From de87c1bee38ee384d22ca3222638716c80735e7c Mon Sep 17 00:00:00 2001 From: Micah Kornfield Date: Tue, 19 May 2020 03:23:26 +0000 Subject: [PATCH 1/7] Adds two implementations of a BitRunReader, which returns set/not-set and number of bits in a row. Adds benchmarks comparing the two implementations under different distributions. - Makes use of Adds the BitRunReader for use in Parquet Writing - Refactors GetBatchedSpaced and GetBatchedSpacedWithDict: Use a single templated method that adds a template parameter that the code can share. Does all checking for out of bounds indices in one go instead of on each pass through th literal (this is a slight behavior change as the index returned will be different). Makes use of the BitRunReader. With exactly alternating bits this shows a big performance drop, but is generally positive across any random and/or skewered nullability. fix type cast to make appveyor happy add predict false one more cast for windows remove redundant using try to fix builds address some comments inline all methods remove InvertRemainingBits and use LeastSignificantBitMask (renamed from PartialWordMask) remove LoadInitialWord, fix compile error fix lint Pre-rebase work iwyu Fix MSVC warning --- cpp/src/arrow/CMakeLists.txt | 1 + cpp/src/arrow/array/array_primitive.cc | 1 + cpp/src/arrow/util/bit_block_counter.cc | 1 + cpp/src/arrow/util/bit_block_counter.h | 3 - cpp/src/arrow/util/bit_block_counter_test.cc | 1 + cpp/src/arrow/util/bit_run_reader.cc | 52 +++ cpp/src/arrow/util/bit_run_reader.h | 166 ++++++++++ cpp/src/arrow/util/bit_util.h | 6 + cpp/src/arrow/util/bit_util_benchmark.cc | 68 ++++ cpp/src/arrow/util/bit_util_test.cc | 190 ++++++++++- cpp/src/arrow/util/bitmap_builders.h | 1 + cpp/src/arrow/util/bitmap_ops.cc | 2 - cpp/src/arrow/util/bitmap_ops.h | 5 +- cpp/src/arrow/util/bitmap_reader.h | 2 + cpp/src/arrow/util/rle_encoding.h | 314 ++++++++++--------- cpp/src/parquet/arrow/path_internal.cc | 26 +- 16 files changed, 665 insertions(+), 174 deletions(-) create mode 100644 cpp/src/arrow/util/bit_run_reader.cc create mode 100644 cpp/src/arrow/util/bit_run_reader.h diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 1744a626313..989893e16f2 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -175,6 +175,7 @@ set(ARROW_SRCS testing/util.cc util/basic_decimal.cc util/bit_block_counter.cc + util/bit_run_reader.cc util/bit_util.cc util/bitmap.cc util/bitmap_builders.cc diff --git a/cpp/src/arrow/array/array_primitive.cc b/cpp/src/arrow/array/array_primitive.cc index 519a7f21f43..a1aff933af4 100644 --- a/cpp/src/arrow/array/array_primitive.cc +++ b/cpp/src/arrow/array/array_primitive.cc @@ -23,6 +23,7 @@ #include "arrow/array/array_base.h" #include "arrow/type.h" #include "arrow/util/bit_block_counter.h" +#include "arrow/util/bitmap_ops.h" #include "arrow/util/logging.h" namespace arrow { diff --git a/cpp/src/arrow/util/bit_block_counter.cc b/cpp/src/arrow/util/bit_block_counter.cc index a550eccec1a..381a92eff51 100644 --- a/cpp/src/arrow/util/bit_block_counter.cc +++ b/cpp/src/arrow/util/bit_block_counter.cc @@ -23,6 +23,7 @@ #include "arrow/buffer.h" #include "arrow/util/bit_util.h" +#include "arrow/util/bitmap_ops.h" #include "arrow/util/ubsan.h" namespace arrow { diff --git a/cpp/src/arrow/util/bit_block_counter.h b/cpp/src/arrow/util/bit_block_counter.h index 3ee777fbabc..9ebcc5e9a90 100644 --- a/cpp/src/arrow/util/bit_block_counter.h +++ b/cpp/src/arrow/util/bit_block_counter.h @@ -22,9 +22,6 @@ #include #include -#include "arrow/util/bit_util.h" -#include "arrow/util/bitmap_ops.h" -#include "arrow/util/ubsan.h" #include "arrow/util/visibility.h" namespace arrow { diff --git a/cpp/src/arrow/util/bit_block_counter_test.cc b/cpp/src/arrow/util/bit_block_counter_test.cc index f1a0f35d386..924222c3810 100644 --- a/cpp/src/arrow/util/bit_block_counter_test.cc +++ b/cpp/src/arrow/util/bit_block_counter_test.cc @@ -30,6 +30,7 @@ #include "arrow/testing/util.h" #include "arrow/util/bit_block_counter.h" #include "arrow/util/bit_util.h" +#include "arrow/util/bitmap_ops.h" namespace arrow { namespace internal { diff --git a/cpp/src/arrow/util/bit_run_reader.cc b/cpp/src/arrow/util/bit_run_reader.cc new file mode 100644 index 00000000000..02d2f5c8bd6 --- /dev/null +++ b/cpp/src/arrow/util/bit_run_reader.cc @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/util/bit_run_reader.h" + +#include + +#include "arrow/util/bit_util.h" + +namespace arrow { +namespace internal { + +BitRunReader::BitRunReader(const uint8_t* bitmap, int64_t start_offset, int64_t length) + : bitmap_(bitmap + (start_offset / 8)), + position_(start_offset % 8), + length_(position_ + length) { + if (ARROW_PREDICT_FALSE(length == 0)) { + word_ = 0; + return; + } + + //// On the initial load if there is an offset we need to account for this when + // loading bytes. Every other call to LoadWord() should only occur when + // position_ is a multiple of 64. + current_run_bit_set_ = !BitUtil::GetBit(bitmap, start_offset); + int64_t shift_offset = position_ % 8; + int64_t bits_remaining = (length_ - position_) + shift_offset; + bits_remaining += (bits_remaining % 8) == 0 && shift_offset > 0; + + LoadWord(bits_remaining); + + // Prepare for inversion in NextRun. + // Clear out any preceding bits. + word_ = word_ & ~BitUtil::LeastSignficantBitMask(position_); +} + +} // namespace internal +} // namespace arrow diff --git a/cpp/src/arrow/util/bit_run_reader.h b/cpp/src/arrow/util/bit_run_reader.h new file mode 100644 index 00000000000..03b0da26718 --- /dev/null +++ b/cpp/src/arrow/util/bit_run_reader.h @@ -0,0 +1,166 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include + +#include "arrow/util/bit_util.h" +#include "arrow/util/bitmap_reader.h" +#include "arrow/util/macros.h" +#include "arrow/util/visibility.h" + +namespace arrow { +namespace internal { + +struct BitRun { + int64_t length; + // Whether bits are set at this point. + bool set; + + std::string ToString() const { + return std::string("{Length: ") + std::to_string(length) + + ", set=" + std::to_string(set) + "}"; + } +}; + +static inline bool operator==(const BitRun& lhs, const BitRun& rhs) { + return lhs.length == rhs.length && lhs.set == rhs.set; +} + +class BitRunReaderScalar { + public: + BitRunReaderScalar(const uint8_t* bitmap, int64_t start_offset, int64_t length) + : reader_(bitmap, start_offset, length) {} + + BitRun NextRun() { + BitRun rl = {/*length=*/0, reader_.IsSet()}; + // Advance while the values are equal and not at the end of list. + while (reader_.position() < reader_.length() && reader_.IsSet() == rl.set) { + rl.length++; + reader_.Next(); + } + return rl; + } + + private: + BitmapReader reader_; +}; + +#if defined(ARROW_LITTLE_ENDIAN) +/// A convenience class for counting the number of continguous set/unset bits +/// in a bitmap. +class ARROW_EXPORT BitRunReader { + public: + /// \brief Constructs new BitRunReader. + /// + /// \param[in] bitmap source data + /// \param[in] start_offset bit offset into the source data + /// \param[in] length number of bits to copy + BitRunReader(const uint8_t* bitmap, int64_t start_offset, int64_t length); + + /// Returns a new BitRun containing the number of contiguous + /// bits with the same value. length == 0 indicates the + /// end of the bitmap. + BitRun NextRun() { + if (ARROW_PREDICT_FALSE(position_ >= length_)) { + return {/*length=*/0, false}; + } + // This implementation relies on a efficient implementations of + // CountTrailingZeros and assumes that runs are more often then + // not. The logic is to incrementally find the next bit change + // from the current position. This is done by zeroing all + // bits in word_ up to position_ and using the TrailingZeroCount + // to find the index of the next set bit. + + // The runs alternate on each call, so flip the bit. + current_run_bit_set_ = !current_run_bit_set_; + + int64_t start_position = position_; + int64_t start_bit_offset = start_position & 63; + // Invert the word for proper use of CountTrailingZeros and + // clear bits so CountTrailingZeros can do it magic. + word_ = ~word_ & ~BitUtil::LeastSignficantBitMask(start_bit_offset); + + // Go forward until the next change from unset to set. + int64_t new_bits = BitUtil::CountTrailingZeros(word_) - start_bit_offset; + position_ += new_bits; + + if (ARROW_PREDICT_FALSE(BitUtil::IsMultipleOf64(position_)) && + ARROW_PREDICT_TRUE(position_ < length_)) { + // Continue extending position while we can advance an entire word. + // (updates position_ accordingly). + AdvanceUntilChange(); + } + + return {/*length=*/position_ - start_position, current_run_bit_set_}; + } + + private: + void AdvanceUntilChange() { + int64_t new_bits = 0; + do { + // Advance the position of the bitmap for loading. + bitmap_ += sizeof(uint64_t); + LoadNextWord(); + new_bits = BitUtil::CountTrailingZeros(word_); + // Continue calculating run length. + position_ += new_bits; + } while (ARROW_PREDICT_FALSE(BitUtil::IsMultipleOf64(position_)) && + ARROW_PREDICT_TRUE(position_ < length_) && new_bits > 0); + } + + void LoadNextWord() { return LoadWord(length_ - position_); } + + // Helper method for Loading the next word. + void LoadWord(int64_t bits_remaining) { + word_ = 0; + // we need at least an extra byte in this case. + if (ARROW_PREDICT_TRUE(bits_remaining >= 64)) { + std::memcpy(&word_, bitmap_, 8); + } else { + int64_t bytes_to_load = BitUtil::BytesForBits(bits_remaining); + auto word_ptr = reinterpret_cast(&word_); + std::memcpy(word_ptr, bitmap_, bytes_to_load); + // Ensure stoppage at last bit in bitmap by reversing the next higher + // order bit. + BitUtil::SetBitTo(word_ptr, bits_remaining, + !BitUtil::GetBit(word_ptr, bits_remaining - 1)); + } + + // Two cases: + // 1. For unset, CountTrailingZeros works natually so we don't + // invert the word. + // 2. Otherwise invert so we can use CountTrailingZeros. + if (current_run_bit_set_) { + word_ = ~word_; + } + } + const uint8_t* bitmap_; + int64_t position_; + int64_t length_; + uint64_t word_; + bool current_run_bit_set_; +}; +#else +using BitRunReader = BitRunReaderScalar; +#endif + +} // namespace internal +} // namespace arrow diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h index 1ca293bd5fc..3fe24b46412 100644 --- a/cpp/src/arrow/util/bit_util.h +++ b/cpp/src/arrow/util/bit_util.h @@ -139,6 +139,12 @@ constexpr bool IsMultipleOf64(int64_t n) { return (n & 63) == 0; } constexpr bool IsMultipleOf8(int64_t n) { return (n & 7) == 0; } +// Returns a mask for the bit_index lower order bits. +// Only valid for bit_index in the range [0, 64). +constexpr uint64_t LeastSignficantBitMask(int64_t bit_index) { + return (static_cast(1) << bit_index) - 1; +} + // Returns 'value' rounded up to the nearest multiple of 'factor' constexpr int64_t RoundUp(int64_t value, int64_t factor) { return CeilDiv(value, factor) * factor; diff --git a/cpp/src/arrow/util/bit_util_benchmark.cc b/cpp/src/arrow/util/bit_util_benchmark.cc index 10047d23390..2b90d74c250 100644 --- a/cpp/src/arrow/util/bit_util_benchmark.cc +++ b/cpp/src/arrow/util/bit_util_benchmark.cc @@ -28,9 +28,13 @@ #include "arrow/array/array_base.h" #include "arrow/array/array_primitive.h" #include "arrow/buffer.h" +#include "arrow/builder.h" +#include "arrow/memory_pool.h" #include "arrow/result.h" +#include "arrow/testing/gtest_util.h" #include "arrow/testing/random.h" #include "arrow/testing/util.h" +#include "arrow/util/bit_run_reader.h" #include "arrow/util/bit_util.h" #include "arrow/util/bitmap.h" #include "arrow/util/bitmap_generate.h" @@ -195,6 +199,44 @@ static void BenchmarkBitmapReader(benchmark::State& state, int64_t nbytes) { state.SetBytesProcessed(2LL * state.iterations() * nbytes); } +template +static void BenchmarkBitRunReader(benchmark::State& state, int64_t set_percentage) { + ::arrow::random::RandomArrayGenerator rag(/*seed=*/23); + constexpr int64_t kNumBits = 4096; + double set_probability = + static_cast(set_percentage == -1 ? 0 : set_percentage) / 100.0; + std::shared_ptr buffer = + rag.Boolean(kNumBits, set_probability)->data()->buffers[1]; + + const uint8_t* bitmap = buffer->data(); + if (set_percentage == -1) { + internal::BitmapWriter writer(buffer->mutable_data(), /*start_offset=*/0, + /*length=*/kNumBits); + for (int x = 0; x < kNumBits; x++) { + if (x % 2 == 0) { + writer.Set(); + } else { + writer.Clear(); + } + writer.Next(); + } + } + + for (auto _ : state) { + { + BitRunReaderType reader(bitmap, 0, kNumBits); + int64_t set_total = 0; + internal::BitRun br; + do { + br = reader.NextRun(); + set_total += br.set ? br.length : 0; + } while (br.length != 0); + benchmark::DoNotOptimize(set_total); + } + } + state.SetBytesProcessed(state.iterations() * (kNumBits / 8)); +} + template static void BenchmarkVisitBits(benchmark::State& state, int64_t nbytes) { std::shared_ptr buffer = CreateRandomBuffer(nbytes); @@ -277,6 +319,14 @@ static void BitmapReader(benchmark::State& state) { BenchmarkBitmapReader(state, state.range(0)); } +static void BitRunReader(benchmark::State& state) { + BenchmarkBitRunReader(state, state.range(0)); +} + +static void BitRunReaderScalar(benchmark::State& state) { + BenchmarkBitRunReader(state, state.range(0)); +} + static void BitmapWriter(benchmark::State& state) { BenchmarkBitmapWriter(state, state.range(0)); } @@ -409,6 +459,24 @@ BENCHMARK(ReferenceNaiveBitmapReader)->Arg(kBufferSize); #endif BENCHMARK(BitmapReader)->Arg(kBufferSize); +BENCHMARK(BitRunReader) + ->Arg(-1) + ->Arg(0) + ->Arg(10) + ->Arg(25) + ->Arg(50) + ->Arg(60) + ->Arg(75) + ->Arg(99); +BENCHMARK(BitRunReaderScalar) + ->Arg(-1) + ->Arg(0) + ->Arg(10) + ->Arg(25) + ->Arg(50) + ->Arg(60) + ->Arg(75) + ->Arg(99); BENCHMARK(VisitBits)->Arg(kBufferSize); BENCHMARK(VisitBitsUnrolled)->Arg(kBufferSize); BENCHMARK(SetBitsTo)->Arg(2)->Arg(1 << 4)->Arg(1 << 10)->Arg(1 << 17); diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index 28fa54f29d4..d3ea5b0fc4c 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -20,29 +20,37 @@ #include #include #include -#include #include #include +#include #include +#include #include #include #include +#include "arrow/array/array_base.h" +#include "arrow/array/data.h" #include "arrow/buffer.h" -#include "arrow/memory_pool.h" +#include "arrow/result.h" +#include "arrow/status.h" #include "arrow/testing/gtest_common.h" +#include "arrow/testing/gtest_compat.h" #include "arrow/testing/gtest_util.h" +#include "arrow/testing/random.h" +#include "arrow/testing/util.h" +#include "arrow/type_fwd.h" +#include "arrow/util/bit_run_reader.h" #include "arrow/util/bit_stream_utils.h" #include "arrow/util/bit_util.h" #include "arrow/util/bitmap.h" -#include "arrow/util/bitmap_builders.h" +#include "arrow/util/bitmap_generate.h" #include "arrow/util/bitmap_ops.h" #include "arrow/util/bitmap_reader.h" #include "arrow/util/bitmap_visit.h" #include "arrow/util/bitmap_writer.h" #include "arrow/util/bitset_stack.h" -#include "arrow/util/cpu_info.h" namespace arrow { @@ -193,6 +201,180 @@ TEST(BitmapReader, DoesNotReadOutOfBounds) { internal::BitmapReader r3(nullptr, 0, 0); } +namespace internal { +void PrintTo(const internal::BitRun& run, std::ostream* os) { + *os << run.ToString(); // whatever needed to print bar to os +} +} // namespace internal + +TEST(BitRunReader, ZeroLength) { + internal::BitRunReader reader(nullptr, /*start_offset=*/0, /*length=*/0); + + EXPECT_EQ(reader.NextRun().length, 0); +} + +TEST(BitRunReader, NormalOperation) { + std::vector bm_vector = {1, 0, 1}; // size: 3 + bm_vector.insert(bm_vector.end(), /*n=*/5, /*val=*/0); // size: 8 + bm_vector.insert(bm_vector.end(), /*n=*/7, /*val=*/1); // size: 15 + bm_vector.insert(bm_vector.end(), /*n=*/3, /*val=*/0); // size: 18 + bm_vector.insert(bm_vector.end(), /*n=*/25, /*val=*/1); // size: 43 + bm_vector.insert(bm_vector.end(), /*n=*/21, /*val=*/0); // size: 64 + bm_vector.insert(bm_vector.end(), /*n=*/26, /*val=*/1); // size: 90 + bm_vector.insert(bm_vector.end(), /*n=*/130, /*val=*/0); // size: 220 + bm_vector.insert(bm_vector.end(), /*n=*/65, /*val=*/1); // size: 285 + std::shared_ptr bitmap; + int64_t length; + BitmapFromVector(bm_vector, /*bit_offset=*/0, &bitmap, &length); + + internal::BitRunReader reader(bitmap->data(), /*start_offset=*/0, /*length=*/length); + std::vector results; + internal::BitRun rl; + do { + rl = reader.NextRun(); + results.push_back(rl); + } while (rl.length != 0); + EXPECT_EQ(results.back().length, 0); + results.pop_back(); + EXPECT_THAT(results, ElementsAreArray( + std::vector{{/*length=*/1, /*set=*/true}, + {/*length=*/1, /*set=*/false}, + {/*length=*/1, /*set=*/true}, + {/*length=*/5, /*set=*/false}, + {/*length=*/7, /*set=*/true}, + {/*length=*/3, /*set=*/false}, + {/*length=*/25, /*set=*/true}, + {/*length=*/21, /*set=*/false}, + {/*length=*/26, /*set=*/true}, + {/*length=*/130, /*set=*/false}, + {/*length=*/65, /*set=*/true}})); +} + +TEST(BitRunReader, TruncatedAtWord) { + std::vector bm_vector; + bm_vector.insert(bm_vector.end(), /*n=*/7, /*val=*/1); + bm_vector.insert(bm_vector.end(), /*n=*/58, /*val=*/0); + + std::shared_ptr bitmap; + int64_t length; + BitmapFromVector(bm_vector, /*bit_offset=*/0, &bitmap, &length); + + internal::BitRunReader reader(bitmap->data(), /*start_offset=*/1, + /*length=*/63); + std::vector results; + internal::BitRun rl; + do { + rl = reader.NextRun(); + results.push_back(rl); + } while (rl.length != 0); + EXPECT_EQ(results.back().length, 0); + results.pop_back(); + EXPECT_THAT(results, + ElementsAreArray(std::vector{ + {/*length=*/6, /*set=*/true}, {/*length=*/57, /*set=*/false}})); +} + +TEST(BitRunReader, ScalarComparison) { + ::arrow::random::RandomArrayGenerator rag(/*seed=*/23); + constexpr int64_t kNumBits = 1000000; + std::shared_ptr buffer = + rag.Boolean(kNumBits, /*set_probability=*/.4)->data()->buffers[1]; + + const uint8_t* bitmap = buffer->data(); + + internal::BitRunReader reader(bitmap, 0, kNumBits); + internal::BitRunReaderScalar scalar_reader(bitmap, 0, kNumBits); + internal::BitRun br, brs; + int64_t br_bits = 0; + int64_t brs_bits = 0; + do { + br = reader.NextRun(); + brs = scalar_reader.NextRun(); + br_bits += br.length; + brs_bits += brs.length; + EXPECT_EQ(br.length, brs.length); + if (br.length > 0) { + EXPECT_EQ(br, brs) << internal::Bitmap(bitmap, 0, kNumBits).ToString() << br_bits + << " " << brs_bits; + } + } while (brs.length != 0); + EXPECT_EQ(br_bits, brs_bits); +} + +TEST(BitRunReader, TruncatedWithinWordMultipleOf8Bits) { + std::vector bm_vector; + bm_vector.insert(bm_vector.end(), /*n=*/7, /*val=*/1); + bm_vector.insert(bm_vector.end(), /*n=*/5, /*val=*/0); + + std::shared_ptr bitmap; + int64_t length; + BitmapFromVector(bm_vector, /*bit_offset=*/0, &bitmap, &length); + + internal::BitRunReader reader(bitmap->data(), /*start_offset=*/1, + /*length=*/7); + std::vector results; + internal::BitRun rl; + do { + rl = reader.NextRun(); + results.push_back(rl); + } while (rl.length != 0); + EXPECT_EQ(results.back().length, 0); + results.pop_back(); + EXPECT_THAT(results, ElementsAreArray(std::vector{ + {/*length=*/6, /*set=*/true}, {/*length=*/2, /*set=*/false}})); +} + +TEST(BitRunReader, TruncatedWithinWord) { + std::vector bm_vector; + bm_vector.insert(bm_vector.end(), /*n=*/37 + 40, /*val=*/0); + bm_vector.insert(bm_vector.end(), /*n=*/23, /*val=*/1); + + std::shared_ptr bitmap; + int64_t length; + BitmapFromVector(bm_vector, /*bit_offset=*/0, &bitmap, &length); + + constexpr int64_t kOffset = 37; + internal::BitRunReader reader(bitmap->data(), /*start_offset=*/kOffset, + /*length=*/53); + std::vector results; + internal::BitRun rl; + do { + rl = reader.NextRun(); + results.push_back(rl); + } while (rl.length != 0); + EXPECT_EQ(results.back().length, 0); + results.pop_back(); + EXPECT_THAT(results, + ElementsAreArray(std::vector{ + {/*length=*/40, /*set=*/false}, {/*length=*/13, /*set=*/true}})); +} + +TEST(BitRunReader, TruncatedMultipleWords) { + std::vector bm_vector = {1, 0, 1}; // size: 3 + bm_vector.insert(bm_vector.end(), /*n=*/5, /*val=*/0); // size: 8 + bm_vector.insert(bm_vector.end(), /*n=*/30, /*val=*/1); // size: 38 + bm_vector.insert(bm_vector.end(), /*n=*/95, /*val=*/0); // size: 133 + std::shared_ptr bitmap; + int64_t length; + BitmapFromVector(bm_vector, /*bit_offset=*/0, &bitmap, &length); + + constexpr int64_t kOffset = 5; + internal::BitRunReader reader(bitmap->data(), /*start_offset=*/kOffset, + /*length=*/length - (kOffset + 3)); + std::vector results; + internal::BitRun rl; + do { + rl = reader.NextRun(); + results.push_back(rl); + } while (rl.length != 0); + EXPECT_EQ(results.back().length, 0); + results.pop_back(); + EXPECT_THAT(results, ElementsAreArray(std::vector{ + {/*length=*/3, /*set=*/false}, + {/*length=*/30, /*set=*/true}, + {/*length=*/92, /*set=*/false}})); +} + TEST(BitmapWriter, NormalOperation) { for (const auto fill_byte_int : {0x00, 0xff}) { const uint8_t fill_byte = static_cast(fill_byte_int); diff --git a/cpp/src/arrow/util/bitmap_builders.h b/cpp/src/arrow/util/bitmap_builders.h index d11e8bc874a..5bd2ad44140 100644 --- a/cpp/src/arrow/util/bitmap_builders.h +++ b/cpp/src/arrow/util/bitmap_builders.h @@ -19,6 +19,7 @@ #include #include +#include #include "arrow/result.h" #include "arrow/type_fwd.h" diff --git a/cpp/src/arrow/util/bitmap_ops.cc b/cpp/src/arrow/util/bitmap_ops.cc index c3a02f23e4a..32f21d1e08c 100644 --- a/cpp/src/arrow/util/bitmap_ops.cc +++ b/cpp/src/arrow/util/bitmap_ops.cc @@ -17,12 +17,10 @@ #include "arrow/util/bitmap_ops.h" -#include #include #include #include #include -#include #include "arrow/buffer.h" #include "arrow/result.h" diff --git a/cpp/src/arrow/util/bitmap_ops.h b/cpp/src/arrow/util/bitmap_ops.h index d7a2db61d78..7d2a3ea40fd 100644 --- a/cpp/src/arrow/util/bitmap_ops.h +++ b/cpp/src/arrow/util/bitmap_ops.h @@ -21,10 +21,13 @@ #include #include "arrow/result.h" -#include "arrow/type_fwd.h" #include "arrow/util/visibility.h" namespace arrow { + +class Buffer; +class MemoryPool; + namespace internal { // ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/util/bitmap_reader.h b/cpp/src/arrow/util/bitmap_reader.h index 838781db8fc..5579d95210b 100644 --- a/cpp/src/arrow/util/bitmap_reader.h +++ b/cpp/src/arrow/util/bitmap_reader.h @@ -57,6 +57,8 @@ class BitmapReader { int64_t position() const { return position_; } + int64_t length() const { return length_; } + private: const uint8_t* bitmap_; int64_t position_; diff --git a/cpp/src/arrow/util/rle_encoding.h b/cpp/src/arrow/util/rle_encoding.h index 0eab5b3a9ff..16ccc09d0ac 100644 --- a/cpp/src/arrow/util/rle_encoding.h +++ b/cpp/src/arrow/util/rle_encoding.h @@ -22,11 +22,12 @@ #include #include +#include #include +#include "arrow/util/bit_run_reader.h" #include "arrow/util/bit_stream_utils.h" #include "arrow/util/bit_util.h" -#include "arrow/util/bitmap_reader.h" #include "arrow/util/macros.h" namespace arrow { @@ -149,6 +150,11 @@ class RleDecoder { /// are no more. template bool NextCounts(); + + /// Utility methods for retrieving spaced values. + template + int GetSpaced(Converter converter, int batch_size, int null_count, + const uint8_t* valid_bits, int64_t valid_bits_offset, T* out); }; /// Class to incrementally build the rle data. This class does not allocate any memory. @@ -301,7 +307,7 @@ inline int RleDecoder::GetBatch(T* values, int batch_size) { while (values_read < batch_size) { int remaining = batch_size - values_read; - if (repeat_count_ > 0) { + if (repeat_count_ > 0) { // Repeated value case. int repeat_batch = std::min(remaining, repeat_count_); std::fill(out, out + repeat_batch, static_cast(current_value_)); @@ -326,98 +332,194 @@ inline int RleDecoder::GetBatch(T* values, int batch_size) { return values_read; } -template -inline int RleDecoder::GetBatchSpaced(int batch_size, int null_count, - const uint8_t* valid_bits, - int64_t valid_bits_offset, T* out) { +template +inline int RleDecoder::GetSpaced(Converter converter, int batch_size, int null_count, + const uint8_t* valid_bits, int64_t valid_bits_offset, + T* out) { + if (ARROW_PREDICT_FALSE(null_count == batch_size)) { + converter.FillZero(out, out + batch_size); + return batch_size; + } + DCHECK_GE(bit_width_, 0); int values_read = 0; - int remaining_nulls = null_count; - T zero = {}; - - arrow::internal::BitmapReader bit_reader(valid_bits, valid_bits_offset, batch_size); + int values_remaining = batch_size - null_count; + // Assume no bits to start. + arrow::internal::BitRunReader bit_reader(valid_bits, valid_bits_offset, + /*length=*/batch_size); + arrow::internal::BitRun valid_run = bit_reader.NextRun(); while (values_read < batch_size) { - DCHECK_LT(bit_reader.position(), batch_size); - bool is_valid = bit_reader.IsSet(); - bit_reader.Next(); + if (ARROW_PREDICT_FALSE(valid_run.length == 0)) { + valid_run = bit_reader.NextRun(); + } + + DCHECK_GT(batch_size, 0); + DCHECK_GT(valid_run.length, 0); - if (is_valid) { + if (valid_run.set) { if ((repeat_count_ == 0) && (literal_count_ == 0)) { - if (!NextCounts()) return values_read; + if (!NextCounts()) return values_read; + DCHECK((repeat_count_ > 0) ^ (literal_count_ > 0)); } - if (repeat_count_ > 0) { - // The current index is already valid, we don't need to check that again - int repeat_batch = 1; - repeat_count_--; + if (repeat_count_ > 0) { + int repeat_batch = 0; + // Consume the entire repeat counts incrementing repeat_batch to + // be the total of nulls + values consumed, we only need to + // get the total count because we can fill in the same value for + // nulls and non-nulls. This proves to be a big efficiency win. while (repeat_count_ > 0 && (values_read + repeat_batch) < batch_size) { - DCHECK_LT(bit_reader.position(), batch_size); - if (bit_reader.IsSet()) { - repeat_count_--; + DCHECK_GT(valid_run.length, 0); + if (valid_run.set) { + int update_size = std::min(static_cast(valid_run.length), repeat_count_); + repeat_count_ -= update_size; + repeat_batch += update_size; + valid_run.length -= update_size; + values_remaining -= update_size; } else { - remaining_nulls--; + // We can consume all nulls here because we would do so on + // the next loop anyways. + repeat_batch += static_cast(valid_run.length); + valid_run.length = 0; + } + if (valid_run.length == 0) { + valid_run = bit_reader.NextRun(); } - repeat_batch++; - - bit_reader.Next(); } - std::fill(out, out + repeat_batch, static_cast(current_value_)); + RunType current_value = static_cast(current_value_); + if (ARROW_PREDICT_FALSE(!converter.IsValid(current_value))) { + return values_read; + } + converter.Fill(out, out + repeat_batch, current_value); out += repeat_batch; values_read += repeat_batch; } else if (literal_count_ > 0) { - int literal_batch = - std::min(batch_size - values_read - remaining_nulls, literal_count_); + int literal_batch = std::min(values_remaining, literal_count_); + DCHECK_GT(literal_batch, 0); // Decode the literals constexpr int kBufferSize = 1024; - T indices[kBufferSize]; + RunType indices[kBufferSize]; literal_batch = std::min(literal_batch, kBufferSize); - int actual_read = bit_reader_.GetBatch(bit_width_, &indices[0], literal_batch); - DCHECK_EQ(actual_read, literal_batch); - + int actual_read = bit_reader_.GetBatch(bit_width_, indices, literal_batch); + if (ARROW_PREDICT_FALSE(actual_read != literal_batch)) { + return values_read; + } + if (!converter.IsValid(indices, /*length=*/actual_read)) { + return values_read; + } int skipped = 0; - int literals_read = 1; - *out++ = indices[0]; - - // Read the first bitset to the end + int literals_read = 0; while (literals_read < literal_batch) { - DCHECK_LT(bit_reader.position(), batch_size); - if (bit_reader.IsSet()) { - *out = indices[literals_read]; - literals_read++; + if (valid_run.set) { + int update_size = std::min(literal_batch - literals_read, + static_cast(valid_run.length)); + converter.Copy(out, indices + literals_read, update_size); + literals_read += update_size; + out += update_size; + valid_run.length -= update_size; } else { - *out = zero; - skipped++; + converter.FillZero(out, out + valid_run.length); + out += valid_run.length; + skipped += static_cast(valid_run.length); + valid_run.length = 0; + } + if (valid_run.length == 0) { + valid_run = bit_reader.NextRun(); } - ++out; - bit_reader.Next(); } literal_count_ -= literal_batch; + values_remaining -= literal_batch; values_read += literal_batch + skipped; - remaining_nulls -= skipped; } } else { - *out = zero; - ++out; - values_read++; - remaining_nulls--; + converter.FillZero(out, out + valid_run.length); + out += valid_run.length; + values_read += static_cast(valid_run.length); + valid_run.length = 0; } } - + DCHECK_EQ(valid_run.length, 0); + DCHECK_EQ(values_remaining, 0); return values_read; } +// Converter for GetSpaced that handles runs that get returned +// directly as output. +template +struct PlainRleConverter { + T kZero = {}; + inline bool IsValid(const T& values) const { return true; } + inline bool IsValid(const T* values, int32_t length) const { return true; } + inline void Fill(T* begin, T* end, const T& run_value) const { + std::fill(begin, end, run_value); + } + inline void FillZero(T* begin, T* end) { std::fill(begin, end, kZero); } + inline void Copy(T* out, const T* values, int length) const { + std::memcpy(out, values, length * sizeof(T)); + } +}; + +template +inline int RleDecoder::GetBatchSpaced(int batch_size, int null_count, + const uint8_t* valid_bits, + int64_t valid_bits_offset, T* out) { + if (null_count == 0) { + return GetBatch(out, batch_size); + } + PlainRleConverter converter; + return GetSpaced>( + converter, batch_size, null_count, valid_bits, valid_bits_offset, out); +} + static inline bool IndexInRange(int32_t idx, int32_t dictionary_length) { return idx >= 0 && idx < dictionary_length; } +// Converter for GetSpaced that handles runs of returned dictionary +// indices. +template +struct DictionaryConverter { + T kZero = {}; + const T* dictionary; + int32_t dictionary_length; + + inline bool IsValid(int32_t value) { return IndexInRange(value, dictionary_length); } + + inline bool IsValid(const int32_t* values, int32_t length) const { + using IndexType = int32_t; + IndexType min_index = std::numeric_limits::max(); + IndexType max_index = std::numeric_limits::min(); + for (int x = 0; x < length; x++) { + min_index = std::min(values[x], min_index); + max_index = std::max(values[x], max_index); + } + + return IndexInRange(min_index, dictionary_length) && + IndexInRange(max_index, dictionary_length); + } + inline void Fill(T* begin, T* end, const int32_t& run_value) const { + std::fill(begin, end, dictionary[run_value]); + } + inline void FillZero(T* begin, T* end) { std::fill(begin, end, kZero); } + + inline void Copy(T* out, const int32_t* values, int length) const { + for (int x = 0; x < length; x++) { + out[x] = dictionary[values[x]]; + } + } +}; + template inline int RleDecoder::GetBatchWithDict(const T* dictionary, int32_t dictionary_length, T* values, int batch_size) { // Per https://github.com/apache/parquet-format/blob/master/Encodings.md, // the maximum dictionary index width in Parquet is 32 bits. using IndexType = int32_t; + DictionaryConverter converter; + converter.dictionary = dictionary; + converter.dictionary_length = dictionary_length; DCHECK_GE(bit_width_, 0); int values_read = 0; @@ -452,14 +554,10 @@ inline int RleDecoder::GetBatchWithDict(const T* dictionary, int32_t dictionary_ if (ARROW_PREDICT_FALSE(actual_read != literal_batch)) { return values_read; } - - for (int i = 0; i < literal_batch; ++i) { - IndexType index = indices[i]; - if (ARROW_PREDICT_FALSE(!IndexInRange(index, dictionary_length))) { - return values_read; - } - out[i] = dictionary[index]; + if (ARROW_PREDICT_FALSE(!converter.IsValid(indices, /*length=*/literal_batch))) { + return values_read; } + converter.Copy(out, indices, literal_batch); /* Upkeep counters */ literal_count_ -= literal_batch; @@ -479,98 +577,16 @@ inline int RleDecoder::GetBatchWithDictSpaced(const T* dictionary, int batch_size, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset) { - using IndexType = int32_t; - - DCHECK_GE(bit_width_, 0); - int values_read = 0; - int remaining_nulls = null_count; - T zero = {}; - - arrow::internal::BitmapReader bit_reader(valid_bits, valid_bits_offset, batch_size); - - while (values_read < batch_size) { - DCHECK_LT(bit_reader.position(), batch_size); - bool is_valid = bit_reader.IsSet(); - bit_reader.Next(); - - if (is_valid) { - if ((repeat_count_ == 0) && (literal_count_ == 0)) { - if (!NextCounts()) return values_read; - } - if (repeat_count_ > 0) { - auto idx = static_cast(current_value_); - if (ARROW_PREDICT_FALSE(!IndexInRange(idx, dictionary_length))) { - return values_read; - } - T value = dictionary[idx]; - // The current index is already valid, we don't need to check that again - int repeat_batch = 1; - repeat_count_--; - - while (repeat_count_ > 0 && (values_read + repeat_batch) < batch_size) { - DCHECK_LT(bit_reader.position(), batch_size); - if (bit_reader.IsSet()) { - repeat_count_--; - } else { - remaining_nulls--; - } - repeat_batch++; - - bit_reader.Next(); - } - std::fill(out, out + repeat_batch, value); - out += repeat_batch; - values_read += repeat_batch; - } else if (literal_count_ > 0) { - int literal_batch = - std::min(batch_size - values_read - remaining_nulls, literal_count_); - - // Decode the literals - constexpr int kBufferSize = 1024; - IndexType indices[kBufferSize]; - literal_batch = std::min(literal_batch, kBufferSize); - int actual_read = bit_reader_.GetBatch(bit_width_, &indices[0], literal_batch); - if (actual_read != literal_batch) return values_read; - - int skipped = 0; - int literals_read = 1; - - IndexType first_idx = indices[0]; - if (ARROW_PREDICT_FALSE(!IndexInRange(first_idx, dictionary_length))) { - return values_read; - } - *out++ = dictionary[first_idx]; - - // Read the first bitset to the end - while (literals_read < literal_batch) { - DCHECK_LT(bit_reader.position(), batch_size); - if (bit_reader.IsSet()) { - IndexType idx = indices[literals_read]; - if (ARROW_PREDICT_FALSE(!IndexInRange(idx, dictionary_length))) { - return values_read; - } - *out = dictionary[idx]; - literals_read++; - } else { - *out = zero; - skipped++; - } - ++out; - bit_reader.Next(); - } - literal_count_ -= literal_batch; - values_read += literal_batch + skipped; - remaining_nulls -= skipped; - } - } else { - *out = zero; - ++out; - values_read++; - remaining_nulls--; - } + if (null_count == 0) { + return GetBatchWithDict(dictionary, dictionary_length, out, batch_size); } - return values_read; + using IndexType = int32_t; + DictionaryConverter converter; + converter.dictionary = dictionary; + converter.dictionary_length = dictionary_length; + return GetSpaced>( + converter, batch_size, null_count, valid_bits, valid_bits_offset, out); } template @@ -593,7 +609,7 @@ bool RleDecoder::NextCounts() { return false; } repeat_count_ = count; - T value = 0; + T value = {}; if (!bit_reader_.GetAligned(static_cast(BitUtil::CeilDiv(bit_width_, 8)), &value)) { return false; diff --git a/cpp/src/parquet/arrow/path_internal.cc b/cpp/src/parquet/arrow/path_internal.cc index 8ada325e0e5..8d2790aa0d4 100644 --- a/cpp/src/parquet/arrow/path_internal.cc +++ b/cpp/src/parquet/arrow/path_internal.cc @@ -98,6 +98,7 @@ #include "arrow/memory_pool.h" #include "arrow/type.h" #include "arrow/type_traits.h" +#include "arrow/util/bit_run_reader.h" #include "arrow/util/bit_util.h" #include "arrow/util/bitmap_visit.h" #include "arrow/util/logging.h" @@ -464,8 +465,8 @@ class NullableNode { void SetRepLevelIfNull(int16_t rep_level) { rep_level_if_null_ = rep_level; } - ::arrow::internal::BitmapReader MakeReader(const ElementRange& range) { - return ::arrow::internal::BitmapReader(null_bitmap_, entry_offset_ + range.start, + ::arrow::internal::BitRunReader MakeReader(const ElementRange& range) { + return ::arrow::internal::BitRunReader(null_bitmap_, entry_offset_ + range.start, range.Size()); } @@ -478,25 +479,20 @@ class NullableNode { valid_bits_reader_ = MakeReader(*range); } child_range->start = range->start; - while (!range->Empty() && !valid_bits_reader_.IsSet()) { - ++range->start; - valid_bits_reader_.Next(); - } - int64_t null_count = range->start - child_range->start; - if (null_count > 0) { - RETURN_IF_ERROR(FillRepLevels(null_count, rep_level_if_null_, context)); - RETURN_IF_ERROR(context->AppendDefLevels(null_count, def_level_if_null_)); + ::arrow::internal::BitRun run = valid_bits_reader_.NextRun(); + if (!run.set) { + range->start += run.length; + RETURN_IF_ERROR(FillRepLevels(run.length, rep_level_if_null_, context)); + RETURN_IF_ERROR(context->AppendDefLevels(run.length, def_level_if_null_)); + run = valid_bits_reader_.NextRun(); } if (range->Empty()) { new_range_ = true; return kDone; } child_range->end = child_range->start = range->start; + child_range->end += run.length; - while (child_range->end != range->end && valid_bits_reader_.IsSet()) { - ++child_range->end; - valid_bits_reader_.Next(); - } DCHECK(!child_range->Empty()); range->start += child_range->Size(); new_range_ = false; @@ -505,7 +501,7 @@ class NullableNode { const uint8_t* null_bitmap_; int64_t entry_offset_; - ::arrow::internal::BitmapReader valid_bits_reader_; + ::arrow::internal::BitRunReader valid_bits_reader_; int16_t def_level_if_null_; int16_t rep_level_if_null_; From d9d018a32bf2ac86bb8c02f91ed1dfa1d8c562c3 Mon Sep 17 00:00:00 2001 From: Micah Kornfield Date: Thu, 25 Jun 2020 05:04:32 +0000 Subject: [PATCH 2/7] fix BitRunReader? --- cpp/src/arrow/util/bit_run_reader.cc | 6 ++---- cpp/src/arrow/util/bit_util_test.cc | 25 ++++++++++++++++++++++++- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/util/bit_run_reader.cc b/cpp/src/arrow/util/bit_run_reader.cc index 02d2f5c8bd6..7bf8ea127a9 100644 --- a/cpp/src/arrow/util/bit_run_reader.cc +++ b/cpp/src/arrow/util/bit_run_reader.cc @@ -33,13 +33,11 @@ BitRunReader::BitRunReader(const uint8_t* bitmap, int64_t start_offset, int64_t return; } - //// On the initial load if there is an offset we need to account for this when + // On the initial load if there is an offset we need to account for this when // loading bytes. Every other call to LoadWord() should only occur when // position_ is a multiple of 64. current_run_bit_set_ = !BitUtil::GetBit(bitmap, start_offset); - int64_t shift_offset = position_ % 8; - int64_t bits_remaining = (length_ - position_) + shift_offset; - bits_remaining += (bits_remaining % 8) == 0 && shift_offset > 0; + int64_t bits_remaining = length + position_; LoadWord(bits_remaining); diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index d3ea5b0fc4c..da87c5fb9f7 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -250,6 +250,29 @@ TEST(BitRunReader, NormalOperation) { {/*length=*/65, /*set=*/true}})); } +TEST(BitRunReader, AllZero) { + for (int offset = 0; offset < 8; offset++) { + for (int64_t x = 0; x < (1 << 8) - 1; x++) { + internal::BitRunReader reader(reinterpret_cast(&x), + /*start_offset=*/offset, + /*length=*/8 - offset); + std::vector results; + internal::BitRun rl; + do { + rl = reader.NextRun(); + results.push_back(rl); + } while (rl.length != 0); + EXPECT_EQ(results.back().length, 0); + results.pop_back(); + int sum = 0; + for (const auto& result : results) { + sum += result.length; + } + ASSERT_EQ(sum, 8 - offset); + } + } +} + TEST(BitRunReader, TruncatedAtWord) { std::vector bm_vector; bm_vector.insert(bm_vector.end(), /*n=*/7, /*val=*/1); @@ -321,7 +344,7 @@ TEST(BitRunReader, TruncatedWithinWordMultipleOf8Bits) { EXPECT_EQ(results.back().length, 0); results.pop_back(); EXPECT_THAT(results, ElementsAreArray(std::vector{ - {/*length=*/6, /*set=*/true}, {/*length=*/2, /*set=*/false}})); + {/*length=*/6, /*set=*/true}, {/*length=*/1, /*set=*/false}})); } TEST(BitRunReader, TruncatedWithinWord) { From 6ab22af4f10db7aa05c8f9406949e0663dfce160 Mon Sep 17 00:00:00 2001 From: Micah Kornfield Date: Thu, 25 Jun 2020 05:45:19 +0000 Subject: [PATCH 3/7] incorporate BitBlockCounter --- cpp/src/arrow/util/rle_encoding.h | 60 ++++++++++++++++++++++++++++--- 1 file changed, 55 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/util/rle_encoding.h b/cpp/src/arrow/util/rle_encoding.h index 16ccc09d0ac..68d29930666 100644 --- a/cpp/src/arrow/util/rle_encoding.h +++ b/cpp/src/arrow/util/rle_encoding.h @@ -25,6 +25,7 @@ #include #include +#include "arrow/util/bit_block_counter.h" #include "arrow/util/bit_run_reader.h" #include "arrow/util/bit_stream_utils.h" #include "arrow/util/bit_util.h" @@ -468,9 +469,35 @@ inline int RleDecoder::GetBatchSpaced(int batch_size, int null_count, if (null_count == 0) { return GetBatch(out, batch_size); } + PlainRleConverter converter; - return GetSpaced>( - converter, batch_size, null_count, valid_bits, valid_bits_offset, out); + arrow::internal::BitBlockCounter block_counter(valid_bits, valid_bits_offset, + batch_size); + + int total_processed = 0; + int processed = 0; + arrow::internal::BitBlockCount block; + + do { + block = block_counter.NextFourWords(); + if (block.length == 0) { + break; + } + if (block.AllSet()) { + processed = GetBatch(out, block.length); + } else if (block.NoneSet()) { + converter.FillZero(out, out + block.length); + processed = block.length; + } else { + processed = GetSpaced>( + converter, block.length, block.length - block.popcount, valid_bits, + valid_bits_offset, out); + } + total_processed += processed; + out += block.length; + valid_bits_offset += block.length; + } while (processed == block.length); + return total_processed; } static inline bool IndexInRange(int32_t idx, int32_t dictionary_length) { @@ -580,13 +607,36 @@ inline int RleDecoder::GetBatchWithDictSpaced(const T* dictionary, if (null_count == 0) { return GetBatchWithDict(dictionary, dictionary_length, out, batch_size); } - + arrow::internal::BitBlockCounter block_counter(valid_bits, valid_bits_offset, + batch_size); using IndexType = int32_t; DictionaryConverter converter; converter.dictionary = dictionary; converter.dictionary_length = dictionary_length; - return GetSpaced>( - converter, batch_size, null_count, valid_bits, valid_bits_offset, out); + + int total_processed = 0; + int processed = 0; + arrow::internal::BitBlockCount block; + do { + block = block_counter.NextFourWords(); + if (block.length == 0) { + break; + } + if (block.AllSet()) { + processed = GetBatchWithDict(dictionary, dictionary_length, out, block.length); + } else if (block.NoneSet()) { + converter.FillZero(out, out + block.length); + processed = block.length; + } else { + processed = GetSpaced>( + converter, block.length, block.length - block.popcount, valid_bits, + valid_bits_offset, out); + } + total_processed += processed; + out += block.length; + valid_bits_offset += block.length; + } while (processed == block.length); + return total_processed; } template From 6ddd027a51d9bc94638cce28ff74433c61f5f6d5 Mon Sep 17 00:00:00 2001 From: Micah Kornfield Date: Thu, 25 Jun 2020 05:48:36 +0000 Subject: [PATCH 4/7] fix test compilation windows --- cpp/src/arrow/util/bit_util_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index da87c5fb9f7..0956b797f27 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -264,7 +264,7 @@ TEST(BitRunReader, AllZero) { } while (rl.length != 0); EXPECT_EQ(results.back().length, 0); results.pop_back(); - int sum = 0; + int64_t sum = 0; for (const auto& result : results) { sum += result.length; } From 22325a64178b0bd189832c72f52dfe80073da271 Mon Sep 17 00:00:00 2001 From: Micah Kornfield Date: Thu, 25 Jun 2020 06:14:23 +0000 Subject: [PATCH 5/7] rename BitRunReaderScalar to BitRunReaderLinear. Fix os390 builds --- cpp/src/arrow/util/bit_run_reader.h | 8 ++++---- cpp/src/arrow/util/bit_util_test.cc | 7 ++++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/cpp/src/arrow/util/bit_run_reader.h b/cpp/src/arrow/util/bit_run_reader.h index 03b0da26718..eaa85bf3d7c 100644 --- a/cpp/src/arrow/util/bit_run_reader.h +++ b/cpp/src/arrow/util/bit_run_reader.h @@ -44,9 +44,9 @@ static inline bool operator==(const BitRun& lhs, const BitRun& rhs) { return lhs.length == rhs.length && lhs.set == rhs.set; } -class BitRunReaderScalar { +class BitRunReaderLinear { public: - BitRunReaderScalar(const uint8_t* bitmap, int64_t start_offset, int64_t length) + BitRunReaderLinear(const uint8_t* bitmap, int64_t start_offset, int64_t length) : reader_(bitmap, start_offset, length) {} BitRun NextRun() { @@ -63,7 +63,7 @@ class BitRunReaderScalar { BitmapReader reader_; }; -#if defined(ARROW_LITTLE_ENDIAN) +#if ARROW_LITTLE_ENDIAN /// A convenience class for counting the number of continguous set/unset bits /// in a bitmap. class ARROW_EXPORT BitRunReader { @@ -159,7 +159,7 @@ class ARROW_EXPORT BitRunReader { bool current_run_bit_set_; }; #else -using BitRunReader = BitRunReaderScalar; +using BitRunReader = BitRunReaderLinear; #endif } // namespace internal diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index 0956b797f27..adb1e0f7ef4 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -250,10 +250,11 @@ TEST(BitRunReader, NormalOperation) { {/*length=*/65, /*set=*/true}})); } -TEST(BitRunReader, AllZero) { +TEST(BitRunReader, AllFirstByteCombos) { for (int offset = 0; offset < 8; offset++) { for (int64_t x = 0; x < (1 << 8) - 1; x++) { - internal::BitRunReader reader(reinterpret_cast(&x), + int64_t bits = BitUtil::ToLittleEndian(x); + internal::BitRunReader reader(reinterpret_cast(&bits), /*start_offset=*/offset, /*length=*/8 - offset); std::vector results; @@ -306,7 +307,7 @@ TEST(BitRunReader, ScalarComparison) { const uint8_t* bitmap = buffer->data(); internal::BitRunReader reader(bitmap, 0, kNumBits); - internal::BitRunReaderScalar scalar_reader(bitmap, 0, kNumBits); + internal::BitRunReaderLinear scalar_reader(bitmap, 0, kNumBits); internal::BitRun br, brs; int64_t br_bits = 0; int64_t brs_bits = 0; From 602d8eb3b59258d6539b08668562e398abc96b88 Mon Sep 17 00:00:00 2001 From: Micah Kornfield Date: Thu, 25 Jun 2020 06:43:03 +0000 Subject: [PATCH 6/7] missed some renames --- cpp/src/arrow/util/bit_util_benchmark.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/util/bit_util_benchmark.cc b/cpp/src/arrow/util/bit_util_benchmark.cc index 2b90d74c250..63783da77ef 100644 --- a/cpp/src/arrow/util/bit_util_benchmark.cc +++ b/cpp/src/arrow/util/bit_util_benchmark.cc @@ -323,8 +323,8 @@ static void BitRunReader(benchmark::State& state) { BenchmarkBitRunReader(state, state.range(0)); } -static void BitRunReaderScalar(benchmark::State& state) { - BenchmarkBitRunReader(state, state.range(0)); +static void BitRunReaderLinear(benchmark::State& state) { + BenchmarkBitRunReader(state, state.range(0)); } static void BitmapWriter(benchmark::State& state) { @@ -468,7 +468,7 @@ BENCHMARK(BitRunReader) ->Arg(60) ->Arg(75) ->Arg(99); -BENCHMARK(BitRunReaderScalar) +BENCHMARK(BitRunReaderLinear) ->Arg(-1) ->Arg(0) ->Arg(10) From d4e001b4f9e83747312c1641f08f38a4fc629878 Mon Sep 17 00:00:00 2001 From: Micah Kornfield Date: Thu, 25 Jun 2020 07:02:40 +0000 Subject: [PATCH 7/7] add guards in .cc file --- cpp/src/arrow/util/bit_run_reader.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cpp/src/arrow/util/bit_run_reader.cc b/cpp/src/arrow/util/bit_run_reader.cc index 7bf8ea127a9..da5ba649dd8 100644 --- a/cpp/src/arrow/util/bit_run_reader.cc +++ b/cpp/src/arrow/util/bit_run_reader.cc @@ -24,6 +24,8 @@ namespace arrow { namespace internal { +#if ARROW_LITTLE_ENDIAN + BitRunReader::BitRunReader(const uint8_t* bitmap, int64_t start_offset, int64_t length) : bitmap_(bitmap + (start_offset / 8)), position_(start_offset % 8), @@ -46,5 +48,7 @@ BitRunReader::BitRunReader(const uint8_t* bitmap, int64_t start_offset, int64_t word_ = word_ & ~BitUtil::LeastSignficantBitMask(position_); } +#endif + } // namespace internal } // namespace arrow