From feedc84b98de9a97c3ccf7c61c66740073eba23a Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Wed, 3 Jun 2020 18:34:44 -0500 Subject: [PATCH 1/6] BitmapScanner for faster processing of data without many nulls --- cpp/src/arrow/util/bit_util.cc | 44 +++++++++ cpp/src/arrow/util/bit_util.h | 18 ++++ cpp/src/arrow/util/bit_util_benchmark.cc | 116 +++++++++++++++++++++++ cpp/src/arrow/util/bit_util_test.cc | 80 ++++++++++++++++ 4 files changed, 258 insertions(+) diff --git a/cpp/src/arrow/util/bit_util.cc b/cpp/src/arrow/util/bit_util.cc index 5ec278acae8..7439f48a424 100644 --- a/cpp/src/arrow/util/bit_util.cc +++ b/cpp/src/arrow/util/bit_util.cc @@ -598,5 +598,49 @@ Result> BitmapAllButOne(MemoryPool* pool, int64_t length return std::move(buffer); } +std::pair BitmapScanner::NextRun() { + auto load_word = [](const uint8_t* bytes) -> uint64_t { + return BitUtil::ToLittleEndian(util::SafeLoadAs(bytes)); + }; + auto shift_word = [](uint64_t current, uint64_t next, int64_t shift) -> uint64_t { + return (current >> shift) | (next << (64 - shift)); + }; + + // When the offset is > 0, we need there to be a word beyond the last aligned + // word in the bitmap for the bit shifting logic. + const int64_t bits_required_to_scan_words = offset_ == 0 ? 256 : 256 + (64 - offset_); + if (bits_remaining_ < bits_required_to_scan_words) { + // End of the bitmap, leave it to the caller to decide how to best check + // these bits, no need to do redundant computation here. + const int64_t run_length = bits_remaining_; + bits_remaining_ -= run_length; + return {run_length, CountSetBits(bitmap_, offset_, run_length)}; + } + + int64_t total_popcount = 0; + if (offset_ == 0) { + total_popcount += __builtin_popcountll(load_word(bitmap_)); + total_popcount += __builtin_popcountll(load_word(bitmap_ + 8)); + total_popcount += __builtin_popcountll(load_word(bitmap_ + 16)); + total_popcount += __builtin_popcountll(load_word(bitmap_ + 24)); + } else { + auto current = load_word(bitmap_); + auto next = load_word(bitmap_ + 8); + total_popcount += __builtin_popcountll(shift_word(current, next, offset_)); + current = next; + next = load_word(bitmap_ + 16); + total_popcount += __builtin_popcountll(shift_word(current, next, offset_)); + current = next; + next = load_word(bitmap_ + 24); + total_popcount += __builtin_popcountll(shift_word(current, next, offset_)); + current = next; + next = load_word(bitmap_ + 32); + total_popcount += __builtin_popcountll(shift_word(current, next, offset_)); + } + bitmap_ += 32; + bits_remaining_ -= 256; + return {256, total_popcount}; +} + } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h index 12b1a13c59a..375165b7bf9 100644 --- a/cpp/src/arrow/util/bit_util.h +++ b/cpp/src/arrow/util/bit_util.h @@ -1170,5 +1170,23 @@ class BitsetStack { std::vector offsets_; }; +/// \brief A class that scans through a true/false bitmap to yield runs of +/// all-true or not-all-true runs. This is used to accelerate processing of +/// mostly-not-null array data. 4 words are examined at a time. +struct ARROW_EXPORT BitmapScanner { + BitmapScanner(const uint8_t* bitmap, int64_t start_offset, int64_t length) + : bitmap_(bitmap + start_offset / 8), + bits_remaining_(length), + offset_(start_offset % 8) {} + + /// \brief Return the next run of available bits, up to 256. The returned + /// pair contains the size of run and the number of true values + std::pair NextRun(); + + const uint8_t* bitmap_; + int64_t bits_remaining_; + int64_t offset_; +}; + } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/util/bit_util_benchmark.cc b/cpp/src/arrow/util/bit_util_benchmark.cc index 81ccb4726a9..39bf766e1db 100644 --- a/cpp/src/arrow/util/bit_util_benchmark.cc +++ b/cpp/src/arrow/util/bit_util_benchmark.cc @@ -22,10 +22,12 @@ #include #include +#include "arrow/array/array_base.h" #include "arrow/buffer.h" #include "arrow/builder.h" #include "arrow/memory_pool.h" #include "arrow/testing/gtest_util.h" +#include "arrow/testing/random.h" #include "arrow/testing/util.h" #include "arrow/util/bit_util.h" @@ -387,6 +389,114 @@ static void BitmapEquals(benchmark::State& state) { state.SetBytesProcessed(state.iterations() * buffer_size); } +template +static void BitmapScannerSumNotNull(benchmark::State& state) { + using internal::BitmapScanner; + + random::RandomArrayGenerator rng(/*seed=*/0); + + const int64_t bitmap_length = 1 << 20; + + // State parameter is the average number of total values for each false + // value. So 100 means that 1 out of 100 on average are false. + double true_probability = 1. - 1. / state.range(0); + auto arr = rng.Int8(bitmap_length, 0, 100, true_probability); + + const uint8_t* bitmap = arr->null_bitmap_data(); + + // Compute the expected result + int64_t expected = 0; + const auto& int8_arr = static_cast(*arr); + for (int64_t i = Offset; i < bitmap_length; ++i) { + if (int8_arr.IsValid(i)) { + expected += int8_arr.Value(i); + } + } + for (auto _ : state) { + BitmapScanner scanner(bitmap, Offset, bitmap_length - Offset); + int64_t result = 0; + int64_t position = Offset; + while (true) { + auto bit_run = scanner.NextRun(); + if (bit_run.first == 0) { + break; + } + if (bit_run.first == bit_run.second) { + // All not-null + for (int64_t i = 0; i < bit_run.first; ++i) { + result += int8_arr.Value(position + i); + } + } else if (bit_run.second > 0) { + // Some but not all not-null + for (int64_t i = 0; i < bit_run.first; ++i) { + if (BitUtil::GetBit(bitmap, position + i)) { + result += int8_arr.Value(position + i); + } + } + } + position += bit_run.first; + } + // Sanity check + if (result != expected) { + std::abort(); + } + } + state.SetItemsProcessed(state.iterations() * bitmap_length); +} + +template +static void BitmapReaderSumNotNull(benchmark::State& state) { + random::RandomArrayGenerator rng(/*seed=*/0); + + const int64_t bitmap_length = 1 << 20; + + // State parameter is the average number of total values for each false + // value. So 100 means that 1 out of 100 on average are false. + double true_probability = 1. - 1. / state.range(0); + auto arr = rng.Int8(bitmap_length, 0, 100, true_probability); + + const uint8_t* bitmap = arr->null_bitmap_data(); + // Compute the expected result + int64_t expected = 0; + const auto& int8_arr = static_cast(*arr); + for (int64_t i = Offset; i < bitmap_length; ++i) { + if (int8_arr.IsValid(i)) { + expected += int8_arr.Value(i); + } + } + for (auto _ : state) { + internal::BitmapReader bit_reader(bitmap, Offset, bitmap_length - Offset); + int64_t result = 0; + for (int64_t i = Offset; i < bitmap_length; ++i) { + if (bit_reader.IsSet()) { + result += int8_arr.Value(i); + } + bit_reader.Next(); + } + // Sanity check + if (result != expected) { + std::abort(); + } + } + state.SetItemsProcessed(state.iterations() * bitmap_length); +} + +static void BitmapScannerSumNotNull(benchmark::State& state) { + BitmapScannerSumNotNull<0>(state); +} + +static void BitmapScannerSumNotNullWithOffset(benchmark::State& state) { + BitmapScannerSumNotNull<4>(state); +} + +static void BitmapReaderSumNotNull(benchmark::State& state) { + BitmapReaderSumNotNull<0>(state); +} + +static void BitmapReaderSumNotNullWithOffset(benchmark::State& state) { + BitmapReaderSumNotNull<4>(state); +} + static void BitmapEqualsWithoutOffset(benchmark::State& state) { BitmapEquals<0>(state); } static void BitmapEqualsWithOffset(benchmark::State& state) { BitmapEquals<4>(state); } @@ -424,6 +534,12 @@ BENCHMARK(CopyBitmapWithOffsetBoth)->Arg(kBufferSize); BENCHMARK(BitmapEqualsWithoutOffset)->Arg(kBufferSize); BENCHMARK(BitmapEqualsWithOffset)->Arg(kBufferSize); +// Range value: average number of total values per null +BENCHMARK(BitmapScannerSumNotNull)->Range(8, 1 << 16); +BENCHMARK(BitmapScannerSumNotNullWithOffset)->Range(8, 1 << 16); +BENCHMARK(BitmapReaderSumNotNull)->Range(8, 1 << 16); +BENCHMARK(BitmapReaderSumNotNullWithOffset)->Range(8, 1 << 16); + #define AND_BENCHMARK_RANGES \ { \ {kBufferSize * 4, kBufferSize * 16}, { 0, 2 } \ diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index 8b87e698bc3..b9b4d7d11ae 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -1367,5 +1367,85 @@ TEST(Bitmap, VisitWordsAnd) { } } } + +class TestBitmapScanner : public ::testing::Test { + public: + void Create(int64_t nbytes, int64_t offset, int64_t length) { + ASSERT_OK_AND_ASSIGN(buf_, AllocateBuffer(nbytes)); + // Start with data zeroed out + std::memset(buf_->mutable_data(), 0, nbytes); + scanner_.reset(new BitmapScanner(buf_->data(), offset, length)); + } + + protected: + std::shared_ptr buf_; + std::unique_ptr scanner_; +}; + +static constexpr int64_t kWordSize = 64; + +TEST_F(TestBitmapScanner, Basics) { + const int64_t nbytes = 1024; + + Create(nbytes, 0, nbytes * 8); + + int64_t bits_scanned = 0; + while (true) { + auto run = scanner_->NextRun(); + if (run.first == 0) { + break; + } + ASSERT_EQ(4 * kWordSize, run.first); + ASSERT_EQ(0, run.second); + bits_scanned += run.first; + } + ASSERT_EQ(1024 * 8, bits_scanned); + + auto CheckWithOffset = [&](int64_t offset) { + const int64_t nwords = 15; + + // Trim a bit from the end of the bitmap so we can check the remainder bits + // behavior + Create(nwords * 8 + 1, offset, nwords * kWordSize - offset - 1); + + // Start with data all set + std::memset(buf_->mutable_data(), 0xFF, nbytes); + + auto run = scanner_->NextRun(); + ASSERT_EQ(4 * kWordSize, run.first); + ASSERT_EQ(256, run.second); + + // Add some false values to the next 3 shifted words + BitUtil::SetBitTo(buf_->mutable_data(), 4 * kWordSize + offset, false); + BitUtil::SetBitTo(buf_->mutable_data(), 5 * kWordSize + offset, false); + BitUtil::SetBitTo(buf_->mutable_data(), 6 * kWordSize + offset, false); + run = scanner_->NextRun(); + + ASSERT_EQ(256, run.first); + ASSERT_EQ(253, run.second); + + BitUtil::SetBitsTo(buf_->mutable_data(), 8 * kWordSize + offset, 2 * kWordSize, + false); + + run = scanner_->NextRun(); + ASSERT_EQ(256, run.first); + ASSERT_EQ(128, run.second); + + // Last run + run = scanner_->NextRun(); + ASSERT_EQ(3 * kWordSize - offset - 1, run.first); + ASSERT_EQ(run.first, run.second); + + // We can keep calling NextRun safely + run = scanner_->NextRun(); + ASSERT_EQ(0, run.first); + ASSERT_EQ(0, run.second); + }; + + for (int64_t offset_i = 0; offset_i < 7; ++offset_i) { + CheckWithOffset(offset_i); + } +} + } // namespace internal } // namespace arrow From 20e7069b389442b7838930a3ac6e74514d40d39f Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Wed, 3 Jun 2020 19:38:01 -0500 Subject: [PATCH 2/6] Review comments, update comment to be more accurate --- cpp/src/arrow/util/bit_util.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h index 375165b7bf9..6ac0dac64cf 100644 --- a/cpp/src/arrow/util/bit_util.h +++ b/cpp/src/arrow/util/bit_util.h @@ -1170,10 +1170,11 @@ class BitsetStack { std::vector offsets_; }; -/// \brief A class that scans through a true/false bitmap to yield runs of -/// all-true or not-all-true runs. This is used to accelerate processing of -/// mostly-not-null array data. 4 words are examined at a time. -struct ARROW_EXPORT BitmapScanner { +/// \brief A class that scans through a true/false bitmap to yield runs of up +/// to 256 bits at a time along with their popcount. This is used to accelerate +/// processing of mostly-not-null array data. +class ARROW_EXPORT BitmapScanner { + public: BitmapScanner(const uint8_t* bitmap, int64_t start_offset, int64_t length) : bitmap_(bitmap + start_offset / 8), bits_remaining_(length), @@ -1183,6 +1184,7 @@ struct ARROW_EXPORT BitmapScanner { /// pair contains the size of run and the number of true values std::pair NextRun(); + private: const uint8_t* bitmap_; int64_t bits_remaining_; int64_t offset_; From 3dbd47d450083c6742cea2a9c4195d29f2ca7db1 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Wed, 3 Jun 2020 20:03:10 -0500 Subject: [PATCH 3/6] Fix ASAN error --- cpp/src/arrow/util/bit_util_test.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index b9b4d7d11ae..07d4c98c0a5 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -1404,12 +1404,13 @@ TEST_F(TestBitmapScanner, Basics) { auto CheckWithOffset = [&](int64_t offset) { const int64_t nwords = 15; + const int64_t total_bytes = nwords * 8 + 1; // Trim a bit from the end of the bitmap so we can check the remainder bits // behavior - Create(nwords * 8 + 1, offset, nwords * kWordSize - offset - 1); + Create(total_bytes, offset, nwords * kWordSize - offset - 1); // Start with data all set - std::memset(buf_->mutable_data(), 0xFF, nbytes); + std::memset(buf_->mutable_data(), 0xFF, total_bytes); auto run = scanner_->NextRun(); ASSERT_EQ(4 * kWordSize, run.first); From af937663fdb3dd19dbd47e1626fc2de1d3aa404a Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Wed, 3 Jun 2020 22:44:44 -0500 Subject: [PATCH 4/6] Code review comments. Add another unit test --- cpp/src/arrow/util/bit_util.cc | 10 ++-- cpp/src/arrow/util/bit_util.h | 11 +++- cpp/src/arrow/util/bit_util_benchmark.cc | 14 ++--- cpp/src/arrow/util/bit_util_test.cc | 73 +++++++++++++++--------- 4 files changed, 68 insertions(+), 40 deletions(-) diff --git a/cpp/src/arrow/util/bit_util.cc b/cpp/src/arrow/util/bit_util.cc index 7439f48a424..fcb52c85178 100644 --- a/cpp/src/arrow/util/bit_util.cc +++ b/cpp/src/arrow/util/bit_util.cc @@ -598,7 +598,7 @@ Result> BitmapAllButOne(MemoryPool* pool, int64_t length return std::move(buffer); } -std::pair BitmapScanner::NextRun() { +BitmapScanner::Block BitmapScanner::NextBlock() { auto load_word = [](const uint8_t* bytes) -> uint64_t { return BitUtil::ToLittleEndian(util::SafeLoadAs(bytes)); }; @@ -612,9 +612,9 @@ std::pair BitmapScanner::NextRun() { if (bits_remaining_ < bits_required_to_scan_words) { // End of the bitmap, leave it to the caller to decide how to best check // these bits, no need to do redundant computation here. - const int64_t run_length = bits_remaining_; + const int16_t run_length = static_cast(bits_remaining_); bits_remaining_ -= run_length; - return {run_length, CountSetBits(bitmap_, offset_, run_length)}; + return {run_length, static_cast(CountSetBits(bitmap_, offset_, run_length))}; } int64_t total_popcount = 0; @@ -637,9 +637,9 @@ std::pair BitmapScanner::NextRun() { next = load_word(bitmap_ + 32); total_popcount += __builtin_popcountll(shift_word(current, next, offset_)); } - bitmap_ += 32; + bitmap_ += BitUtil::BytesForBits(kTargetBlockLength); bits_remaining_ -= 256; - return {256, total_popcount}; + return {256, static_cast(total_popcount)}; } } // namespace internal diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h index 6ac0dac64cf..88a0007504b 100644 --- a/cpp/src/arrow/util/bit_util.h +++ b/cpp/src/arrow/util/bit_util.h @@ -1170,11 +1170,18 @@ class BitsetStack { std::vector offsets_; }; -/// \brief A class that scans through a true/false bitmap to yield runs of up +/// \brief A class that scans through a true/false bitmap to yield blocks of up /// to 256 bits at a time along with their popcount. This is used to accelerate /// processing of mostly-not-null array data. class ARROW_EXPORT BitmapScanner { public: + struct Block { + int16_t length; + int16_t popcount; + }; + + static constexpr int16_t kTargetBlockLength = 256; + BitmapScanner(const uint8_t* bitmap, int64_t start_offset, int64_t length) : bitmap_(bitmap + start_offset / 8), bits_remaining_(length), @@ -1182,7 +1189,7 @@ class ARROW_EXPORT BitmapScanner { /// \brief Return the next run of available bits, up to 256. The returned /// pair contains the size of run and the number of true values - std::pair NextRun(); + Block NextBlock(); private: const uint8_t* bitmap_; diff --git a/cpp/src/arrow/util/bit_util_benchmark.cc b/cpp/src/arrow/util/bit_util_benchmark.cc index 39bf766e1db..0f262abe353 100644 --- a/cpp/src/arrow/util/bit_util_benchmark.cc +++ b/cpp/src/arrow/util/bit_util_benchmark.cc @@ -417,24 +417,24 @@ static void BitmapScannerSumNotNull(benchmark::State& state) { int64_t result = 0; int64_t position = Offset; while (true) { - auto bit_run = scanner.NextRun(); - if (bit_run.first == 0) { + BitmapScanner::Block block = scanner.NextBlock(); + if (block.length == 0) { break; } - if (bit_run.first == bit_run.second) { + if (block.length == block.popcount) { // All not-null - for (int64_t i = 0; i < bit_run.first; ++i) { + for (int64_t i = 0; i < block.length; ++i) { result += int8_arr.Value(position + i); } - } else if (bit_run.second > 0) { + } else if (block.popcount > 0) { // Some but not all not-null - for (int64_t i = 0; i < bit_run.first; ++i) { + for (int64_t i = 0; i < block.length; ++i) { if (BitUtil::GetBit(bitmap, position + i)) { result += int8_arr.Value(position + i); } } } - position += bit_run.first; + position += block.length; } // Sanity check if (result != expected) { diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index 07d4c98c0a5..aee3c56bd0f 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -1390,17 +1390,20 @@ TEST_F(TestBitmapScanner, Basics) { Create(nbytes, 0, nbytes * 8); int64_t bits_scanned = 0; - while (true) { - auto run = scanner_->NextRun(); - if (run.first == 0) { - break; - } - ASSERT_EQ(4 * kWordSize, run.first); - ASSERT_EQ(0, run.second); - bits_scanned += run.first; + for (int64_t i = 0; i < nbytes / 32; ++i) { + BitmapScanner::Block block = scanner_->NextBlock(); + ASSERT_EQ(block.length, 4 * kWordSize); + ASSERT_EQ(block.popcount, 0); + bits_scanned += block.length; } - ASSERT_EQ(1024 * 8, bits_scanned); + ASSERT_EQ(bits_scanned, 1024 * 8); + + auto block = scanner_->NextBlock(); + ASSERT_EQ(block.length, 0); + ASSERT_EQ(block.popcount, 0); +} +TEST_F(TestBitmapScanner, Offsets) { auto CheckWithOffset = [&](int64_t offset) { const int64_t nwords = 15; @@ -1412,35 +1415,35 @@ TEST_F(TestBitmapScanner, Basics) { // Start with data all set std::memset(buf_->mutable_data(), 0xFF, total_bytes); - auto run = scanner_->NextRun(); - ASSERT_EQ(4 * kWordSize, run.first); - ASSERT_EQ(256, run.second); + BitmapScanner::Block block = scanner_->NextBlock(); + ASSERT_EQ(4 * kWordSize, block.length); + ASSERT_EQ(block.popcount, 256); // Add some false values to the next 3 shifted words BitUtil::SetBitTo(buf_->mutable_data(), 4 * kWordSize + offset, false); BitUtil::SetBitTo(buf_->mutable_data(), 5 * kWordSize + offset, false); BitUtil::SetBitTo(buf_->mutable_data(), 6 * kWordSize + offset, false); - run = scanner_->NextRun(); + block = scanner_->NextBlock(); - ASSERT_EQ(256, run.first); - ASSERT_EQ(253, run.second); + ASSERT_EQ(block.length, 256); + ASSERT_EQ(block.popcount, 253); BitUtil::SetBitsTo(buf_->mutable_data(), 8 * kWordSize + offset, 2 * kWordSize, false); - run = scanner_->NextRun(); - ASSERT_EQ(256, run.first); - ASSERT_EQ(128, run.second); + block = scanner_->NextBlock(); + ASSERT_EQ(block.length, 256); + ASSERT_EQ(block.popcount, 128); - // Last run - run = scanner_->NextRun(); - ASSERT_EQ(3 * kWordSize - offset - 1, run.first); - ASSERT_EQ(run.first, run.second); + // Last block + block = scanner_->NextBlock(); + ASSERT_EQ(block.length, 3 * kWordSize - offset - 1); + ASSERT_EQ(block.length, block.popcount); - // We can keep calling NextRun safely - run = scanner_->NextRun(); - ASSERT_EQ(0, run.first); - ASSERT_EQ(0, run.second); + // We can keep calling NextBlock safely + block = scanner_->NextBlock(); + ASSERT_EQ(block.length, 0); + ASSERT_EQ(block.popcount, 0); }; for (int64_t offset_i = 0; offset_i < 7; ++offset_i) { @@ -1448,5 +1451,23 @@ TEST_F(TestBitmapScanner, Basics) { } } +TEST_F(TestBitmapScanner, RandomData) { + const int64_t nbytes = 1024; + auto buffer = *AllocateBuffer(nbytes); + random_bytes(nbytes, 0, buffer->mutable_data()); + + auto CheckWithOffset = [&](int64_t offset) { + BitmapScanner scanner(buffer->data(), offset, nbytes * 8); + for (int64_t i = 0; i < nbytes / 32; ++i) { + BitmapScanner::Block block = scanner.NextBlock(); + ASSERT_EQ(block.popcount, + CountSetBits(buffer->data(), i * 256 + offset, block.length)); + } + }; + for (int64_t offset_i = 0; offset_i < 7; ++offset_i) { + CheckWithOffset(offset_i); + } +} + } // namespace internal } // namespace arrow From f9dda29a07cef8bb43fe88bf4330cb745c1aa732 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Wed, 3 Jun 2020 22:56:16 -0500 Subject: [PATCH 5/6] Rename class as BitBlockCounter --- cpp/src/arrow/util/bit_util.cc | 2 +- cpp/src/arrow/util/bit_util.h | 4 ++-- cpp/src/arrow/util/bit_util_benchmark.cc | 20 ++++++++++---------- cpp/src/arrow/util/bit_util_test.cc | 20 ++++++++++---------- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/cpp/src/arrow/util/bit_util.cc b/cpp/src/arrow/util/bit_util.cc index fcb52c85178..07951d0c963 100644 --- a/cpp/src/arrow/util/bit_util.cc +++ b/cpp/src/arrow/util/bit_util.cc @@ -598,7 +598,7 @@ Result> BitmapAllButOne(MemoryPool* pool, int64_t length return std::move(buffer); } -BitmapScanner::Block BitmapScanner::NextBlock() { +BitBlockCounter::Block BitBlockCounter::NextBlock() { auto load_word = [](const uint8_t* bytes) -> uint64_t { return BitUtil::ToLittleEndian(util::SafeLoadAs(bytes)); }; diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h index 88a0007504b..d1d6a52f9bb 100644 --- a/cpp/src/arrow/util/bit_util.h +++ b/cpp/src/arrow/util/bit_util.h @@ -1173,7 +1173,7 @@ class BitsetStack { /// \brief A class that scans through a true/false bitmap to yield blocks of up /// to 256 bits at a time along with their popcount. This is used to accelerate /// processing of mostly-not-null array data. -class ARROW_EXPORT BitmapScanner { +class ARROW_EXPORT BitBlockCounter { public: struct Block { int16_t length; @@ -1182,7 +1182,7 @@ class ARROW_EXPORT BitmapScanner { static constexpr int16_t kTargetBlockLength = 256; - BitmapScanner(const uint8_t* bitmap, int64_t start_offset, int64_t length) + BitBlockCounter(const uint8_t* bitmap, int64_t start_offset, int64_t length) : bitmap_(bitmap + start_offset / 8), bits_remaining_(length), offset_(start_offset % 8) {} diff --git a/cpp/src/arrow/util/bit_util_benchmark.cc b/cpp/src/arrow/util/bit_util_benchmark.cc index 0f262abe353..ae4ddc12d5d 100644 --- a/cpp/src/arrow/util/bit_util_benchmark.cc +++ b/cpp/src/arrow/util/bit_util_benchmark.cc @@ -390,8 +390,8 @@ static void BitmapEquals(benchmark::State& state) { } template -static void BitmapScannerSumNotNull(benchmark::State& state) { - using internal::BitmapScanner; +static void BitBlockCounterSumNotNull(benchmark::State& state) { + using internal::BitBlockCounter; random::RandomArrayGenerator rng(/*seed=*/0); @@ -413,11 +413,11 @@ static void BitmapScannerSumNotNull(benchmark::State& state) { } } for (auto _ : state) { - BitmapScanner scanner(bitmap, Offset, bitmap_length - Offset); + BitBlockCounter scanner(bitmap, Offset, bitmap_length - Offset); int64_t result = 0; int64_t position = Offset; while (true) { - BitmapScanner::Block block = scanner.NextBlock(); + BitBlockCounter::Block block = scanner.NextBlock(); if (block.length == 0) { break; } @@ -481,12 +481,12 @@ static void BitmapReaderSumNotNull(benchmark::State& state) { state.SetItemsProcessed(state.iterations() * bitmap_length); } -static void BitmapScannerSumNotNull(benchmark::State& state) { - BitmapScannerSumNotNull<0>(state); +static void BitBlockCounterSumNotNull(benchmark::State& state) { + BitBlockCounterSumNotNull<0>(state); } -static void BitmapScannerSumNotNullWithOffset(benchmark::State& state) { - BitmapScannerSumNotNull<4>(state); +static void BitBlockCounterSumNotNullWithOffset(benchmark::State& state) { + BitBlockCounterSumNotNull<4>(state); } static void BitmapReaderSumNotNull(benchmark::State& state) { @@ -535,8 +535,8 @@ BENCHMARK(BitmapEqualsWithoutOffset)->Arg(kBufferSize); BENCHMARK(BitmapEqualsWithOffset)->Arg(kBufferSize); // Range value: average number of total values per null -BENCHMARK(BitmapScannerSumNotNull)->Range(8, 1 << 16); -BENCHMARK(BitmapScannerSumNotNullWithOffset)->Range(8, 1 << 16); +BENCHMARK(BitBlockCounterSumNotNull)->Range(8, 1 << 16); +BENCHMARK(BitBlockCounterSumNotNullWithOffset)->Range(8, 1 << 16); BENCHMARK(BitmapReaderSumNotNull)->Range(8, 1 << 16); BENCHMARK(BitmapReaderSumNotNullWithOffset)->Range(8, 1 << 16); diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index aee3c56bd0f..53b2c037bd2 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -1368,30 +1368,30 @@ TEST(Bitmap, VisitWordsAnd) { } } -class TestBitmapScanner : public ::testing::Test { +class TestBitBlockCounter : public ::testing::Test { public: void Create(int64_t nbytes, int64_t offset, int64_t length) { ASSERT_OK_AND_ASSIGN(buf_, AllocateBuffer(nbytes)); // Start with data zeroed out std::memset(buf_->mutable_data(), 0, nbytes); - scanner_.reset(new BitmapScanner(buf_->data(), offset, length)); + scanner_.reset(new BitBlockCounter(buf_->data(), offset, length)); } protected: std::shared_ptr buf_; - std::unique_ptr scanner_; + std::unique_ptr scanner_; }; static constexpr int64_t kWordSize = 64; -TEST_F(TestBitmapScanner, Basics) { +TEST_F(TestBitBlockCounter, Basics) { const int64_t nbytes = 1024; Create(nbytes, 0, nbytes * 8); int64_t bits_scanned = 0; for (int64_t i = 0; i < nbytes / 32; ++i) { - BitmapScanner::Block block = scanner_->NextBlock(); + BitBlockCounter::Block block = scanner_->NextBlock(); ASSERT_EQ(block.length, 4 * kWordSize); ASSERT_EQ(block.popcount, 0); bits_scanned += block.length; @@ -1403,7 +1403,7 @@ TEST_F(TestBitmapScanner, Basics) { ASSERT_EQ(block.popcount, 0); } -TEST_F(TestBitmapScanner, Offsets) { +TEST_F(TestBitBlockCounter, Offsets) { auto CheckWithOffset = [&](int64_t offset) { const int64_t nwords = 15; @@ -1415,7 +1415,7 @@ TEST_F(TestBitmapScanner, Offsets) { // Start with data all set std::memset(buf_->mutable_data(), 0xFF, total_bytes); - BitmapScanner::Block block = scanner_->NextBlock(); + BitBlockCounter::Block block = scanner_->NextBlock(); ASSERT_EQ(4 * kWordSize, block.length); ASSERT_EQ(block.popcount, 256); @@ -1451,15 +1451,15 @@ TEST_F(TestBitmapScanner, Offsets) { } } -TEST_F(TestBitmapScanner, RandomData) { +TEST_F(TestBitBlockCounter, RandomData) { const int64_t nbytes = 1024; auto buffer = *AllocateBuffer(nbytes); random_bytes(nbytes, 0, buffer->mutable_data()); auto CheckWithOffset = [&](int64_t offset) { - BitmapScanner scanner(buffer->data(), offset, nbytes * 8); + BitBlockCounter scanner(buffer->data(), offset, nbytes * 8); for (int64_t i = 0; i < nbytes / 32; ++i) { - BitmapScanner::Block block = scanner.NextBlock(); + BitBlockCounter::Block block = scanner.NextBlock(); ASSERT_EQ(block.popcount, CountSetBits(buffer->data(), i * 256 + offset, block.length)); } From ffff8c1c94314c00e4cbc4b7bbe8bf0595ec250e Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Wed, 3 Jun 2020 23:35:19 -0500 Subject: [PATCH 6/6] Fix bug causing ASAN failure --- cpp/src/arrow/util/bit_util_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index 53b2c037bd2..45f6daff4ba 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -1457,7 +1457,7 @@ TEST_F(TestBitBlockCounter, RandomData) { random_bytes(nbytes, 0, buffer->mutable_data()); auto CheckWithOffset = [&](int64_t offset) { - BitBlockCounter scanner(buffer->data(), offset, nbytes * 8); + BitBlockCounter scanner(buffer->data(), offset, nbytes * 8 - offset); for (int64_t i = 0; i < nbytes / 32; ++i) { BitBlockCounter::Block block = scanner.NextBlock(); ASSERT_EQ(block.popcount,