From 02e3d8dcf6794405d6376ba671ccc632f412c3d0 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Tue, 8 Jun 2021 16:36:16 -0400 Subject: [PATCH 01/46] prelim --- cpp/src/arrow/util/bitmap.h | 122 ++++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) diff --git a/cpp/src/arrow/util/bitmap.h b/cpp/src/arrow/util/bitmap.h index 8562c55e3d5..575e51cf956 100644 --- a/cpp/src/arrow/util/bitmap.h +++ b/cpp/src/arrow/util/bitmap.h @@ -225,6 +225,128 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, return min_offset; } + /// \brief Visit words of bits from each bitmap as array + /// + /// All bitmaps must have identical length. The first bit in a visited bitmap + /// may be offset within the first visited word, but words will otherwise contain + /// densely packed bits loaded from the bitmap. That offset within the first word is + /// returned. + /// + /// TODO(bkietz) allow for early termination + // NOTE: this function is efficient on 3+ sufficiently large bitmaps. + // It also has a large prolog / epilog overhead and should be used + // carefully in other cases. + // For 2 bitmaps or less, and/or smaller bitmaps, see also VisitTwoBitBlocksVoid + // and BitmapUInt64Reader. + template >::type::value_type> + static int64_t VisitWordsNew(const std::array& bitmaps_arg, + Visitor&& visitor, Bitmap* out_bitmap_arg) { + constexpr int64_t kBitWidth = sizeof(Word) * 8; + + // local, mutable variables which will be sliced/decremented to represent consumption: + std::array bitmaps; + std::array offsets; + int64_t bit_length = BitLength(bitmaps_arg, N+ 1); + std::array, N + 1> words; + + for (size_t i = 0; i < N; ++i) { + bitmaps[i] = bitmaps_arg[i]; + offsets[i] = bitmaps[i].template word_offset(); + assert(offsets[i] >= 0 && offsets[i] < kBitWidth); + words[i] = bitmaps[i].template words(); + } + bitmaps[N] = *out_bitmap_arg; + offsets[N] = bitmaps[N].template word_offset(); + assert(offsets[N] >= 0 && offsets[N] < kBitWidth); + words[N] = bitmaps[N].template words(); + + auto consume = [&](int64_t consumed_bits) { + for (size_t i = 0; i < N; ++i) { + bitmaps[i] = bitmaps[i].Slice(consumed_bits, bit_length - consumed_bits); + offsets[i] = bitmaps[i].template word_offset(); + assert(offsets[i] >= 0 && offsets[i] < kBitWidth); + words[i] = bitmaps[i].template words(); + } + bit_length -= consumed_bits; + }; + + std::array visited_words; + visited_words.fill(0); + + if (bit_length <= kBitWidth * 2) { + // bitmaps fit into one or two words so don't bother with optimization + while (bit_length > 0) { + auto leading_bits = std::min(bit_length, kBitWidth); + SafeLoadWords(bitmaps, 0, leading_bits, false, &visited_words); + visitor(visited_words); + consume(leading_bits); + } + return 0; + } + + int64_t max_offset = *std::max_element(offsets, offsets + N); + int64_t min_offset = *std::min_element(offsets, offsets + N); + if (max_offset > 0) { + // consume leading bits + auto leading_bits = kBitWidth - min_offset; + SafeLoadWords(bitmaps, 0, leading_bits, true, &visited_words); + visitor(visited_words); + consume(leading_bits); + } + assert(*std::min_element(offsets, offsets + N) == 0); + + int64_t whole_word_count = bit_length / kBitWidth; + assert(whole_word_count >= 1); + + if (min_offset == max_offset) { + // all offsets were identical, all leading bits have been consumed + assert( + std::all_of(offsets, offsets + N, [](int64_t offset) { return offset == 0; })); + + for (int64_t word_i = 0; word_i < whole_word_count; ++word_i) { + for (size_t i = 0; i < N; ++i) { + visited_words[i] = words[i][word_i]; + } + visitor(visited_words); + } + consume(whole_word_count * kBitWidth); + } else { + // leading bits from potentially incomplete words have been consumed + + // word_i such that words[i][word_i] and words[i][word_i + 1] are lie entirely + // within the bitmap for all i + for (int64_t word_i = 0; word_i < whole_word_count - 1; ++word_i) { + for (size_t i = 0; i < N; ++i) { + if (offsets[i] == 0) { + visited_words[i] = words[i][word_i]; + } else { + auto words0 = BitUtil::ToLittleEndian(words[i][word_i]); + auto words1 = BitUtil::ToLittleEndian(words[i][word_i + 1]); + visited_words[i] = BitUtil::FromLittleEndian( + (words0 >> offsets[i]) | (words1 << (kBitWidth - offsets[i]))); + } + } + visitor(visited_words); + } + consume((whole_word_count - 1) * kBitWidth); + + SafeLoadWords(bitmaps, 0, kBitWidth, false, &visited_words); + + visitor(visited_words); + consume(kBitWidth); + } + + // load remaining bits + if (bit_length > 0) { + SafeLoadWords(bitmaps, 0, bit_length, false, &visited_words); + visitor(visited_words); + } + + return min_offset; + } + const std::shared_ptr& buffer() const { return buffer_; } /// offset of first bit relative to buffer().data() From fad03833ef4e4a524939baf061f48f3a9828f3cb Mon Sep 17 00:00:00 2001 From: niranda perera Date: Wed, 9 Jun 2021 00:17:39 -0400 Subject: [PATCH 02/46] working - not tested properly. requires clean up --- .../arrow/compute/kernels/scalar_if_else.cc | 106 ++++++++++------- cpp/src/arrow/util/bitmap.h | 112 +++++++++++++----- 2 files changed, 145 insertions(+), 73 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc index 7a0defaccd6..83e5501a0f1 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc @@ -76,15 +76,6 @@ Status PromoteNullsVisitor(KernelContext* ctx, const Datum& cond_d, const Datum& // duplicated (probably elided) access to cond_data const Bitmap& _ = cond_data; - // lambda function that will be used inside the visitor - uint64_t* out_validity = nullptr; - int64_t i = 0; - auto apply = [&](uint64_t c_valid, uint64_t c_data, uint64_t l_valid, - uint64_t r_valid) { - out_validity[i] = c_valid & ((c_data & l_valid) | (~c_data & r_valid)); - i++; - }; - // cond.valid & (cond.data & left.valid | ~cond.data & right.valid) // In the following cases, we dont need to allocate out_valid bitmap @@ -110,72 +101,103 @@ Status PromoteNullsVisitor(KernelContext* ctx, const Datum& cond_d, const Datum& // following cases requires a separate out_valid buffer ARROW_ASSIGN_OR_RAISE(output->buffers[0], ctx->AllocateBitmap(cond.length)); - out_validity = output->GetMutableValues(0); + + // lambda function that will be used inside the visitor + auto apply = [&](uint64_t c_valid, uint64_t c_data, uint64_t l_valid, + uint64_t r_valid) { + return c_valid & ((c_data & l_valid) | (~c_data & r_valid)); + }; + + Bitmap out_bitmap(output->buffers[0], 0, cond.length); enum { C_VALID, C_DATA, L_VALID, R_VALID }; switch (flag) { case COND_CONST | LEFT_CONST | RIGHT_CONST: { - Bitmap bitmaps[] = {_, cond_data, _, _}; - Bitmap::VisitWords(bitmaps, [&](std::array words) { - apply(*cond_const, words[C_DATA], *left_const, *right_const); - }); + std::array bitmaps{_, cond_data, _, _}; + Bitmap::VisitWordsAndWrite( + bitmaps, + [&](std::array words) { + return apply(*cond_const, words[C_DATA], *left_const, *right_const); + }, + &out_bitmap); break; } case LEFT_CONST | RIGHT_CONST: { - Bitmap bitmaps[] = {cond_valid, cond_data, _, _}; - Bitmap::VisitWords(bitmaps, [&](std::array words) { - apply(words[C_VALID], words[C_DATA], *left_const, *right_const); - }); + std::array bitmaps{cond_valid, cond_data, _, _}; + Bitmap::VisitWordsAndWrite( + bitmaps, + [&](std::array words) { + return apply(words[C_VALID], words[C_DATA], *left_const, *right_const); + }, + &out_bitmap); break; } case COND_CONST | RIGHT_CONST: { // bitmaps[C_VALID], bitmaps[R_VALID] might be null; override to make it safe for // Visit() - Bitmap bitmaps[] = {_, cond_data, left_valid, _}; - Bitmap::VisitWords(bitmaps, [&](std::array words) { - apply(*cond_const, words[C_DATA], words[L_VALID], *right_const); - }); + std::array bitmaps{_, cond_data, left_valid, _}; + Bitmap::VisitWordsAndWrite( + bitmaps, + [&](std::array words) { + return apply(*cond_const, words[C_DATA], words[L_VALID], *right_const); + }, + &out_bitmap); break; } case RIGHT_CONST: { // bitmaps[R_VALID] might be null; override to make it safe for Visit() - Bitmap bitmaps[] = {cond_valid, cond_data, left_valid, _}; - Bitmap::VisitWords(bitmaps, [&](std::array words) { - apply(words[C_VALID], words[C_DATA], words[L_VALID], *right_const); - }); + std::array bitmaps{cond_valid, cond_data, left_valid, _}; + Bitmap::VisitWordsAndWrite( + bitmaps, + [&](std::array words) { + return apply(words[C_VALID], words[C_DATA], words[L_VALID], *right_const); + }, + &out_bitmap); break; } case COND_CONST | LEFT_CONST: { // bitmaps[C_VALID], bitmaps[L_VALID] might be null; override to make it safe for // Visit() - Bitmap bitmaps[] = {_, cond_data, _, right_valid}; - Bitmap::VisitWords(bitmaps, [&](std::array words) { - apply(*cond_const, words[C_DATA], *left_const, words[R_VALID]); - }); + std::array bitmaps{_, cond_data, _, right_valid}; + Bitmap::VisitWordsAndWrite( + bitmaps, + [&](std::array words) { + return apply(*cond_const, words[C_DATA], *left_const, words[R_VALID]); + }, + &out_bitmap); break; } case LEFT_CONST: { // bitmaps[L_VALID] might be null; override to make it safe for Visit() - Bitmap bitmaps[] = {cond_valid, cond_data, _, right_valid}; - Bitmap::VisitWords(bitmaps, [&](std::array words) { - apply(words[C_VALID], words[C_DATA], *left_const, words[R_VALID]); - }); + std::array bitmaps{cond_valid, cond_data, _, right_valid}; + Bitmap::VisitWordsAndWrite( + bitmaps, + [&](std::array words) { + return apply(words[C_VALID], words[C_DATA], *left_const, words[R_VALID]); + }, + &out_bitmap); break; } case COND_CONST: { // bitmaps[C_VALID] might be null; override to make it safe for Visit() - Bitmap bitmaps[] = {_, cond_data, left_valid, right_valid}; - Bitmap::VisitWords(bitmaps, [&](std::array words) { - apply(*cond_const, words[C_DATA], words[L_VALID], words[R_VALID]); - }); + std::array bitmaps{_, cond_data, left_valid, right_valid}; + Bitmap::VisitWordsAndWrite( + bitmaps, + [&](std::array words) { + return apply(*cond_const, words[C_DATA], words[L_VALID], words[R_VALID]); + }, + &out_bitmap); break; } case 0: { - Bitmap bitmaps[] = {cond_valid, cond_data, left_valid, right_valid}; - Bitmap::VisitWords(bitmaps, [&](std::array words) { - apply(words[C_VALID], words[C_DATA], words[L_VALID], words[R_VALID]); - }); + std::array bitmaps{cond_valid, cond_data, left_valid, right_valid}; + Bitmap::VisitWordsAndWrite( + bitmaps, + [&](std::array words) { + return apply(words[C_VALID], words[C_DATA], words[L_VALID], words[R_VALID]); + }, + &out_bitmap); break; } } diff --git a/cpp/src/arrow/util/bitmap.h b/cpp/src/arrow/util/bitmap.h index 575e51cf956..b73bb3f8c94 100644 --- a/cpp/src/arrow/util/bitmap.h +++ b/cpp/src/arrow/util/bitmap.h @@ -29,6 +29,7 @@ #include "arrow/buffer.h" #include "arrow/util/bit_util.h" +#include "arrow/util/bitmap_ops.h" #include "arrow/util/compare.h" #include "arrow/util/endian.h" #include "arrow/util/functional.h" @@ -241,34 +242,55 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, template >::type::value_type> - static int64_t VisitWordsNew(const std::array& bitmaps_arg, - Visitor&& visitor, Bitmap* out_bitmap_arg) { + static int64_t VisitWordsAndWrite(const std::array& bitmaps_arg, + Visitor&& visitor, Bitmap* out_bitmap_arg) { constexpr int64_t kBitWidth = sizeof(Word) * 8; // local, mutable variables which will be sliced/decremented to represent consumption: - std::array bitmaps; - std::array offsets; - int64_t bit_length = BitLength(bitmaps_arg, N+ 1); - std::array, N + 1> words; + // todo use std::array here + Bitmap bitmaps[N]; + int64_t word_offsets[N]; + int64_t bit_length = BitLength(bitmaps_arg); + View words[N]; + + struct BitmapHolder { + explicit BitmapHolder(Bitmap bitmap_) + : bitmap(std::move(bitmap_)), + word_offset(BitmapHolder::bitmap.template word_offset()), + words(BitmapHolder::bitmap.template words()) { + assert(BitmapHolder::word_offset >= 0 && BitmapHolder::word_offset < kBitWidth); + } + + void SliceAndUpdate(int64_t _offset, int64_t _length) { + BitmapHolder::bitmap = bitmap.Slice(_offset, _length); + BitmapHolder::word_offset = bitmap.template word_offset(); + assert(BitmapHolder::word_offset >= 0 && BitmapHolder::word_offset < kBitWidth); + BitmapHolder::words = bitmap.template words(); + } + + Bitmap bitmap; + int64_t word_offset; + View words; + }; for (size_t i = 0; i < N; ++i) { bitmaps[i] = bitmaps_arg[i]; - offsets[i] = bitmaps[i].template word_offset(); - assert(offsets[i] >= 0 && offsets[i] < kBitWidth); + word_offsets[i] = bitmaps[i].template word_offset(); + assert(word_offsets[i] >= 0 && word_offsets[i] < kBitWidth); words[i] = bitmaps[i].template words(); } - bitmaps[N] = *out_bitmap_arg; - offsets[N] = bitmaps[N].template word_offset(); - assert(offsets[N] >= 0 && offsets[N] < kBitWidth); - words[N] = bitmaps[N].template words(); + + BitmapHolder out_bitmap(*out_bitmap_arg); auto consume = [&](int64_t consumed_bits) { for (size_t i = 0; i < N; ++i) { bitmaps[i] = bitmaps[i].Slice(consumed_bits, bit_length - consumed_bits); - offsets[i] = bitmaps[i].template word_offset(); - assert(offsets[i] >= 0 && offsets[i] < kBitWidth); + word_offsets[i] = bitmaps[i].template word_offset(); + assert(word_offsets[i] >= 0 && word_offsets[i] < kBitWidth); words[i] = bitmaps[i].template words(); } + out_bitmap.SliceAndUpdate(consumed_bits, bit_length - consumed_bits); + bit_length -= consumed_bits; }; @@ -280,37 +302,50 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, while (bit_length > 0) { auto leading_bits = std::min(bit_length, kBitWidth); SafeLoadWords(bitmaps, 0, leading_bits, false, &visited_words); - visitor(visited_words); + Word visit_out = visitor(visited_words); // outputs a word/ partial word + CopyBitmap(reinterpret_cast(&visit_out), 0, leading_bits, + out_bitmap.bitmap.buffer_->mutable_data(), out_bitmap.bitmap.offset_); consume(leading_bits); } return 0; } - int64_t max_offset = *std::max_element(offsets, offsets + N); - int64_t min_offset = *std::min_element(offsets, offsets + N); - if (max_offset > 0) { + int64_t max_word_offset = *std::max_element(word_offsets, word_offsets + N); + int64_t min_word_offset = *std::min_element(word_offsets, word_offsets + N); + if (max_word_offset > 0) { // consume leading bits - auto leading_bits = kBitWidth - min_offset; + auto leading_bits = kBitWidth - min_word_offset; SafeLoadWords(bitmaps, 0, leading_bits, true, &visited_words); - visitor(visited_words); + Word visit_out = visitor(visited_words); + CopyBitmap(reinterpret_cast(&visit_out), sizeof(Word) * 8 - leading_bits, + leading_bits, out_bitmap.bitmap.buffer_->mutable_data(), + out_bitmap.bitmap.offset_); consume(leading_bits); } - assert(*std::min_element(offsets, offsets + N) == 0); + assert(*std::min_element(word_offsets, word_offsets + N) == 0); + assert(out_bitmap.word_offset == 0); int64_t whole_word_count = bit_length / kBitWidth; assert(whole_word_count >= 1); - if (min_offset == max_offset) { + std::vector visit_outs; + visit_outs.reserve(whole_word_count); + + if (min_word_offset == max_word_offset) { // all offsets were identical, all leading bits have been consumed - assert( - std::all_of(offsets, offsets + N, [](int64_t offset) { return offset == 0; })); + assert(std::all_of(word_offsets, word_offsets + N, + [](int64_t offset) { return offset == 0; })); + assert(out_bitmap.word_offset == 0); for (int64_t word_i = 0; word_i < whole_word_count; ++word_i) { for (size_t i = 0; i < N; ++i) { visited_words[i] = words[i][word_i]; } - visitor(visited_words); + visit_outs.template emplace_back(visitor(visited_words)); } + CopyBitmap(reinterpret_cast(visit_outs.data()), 0, + whole_word_count * kBitWidth, out_bitmap.bitmap.buffer_->mutable_data(), + out_bitmap.bitmap.offset_); consume(whole_word_count * kBitWidth); } else { // leading bits from potentially incomplete words have been consumed @@ -319,32 +354,39 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, // within the bitmap for all i for (int64_t word_i = 0; word_i < whole_word_count - 1; ++word_i) { for (size_t i = 0; i < N; ++i) { - if (offsets[i] == 0) { + if (word_offsets[i] == 0) { visited_words[i] = words[i][word_i]; } else { auto words0 = BitUtil::ToLittleEndian(words[i][word_i]); auto words1 = BitUtil::ToLittleEndian(words[i][word_i + 1]); visited_words[i] = BitUtil::FromLittleEndian( - (words0 >> offsets[i]) | (words1 << (kBitWidth - offsets[i]))); + (words0 >> word_offsets[i]) | (words1 << (kBitWidth - word_offsets[i]))); } } - visitor(visited_words); + visit_outs.template emplace_back(visitor(visited_words)); } + CopyBitmap(reinterpret_cast(visit_outs.data()), 0, + (whole_word_count - 1) * kBitWidth, + out_bitmap.bitmap.buffer_->mutable_data(), out_bitmap.bitmap.offset_); consume((whole_word_count - 1) * kBitWidth); SafeLoadWords(bitmaps, 0, kBitWidth, false, &visited_words); - visitor(visited_words); + Word visit_out = visitor(visited_words); // outputs a word/ partial word + CopyBitmap(reinterpret_cast(&visit_out), 0, kBitWidth, + out_bitmap.bitmap.buffer_->mutable_data(), out_bitmap.bitmap.offset_); consume(kBitWidth); } // load remaining bits if (bit_length > 0) { SafeLoadWords(bitmaps, 0, bit_length, false, &visited_words); - visitor(visited_words); + Word visit_out = visitor(visited_words); + CopyBitmap(reinterpret_cast(&visit_out), 0, bit_length, + out_bitmap.bitmap.buffer_->mutable_data(), out_bitmap.bitmap.offset_); } - return min_offset; + return min_word_offset; } const std::shared_ptr& buffer() const { return buffer_; } @@ -423,6 +465,14 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, /// assert bitmaps have identical length and return that length static int64_t BitLength(const Bitmap* bitmaps, size_t N); + template + static int64_t BitLength(const std::array& bitmaps) { + for (size_t i = 1; i < bitmaps.size(); ++i) { + DCHECK_EQ(bitmaps[i].length(), bitmaps[0].length()); + } + return bitmaps[0].length(); + } + std::shared_ptr buffer_; int64_t offset_ = 0, length_ = 0; }; From 1ffce3fce469ddf4e8fd92c6b37c364d7a1e55d7 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Wed, 9 Jun 2021 09:36:17 -0400 Subject: [PATCH 03/46] adding striding --- cpp/src/arrow/util/bitmap.h | 57 ++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/cpp/src/arrow/util/bitmap.h b/cpp/src/arrow/util/bitmap.h index b73bb3f8c94..d3cca8e46c4 100644 --- a/cpp/src/arrow/util/bitmap.h +++ b/cpp/src/arrow/util/bitmap.h @@ -74,6 +74,11 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, return Bitmap(buffer_, offset_ + offset, length); } + void Stride(int64_t stride) { + this->offset_ += stride; + this->length_ -= stride; + } + std::string ToString() const; bool Equals(const Bitmap& other) const; @@ -254,21 +259,34 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, View words[N]; struct BitmapHolder { - explicit BitmapHolder(Bitmap bitmap_) - : bitmap(std::move(bitmap_)), - word_offset(BitmapHolder::bitmap.template word_offset()), - words(BitmapHolder::bitmap.template words()) { + explicit BitmapHolder(Bitmap* bitmap_) + : bitmap(bitmap_), + word_offset(bitmap_->template word_offset()), + words(bitmap_->template words()) { assert(BitmapHolder::word_offset >= 0 && BitmapHolder::word_offset < kBitWidth); } - void SliceAndUpdate(int64_t _offset, int64_t _length) { - BitmapHolder::bitmap = bitmap.Slice(_offset, _length); - BitmapHolder::word_offset = bitmap.template word_offset(); + // void SliceAndUpdate(int64_t _offset, int64_t _length) { + // BitmapHolder::bitmap = bitmap.Slice(_offset, _length); + // BitmapHolder::word_offset = bitmap.template word_offset(); + // assert(BitmapHolder::word_offset >= 0 && BitmapHolder::word_offset < + // kBitWidth); BitmapHolder::words = bitmap.template words(); + // } + + void StrideAndUpdate(int64_t _stride) { + BitmapHolder::bitmap->Stride(_stride); + BitmapHolder::word_offset = bitmap->template word_offset(); assert(BitmapHolder::word_offset >= 0 && BitmapHolder::word_offset < kBitWidth); - BitmapHolder::words = bitmap.template words(); + BitmapHolder::words = bitmap->template words(); } - Bitmap bitmap; + inline int64_t offset() const { return bitmap->offset_; } + + inline const uint8_t* data() const { return bitmap->buffer_->data(); } + + inline uint8_t* mutable_data() { return bitmap->buffer_->mutable_data(); } + + Bitmap* bitmap; int64_t word_offset; View words; }; @@ -280,7 +298,7 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, words[i] = bitmaps[i].template words(); } - BitmapHolder out_bitmap(*out_bitmap_arg); + BitmapHolder out_bitmap(out_bitmap_arg); auto consume = [&](int64_t consumed_bits) { for (size_t i = 0; i < N; ++i) { @@ -289,7 +307,7 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, assert(word_offsets[i] >= 0 && word_offsets[i] < kBitWidth); words[i] = bitmaps[i].template words(); } - out_bitmap.SliceAndUpdate(consumed_bits, bit_length - consumed_bits); + out_bitmap.StrideAndUpdate(consumed_bits); bit_length -= consumed_bits; }; @@ -304,7 +322,7 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, SafeLoadWords(bitmaps, 0, leading_bits, false, &visited_words); Word visit_out = visitor(visited_words); // outputs a word/ partial word CopyBitmap(reinterpret_cast(&visit_out), 0, leading_bits, - out_bitmap.bitmap.buffer_->mutable_data(), out_bitmap.bitmap.offset_); + out_bitmap.mutable_data(), out_bitmap.offset()); consume(leading_bits); } return 0; @@ -318,8 +336,7 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, SafeLoadWords(bitmaps, 0, leading_bits, true, &visited_words); Word visit_out = visitor(visited_words); CopyBitmap(reinterpret_cast(&visit_out), sizeof(Word) * 8 - leading_bits, - leading_bits, out_bitmap.bitmap.buffer_->mutable_data(), - out_bitmap.bitmap.offset_); + leading_bits, out_bitmap.mutable_data(), out_bitmap.offset()); consume(leading_bits); } assert(*std::min_element(word_offsets, word_offsets + N) == 0); @@ -344,8 +361,8 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, visit_outs.template emplace_back(visitor(visited_words)); } CopyBitmap(reinterpret_cast(visit_outs.data()), 0, - whole_word_count * kBitWidth, out_bitmap.bitmap.buffer_->mutable_data(), - out_bitmap.bitmap.offset_); + whole_word_count * kBitWidth, out_bitmap.mutable_data(), + out_bitmap.offset()); consume(whole_word_count * kBitWidth); } else { // leading bits from potentially incomplete words have been consumed @@ -366,15 +383,15 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, visit_outs.template emplace_back(visitor(visited_words)); } CopyBitmap(reinterpret_cast(visit_outs.data()), 0, - (whole_word_count - 1) * kBitWidth, - out_bitmap.bitmap.buffer_->mutable_data(), out_bitmap.bitmap.offset_); + (whole_word_count - 1) * kBitWidth, out_bitmap.mutable_data(), + out_bitmap.offset()); consume((whole_word_count - 1) * kBitWidth); SafeLoadWords(bitmaps, 0, kBitWidth, false, &visited_words); Word visit_out = visitor(visited_words); // outputs a word/ partial word CopyBitmap(reinterpret_cast(&visit_out), 0, kBitWidth, - out_bitmap.bitmap.buffer_->mutable_data(), out_bitmap.bitmap.offset_); + out_bitmap.mutable_data(), out_bitmap.offset()); consume(kBitWidth); } @@ -383,7 +400,7 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, SafeLoadWords(bitmaps, 0, bit_length, false, &visited_words); Word visit_out = visitor(visited_words); CopyBitmap(reinterpret_cast(&visit_out), 0, bit_length, - out_bitmap.bitmap.buffer_->mutable_data(), out_bitmap.bitmap.offset_); + out_bitmap.mutable_data(), out_bitmap.offset()); } return min_word_offset; From 35f61788e50bebf7802cceced55f12a0c3af46bd Mon Sep 17 00:00:00 2001 From: niranda perera Date: Wed, 9 Jun 2021 15:41:53 -0400 Subject: [PATCH 04/46] adding tests --- cpp/src/arrow/util/CMakeLists.txt | 1 + cpp/src/arrow/util/bitmap.h | 110 +++++++++++++------------ cpp/src/arrow/util/bitmap_test.cc | 132 ++++++++++++++++++++++++++++++ 3 files changed, 191 insertions(+), 52 deletions(-) create mode 100644 cpp/src/arrow/util/bitmap_test.cc diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt index e26a17120cd..571834dfca6 100644 --- a/cpp/src/arrow/util/CMakeLists.txt +++ b/cpp/src/arrow/util/CMakeLists.txt @@ -44,6 +44,7 @@ add_arrow_test(utility-test async_generator_test.cc bit_block_counter_test.cc bit_util_test.cc + bitmap_test.cc cache_test.cc checked_cast_test.cc compression_test.cc diff --git a/cpp/src/arrow/util/bitmap.h b/cpp/src/arrow/util/bitmap.h index d3cca8e46c4..877811afd31 100644 --- a/cpp/src/arrow/util/bitmap.h +++ b/cpp/src/arrow/util/bitmap.h @@ -115,6 +115,21 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, } } + /// \brief Visit bits from each bitmap as bitset + /// + /// All bitmaps must have identical length. + template + static void VisitBits(const std::array& bitmaps, Visitor&& visitor) { + int64_t bit_length = BitLength(bitmaps); + std::bitset bits; + for (int64_t bit_i = 0; bit_i < bit_length; ++bit_i) { + for (size_t i = 0; i < N; ++i) { + bits[i] = bitmaps[i].GetBit(bit_i); + } + visitor(bits); + } + } + /// \brief Visit words of bits from each bitmap as array /// /// All bitmaps must have identical length. The first bit in a visited bitmap @@ -252,13 +267,11 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, constexpr int64_t kBitWidth = sizeof(Word) * 8; // local, mutable variables which will be sliced/decremented to represent consumption: - // todo use std::array here - Bitmap bitmaps[N]; - int64_t word_offsets[N]; + Bitmap bitmaps[N]; // todo use std::array here int64_t bit_length = BitLength(bitmaps_arg); - View words[N]; struct BitmapHolder { + BitmapHolder() = default; explicit BitmapHolder(Bitmap* bitmap_) : bitmap(bitmap_), word_offset(bitmap_->template word_offset()), @@ -266,48 +279,31 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, assert(BitmapHolder::word_offset >= 0 && BitmapHolder::word_offset < kBitWidth); } - // void SliceAndUpdate(int64_t _offset, int64_t _length) { - // BitmapHolder::bitmap = bitmap.Slice(_offset, _length); - // BitmapHolder::word_offset = bitmap.template word_offset(); - // assert(BitmapHolder::word_offset >= 0 && BitmapHolder::word_offset < - // kBitWidth); BitmapHolder::words = bitmap.template words(); - // } - - void StrideAndUpdate(int64_t _stride) { + inline void StrideAndUpdate(int64_t _stride) { BitmapHolder::bitmap->Stride(_stride); BitmapHolder::word_offset = bitmap->template word_offset(); assert(BitmapHolder::word_offset >= 0 && BitmapHolder::word_offset < kBitWidth); BitmapHolder::words = bitmap->template words(); } - inline int64_t offset() const { return bitmap->offset_; } - - inline const uint8_t* data() const { return bitmap->buffer_->data(); } - - inline uint8_t* mutable_data() { return bitmap->buffer_->mutable_data(); } - - Bitmap* bitmap; - int64_t word_offset; + Bitmap* bitmap{}; + int64_t word_offset = 0; View words; }; + std::array in_bitmaps; + Bitmap out_bitmap = *out_bitmap_arg; // make a copy + for (size_t i = 0; i < N; ++i) { - bitmaps[i] = bitmaps_arg[i]; - word_offsets[i] = bitmaps[i].template word_offset(); - assert(word_offsets[i] >= 0 && word_offsets[i] < kBitWidth); - words[i] = bitmaps[i].template words(); + bitmaps[i] = bitmaps_arg[i]; // make a copy + in_bitmaps[i] = BitmapHolder(&bitmaps[i]); } - BitmapHolder out_bitmap(out_bitmap_arg); - auto consume = [&](int64_t consumed_bits) { for (size_t i = 0; i < N; ++i) { - bitmaps[i] = bitmaps[i].Slice(consumed_bits, bit_length - consumed_bits); - word_offsets[i] = bitmaps[i].template word_offset(); - assert(word_offsets[i] >= 0 && word_offsets[i] < kBitWidth); - words[i] = bitmaps[i].template words(); + in_bitmaps[i].StrideAndUpdate(consumed_bits); } - out_bitmap.StrideAndUpdate(consumed_bits); + out_bitmap.Stride(consumed_bits); bit_length -= consumed_bits; }; @@ -322,25 +318,33 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, SafeLoadWords(bitmaps, 0, leading_bits, false, &visited_words); Word visit_out = visitor(visited_words); // outputs a word/ partial word CopyBitmap(reinterpret_cast(&visit_out), 0, leading_bits, - out_bitmap.mutable_data(), out_bitmap.offset()); + out_bitmap.buffer_->mutable_data(), out_bitmap.offset()); consume(leading_bits); } return 0; } - int64_t max_word_offset = *std::max_element(word_offsets, word_offsets + N); - int64_t min_word_offset = *std::min_element(word_offsets, word_offsets + N); + auto word_offset_comp = [](const BitmapHolder& l, const BitmapHolder& r) { + return l.word_offset < r.word_offset; + }; + + int64_t max_word_offset = + (*std::max_element(in_bitmaps.begin(), in_bitmaps.end(), word_offset_comp)) + .word_offset; + int64_t min_word_offset = + (*std::min_element(in_bitmaps.begin(), in_bitmaps.end(), word_offset_comp)) + .word_offset; if (max_word_offset > 0) { // consume leading bits auto leading_bits = kBitWidth - min_word_offset; SafeLoadWords(bitmaps, 0, leading_bits, true, &visited_words); Word visit_out = visitor(visited_words); CopyBitmap(reinterpret_cast(&visit_out), sizeof(Word) * 8 - leading_bits, - leading_bits, out_bitmap.mutable_data(), out_bitmap.offset()); + leading_bits, out_bitmap.buffer_->mutable_data(), out_bitmap.offset()); consume(leading_bits); } - assert(*std::min_element(word_offsets, word_offsets + N) == 0); - assert(out_bitmap.word_offset == 0); + assert((*std::min_element(in_bitmaps.begin(), in_bitmaps.end(), word_offset_comp)) + .word_offset == 0); int64_t whole_word_count = bit_length / kBitWidth; assert(whole_word_count >= 1); @@ -350,18 +354,18 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, if (min_word_offset == max_word_offset) { // all offsets were identical, all leading bits have been consumed - assert(std::all_of(word_offsets, word_offsets + N, - [](int64_t offset) { return offset == 0; })); - assert(out_bitmap.word_offset == 0); + assert(std::all_of( + in_bitmaps.begin(), in_bitmaps.end(), + [](const BitmapHolder& holder) { return holder.word_offset == 0; })); for (int64_t word_i = 0; word_i < whole_word_count; ++word_i) { for (size_t i = 0; i < N; ++i) { - visited_words[i] = words[i][word_i]; + visited_words[i] = in_bitmaps[i].words[word_i]; } visit_outs.template emplace_back(visitor(visited_words)); } CopyBitmap(reinterpret_cast(visit_outs.data()), 0, - whole_word_count * kBitWidth, out_bitmap.mutable_data(), + whole_word_count * kBitWidth, out_bitmap.buffer_->mutable_data(), out_bitmap.offset()); consume(whole_word_count * kBitWidth); } else { @@ -371,19 +375,21 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, // within the bitmap for all i for (int64_t word_i = 0; word_i < whole_word_count - 1; ++word_i) { for (size_t i = 0; i < N; ++i) { - if (word_offsets[i] == 0) { - visited_words[i] = words[i][word_i]; + const auto ith_words = in_bitmaps[i].words; + const auto ith_word_offset = in_bitmaps[i].word_offset; + if (ith_word_offset == 0) { + visited_words[i] = ith_words[word_i]; } else { - auto words0 = BitUtil::ToLittleEndian(words[i][word_i]); - auto words1 = BitUtil::ToLittleEndian(words[i][word_i + 1]); + auto words0 = BitUtil::ToLittleEndian(ith_words[word_i]); + auto words1 = BitUtil::ToLittleEndian(ith_words[word_i + 1]); visited_words[i] = BitUtil::FromLittleEndian( - (words0 >> word_offsets[i]) | (words1 << (kBitWidth - word_offsets[i]))); + (words0 >> ith_word_offset) | (words1 << (kBitWidth - ith_word_offset))); } } visit_outs.template emplace_back(visitor(visited_words)); } CopyBitmap(reinterpret_cast(visit_outs.data()), 0, - (whole_word_count - 1) * kBitWidth, out_bitmap.mutable_data(), + (whole_word_count - 1) * kBitWidth, out_bitmap.buffer_->mutable_data(), out_bitmap.offset()); consume((whole_word_count - 1) * kBitWidth); @@ -391,7 +397,7 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, Word visit_out = visitor(visited_words); // outputs a word/ partial word CopyBitmap(reinterpret_cast(&visit_out), 0, kBitWidth, - out_bitmap.mutable_data(), out_bitmap.offset()); + out_bitmap.buffer_->mutable_data(), out_bitmap.offset()); consume(kBitWidth); } @@ -400,7 +406,7 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, SafeLoadWords(bitmaps, 0, bit_length, false, &visited_words); Word visit_out = visitor(visited_words); CopyBitmap(reinterpret_cast(&visit_out), 0, bit_length, - out_bitmap.mutable_data(), out_bitmap.offset()); + out_bitmap.buffer_->mutable_data(), out_bitmap.offset()); } return min_word_offset; @@ -484,8 +490,8 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, template static int64_t BitLength(const std::array& bitmaps) { - for (size_t i = 1; i < bitmaps.size(); ++i) { - DCHECK_EQ(bitmaps[i].length(), bitmaps[0].length()); + for (size_t i = 1; i < N; ++i) { + assert(bitmaps[i].length() == bitmaps[0].length()); } return bitmaps[0].length(); } diff --git a/cpp/src/arrow/util/bitmap_test.cc b/cpp/src/arrow/util/bitmap_test.cc new file mode 100644 index 00000000000..4dc6d5c0cee --- /dev/null +++ b/cpp/src/arrow/util/bitmap_test.cc @@ -0,0 +1,132 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/util/bitmap.h" + +#include +#include +#include + +#include +#include + +#include "arrow/buffer.h" + +namespace arrow { +namespace internal { + +void random_bool_vector(std::vector& vec, int64_t size, double p = 0.5) { + vec.reserve(size); + std::random_device rd; + std::mt19937 gen(rd()); + std::bernoulli_distribution d(p); + + for (int n = 0; n < size; ++n) { + vec.push_back(d(gen)); + } +} + +void VerifyBoolOutput(const Bitmap& bitmap, const std::vector& expected) { + arrow::BooleanBuilder boolean_builder; + ASSERT_OK(boolean_builder.AppendValues(expected)); + ASSERT_OK_AND_ASSIGN(auto arr, boolean_builder.Finish()); + + ASSERT_TRUE(BitmapEquals(bitmap.buffer()->data(), bitmap.offset(), + arr->data()->buffers[1]->data(), 0, expected.size())); +} + +class TestBitmapVisit : public ::testing::Test {}; + +TEST_F(TestBitmapVisit, OutputZeroOffset) { + int64_t bits = 1000, part = bits / 4; + + std::vector data; + random_bool_vector(data, bits); + + arrow::BooleanBuilder boolean_builder; + ASSERT_OK(boolean_builder.AppendValues(data)); + ASSERT_OK_AND_ASSIGN(auto arrow_data, boolean_builder.Finish()); + + std::shared_ptr& arrow_buffer = arrow_data->data()->buffers[1]; + + Bitmap bm0(arrow_buffer, 0, part); + Bitmap bm1 = bm0.Slice(part * 1, part); // this goes beyond bm0's len + Bitmap bm2 = bm0.Slice(part * 2, part); // this goes beyond bm0's len + + ASSERT_OK_AND_ASSIGN(auto out, AllocateBitmap(part)); + Bitmap out_bm(out, 0, part); + + // (bm0 & bm1) | bm2 + std::array bms{bm0, bm1, bm2}; + Bitmap::VisitWordsAndWrite( + bms, + [](std::array& words) { return (words[0] & words[1]) | words[2]; }, + &out_bm); + + std::vector v0(data.begin(), data.begin() + part); + std::vector v1(data.begin() + part * 1, data.begin() + part * 2); + std::vector v2(data.begin() + part * 2, data.begin() + part * 3); + std::vector v3(part); + // v3 = v0 & v1 + std::transform(v0.begin(), v0.end(), v1.begin(), v3.begin(), std::logical_and()); + // v3 |= v2 + std::transform(v3.begin(), v3.end(), v2.begin(), v3.begin(), std::logical_or()); + + VerifyBoolOutput(out_bm, v3); +} + +TEST_F(TestBitmapVisit, OutputNonZeroOffset) { + int64_t bits = 1000, part = bits / 4; + + std::vector data; + random_bool_vector(data, bits); + + arrow::BooleanBuilder boolean_builder; + ASSERT_OK(boolean_builder.AppendValues(data)); + ASSERT_OK_AND_ASSIGN(auto arrow_data, boolean_builder.Finish()); + + std::shared_ptr& arrow_buffer = arrow_data->data()->buffers[1]; + + Bitmap bm0(arrow_buffer, 0, part); + Bitmap bm1 = bm0.Slice(part * 1, part); // this goes beyond bm0's len + Bitmap bm2 = bm0.Slice(part * 2, part); // this goes beyond bm0's len + + // allocate lager buffer but only use the last `part` + ASSERT_OK_AND_ASSIGN(auto out, AllocateBitmap(part * 2)); + Bitmap out_bm(out, part, part); + + // (bm0 & bm1) | bm2 + std::array bms{bm0, bm1, bm2}; + Bitmap::VisitWordsAndWrite( + bms, + [](std::array& words) { return (words[0] & words[1]) | words[2]; }, + &out_bm); + + std::vector v0(data.begin(), data.begin() + part); + std::vector v1(data.begin() + part * 1, data.begin() + part * 2); + std::vector v2(data.begin() + part * 2, data.begin() + part * 3); + std::vector v3(part); + // v3 = v0 & v1 + std::transform(v0.begin(), v0.end(), v1.begin(), v3.begin(), std::logical_and()); + // v3 |= v2 + std::transform(v3.begin(), v3.end(), v2.begin(), v3.begin(), std::logical_or()); + + VerifyBoolOutput(out_bm, v3); +} + +} // namespace internal +} // namespace arrow \ No newline at end of file From f0f3c83a47fc373a4fac0ac5e50e76bbe62b3f65 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Wed, 9 Jun 2021 17:10:10 -0400 Subject: [PATCH 05/46] moving BitmapWordReader and BitmapWordWriter to header files --- cpp/src/arrow/util/bitmap_ops.cc | 218 ----------------------------- cpp/src/arrow/util/bitmap_reader.h | 113 ++++++++++++++- cpp/src/arrow/util/bitmap_writer.h | 101 +++++++++++++ 3 files changed, 213 insertions(+), 219 deletions(-) diff --git a/cpp/src/arrow/util/bitmap_ops.cc b/cpp/src/arrow/util/bitmap_ops.cc index a27a61cadf3..63c8b008f4a 100644 --- a/cpp/src/arrow/util/bitmap_ops.cc +++ b/cpp/src/arrow/util/bitmap_ops.cc @@ -28,9 +28,7 @@ #include "arrow/util/bit_util.h" #include "arrow/util/bitmap_reader.h" #include "arrow/util/bitmap_writer.h" -#include "arrow/util/endian.h" #include "arrow/util/logging.h" -#include "arrow/util/ubsan.h" namespace arrow { namespace internal { @@ -85,222 +83,6 @@ int64_t CountSetBits(const uint8_t* data, int64_t bit_offset, int64_t length) { return count; } -namespace { - -// BitmapWordReader here is faster than BitmapUInt64Reader (in bitmap_reader.h) -// on sufficiently large inputs. However, it has a larger prolog / epilog overhead -// and should probably not be used for small bitmaps. - -template -class BitmapWordReader { - public: - BitmapWordReader(const uint8_t* bitmap, int64_t offset, int64_t length) { - bitmap_ = bitmap + offset / 8; - offset_ = offset % 8; - bitmap_end_ = bitmap_ + BitUtil::BytesForBits(offset_ + length); - - // decrement word count by one as we may touch two adjacent words in one iteration - nwords_ = length / (sizeof(Word) * 8) - 1; - if (nwords_ < 0) { - nwords_ = 0; - } - trailing_bits_ = static_cast(length - nwords_ * sizeof(Word) * 8); - trailing_bytes_ = static_cast(BitUtil::BytesForBits(trailing_bits_)); - - if (nwords_ > 0) { - current_word_ = load(bitmap_); - } else if (length > 0) { - current_byte_ = load(bitmap_); - } - } - - Word NextWord() { - bitmap_ += sizeof(Word); - const Word next_word = load(bitmap_); - Word word = current_word_; - if (offset_) { - // combine two adjacent words into one word - // |<------ next ----->|<---- current ---->| - // +-------------+-----+-------------+-----+ - // | --- | A | B | --- | - // +-------------+-----+-------------+-----+ - // | | offset - // v v - // +-----+-------------+ - // | A | B | - // +-----+-------------+ - // |<------ word ----->| - word >>= offset_; - word |= next_word << (sizeof(Word) * 8 - offset_); - } - current_word_ = next_word; - return word; - } - - uint8_t NextTrailingByte(int& valid_bits) { - uint8_t byte; - DCHECK_GT(trailing_bits_, 0); - - if (trailing_bits_ <= 8) { - // last byte - valid_bits = trailing_bits_; - trailing_bits_ = 0; - byte = 0; - internal::BitmapReader reader(bitmap_, offset_, valid_bits); - for (int i = 0; i < valid_bits; ++i) { - byte >>= 1; - if (reader.IsSet()) { - byte |= 0x80; - } - reader.Next(); - } - byte >>= (8 - valid_bits); - } else { - ++bitmap_; - const uint8_t next_byte = load(bitmap_); - byte = current_byte_; - if (offset_) { - byte >>= offset_; - byte |= next_byte << (8 - offset_); - } - current_byte_ = next_byte; - trailing_bits_ -= 8; - valid_bits = 8; - } - return byte; - } - - int64_t words() const { return nwords_; } - int trailing_bytes() const { return trailing_bytes_; } - - private: - int64_t offset_; - const uint8_t* bitmap_; - - const uint8_t* bitmap_end_; - int64_t nwords_; - int trailing_bits_; - int trailing_bytes_; - union { - Word current_word_; - struct { -#if ARROW_LITTLE_ENDIAN == 0 - uint8_t padding_bytes_[sizeof(Word) - 1]; -#endif - uint8_t current_byte_; - }; - }; - - template - DType load(const uint8_t* bitmap) { - DCHECK_LE(bitmap + sizeof(DType), bitmap_end_); - return BitUtil::ToLittleEndian(util::SafeLoadAs(bitmap)); - } -}; - -template -class BitmapWordWriter { - public: - BitmapWordWriter(uint8_t* bitmap, int64_t offset, int64_t length) { - bitmap_ = bitmap + offset / 8; - offset_ = offset % 8; - bitmap_end_ = bitmap_ + BitUtil::BytesForBits(offset_ + length); - mask_ = (1U << offset_) - 1; - - if (offset_) { - if (length >= static_cast(sizeof(Word) * 8)) { - current_word_ = load(bitmap_); - } else if (length > 0) { - current_byte_ = load(bitmap_); - } - } - } - - void PutNextWord(Word word) { - if (offset_) { - // split one word into two adjacent words, don't touch unused bits - // |<------ word ----->| - // +-----+-------------+ - // | A | B | - // +-----+-------------+ - // | | - // v v offset - // +-------------+-----+-------------+-----+ - // | --- | A | B | --- | - // +-------------+-----+-------------+-----+ - // |<------ next ----->|<---- current ---->| - word = (word << offset_) | (word >> (sizeof(Word) * 8 - offset_)); - Word next_word = load(bitmap_ + sizeof(Word)); - current_word_ = (current_word_ & mask_) | (word & ~mask_); - next_word = (next_word & ~mask_) | (word & mask_); - store(bitmap_, current_word_); - store(bitmap_ + sizeof(Word), next_word); - current_word_ = next_word; - } else { - store(bitmap_, word); - } - bitmap_ += sizeof(Word); - } - - void PutNextTrailingByte(uint8_t byte, int valid_bits) { - if (valid_bits == 8) { - if (offset_) { - byte = (byte << offset_) | (byte >> (8 - offset_)); - uint8_t next_byte = load(bitmap_ + 1); - current_byte_ = (current_byte_ & mask_) | (byte & ~mask_); - next_byte = (next_byte & ~mask_) | (byte & mask_); - store(bitmap_, current_byte_); - store(bitmap_ + 1, next_byte); - current_byte_ = next_byte; - } else { - store(bitmap_, byte); - } - ++bitmap_; - } else { - DCHECK_GT(valid_bits, 0); - DCHECK_LT(valid_bits, 8); - DCHECK_LE(bitmap_ + BitUtil::BytesForBits(offset_ + valid_bits), bitmap_end_); - internal::BitmapWriter writer(bitmap_, offset_, valid_bits); - for (int i = 0; i < valid_bits; ++i) { - (byte & 0x01) ? writer.Set() : writer.Clear(); - writer.Next(); - byte >>= 1; - } - writer.Finish(); - } - } - - private: - int64_t offset_; - uint8_t* bitmap_; - - const uint8_t* bitmap_end_; - uint64_t mask_; - union { - Word current_word_; - struct { -#if ARROW_LITTLE_ENDIAN == 0 - uint8_t padding_bytes_[sizeof(Word) - 1]; -#endif - uint8_t current_byte_; - }; - }; - - template - DType load(const uint8_t* bitmap) { - DCHECK_LE(bitmap + sizeof(DType), bitmap_end_); - return BitUtil::ToLittleEndian(util::SafeLoadAs(bitmap)); - } - - template - void store(uint8_t* bitmap, DType data) { - DCHECK_LE(bitmap + sizeof(DType), bitmap_end_); - util::SafeStore(bitmap, BitUtil::FromLittleEndian(data)); - } -}; - -} // namespace - enum class TransferMode : bool { Copy, Invert }; template diff --git a/cpp/src/arrow/util/bitmap_reader.h b/cpp/src/arrow/util/bitmap_reader.h index cf4f5e7db8b..66c0df35cff 100644 --- a/cpp/src/arrow/util/bitmap_reader.h +++ b/cpp/src/arrow/util/bitmap_reader.h @@ -142,6 +142,117 @@ class BitmapUInt64Reader { uint64_t carry_bits_; }; +// BitmapWordReader here is faster than BitmapUInt64Reader (in bitmap_reader.h) +// on sufficiently large inputs. However, it has a larger prolog / epilog overhead +// and should probably not be used for small bitmaps. + +template +class BitmapWordReader { + public: + BitmapWordReader(const uint8_t* bitmap, int64_t offset, int64_t length) { + bitmap_ = bitmap + offset / 8; + offset_ = offset % 8; + bitmap_end_ = bitmap_ + BitUtil::BytesForBits(offset_ + length); + + // decrement word count by one as we may touch two adjacent words in one iteration + nwords_ = length / (sizeof(Word) * 8) - 1; + if (nwords_ < 0) { + nwords_ = 0; + } + trailing_bits_ = static_cast(length - nwords_ * sizeof(Word) * 8); + trailing_bytes_ = static_cast(BitUtil::BytesForBits(trailing_bits_)); + + if (nwords_ > 0) { + current_word_ = load(bitmap_); + } else if (length > 0) { + current_byte_ = load(bitmap_); + } + } + + Word NextWord() { + bitmap_ += sizeof(Word); + const Word next_word = load(bitmap_); + Word word = current_word_; + if (offset_) { + // combine two adjacent words into one word + // |<------ next ----->|<---- current ---->| + // +-------------+-----+-------------+-----+ + // | --- | A | B | --- | + // +-------------+-----+-------------+-----+ + // | | offset + // v v + // +-----+-------------+ + // | A | B | + // +-----+-------------+ + // |<------ word ----->| + word >>= offset_; + word |= next_word << (sizeof(Word) * 8 - offset_); + } + current_word_ = next_word; + return word; + } + + uint8_t NextTrailingByte(int& valid_bits) { + uint8_t byte; + assert(trailing_bits_ > 0); + + if (trailing_bits_ <= 8) { + // last byte + valid_bits = trailing_bits_; + trailing_bits_ = 0; + byte = 0; + internal::BitmapReader reader(bitmap_, offset_, valid_bits); + for (int i = 0; i < valid_bits; ++i) { + byte >>= 1; + if (reader.IsSet()) { + byte |= 0x80; + } + reader.Next(); + } + byte >>= (8 - valid_bits); + } else { + ++bitmap_; + const uint8_t next_byte = load(bitmap_); + byte = current_byte_; + if (offset_) { + byte >>= offset_; + byte |= next_byte << (8 - offset_); + } + current_byte_ = next_byte; + trailing_bits_ -= 8; + valid_bits = 8; + } + return byte; + } + + int64_t words() const { return nwords_; } + int trailing_bytes() const { return trailing_bytes_; } + + private: + int64_t offset_; + const uint8_t* bitmap_; + + const uint8_t* bitmap_end_; + int64_t nwords_; + int trailing_bits_; + int trailing_bytes_; + union { + Word current_word_; + struct { +#if ARROW_LITTLE_ENDIAN == 0 + uint8_t padding_bytes_[sizeof(Word) - 1]; +#endif + uint8_t current_byte_; + }; + }; + + template + DType load(const uint8_t* bitmap) { + assert(bitmap + sizeof(DType) <= bitmap_end_); + return BitUtil::ToLittleEndian(util::SafeLoadAs(bitmap)); + } +}; + /// \brief Index into a possibly non-existent bitmap struct OptionalBitIndexer { const uint8_t* bitmap; @@ -151,7 +262,7 @@ struct OptionalBitIndexer { : bitmap(buffer == NULLPTR ? NULLPTR : buffer->data()), offset(offset) {} bool operator[](int64_t i) const { - return bitmap == NULLPTR ? true : BitUtil::GetBit(bitmap, offset + i); + return bitmap == NULLPTR || BitUtil::GetBit(bitmap, offset + i); } }; diff --git a/cpp/src/arrow/util/bitmap_writer.h b/cpp/src/arrow/util/bitmap_writer.h index d4f02f37a41..afe0dcea35f 100644 --- a/cpp/src/arrow/util/bitmap_writer.h +++ b/cpp/src/arrow/util/bitmap_writer.h @@ -180,5 +180,106 @@ class FirstTimeBitmapWriter { int64_t byte_offset_; }; +template +class BitmapWordWriter { + public: + BitmapWordWriter(uint8_t* bitmap, int64_t offset, int64_t length) { + bitmap_ = bitmap + offset / 8; + offset_ = offset % 8; + bitmap_end_ = bitmap_ + BitUtil::BytesForBits(offset_ + length); + mask_ = (1U << offset_) - 1; + + if (offset_) { + if (length >= static_cast(sizeof(Word) * 8)) { + current_word_ = load(bitmap_); + } else if (length > 0) { + current_byte_ = load(bitmap_); + } + } + } + + void PutNextWord(Word word) { + if (offset_) { + // split one word into two adjacent words, don't touch unused bits + // |<------ word ----->| + // +-----+-------------+ + // | A | B | + // +-----+-------------+ + // | | + // v v offset + // +-------------+-----+-------------+-----+ + // | --- | A | B | --- | + // +-------------+-----+-------------+-----+ + // |<------ next ----->|<---- current ---->| + word = (word << offset_) | (word >> (sizeof(Word) * 8 - offset_)); + Word next_word = load(bitmap_ + sizeof(Word)); + current_word_ = (current_word_ & mask_) | (word & ~mask_); + next_word = (next_word & ~mask_) | (word & mask_); + store(bitmap_, current_word_); + store(bitmap_ + sizeof(Word), next_word); + current_word_ = next_word; + } else { + store(bitmap_, word); + } + bitmap_ += sizeof(Word); + } + + void PutNextTrailingByte(uint8_t byte, int valid_bits) { + if (valid_bits == 8) { + if (offset_) { + byte = (byte << offset_) | (byte >> (8 - offset_)); + uint8_t next_byte = load(bitmap_ + 1); + current_byte_ = (current_byte_ & mask_) | (byte & ~mask_); + next_byte = (next_byte & ~mask_) | (byte & mask_); + store(bitmap_, current_byte_); + store(bitmap_ + 1, next_byte); + current_byte_ = next_byte; + } else { + store(bitmap_, byte); + } + ++bitmap_; + } else { + assert(valid_bits > 0); + assert(valid_bits < 8); + assert(bitmap_ + BitUtil::BytesForBits(offset_ + valid_bits) <= bitmap_end_); + internal::BitmapWriter writer(bitmap_, offset_, valid_bits); + for (int i = 0; i < valid_bits; ++i) { + (byte & 0x01) ? writer.Set() : writer.Clear(); + writer.Next(); + byte >>= 1; + } + writer.Finish(); + } + } + + private: + int64_t offset_; + uint8_t* bitmap_; + + const uint8_t* bitmap_end_; + uint64_t mask_; + union { + Word current_word_; + struct { +#if ARROW_LITTLE_ENDIAN == 0 + uint8_t padding_bytes_[sizeof(Word) - 1]; +#endif + uint8_t current_byte_; + }; + }; + + template + DType load(const uint8_t* bitmap) { + assert(bitmap + sizeof(DType) <= bitmap_end_); + return BitUtil::ToLittleEndian(util::SafeLoadAs(bitmap)); + } + + template + void store(uint8_t* bitmap, DType data) { + assert(bitmap + sizeof(DType) <= bitmap_end_); + util::SafeStore(bitmap, BitUtil::FromLittleEndian(data)); + } +}; + } // namespace internal } // namespace arrow From 7c6a4ef29067e0bde006e59f768bb3de0c2ffe6b Mon Sep 17 00:00:00 2001 From: niranda perera Date: Thu, 10 Jun 2021 20:49:08 -0400 Subject: [PATCH 06/46] adding multiple writers and testing w/ offsets --- cpp/src/arrow/util/bitmap.h | 115 +++++++++++++++++++++ cpp/src/arrow/util/bitmap_reader.h | 4 +- cpp/src/arrow/util/bitmap_test.cc | 156 ++++++++++++++++++++++++++--- cpp/src/arrow/util/bitmap_writer.h | 22 +++- 4 files changed, 282 insertions(+), 15 deletions(-) diff --git a/cpp/src/arrow/util/bitmap.h b/cpp/src/arrow/util/bitmap.h index 877811afd31..7174923fca4 100644 --- a/cpp/src/arrow/util/bitmap.h +++ b/cpp/src/arrow/util/bitmap.h @@ -30,6 +30,8 @@ #include "arrow/buffer.h" #include "arrow/util/bit_util.h" #include "arrow/util/bitmap_ops.h" +#include "arrow/util/bitmap_reader.h" +#include "arrow/util/bitmap_writer.h" #include "arrow/util/compare.h" #include "arrow/util/endian.h" #include "arrow/util/functional.h" @@ -412,6 +414,119 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, return min_word_offset; } + template + using MultiOutputVisitor = std::function& in_words, + std::array& out_words)>; + + template + static void VisitWordsAndWrite(const std::array& bitmaps_arg, + MultiOutputVisitor&& visitor, + std::array& out_bitmaps_arg) { + constexpr int64_t kBitWidth = sizeof(Word) * 8; + + int64_t bit_length = BitLength(bitmaps_arg); + assert(bit_length == BitLength(out_bitmaps_arg)); + + std::array, N> readers; + for (size_t i = 0; i < N; ++i) { + readers[i] = BitmapWordReader(bitmaps_arg[i].buffer_->data(), + bitmaps_arg[i].offset_, bitmaps_arg[i].length_); + } + + std::array, M> writers; + for (size_t i = 0; i < M; ++i) { + writers[i] = + BitmapWordWriter(out_bitmaps_arg[i].buffer_->mutable_data(), + out_bitmaps_arg[i].offset_, out_bitmaps_arg[i].length_); + } + + std::array visited_words; + visited_words.fill(0); + std::array output_words; + output_words.fill(0); + + // every reader will have same number of words, since they are same length'ed + // todo this will be inefficient in some cases. When there are offsets beyond Word + // boundary, every Word would have to be created from 2 adjoining Words + auto n_words = readers[0].words(); + while (n_words--) { + // first collect all words to visited_words array + for (size_t i = 0; i < N; i++) { + visited_words[i] = readers[i].NextWord(); + } + + visitor(visited_words, output_words); + + for (size_t i = 0; i < M; i++) { + writers[i].PutNextWord(output_words[i]); + } + + bit_length -= kBitWidth; + } + + // every reader will have same number of trailing bytes, because of the above reason + // todo when the above issue is resolved, following logic also needs to be fixed! + // tailing portion could be more than one word! (ref: BitmapWordReader constructor) + assert(static_cast(bit_length) < kBitWidth * 2); + if (bit_length / kBitWidth) { + // there's one full word in trailing portion. Cant use NextWord() here because it + // doesn't stride the trailing metadata + for (size_t i = 0; i < N; i++) { + visited_words[i] = 0; + for (size_t b = 0; b < sizeof(Word); b++) { + int dummy; + auto byte = static_cast(readers[i].NextTrailingByte(dummy)); + visited_words[i] |= byte << (b * 8); + } + } + + visitor(visited_words, output_words); + + for (size_t i = 0; i < M; i++) { + writers[i].PutNextWord(output_words[i]); + } + + bit_length -= kBitWidth; + } + + // clean-up last partial word + if (bit_length) { + output_words.fill(0); + for (size_t i = 0; i < N; i++) { + visited_words[i] = 0; + int n_byte = readers[i].trailing_bytes(); + for (int b = 0; b < n_byte; b++) { + int valid_bits; + auto byte = static_cast(readers[i].NextTrailingByte(valid_bits)); + visited_words[i] |= (byte << b * 8); + } + } + + visitor(visited_words, output_words); + + for (size_t i = 0; i < M; i++) { + writers[i].PutNextWord(output_words[i], bit_length); + } + } + } + + template + using SingleOutputVisitor = + std::function& in_words, Word& out_words)>; + + template + static void VisitWordsAndWrite(const std::array& bitmaps_arg, + SingleOutputVisitor&& visitor, + Bitmap& out_bitmap_arg) { + std::array out_bitmaps{out_bitmap_arg}; + VisitWordsAndWrite( + bitmaps_arg, + [&](const std::array& in_words, std::array& out_words) { + visitor(in_words, out_words[0]); + }, + out_bitmaps); + } + const std::shared_ptr& buffer() const { return buffer_; } /// offset of first bit relative to buffer().data() diff --git a/cpp/src/arrow/util/bitmap_reader.h b/cpp/src/arrow/util/bitmap_reader.h index 66c0df35cff..a562e9a1294 100644 --- a/cpp/src/arrow/util/bitmap_reader.h +++ b/cpp/src/arrow/util/bitmap_reader.h @@ -149,10 +149,11 @@ class BitmapUInt64Reader { template class BitmapWordReader { public: + BitmapWordReader() = default; BitmapWordReader(const uint8_t* bitmap, int64_t offset, int64_t length) { bitmap_ = bitmap + offset / 8; offset_ = offset % 8; - bitmap_end_ = bitmap_ + BitUtil::BytesForBits(offset_ + length); + bitmap_end_ = bitmap_ + BitUtil::BytesForBits(offset + length); // decrement word count by one as we may touch two adjacent words in one iteration nwords_ = length / (sizeof(Word) * 8) - 1; @@ -220,6 +221,7 @@ class BitmapWordReader { } current_byte_ = next_byte; trailing_bits_ -= 8; + trailing_bytes_--; valid_bits = 8; } return byte; diff --git a/cpp/src/arrow/util/bitmap_test.cc b/cpp/src/arrow/util/bitmap_test.cc index 4dc6d5c0cee..0db0fa5854c 100644 --- a/cpp/src/arrow/util/bitmap_test.cc +++ b/cpp/src/arrow/util/bitmap_test.cc @@ -40,19 +40,29 @@ void random_bool_vector(std::vector& vec, int64_t size, double p = 0.5) { } } +std::string VectorToString(const std::vector& v) { + std::string out(v.size() + +((v.size() - 1) / 8), ' '); + for (size_t i = 0; i < v.size(); ++i) { + out[i + (i / 8)] = v[i] ? '1' : '0'; + } + return out; +} + void VerifyBoolOutput(const Bitmap& bitmap, const std::vector& expected) { arrow::BooleanBuilder boolean_builder; ASSERT_OK(boolean_builder.AppendValues(expected)); ASSERT_OK_AND_ASSIGN(auto arr, boolean_builder.Finish()); ASSERT_TRUE(BitmapEquals(bitmap.buffer()->data(), bitmap.offset(), - arr->data()->buffers[1]->data(), 0, expected.size())); + arr->data()->buffers[1]->data(), 0, expected.size())) + << "exp: " << VectorToString(expected) << "\ngot: " << bitmap.ToString(); } class TestBitmapVisit : public ::testing::Test {}; -TEST_F(TestBitmapVisit, OutputZeroOffset) { - int64_t bits = 1000, part = bits / 4; +TEST_F(TestBitmapVisit, SingleWriterOutputZeroOffset) { + // choosing part = 199, a prime, so that shifts are falling in-between bytes + int64_t part = 199, bits = part * 4; std::vector data; random_bool_vector(data, bits); @@ -70,12 +80,14 @@ TEST_F(TestBitmapVisit, OutputZeroOffset) { ASSERT_OK_AND_ASSIGN(auto out, AllocateBitmap(part)); Bitmap out_bm(out, 0, part); + auto visitor = [](const std::array& in_words, uint64_t& out_words) { + out_words = (in_words[0] & in_words[1]) | in_words[2]; + }; + // (bm0 & bm1) | bm2 - std::array bms{bm0, bm1, bm2}; Bitmap::VisitWordsAndWrite( - bms, - [](std::array& words) { return (words[0] & words[1]) | words[2]; }, - &out_bm); + {bm0, bm1, bm2}, std::forward>(visitor), + out_bm); std::vector v0(data.begin(), data.begin() + part); std::vector v1(data.begin() + part * 1, data.begin() + part * 2); @@ -89,8 +101,9 @@ TEST_F(TestBitmapVisit, OutputZeroOffset) { VerifyBoolOutput(out_bm, v3); } -TEST_F(TestBitmapVisit, OutputNonZeroOffset) { - int64_t bits = 1000, part = bits / 4; +TEST_F(TestBitmapVisit, SingleWriterOutputNonZeroOffset) { + // choosing part = 199, a prime + int64_t part = 199, bits = part * 4; std::vector data; random_bool_vector(data, bits); @@ -109,12 +122,14 @@ TEST_F(TestBitmapVisit, OutputNonZeroOffset) { ASSERT_OK_AND_ASSIGN(auto out, AllocateBitmap(part * 2)); Bitmap out_bm(out, part, part); + auto visitor = [](const std::array& in_words, uint64_t& out_words) { + out_words = (in_words[0] & in_words[1]) | in_words[2]; + }; + // (bm0 & bm1) | bm2 - std::array bms{bm0, bm1, bm2}; Bitmap::VisitWordsAndWrite( - bms, - [](std::array& words) { return (words[0] & words[1]) | words[2]; }, - &out_bm); + {bm0, bm1, bm2}, std::forward>(visitor), + out_bm); std::vector v0(data.begin(), data.begin() + part); std::vector v1(data.begin() + part * 1, data.begin() + part * 2); @@ -128,5 +143,120 @@ TEST_F(TestBitmapVisit, OutputNonZeroOffset) { VerifyBoolOutput(out_bm, v3); } +TEST_F(TestBitmapVisit, MultiWriterOutputZeroOffset) { + // choosing part = 199, a prime + int64_t part = 199, bits = part * 4; + + std::vector data; + random_bool_vector(data, bits); + + arrow::BooleanBuilder boolean_builder; + ASSERT_OK(boolean_builder.AppendValues(data)); + ASSERT_OK_AND_ASSIGN(auto arrow_data, boolean_builder.Finish()); + + std::shared_ptr& arrow_buffer = arrow_data->data()->buffers[1]; + + Bitmap bm0(arrow_buffer, 0, part); + Bitmap bm1 = bm0.Slice(part * 1, part); // this goes beyond bm0's len + Bitmap bm2 = bm0.Slice(part * 2, part); // this goes beyond bm0's len + + std::array out_bms; + ASSERT_OK_AND_ASSIGN(auto out0, AllocateBitmap(part)); + ASSERT_OK_AND_ASSIGN(auto out1, AllocateBitmap(part)); + out_bms[0] = Bitmap(out0, 0, part); + out_bms[1] = Bitmap(out1, 0, part); + + std::vector v0(data.begin(), data.begin() + part); + std::vector v1(data.begin() + part * 1, data.begin() + part * 2); + std::vector v2(data.begin() + part * 2, data.begin() + part * 3); + + // out0 = bm0 & bm1, out1= bm0 | bm2 + auto visitor_func = [](const std::array& in, + std::array& out) { + out[0] = in[0] & in[1]; + out[1] = in[0] | in[2]; + }; + + Bitmap::VisitWordsAndWrite( + {bm0, bm1, bm2}, + std::forward>(visitor_func), out_bms); + + std::vector out_v0(part); + std::vector out_v1(part); + // v3 = v0 & v1 + std::transform(v0.begin(), v0.end(), v1.begin(), out_v0.begin(), + std::logical_and()); + // v3 |= v2 + std::transform(v0.begin(), v0.end(), v2.begin(), out_v1.begin(), + std::logical_or()); + + // std::cout << "v0: " << VectorToString(v0)<< "\n"; + // std::cout << "b0: " << bm0.ToString()<< "\n"; + // std::cout << "v1: " << VectorToString(v1)<< "\n"; + // std::cout << "b1: " << bm1.ToString()<< "\n"; + // std::cout << "v2: " << VectorToString(v2) << "\n"; + // std::cout << "b2: " << bm2.ToString() << "\n"; + + VerifyBoolOutput(out_bms[0], out_v0); + VerifyBoolOutput(out_bms[1], out_v1); +} + +TEST_F(TestBitmapVisit, MultiWriterOutputNonZeroOffset) { + // choosing part = 199, a prime + int64_t part = 199, bits = part * 4; + + std::vector data; + random_bool_vector(data, bits); + + arrow::BooleanBuilder boolean_builder; + ASSERT_OK(boolean_builder.AppendValues(data)); + ASSERT_OK_AND_ASSIGN(auto arrow_data, boolean_builder.Finish()); + + std::shared_ptr& arrow_buffer = arrow_data->data()->buffers[1]; + + Bitmap bm0(arrow_buffer, 0, part); + Bitmap bm1 = bm0.Slice(part * 1, part); // this goes beyond bm0's len + Bitmap bm2 = bm0.Slice(part * 2, part); // this goes beyond bm0's len + + std::array out_bms; + ASSERT_OK_AND_ASSIGN(auto out, AllocateBitmap(part * 4)); + out_bms[0] = Bitmap(out, part, part); + out_bms[1] = Bitmap(out, part * 2, part); + + std::vector v0(data.begin(), data.begin() + part); + std::vector v1(data.begin() + part * 1, data.begin() + part * 2); + std::vector v2(data.begin() + part * 2, data.begin() + part * 3); + + // std::cout << "v0: " << VectorToString(v0)<< "\n"; + // std::cout << "b0: " << bm0.ToString() << "\n"; + // std::cout << "v1: " << VectorToString(v1) << "\n"; + // std::cout << "b1: " << bm1.ToString() << "\n"; + // std::cout << "v2: " << VectorToString(v2) << "\n"; + // std::cout << "b2: " << bm2.ToString() << "\n"; + + // out0 = bm0 & bm1, out1= bm0 | bm2 + auto visitor_func = [](const std::array& in, + std::array& out) { + out[0] = in[0] & in[1]; + out[1] = in[0] | in[2]; + }; + + Bitmap::VisitWordsAndWrite( + {bm0, bm1, bm2}, + std::forward>(visitor_func), out_bms); + + std::vector out_v0(part); + std::vector out_v1(part); + // v3 = v0 & v1 + std::transform(v0.begin(), v0.end(), v1.begin(), out_v0.begin(), + std::logical_and()); + // v3 |= v2 + std::transform(v0.begin(), v0.end(), v2.begin(), out_v1.begin(), + std::logical_or()); + + VerifyBoolOutput(out_bms[0], out_v0); + VerifyBoolOutput(out_bms[1], out_v1); +} + } // namespace internal } // namespace arrow \ No newline at end of file diff --git a/cpp/src/arrow/util/bitmap_writer.h b/cpp/src/arrow/util/bitmap_writer.h index afe0dcea35f..ca75abbf15c 100644 --- a/cpp/src/arrow/util/bitmap_writer.h +++ b/cpp/src/arrow/util/bitmap_writer.h @@ -183,10 +183,11 @@ class FirstTimeBitmapWriter { template class BitmapWordWriter { public: + BitmapWordWriter() = default; BitmapWordWriter(uint8_t* bitmap, int64_t offset, int64_t length) { bitmap_ = bitmap + offset / 8; offset_ = offset % 8; - bitmap_end_ = bitmap_ + BitUtil::BytesForBits(offset_ + length); + bitmap_end_ = bitmap_ + BitUtil::BytesForBits(offset + length); mask_ = (1U << offset_) - 1; if (offset_) { @@ -224,6 +225,25 @@ class BitmapWordWriter { bitmap_ += sizeof(Word); } + void PutNextWord(Word word, int valid_bits) { + assert(static_cast(valid_bits) <= sizeof(Word) * 8); + if (ARROW_PREDICT_FALSE(valid_bits == 0)) { + return; + } else if (ARROW_PREDICT_FALSE(valid_bits == sizeof(Word) * 8)) { + return PutNextWord(word); + } + int i = 0; + for (; i < valid_bits / 8; i++) { + uint8_t byte = *(reinterpret_cast(&word) + i); + PutNextTrailingByte(byte, 8); + } + // cleanup + if (int remainder = valid_bits - i * 8) { + assert(static_cast(remainder) < sizeof(Word) * 8); + PutNextTrailingByte(*(reinterpret_cast(&word) + i), remainder); + } + } + void PutNextTrailingByte(uint8_t byte, int valid_bits) { if (valid_bits == 8) { if (offset_) { From 1e223011f8d5a970be0a2aa14160b12f4e029af2 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Fri, 11 Jun 2021 01:10:15 +0000 Subject: [PATCH 07/46] Autoformat/render all the things [automated commit] --- r/man/ChunkedArray.Rd | 22 -------------------- r/man/Field.Rd | 5 ----- r/man/FileFormat.Rd | 15 ------------- r/man/ParquetFileReader.Rd | 12 ----------- r/man/RecordBatch.Rd | 11 ---------- r/man/RecordBatchReader.Rd | 37 --------------------------------- r/man/RecordBatchWriter.Rd | 37 --------------------------------- r/man/Scalar.Rd | 17 --------------- r/man/Schema.Rd | 9 -------- r/man/Table.Rd | 11 ---------- r/man/buffer.Rd | 9 -------- r/man/call_function.Rd | 10 --------- r/man/codec_is_available.Rd | 5 ----- r/man/copy_files.Rd | 10 --------- r/man/data-type.Rd | 8 ------- r/man/hive_partition.Rd | 5 ----- r/man/list_compute_functions.Rd | 7 ------- r/man/load_flight_server.Rd | 5 ----- r/man/match_arrow.Rd | 25 ---------------------- r/man/read_delim_arrow.Rd | 11 ---------- r/man/read_feather.Rd | 11 ---------- r/man/read_json_arrow.Rd | 12 ----------- r/man/read_parquet.Rd | 9 -------- r/man/s3_bucket.Rd | 5 ----- r/man/type.Rd | 10 --------- r/man/unify_schemas.Rd | 7 ------- r/man/value_counts.Rd | 6 ------ r/man/write_csv_arrow.Rd | 7 ------- r/man/write_feather.Rd | 7 ------- r/man/write_ipc_stream.Rd | 7 ------- r/man/write_parquet.Rd | 12 ----------- r/man/write_to_raw.Rd | 7 ------- 32 files changed, 371 deletions(-) diff --git a/r/man/ChunkedArray.Rd b/r/man/ChunkedArray.Rd index eaae0b3d4b8..486b6222af7 100644 --- a/r/man/ChunkedArray.Rd +++ b/r/man/ChunkedArray.Rd @@ -53,28 +53,6 @@ within the array's internal data. This can be an expensive check, potentially \c } } -\examples{ -\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -# Pass items into chunked_array as separate objects to create chunks -class_scores <- chunked_array(c(87, 88, 89), c(94, 93, 92), c(71, 72, 73)) -class_scores$num_chunks - -# When taking a Slice from a chunked_array, chunks are preserved -class_scores$Slice(2, length = 5) - -# You can combine Take and SortIndices to return a ChunkedArray with 1 chunk -# containing all values, ordered. -class_scores$Take(class_scores$SortIndices(descending = TRUE)) - -# If you pass a list into chunked_array, you get a list of length 1 -list_scores <- chunked_array(list(c(9.9, 9.6, 9.5), c(8.2, 8.3, 8.4), c(10.0, 9.9, 9.8))) -list_scores$num_chunks - -# When constructing a ChunkedArray, the first chunk is used to infer type. -doubles <- chunked_array(c(1, 2, 3), c(5L, 6L, 7L)) -doubles$type -\dontshow{\}) # examplesIf} -} \seealso{ \link{Array} } diff --git a/r/man/Field.Rd b/r/man/Field.Rd index 77d31fa637a..03dffd11ca9 100644 --- a/r/man/Field.Rd +++ b/r/man/Field.Rd @@ -28,8 +28,3 @@ field(name, type, metadata) } } -\examples{ -\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -field("x", int32()) -\dontshow{\}) # examplesIf} -} diff --git a/r/man/FileFormat.Rd b/r/man/FileFormat.Rd index 5bc9475b408..b8d4dc01bad 100644 --- a/r/man/FileFormat.Rd +++ b/r/man/FileFormat.Rd @@ -51,18 +51,3 @@ From \link{CsvFragmentScanOptions} (these values can be overridden at scan time) It returns the appropriate subclass of \code{FileFormat} (e.g. \code{ParquetFileFormat}) } -\examples{ -\dontshow{if (arrow_with_dataset()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -## Semi-colon delimited files -# Set up directory for examples -tf <- tempfile() -dir.create(tf) -on.exit(unlink(tf)) -write.table(mtcars, file.path(tf, "file1.txt"), sep = ";", row.names = FALSE) - -# Create FileFormat object -format <- FileFormat$create(format = "text", delimiter = ";") - -open_dataset(tf, format = format) -\dontshow{\}) # examplesIf} -} diff --git a/r/man/ParquetFileReader.Rd b/r/man/ParquetFileReader.Rd index 39146919768..e97cf80ee7a 100644 --- a/r/man/ParquetFileReader.Rd +++ b/r/man/ParquetFileReader.Rd @@ -44,15 +44,3 @@ The optional \verb{column_indices=} argument is a 0-based integer vector indicat } } -\examples{ -\dontshow{if (arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -f <- system.file("v0.7.1.parquet", package="arrow") -pq <- ParquetFileReader$create(f) -pq$GetSchema() -if (codec_is_available("snappy")) { - # This file has compressed data columns - tab <- pq$ReadTable() - tab$schema -} -\dontshow{\}) # examplesIf} -} diff --git a/r/man/RecordBatch.Rd b/r/man/RecordBatch.Rd index ff08c215853..e3024b91b7a 100644 --- a/r/man/RecordBatch.Rd +++ b/r/man/RecordBatch.Rd @@ -79,14 +79,3 @@ All list elements are coerced to string. See \code{schema()} for more informatio } } -\examples{ -\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -batch <- record_batch(name = rownames(mtcars), mtcars) -dim(batch) -dim(head(batch)) -names(batch) -batch$mpg -batch[["cyl"]] -as.data.frame(batch[4:8, c("gear", "hp", "wt")]) -\dontshow{\}) # examplesIf} -} diff --git a/r/man/RecordBatchReader.Rd b/r/man/RecordBatchReader.Rd index 90c796a6693..a206c30c8fb 100644 --- a/r/man/RecordBatchReader.Rd +++ b/r/man/RecordBatchReader.Rd @@ -43,43 +43,6 @@ are in the file. } } -\examples{ -\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -tf <- tempfile() -on.exit(unlink(tf)) - -batch <- record_batch(chickwts) - -# This opens a connection to the file in Arrow -file_obj <- FileOutputStream$create(tf) -# Pass that to a RecordBatchWriter to write data conforming to a schema -writer <- RecordBatchFileWriter$create(file_obj, batch$schema) -writer$write(batch) -# You may write additional batches to the stream, provided that they have -# the same schema. -# Call "close" on the writer to indicate end-of-file/stream -writer$close() -# Then, close the connection--closing the IPC message does not close the file -file_obj$close() - -# Now, we have a file we can read from. Same pattern: open file connection, -# then pass it to a RecordBatchReader -read_file_obj <- ReadableFile$create(tf) -reader <- RecordBatchFileReader$create(read_file_obj) -# RecordBatchFileReader knows how many batches it has (StreamReader does not) -reader$num_record_batches -# We could consume the Reader by calling $read_next_batch() until all are, -# consumed, or we can call $read_table() to pull them all into a Table -tab <- reader$read_table() -# Call as.data.frame to turn that Table into an R data.frame -df <- as.data.frame(tab) -# This should be the same data we sent -all.equal(df, chickwts, check.attributes = FALSE) -# Unlike the Writers, we don't have to close RecordBatchReaders, -# but we do still need to close the file connection -read_file_obj$close() -\dontshow{\}) # examplesIf} -} \seealso{ \code{\link[=read_ipc_stream]{read_ipc_stream()}} and \code{\link[=read_feather]{read_feather()}} provide a much simpler interface for reading data from these formats and are sufficient for many use cases. diff --git a/r/man/RecordBatchWriter.Rd b/r/man/RecordBatchWriter.Rd index 219c150e6a4..cc6d2feb3ac 100644 --- a/r/man/RecordBatchWriter.Rd +++ b/r/man/RecordBatchWriter.Rd @@ -45,43 +45,6 @@ to be closed separately. } } -\examples{ -\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -tf <- tempfile() -on.exit(unlink(tf)) - -batch <- record_batch(chickwts) - -# This opens a connection to the file in Arrow -file_obj <- FileOutputStream$create(tf) -# Pass that to a RecordBatchWriter to write data conforming to a schema -writer <- RecordBatchFileWriter$create(file_obj, batch$schema) -writer$write(batch) -# You may write additional batches to the stream, provided that they have -# the same schema. -# Call "close" on the writer to indicate end-of-file/stream -writer$close() -# Then, close the connection--closing the IPC message does not close the file -file_obj$close() - -# Now, we have a file we can read from. Same pattern: open file connection, -# then pass it to a RecordBatchReader -read_file_obj <- ReadableFile$create(tf) -reader <- RecordBatchFileReader$create(read_file_obj) -# RecordBatchFileReader knows how many batches it has (StreamReader does not) -reader$num_record_batches -# We could consume the Reader by calling $read_next_batch() until all are, -# consumed, or we can call $read_table() to pull them all into a Table -tab <- reader$read_table() -# Call as.data.frame to turn that Table into an R data.frame -df <- as.data.frame(tab) -# This should be the same data we sent -all.equal(df, chickwts, check.attributes = FALSE) -# Unlike the Writers, we don't have to close RecordBatchReaders, -# but we do still need to close the file connection -read_file_obj$close() -\dontshow{\}) # examplesIf} -} \seealso{ \code{\link[=write_ipc_stream]{write_ipc_stream()}} and \code{\link[=write_feather]{write_feather()}} provide a much simpler interface for writing data to these formats and are sufficient for many use diff --git a/r/man/Scalar.Rd b/r/man/Scalar.Rd index 21e04c12e08..9128988d11c 100644 --- a/r/man/Scalar.Rd +++ b/r/man/Scalar.Rd @@ -19,20 +19,3 @@ A \code{Scalar} holds a single value of an Arrow type. \verb{$type}: Scalar type } -\examples{ -\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -Scalar$create(pi) -Scalar$create(404) -# If you pass a vector into Scalar$create, you get a list containing your items -Scalar$create(c(1, 2, 3)) - -# Comparisons -my_scalar <- Scalar$create(99) -my_scalar$ApproxEquals(Scalar$create(99.00001)) # FALSE -my_scalar$ApproxEquals(Scalar$create(99.000009)) # TRUE -my_scalar$Equals(Scalar$create(99.000009)) # FALSE -my_scalar$Equals(Scalar$create(99L)) # FALSE (types don't match) - -my_scalar$ToString() -\dontshow{\}) # examplesIf} -} diff --git a/r/man/Schema.Rd b/r/man/Schema.Rd index 6e385bb804e..0c66e5c2a42 100644 --- a/r/man/Schema.Rd +++ b/r/man/Schema.Rd @@ -74,12 +74,3 @@ Files with compressed metadata are readable by older versions of arrow, but the metadata is dropped. } -\examples{ -\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -df <- data.frame(col1 = 2:4, col2 = c(0.1, 0.3, 0.5)) -tab1 <- Table$create(df) -tab1$schema -tab2 <- Table$create(df, schema = schema(col1 = int8(), col2 = float32())) -tab2$schema -\dontshow{\}) # examplesIf} -} diff --git a/r/man/Table.Rd b/r/man/Table.Rd index 2675943e572..d955b0f5a29 100644 --- a/r/man/Table.Rd +++ b/r/man/Table.Rd @@ -79,14 +79,3 @@ All list elements are coerced to string. See \code{schema()} for more informatio } } -\examples{ -\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -tab <- Table$create(name = rownames(mtcars), mtcars) -dim(tab) -dim(head(tab)) -names(tab) -tab$mpg -tab[["cyl"]] -as.data.frame(tab[4:8, c("gear", "hp", "wt")]) -\dontshow{\}) # examplesIf} -} diff --git a/r/man/buffer.Rd b/r/man/buffer.Rd index a3ca1fc2fcb..99b636da3c7 100644 --- a/r/man/buffer.Rd +++ b/r/man/buffer.Rd @@ -33,12 +33,3 @@ contiguous memory with a particular size. } } -\examples{ -\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -my_buffer <- buffer(c(1, 2, 3, 4)) -my_buffer$is_mutable -my_buffer$ZeroPadding() -my_buffer$size -my_buffer$capacity -\dontshow{\}) # examplesIf} -} diff --git a/r/man/call_function.Rd b/r/man/call_function.Rd index f63038442dc..790c4237518 100644 --- a/r/man/call_function.Rd +++ b/r/man/call_function.Rd @@ -35,16 +35,6 @@ are callable with an \code{arrow_} prefix. When passing indices in \code{...}, \code{args}, or \code{options}, express them as 0-based integers (consistent with C++). } -\examples{ -\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -a <- Array$create(c(1L, 2L, 3L, NA, 5L)) -s <- Scalar$create(4L) -call_function("fill_null", a, s) - -a <- Array$create(rnorm(10000)) -call_function("quantile", a, options = list(q = seq(0, 1, 0.25))) -\dontshow{\}) # examplesIf} -} \seealso{ \href{https://arrow.apache.org/docs/cpp/compute.html}{Arrow C++ documentation} for the functions and their respective options. } diff --git a/r/man/codec_is_available.Rd b/r/man/codec_is_available.Rd index b3238ff1dca..1b5e8278fa9 100644 --- a/r/man/codec_is_available.Rd +++ b/r/man/codec_is_available.Rd @@ -18,8 +18,3 @@ Support for compression libraries depends on the build-time settings of the Arrow C++ library. This function lets you know which are available for use. } -\examples{ -\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -codec_is_available("gzip") -\dontshow{\}) # examplesIf} -} diff --git a/r/man/copy_files.Rd b/r/man/copy_files.Rd index 1b83703f19f..75cc4405d8a 100644 --- a/r/man/copy_files.Rd +++ b/r/man/copy_files.Rd @@ -23,13 +23,3 @@ Nothing: called for side effects in the file system \description{ Copy files between FileSystems } -\examples{ -\dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -# Copy an S3 bucket's files to a local directory: -copy_files("s3://your-bucket-name", "local-directory") -# Using a FileSystem object -copy_files(s3_bucket("your-bucket-name"), "local-directory") -# Or go the other way, from local to S3 -copy_files("local-directory", s3_bucket("your-bucket-name")) -\dontshow{\}) # examplesIf} -} diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd index a0631897573..101702a2fb2 100644 --- a/r/man/data-type.Rd +++ b/r/man/data-type.Rd @@ -150,14 +150,6 @@ are translated to R objects, \code{uint32} and \code{uint64} are converted to \c types, this conversion can be disabled (so that \code{int64} always yields a \code{bit64::integer64} object) by setting \code{options(arrow.int64_downcast = FALSE)}. } -\examples{ -\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -bool() -struct(a = int32(), b = double()) -timestamp("ms", timezone = "CEST") -time64("ns") -\dontshow{\}) # examplesIf} -} \seealso{ \code{\link[=dictionary]{dictionary()}} for creating a dictionary (factor-like) type. } diff --git a/r/man/hive_partition.Rd b/r/man/hive_partition.Rd index eef9f9157ea..39d5d8d0ae2 100644 --- a/r/man/hive_partition.Rd +++ b/r/man/hive_partition.Rd @@ -28,8 +28,3 @@ Hive partitioning embeds field names and values in path segments, such as Because fields are named in the path segments, order of fields passed to \code{hive_partition()} does not matter. } -\examples{ -\dontshow{if (arrow_with_dataset()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -hive_partition(year = int16(), month = int8()) -\dontshow{\}) # examplesIf} -} diff --git a/r/man/list_compute_functions.Rd b/r/man/list_compute_functions.Rd index 668e090c0ca..ba17688d833 100644 --- a/r/man/list_compute_functions.Rd +++ b/r/man/list_compute_functions.Rd @@ -37,10 +37,3 @@ The package includes Arrow methods for many base R functions that can be called directly on Arrow objects, as well as some tidyverse-flavored versions available inside \code{dplyr} verbs. } -\examples{ -\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -list_compute_functions() -list_compute_functions(pattern = "^UTF8", ignore.case = TRUE) -list_compute_functions(pattern = "^is", invert = TRUE) -\dontshow{\}) # examplesIf} -} diff --git a/r/man/load_flight_server.Rd b/r/man/load_flight_server.Rd index 66d30f39147..7e2000a9ca2 100644 --- a/r/man/load_flight_server.Rd +++ b/r/man/load_flight_server.Rd @@ -15,8 +15,3 @@ to look in the \verb{inst/} directory for included modules.} \description{ Load a Python Flight server } -\examples{ -\dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -load_flight_server("demo_flight_server") -\dontshow{\}) # examplesIf} -} diff --git a/r/man/match_arrow.Rd b/r/man/match_arrow.Rd index d63ef3eed87..21481af4c6b 100644 --- a/r/man/match_arrow.Rd +++ b/r/man/match_arrow.Rd @@ -26,28 +26,3 @@ per element of \code{x} it it is present in \code{table}. \code{base::match()} is not a generic, so we can't just define Arrow methods for it. This function exposes the analogous functions in the Arrow C++ library. } -\examples{ -\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -# note that the returned value is 0-indexed -cars_tbl <- Table$create(name = rownames(mtcars), mtcars) -match_arrow(Scalar$create("Mazda RX4 Wag"), cars_tbl$name) - -is_in(Array$create("Mazda RX4 Wag"), cars_tbl$name) - -# Although there are multiple matches, you are returned the index of the first -# match, as with the base R equivalent -match(4, mtcars$cyl) # 1-indexed -match_arrow(Scalar$create(4), cars_tbl$cyl) # 0-indexed - -# If `x` contains multiple values, you are returned the indices of the first -# match for each value. -match(c(4, 6, 8), mtcars$cyl) -match_arrow(Array$create(c(4, 6, 8)), cars_tbl$cyl) - -# Return type matches type of `x` -is_in(c(4, 6, 8), mtcars$cyl) # returns vector -is_in(Scalar$create(4), mtcars$cyl) # returns Scalar -is_in(Array$create(c(4, 6, 8)), cars_tbl$cyl) # returns Array -is_in(ChunkedArray$create(c(4, 6), 8), cars_tbl$cyl) # returns ChunkedArray -\dontshow{\}) # examplesIf} -} diff --git a/r/man/read_delim_arrow.Rd b/r/man/read_delim_arrow.Rd index 71394e547c9..d9c80306931 100644 --- a/r/man/read_delim_arrow.Rd +++ b/r/man/read_delim_arrow.Rd @@ -205,14 +205,3 @@ Note that if you are specifying column names, whether by \code{schema} or to idenfity column names, you'll need to add \code{skip = 1} to skip that row. } -\examples{ -\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} - tf <- tempfile() - on.exit(unlink(tf)) - write.csv(mtcars, file = tf) - df <- read_csv_arrow(tf) - dim(df) - # Can select columns - df <- read_csv_arrow(tf, col_select = starts_with("d")) -\dontshow{\}) # examplesIf} -} diff --git a/r/man/read_feather.Rd b/r/man/read_feather.Rd index 95f4d1d12c6..fa18e3f7844 100644 --- a/r/man/read_feather.Rd +++ b/r/man/read_feather.Rd @@ -34,17 +34,6 @@ and to make sharing data across data analysis languages easy. This function reads both the original, limited specification of the format and the version 2 specification, which is the Apache Arrow IPC file format. } -\examples{ -\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -tf <- tempfile() -on.exit(unlink(tf)) -write_feather(mtcars, tf) -df <- read_feather(tf) -dim(df) -# Can select columns -df <- read_feather(tf, col_select = starts_with("d")) -\dontshow{\}) # examplesIf} -} \seealso{ \link{FeatherReader} and \link{RecordBatchReader} for lower-level access to reading Arrow IPC data. } diff --git a/r/man/read_json_arrow.Rd b/r/man/read_json_arrow.Rd index 4806b4ad1f0..476c99fe4de 100644 --- a/r/man/read_json_arrow.Rd +++ b/r/man/read_json_arrow.Rd @@ -38,15 +38,3 @@ A \code{data.frame}, or a Table if \code{as_data_frame = FALSE}. \description{ Using \link{JsonTableReader} } -\examples{ -\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} - tf <- tempfile() - on.exit(unlink(tf)) - writeLines(' - { "hello": 3.5, "world": false, "yo": "thing" } - { "hello": 3.25, "world": null } - { "hello": 0.0, "world": true, "yo": null } - ', tf, useBytes=TRUE) - df <- read_json_arrow(tf) -\dontshow{\}) # examplesIf} -} diff --git a/r/man/read_parquet.Rd b/r/man/read_parquet.Rd index 056e8644747..ffb2cf7109f 100644 --- a/r/man/read_parquet.Rd +++ b/r/man/read_parquet.Rd @@ -39,12 +39,3 @@ A \link[=Table]{arrow::Table}, or a \code{data.frame} if \code{as_data_frame} is '\href{https://parquet.apache.org/}{Parquet}' is a columnar storage file format. This function enables you to read Parquet files into R. } -\examples{ -\dontshow{if (arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -tf <- tempfile() -on.exit(unlink(tf)) -write_parquet(mtcars, tf) -df <- read_parquet(tf, col_select = starts_with("d")) -head(df) -\dontshow{\}) # examplesIf} -} diff --git a/r/man/s3_bucket.Rd b/r/man/s3_bucket.Rd index 95a086deae5..78d527a56c4 100644 --- a/r/man/s3_bucket.Rd +++ b/r/man/s3_bucket.Rd @@ -21,8 +21,3 @@ are authorized to access the bucket's contents. that automatically detects the bucket's AWS region and holding onto the its relative path. } -\examples{ -\dontshow{if (arrow_with_s3()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -bucket <- s3_bucket("ursa-labs-taxi-data") -\dontshow{\}) # examplesIf} -} diff --git a/r/man/type.Rd b/r/man/type.Rd index d55bbe24bd5..2f85e4a6ac6 100644 --- a/r/man/type.Rd +++ b/r/man/type.Rd @@ -15,13 +15,3 @@ an arrow logical type \description{ infer the arrow Array type from an R vector } -\examples{ -\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -type(1:10) -type(1L:10L) -type(c(1, 1.5, 2)) -type(c("A", "B", "C")) -type(mtcars) -type(Sys.Date()) -\dontshow{\}) # examplesIf} -} diff --git a/r/man/unify_schemas.Rd b/r/man/unify_schemas.Rd index 50c80c2dda9..709e33a5e74 100644 --- a/r/man/unify_schemas.Rd +++ b/r/man/unify_schemas.Rd @@ -18,10 +18,3 @@ A \code{Schema} with the union of fields contained in the inputs, or \description{ Combine and harmonize schemas } -\examples{ -\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -a <- schema(b = double(), c = bool()) -z <- schema(b = double(), k = utf8()) -unify_schemas(a, z) -\dontshow{\}) # examplesIf} -} diff --git a/r/man/value_counts.Rd b/r/man/value_counts.Rd index 6ef77cd4727..139af8edc63 100644 --- a/r/man/value_counts.Rd +++ b/r/man/value_counts.Rd @@ -16,9 +16,3 @@ A \code{StructArray} containing "values" (same type as \code{x}) and "counts" \description{ This function tabulates the values in the array and returns a table of counts. } -\examples{ -\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -cyl_vals <- Array$create(mtcars$cyl) -value_counts(cyl_vals) -\dontshow{\}) # examplesIf} -} diff --git a/r/man/write_csv_arrow.Rd b/r/man/write_csv_arrow.Rd index 55a239ca998..d6df2bcd08e 100644 --- a/r/man/write_csv_arrow.Rd +++ b/r/man/write_csv_arrow.Rd @@ -23,10 +23,3 @@ the stream will be left open. \description{ Write CSV file to disk } -\examples{ -\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -tf <- tempfile() -on.exit(unlink(tf)) -write_csv_arrow(mtcars, tf) -\dontshow{\}) # examplesIf} -} diff --git a/r/man/write_feather.Rd b/r/man/write_feather.Rd index c6273b61be8..0cc8c591369 100644 --- a/r/man/write_feather.Rd +++ b/r/man/write_feather.Rd @@ -47,13 +47,6 @@ and to make sharing data across data analysis languages easy. This function writes both the original, limited specification of the format and the version 2 specification, which is the Apache Arrow IPC file format. } -\examples{ -\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -tf <- tempfile() -on.exit(unlink(tf)) -write_feather(mtcars, tf) -\dontshow{\}) # examplesIf} -} \seealso{ \link{RecordBatchWriter} for lower-level access to writing Arrow IPC data. diff --git a/r/man/write_ipc_stream.Rd b/r/man/write_ipc_stream.Rd index 888d947eb99..4f742ce9178 100644 --- a/r/man/write_ipc_stream.Rd +++ b/r/man/write_ipc_stream.Rd @@ -31,13 +31,6 @@ with some nonstandard behavior, is deprecated. You should explicitly choose the function that will write the desired IPC format (stream or file) since either can be written to a file or \code{OutputStream}. } -\examples{ -\dontshow{if (arrow_available() ) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -tf <- tempfile() -on.exit(unlink(tf)) -write_ipc_stream(mtcars, tf) -\dontshow{\}) # examplesIf} -} \seealso{ \code{\link[=write_feather]{write_feather()}} for writing IPC files. \code{\link[=write_to_raw]{write_to_raw()}} to serialize data to a buffer. diff --git a/r/man/write_parquet.Rd b/r/man/write_parquet.Rd index d7147f7e8e6..823a6038e84 100644 --- a/r/man/write_parquet.Rd +++ b/r/man/write_parquet.Rd @@ -94,15 +94,3 @@ The default "snappy" is used if available, otherwise "uncompressed". To disable compression, set \code{compression = "uncompressed"}. Note that "uncompressed" columns may still have dictionary encoding. } -\examples{ -\dontshow{if (arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -tf1 <- tempfile(fileext = ".parquet") -write_parquet(data.frame(x = 1:5), tf1) - -# using compression -if (codec_is_available("gzip")) { - tf2 <- tempfile(fileext = ".gz.parquet") - write_parquet(data.frame(x = 1:5), tf2, compression = "gzip", compression_level = 5) -} -\dontshow{\}) # examplesIf} -} diff --git a/r/man/write_to_raw.Rd b/r/man/write_to_raw.Rd index 1f507e384c3..46af09a96e8 100644 --- a/r/man/write_to_raw.Rd +++ b/r/man/write_to_raw.Rd @@ -20,10 +20,3 @@ the data (\code{data.frame}, \code{RecordBatch}, or \code{Table}) they were give This function wraps those so that you can serialize data to a buffer and access that buffer as a \code{raw} vector in R. } -\examples{ -\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -# The default format is "stream" -write_to_raw(mtcars) -write_to_raw(mtcars, format = "file") -\dontshow{\}) # examplesIf} -} From 15bdd4b95c299c6f8639f3df6d292bc6aaada1be Mon Sep 17 00:00:00 2001 From: niranda perera Date: Thu, 10 Jun 2021 22:17:19 -0400 Subject: [PATCH 08/46] Revert "Autoformat/render all the things [automated commit]" This reverts commit 1c13c4ca --- r/man/ChunkedArray.Rd | 22 ++++++++++++++++++++ r/man/Field.Rd | 5 +++++ r/man/FileFormat.Rd | 15 +++++++++++++ r/man/ParquetFileReader.Rd | 12 +++++++++++ r/man/RecordBatch.Rd | 11 ++++++++++ r/man/RecordBatchReader.Rd | 37 +++++++++++++++++++++++++++++++++ r/man/RecordBatchWriter.Rd | 37 +++++++++++++++++++++++++++++++++ r/man/Scalar.Rd | 17 +++++++++++++++ r/man/Schema.Rd | 9 ++++++++ r/man/Table.Rd | 11 ++++++++++ r/man/buffer.Rd | 9 ++++++++ r/man/call_function.Rd | 10 +++++++++ r/man/codec_is_available.Rd | 5 +++++ r/man/copy_files.Rd | 10 +++++++++ r/man/data-type.Rd | 8 +++++++ r/man/hive_partition.Rd | 5 +++++ r/man/list_compute_functions.Rd | 7 +++++++ r/man/load_flight_server.Rd | 5 +++++ r/man/match_arrow.Rd | 25 ++++++++++++++++++++++ r/man/read_delim_arrow.Rd | 11 ++++++++++ r/man/read_feather.Rd | 11 ++++++++++ r/man/read_json_arrow.Rd | 12 +++++++++++ r/man/read_parquet.Rd | 9 ++++++++ r/man/s3_bucket.Rd | 5 +++++ r/man/type.Rd | 10 +++++++++ r/man/unify_schemas.Rd | 7 +++++++ r/man/value_counts.Rd | 6 ++++++ r/man/write_csv_arrow.Rd | 7 +++++++ r/man/write_feather.Rd | 7 +++++++ r/man/write_ipc_stream.Rd | 7 +++++++ r/man/write_parquet.Rd | 12 +++++++++++ r/man/write_to_raw.Rd | 7 +++++++ 32 files changed, 371 insertions(+) diff --git a/r/man/ChunkedArray.Rd b/r/man/ChunkedArray.Rd index 486b6222af7..eaae0b3d4b8 100644 --- a/r/man/ChunkedArray.Rd +++ b/r/man/ChunkedArray.Rd @@ -53,6 +53,28 @@ within the array's internal data. This can be an expensive check, potentially \c } } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +# Pass items into chunked_array as separate objects to create chunks +class_scores <- chunked_array(c(87, 88, 89), c(94, 93, 92), c(71, 72, 73)) +class_scores$num_chunks + +# When taking a Slice from a chunked_array, chunks are preserved +class_scores$Slice(2, length = 5) + +# You can combine Take and SortIndices to return a ChunkedArray with 1 chunk +# containing all values, ordered. +class_scores$Take(class_scores$SortIndices(descending = TRUE)) + +# If you pass a list into chunked_array, you get a list of length 1 +list_scores <- chunked_array(list(c(9.9, 9.6, 9.5), c(8.2, 8.3, 8.4), c(10.0, 9.9, 9.8))) +list_scores$num_chunks + +# When constructing a ChunkedArray, the first chunk is used to infer type. +doubles <- chunked_array(c(1, 2, 3), c(5L, 6L, 7L)) +doubles$type +\dontshow{\}) # examplesIf} +} \seealso{ \link{Array} } diff --git a/r/man/Field.Rd b/r/man/Field.Rd index 03dffd11ca9..77d31fa637a 100644 --- a/r/man/Field.Rd +++ b/r/man/Field.Rd @@ -28,3 +28,8 @@ field(name, type, metadata) } } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +field("x", int32()) +\dontshow{\}) # examplesIf} +} diff --git a/r/man/FileFormat.Rd b/r/man/FileFormat.Rd index b8d4dc01bad..5bc9475b408 100644 --- a/r/man/FileFormat.Rd +++ b/r/man/FileFormat.Rd @@ -51,3 +51,18 @@ From \link{CsvFragmentScanOptions} (these values can be overridden at scan time) It returns the appropriate subclass of \code{FileFormat} (e.g. \code{ParquetFileFormat}) } +\examples{ +\dontshow{if (arrow_with_dataset()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +## Semi-colon delimited files +# Set up directory for examples +tf <- tempfile() +dir.create(tf) +on.exit(unlink(tf)) +write.table(mtcars, file.path(tf, "file1.txt"), sep = ";", row.names = FALSE) + +# Create FileFormat object +format <- FileFormat$create(format = "text", delimiter = ";") + +open_dataset(tf, format = format) +\dontshow{\}) # examplesIf} +} diff --git a/r/man/ParquetFileReader.Rd b/r/man/ParquetFileReader.Rd index e97cf80ee7a..39146919768 100644 --- a/r/man/ParquetFileReader.Rd +++ b/r/man/ParquetFileReader.Rd @@ -44,3 +44,15 @@ The optional \verb{column_indices=} argument is a 0-based integer vector indicat } } +\examples{ +\dontshow{if (arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +f <- system.file("v0.7.1.parquet", package="arrow") +pq <- ParquetFileReader$create(f) +pq$GetSchema() +if (codec_is_available("snappy")) { + # This file has compressed data columns + tab <- pq$ReadTable() + tab$schema +} +\dontshow{\}) # examplesIf} +} diff --git a/r/man/RecordBatch.Rd b/r/man/RecordBatch.Rd index e3024b91b7a..ff08c215853 100644 --- a/r/man/RecordBatch.Rd +++ b/r/man/RecordBatch.Rd @@ -79,3 +79,14 @@ All list elements are coerced to string. See \code{schema()} for more informatio } } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +batch <- record_batch(name = rownames(mtcars), mtcars) +dim(batch) +dim(head(batch)) +names(batch) +batch$mpg +batch[["cyl"]] +as.data.frame(batch[4:8, c("gear", "hp", "wt")]) +\dontshow{\}) # examplesIf} +} diff --git a/r/man/RecordBatchReader.Rd b/r/man/RecordBatchReader.Rd index a206c30c8fb..90c796a6693 100644 --- a/r/man/RecordBatchReader.Rd +++ b/r/man/RecordBatchReader.Rd @@ -43,6 +43,43 @@ are in the file. } } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +tf <- tempfile() +on.exit(unlink(tf)) + +batch <- record_batch(chickwts) + +# This opens a connection to the file in Arrow +file_obj <- FileOutputStream$create(tf) +# Pass that to a RecordBatchWriter to write data conforming to a schema +writer <- RecordBatchFileWriter$create(file_obj, batch$schema) +writer$write(batch) +# You may write additional batches to the stream, provided that they have +# the same schema. +# Call "close" on the writer to indicate end-of-file/stream +writer$close() +# Then, close the connection--closing the IPC message does not close the file +file_obj$close() + +# Now, we have a file we can read from. Same pattern: open file connection, +# then pass it to a RecordBatchReader +read_file_obj <- ReadableFile$create(tf) +reader <- RecordBatchFileReader$create(read_file_obj) +# RecordBatchFileReader knows how many batches it has (StreamReader does not) +reader$num_record_batches +# We could consume the Reader by calling $read_next_batch() until all are, +# consumed, or we can call $read_table() to pull them all into a Table +tab <- reader$read_table() +# Call as.data.frame to turn that Table into an R data.frame +df <- as.data.frame(tab) +# This should be the same data we sent +all.equal(df, chickwts, check.attributes = FALSE) +# Unlike the Writers, we don't have to close RecordBatchReaders, +# but we do still need to close the file connection +read_file_obj$close() +\dontshow{\}) # examplesIf} +} \seealso{ \code{\link[=read_ipc_stream]{read_ipc_stream()}} and \code{\link[=read_feather]{read_feather()}} provide a much simpler interface for reading data from these formats and are sufficient for many use cases. diff --git a/r/man/RecordBatchWriter.Rd b/r/man/RecordBatchWriter.Rd index cc6d2feb3ac..219c150e6a4 100644 --- a/r/man/RecordBatchWriter.Rd +++ b/r/man/RecordBatchWriter.Rd @@ -45,6 +45,43 @@ to be closed separately. } } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +tf <- tempfile() +on.exit(unlink(tf)) + +batch <- record_batch(chickwts) + +# This opens a connection to the file in Arrow +file_obj <- FileOutputStream$create(tf) +# Pass that to a RecordBatchWriter to write data conforming to a schema +writer <- RecordBatchFileWriter$create(file_obj, batch$schema) +writer$write(batch) +# You may write additional batches to the stream, provided that they have +# the same schema. +# Call "close" on the writer to indicate end-of-file/stream +writer$close() +# Then, close the connection--closing the IPC message does not close the file +file_obj$close() + +# Now, we have a file we can read from. Same pattern: open file connection, +# then pass it to a RecordBatchReader +read_file_obj <- ReadableFile$create(tf) +reader <- RecordBatchFileReader$create(read_file_obj) +# RecordBatchFileReader knows how many batches it has (StreamReader does not) +reader$num_record_batches +# We could consume the Reader by calling $read_next_batch() until all are, +# consumed, or we can call $read_table() to pull them all into a Table +tab <- reader$read_table() +# Call as.data.frame to turn that Table into an R data.frame +df <- as.data.frame(tab) +# This should be the same data we sent +all.equal(df, chickwts, check.attributes = FALSE) +# Unlike the Writers, we don't have to close RecordBatchReaders, +# but we do still need to close the file connection +read_file_obj$close() +\dontshow{\}) # examplesIf} +} \seealso{ \code{\link[=write_ipc_stream]{write_ipc_stream()}} and \code{\link[=write_feather]{write_feather()}} provide a much simpler interface for writing data to these formats and are sufficient for many use diff --git a/r/man/Scalar.Rd b/r/man/Scalar.Rd index 9128988d11c..21e04c12e08 100644 --- a/r/man/Scalar.Rd +++ b/r/man/Scalar.Rd @@ -19,3 +19,20 @@ A \code{Scalar} holds a single value of an Arrow type. \verb{$type}: Scalar type } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +Scalar$create(pi) +Scalar$create(404) +# If you pass a vector into Scalar$create, you get a list containing your items +Scalar$create(c(1, 2, 3)) + +# Comparisons +my_scalar <- Scalar$create(99) +my_scalar$ApproxEquals(Scalar$create(99.00001)) # FALSE +my_scalar$ApproxEquals(Scalar$create(99.000009)) # TRUE +my_scalar$Equals(Scalar$create(99.000009)) # FALSE +my_scalar$Equals(Scalar$create(99L)) # FALSE (types don't match) + +my_scalar$ToString() +\dontshow{\}) # examplesIf} +} diff --git a/r/man/Schema.Rd b/r/man/Schema.Rd index 0c66e5c2a42..6e385bb804e 100644 --- a/r/man/Schema.Rd +++ b/r/man/Schema.Rd @@ -74,3 +74,12 @@ Files with compressed metadata are readable by older versions of arrow, but the metadata is dropped. } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +df <- data.frame(col1 = 2:4, col2 = c(0.1, 0.3, 0.5)) +tab1 <- Table$create(df) +tab1$schema +tab2 <- Table$create(df, schema = schema(col1 = int8(), col2 = float32())) +tab2$schema +\dontshow{\}) # examplesIf} +} diff --git a/r/man/Table.Rd b/r/man/Table.Rd index d955b0f5a29..2675943e572 100644 --- a/r/man/Table.Rd +++ b/r/man/Table.Rd @@ -79,3 +79,14 @@ All list elements are coerced to string. See \code{schema()} for more informatio } } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +tab <- Table$create(name = rownames(mtcars), mtcars) +dim(tab) +dim(head(tab)) +names(tab) +tab$mpg +tab[["cyl"]] +as.data.frame(tab[4:8, c("gear", "hp", "wt")]) +\dontshow{\}) # examplesIf} +} diff --git a/r/man/buffer.Rd b/r/man/buffer.Rd index 99b636da3c7..a3ca1fc2fcb 100644 --- a/r/man/buffer.Rd +++ b/r/man/buffer.Rd @@ -33,3 +33,12 @@ contiguous memory with a particular size. } } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +my_buffer <- buffer(c(1, 2, 3, 4)) +my_buffer$is_mutable +my_buffer$ZeroPadding() +my_buffer$size +my_buffer$capacity +\dontshow{\}) # examplesIf} +} diff --git a/r/man/call_function.Rd b/r/man/call_function.Rd index 790c4237518..f63038442dc 100644 --- a/r/man/call_function.Rd +++ b/r/man/call_function.Rd @@ -35,6 +35,16 @@ are callable with an \code{arrow_} prefix. When passing indices in \code{...}, \code{args}, or \code{options}, express them as 0-based integers (consistent with C++). } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +a <- Array$create(c(1L, 2L, 3L, NA, 5L)) +s <- Scalar$create(4L) +call_function("fill_null", a, s) + +a <- Array$create(rnorm(10000)) +call_function("quantile", a, options = list(q = seq(0, 1, 0.25))) +\dontshow{\}) # examplesIf} +} \seealso{ \href{https://arrow.apache.org/docs/cpp/compute.html}{Arrow C++ documentation} for the functions and their respective options. } diff --git a/r/man/codec_is_available.Rd b/r/man/codec_is_available.Rd index 1b5e8278fa9..b3238ff1dca 100644 --- a/r/man/codec_is_available.Rd +++ b/r/man/codec_is_available.Rd @@ -18,3 +18,8 @@ Support for compression libraries depends on the build-time settings of the Arrow C++ library. This function lets you know which are available for use. } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +codec_is_available("gzip") +\dontshow{\}) # examplesIf} +} diff --git a/r/man/copy_files.Rd b/r/man/copy_files.Rd index 75cc4405d8a..1b83703f19f 100644 --- a/r/man/copy_files.Rd +++ b/r/man/copy_files.Rd @@ -23,3 +23,13 @@ Nothing: called for side effects in the file system \description{ Copy files between FileSystems } +\examples{ +\dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +# Copy an S3 bucket's files to a local directory: +copy_files("s3://your-bucket-name", "local-directory") +# Using a FileSystem object +copy_files(s3_bucket("your-bucket-name"), "local-directory") +# Or go the other way, from local to S3 +copy_files("local-directory", s3_bucket("your-bucket-name")) +\dontshow{\}) # examplesIf} +} diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd index 101702a2fb2..a0631897573 100644 --- a/r/man/data-type.Rd +++ b/r/man/data-type.Rd @@ -150,6 +150,14 @@ are translated to R objects, \code{uint32} and \code{uint64} are converted to \c types, this conversion can be disabled (so that \code{int64} always yields a \code{bit64::integer64} object) by setting \code{options(arrow.int64_downcast = FALSE)}. } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +bool() +struct(a = int32(), b = double()) +timestamp("ms", timezone = "CEST") +time64("ns") +\dontshow{\}) # examplesIf} +} \seealso{ \code{\link[=dictionary]{dictionary()}} for creating a dictionary (factor-like) type. } diff --git a/r/man/hive_partition.Rd b/r/man/hive_partition.Rd index 39d5d8d0ae2..eef9f9157ea 100644 --- a/r/man/hive_partition.Rd +++ b/r/man/hive_partition.Rd @@ -28,3 +28,8 @@ Hive partitioning embeds field names and values in path segments, such as Because fields are named in the path segments, order of fields passed to \code{hive_partition()} does not matter. } +\examples{ +\dontshow{if (arrow_with_dataset()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +hive_partition(year = int16(), month = int8()) +\dontshow{\}) # examplesIf} +} diff --git a/r/man/list_compute_functions.Rd b/r/man/list_compute_functions.Rd index ba17688d833..668e090c0ca 100644 --- a/r/man/list_compute_functions.Rd +++ b/r/man/list_compute_functions.Rd @@ -37,3 +37,10 @@ The package includes Arrow methods for many base R functions that can be called directly on Arrow objects, as well as some tidyverse-flavored versions available inside \code{dplyr} verbs. } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +list_compute_functions() +list_compute_functions(pattern = "^UTF8", ignore.case = TRUE) +list_compute_functions(pattern = "^is", invert = TRUE) +\dontshow{\}) # examplesIf} +} diff --git a/r/man/load_flight_server.Rd b/r/man/load_flight_server.Rd index 7e2000a9ca2..66d30f39147 100644 --- a/r/man/load_flight_server.Rd +++ b/r/man/load_flight_server.Rd @@ -15,3 +15,8 @@ to look in the \verb{inst/} directory for included modules.} \description{ Load a Python Flight server } +\examples{ +\dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +load_flight_server("demo_flight_server") +\dontshow{\}) # examplesIf} +} diff --git a/r/man/match_arrow.Rd b/r/man/match_arrow.Rd index 21481af4c6b..d63ef3eed87 100644 --- a/r/man/match_arrow.Rd +++ b/r/man/match_arrow.Rd @@ -26,3 +26,28 @@ per element of \code{x} it it is present in \code{table}. \code{base::match()} is not a generic, so we can't just define Arrow methods for it. This function exposes the analogous functions in the Arrow C++ library. } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +# note that the returned value is 0-indexed +cars_tbl <- Table$create(name = rownames(mtcars), mtcars) +match_arrow(Scalar$create("Mazda RX4 Wag"), cars_tbl$name) + +is_in(Array$create("Mazda RX4 Wag"), cars_tbl$name) + +# Although there are multiple matches, you are returned the index of the first +# match, as with the base R equivalent +match(4, mtcars$cyl) # 1-indexed +match_arrow(Scalar$create(4), cars_tbl$cyl) # 0-indexed + +# If `x` contains multiple values, you are returned the indices of the first +# match for each value. +match(c(4, 6, 8), mtcars$cyl) +match_arrow(Array$create(c(4, 6, 8)), cars_tbl$cyl) + +# Return type matches type of `x` +is_in(c(4, 6, 8), mtcars$cyl) # returns vector +is_in(Scalar$create(4), mtcars$cyl) # returns Scalar +is_in(Array$create(c(4, 6, 8)), cars_tbl$cyl) # returns Array +is_in(ChunkedArray$create(c(4, 6), 8), cars_tbl$cyl) # returns ChunkedArray +\dontshow{\}) # examplesIf} +} diff --git a/r/man/read_delim_arrow.Rd b/r/man/read_delim_arrow.Rd index d9c80306931..71394e547c9 100644 --- a/r/man/read_delim_arrow.Rd +++ b/r/man/read_delim_arrow.Rd @@ -205,3 +205,14 @@ Note that if you are specifying column names, whether by \code{schema} or to idenfity column names, you'll need to add \code{skip = 1} to skip that row. } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} + tf <- tempfile() + on.exit(unlink(tf)) + write.csv(mtcars, file = tf) + df <- read_csv_arrow(tf) + dim(df) + # Can select columns + df <- read_csv_arrow(tf, col_select = starts_with("d")) +\dontshow{\}) # examplesIf} +} diff --git a/r/man/read_feather.Rd b/r/man/read_feather.Rd index fa18e3f7844..95f4d1d12c6 100644 --- a/r/man/read_feather.Rd +++ b/r/man/read_feather.Rd @@ -34,6 +34,17 @@ and to make sharing data across data analysis languages easy. This function reads both the original, limited specification of the format and the version 2 specification, which is the Apache Arrow IPC file format. } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +tf <- tempfile() +on.exit(unlink(tf)) +write_feather(mtcars, tf) +df <- read_feather(tf) +dim(df) +# Can select columns +df <- read_feather(tf, col_select = starts_with("d")) +\dontshow{\}) # examplesIf} +} \seealso{ \link{FeatherReader} and \link{RecordBatchReader} for lower-level access to reading Arrow IPC data. } diff --git a/r/man/read_json_arrow.Rd b/r/man/read_json_arrow.Rd index 476c99fe4de..4806b4ad1f0 100644 --- a/r/man/read_json_arrow.Rd +++ b/r/man/read_json_arrow.Rd @@ -38,3 +38,15 @@ A \code{data.frame}, or a Table if \code{as_data_frame = FALSE}. \description{ Using \link{JsonTableReader} } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} + tf <- tempfile() + on.exit(unlink(tf)) + writeLines(' + { "hello": 3.5, "world": false, "yo": "thing" } + { "hello": 3.25, "world": null } + { "hello": 0.0, "world": true, "yo": null } + ', tf, useBytes=TRUE) + df <- read_json_arrow(tf) +\dontshow{\}) # examplesIf} +} diff --git a/r/man/read_parquet.Rd b/r/man/read_parquet.Rd index ffb2cf7109f..056e8644747 100644 --- a/r/man/read_parquet.Rd +++ b/r/man/read_parquet.Rd @@ -39,3 +39,12 @@ A \link[=Table]{arrow::Table}, or a \code{data.frame} if \code{as_data_frame} is '\href{https://parquet.apache.org/}{Parquet}' is a columnar storage file format. This function enables you to read Parquet files into R. } +\examples{ +\dontshow{if (arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +tf <- tempfile() +on.exit(unlink(tf)) +write_parquet(mtcars, tf) +df <- read_parquet(tf, col_select = starts_with("d")) +head(df) +\dontshow{\}) # examplesIf} +} diff --git a/r/man/s3_bucket.Rd b/r/man/s3_bucket.Rd index 78d527a56c4..95a086deae5 100644 --- a/r/man/s3_bucket.Rd +++ b/r/man/s3_bucket.Rd @@ -21,3 +21,8 @@ are authorized to access the bucket's contents. that automatically detects the bucket's AWS region and holding onto the its relative path. } +\examples{ +\dontshow{if (arrow_with_s3()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +bucket <- s3_bucket("ursa-labs-taxi-data") +\dontshow{\}) # examplesIf} +} diff --git a/r/man/type.Rd b/r/man/type.Rd index 2f85e4a6ac6..d55bbe24bd5 100644 --- a/r/man/type.Rd +++ b/r/man/type.Rd @@ -15,3 +15,13 @@ an arrow logical type \description{ infer the arrow Array type from an R vector } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +type(1:10) +type(1L:10L) +type(c(1, 1.5, 2)) +type(c("A", "B", "C")) +type(mtcars) +type(Sys.Date()) +\dontshow{\}) # examplesIf} +} diff --git a/r/man/unify_schemas.Rd b/r/man/unify_schemas.Rd index 709e33a5e74..50c80c2dda9 100644 --- a/r/man/unify_schemas.Rd +++ b/r/man/unify_schemas.Rd @@ -18,3 +18,10 @@ A \code{Schema} with the union of fields contained in the inputs, or \description{ Combine and harmonize schemas } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +a <- schema(b = double(), c = bool()) +z <- schema(b = double(), k = utf8()) +unify_schemas(a, z) +\dontshow{\}) # examplesIf} +} diff --git a/r/man/value_counts.Rd b/r/man/value_counts.Rd index 139af8edc63..6ef77cd4727 100644 --- a/r/man/value_counts.Rd +++ b/r/man/value_counts.Rd @@ -16,3 +16,9 @@ A \code{StructArray} containing "values" (same type as \code{x}) and "counts" \description{ This function tabulates the values in the array and returns a table of counts. } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +cyl_vals <- Array$create(mtcars$cyl) +value_counts(cyl_vals) +\dontshow{\}) # examplesIf} +} diff --git a/r/man/write_csv_arrow.Rd b/r/man/write_csv_arrow.Rd index d6df2bcd08e..55a239ca998 100644 --- a/r/man/write_csv_arrow.Rd +++ b/r/man/write_csv_arrow.Rd @@ -23,3 +23,10 @@ the stream will be left open. \description{ Write CSV file to disk } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +tf <- tempfile() +on.exit(unlink(tf)) +write_csv_arrow(mtcars, tf) +\dontshow{\}) # examplesIf} +} diff --git a/r/man/write_feather.Rd b/r/man/write_feather.Rd index 0cc8c591369..c6273b61be8 100644 --- a/r/man/write_feather.Rd +++ b/r/man/write_feather.Rd @@ -47,6 +47,13 @@ and to make sharing data across data analysis languages easy. This function writes both the original, limited specification of the format and the version 2 specification, which is the Apache Arrow IPC file format. } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +tf <- tempfile() +on.exit(unlink(tf)) +write_feather(mtcars, tf) +\dontshow{\}) # examplesIf} +} \seealso{ \link{RecordBatchWriter} for lower-level access to writing Arrow IPC data. diff --git a/r/man/write_ipc_stream.Rd b/r/man/write_ipc_stream.Rd index 4f742ce9178..888d947eb99 100644 --- a/r/man/write_ipc_stream.Rd +++ b/r/man/write_ipc_stream.Rd @@ -31,6 +31,13 @@ with some nonstandard behavior, is deprecated. You should explicitly choose the function that will write the desired IPC format (stream or file) since either can be written to a file or \code{OutputStream}. } +\examples{ +\dontshow{if (arrow_available() ) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +tf <- tempfile() +on.exit(unlink(tf)) +write_ipc_stream(mtcars, tf) +\dontshow{\}) # examplesIf} +} \seealso{ \code{\link[=write_feather]{write_feather()}} for writing IPC files. \code{\link[=write_to_raw]{write_to_raw()}} to serialize data to a buffer. diff --git a/r/man/write_parquet.Rd b/r/man/write_parquet.Rd index 823a6038e84..d7147f7e8e6 100644 --- a/r/man/write_parquet.Rd +++ b/r/man/write_parquet.Rd @@ -94,3 +94,15 @@ The default "snappy" is used if available, otherwise "uncompressed". To disable compression, set \code{compression = "uncompressed"}. Note that "uncompressed" columns may still have dictionary encoding. } +\examples{ +\dontshow{if (arrow_with_parquet()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +tf1 <- tempfile(fileext = ".parquet") +write_parquet(data.frame(x = 1:5), tf1) + +# using compression +if (codec_is_available("gzip")) { + tf2 <- tempfile(fileext = ".gz.parquet") + write_parquet(data.frame(x = 1:5), tf2, compression = "gzip", compression_level = 5) +} +\dontshow{\}) # examplesIf} +} diff --git a/r/man/write_to_raw.Rd b/r/man/write_to_raw.Rd index 46af09a96e8..1f507e384c3 100644 --- a/r/man/write_to_raw.Rd +++ b/r/man/write_to_raw.Rd @@ -20,3 +20,10 @@ the data (\code{data.frame}, \code{RecordBatch}, or \code{Table}) they were give This function wraps those so that you can serialize data to a buffer and access that buffer as a \code{raw} vector in R. } +\examples{ +\dontshow{if (arrow_available()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +# The default format is "stream" +write_to_raw(mtcars) +write_to_raw(mtcars, format = "file") +\dontshow{\}) # examplesIf} +} From b00a8a04cbcb99ba6db6e3b911e1be6b4872bec0 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Fri, 11 Jun 2021 07:44:31 -0400 Subject: [PATCH 09/46] removing std::function visitor and adding direct ref template --- cpp/src/arrow/util/bitmap.h | 25 +++++++-------- cpp/src/arrow/util/bitmap_test.cc | 52 +++++++++++++++---------------- 2 files changed, 37 insertions(+), 40 deletions(-) diff --git a/cpp/src/arrow/util/bitmap.h b/cpp/src/arrow/util/bitmap.h index 7174923fca4..ec88e7574f1 100644 --- a/cpp/src/arrow/util/bitmap.h +++ b/cpp/src/arrow/util/bitmap.h @@ -414,13 +414,15 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, return min_word_offset; } - template - using MultiOutputVisitor = std::function& in_words, - std::array& out_words)>; + // template + // using MultiOutputVisitor = std::function& in_words, + // std::array& out_words)>; - template + template >::type::value_type> static void VisitWordsAndWrite(const std::array& bitmaps_arg, - MultiOutputVisitor&& visitor, + Visitor&& visitor, std::array& out_bitmaps_arg) { constexpr int64_t kBitWidth = sizeof(Word) * 8; @@ -510,16 +512,13 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, } } - template - using SingleOutputVisitor = - std::function& in_words, Word& out_words)>; - - template + template >::type::value_type> static void VisitWordsAndWrite(const std::array& bitmaps_arg, - SingleOutputVisitor&& visitor, - Bitmap& out_bitmap_arg) { + Visitor&& visitor, Bitmap& out_bitmap_arg) { std::array out_bitmaps{out_bitmap_arg}; - VisitWordsAndWrite( + VisitWordsAndWrite( bitmaps_arg, [&](const std::array& in_words, std::array& out_words) { visitor(in_words, out_words[0]); diff --git a/cpp/src/arrow/util/bitmap_test.cc b/cpp/src/arrow/util/bitmap_test.cc index 0db0fa5854c..d981cb7611d 100644 --- a/cpp/src/arrow/util/bitmap_test.cc +++ b/cpp/src/arrow/util/bitmap_test.cc @@ -80,13 +80,13 @@ TEST_F(TestBitmapVisit, SingleWriterOutputZeroOffset) { ASSERT_OK_AND_ASSIGN(auto out, AllocateBitmap(part)); Bitmap out_bm(out, 0, part); - auto visitor = [](const std::array& in_words, uint64_t& out_words) { - out_words = (in_words[0] & in_words[1]) | in_words[2]; - }; - // (bm0 & bm1) | bm2 + std::array in_bms{bm0, bm1, bm2}; Bitmap::VisitWordsAndWrite( - {bm0, bm1, bm2}, std::forward>(visitor), + in_bms, + [](const std::array& in_words, uint64_t& out_words) { + out_words = (in_words[0] & in_words[1]) | in_words[2]; + }, out_bm); std::vector v0(data.begin(), data.begin() + part); @@ -122,13 +122,13 @@ TEST_F(TestBitmapVisit, SingleWriterOutputNonZeroOffset) { ASSERT_OK_AND_ASSIGN(auto out, AllocateBitmap(part * 2)); Bitmap out_bm(out, part, part); - auto visitor = [](const std::array& in_words, uint64_t& out_words) { - out_words = (in_words[0] & in_words[1]) | in_words[2]; - }; - // (bm0 & bm1) | bm2 + std::array in_bms{bm0, bm1, bm2}; Bitmap::VisitWordsAndWrite( - {bm0, bm1, bm2}, std::forward>(visitor), + in_bms, + [](const std::array& in_words, uint64_t& out_words) { + out_words = (in_words[0] & in_words[1]) | in_words[2]; + }, out_bm); std::vector v0(data.begin(), data.begin() + part); @@ -171,15 +171,14 @@ TEST_F(TestBitmapVisit, MultiWriterOutputZeroOffset) { std::vector v2(data.begin() + part * 2, data.begin() + part * 3); // out0 = bm0 & bm1, out1= bm0 | bm2 - auto visitor_func = [](const std::array& in, - std::array& out) { - out[0] = in[0] & in[1]; - out[1] = in[0] | in[2]; - }; - + std::array in_bms{bm0, bm1, bm2}; Bitmap::VisitWordsAndWrite( - {bm0, bm1, bm2}, - std::forward>(visitor_func), out_bms); + in_bms, + [](const std::array& in, std::array& out) { + out[0] = in[0] & in[1]; + out[1] = in[0] | in[2]; + }, + out_bms); std::vector out_v0(part); std::vector out_v1(part); @@ -235,15 +234,14 @@ TEST_F(TestBitmapVisit, MultiWriterOutputNonZeroOffset) { // std::cout << "b2: " << bm2.ToString() << "\n"; // out0 = bm0 & bm1, out1= bm0 | bm2 - auto visitor_func = [](const std::array& in, - std::array& out) { - out[0] = in[0] & in[1]; - out[1] = in[0] | in[2]; - }; - + std::array in_bms{bm0, bm1, bm2}; Bitmap::VisitWordsAndWrite( - {bm0, bm1, bm2}, - std::forward>(visitor_func), out_bms); + in_bms, + [](const std::array& in, std::array& out) { + out[0] = in[0] & in[1]; + out[1] = in[0] | in[2]; + }, + out_bms); std::vector out_v0(part); std::vector out_v1(part); @@ -259,4 +257,4 @@ TEST_F(TestBitmapVisit, MultiWriterOutputNonZeroOffset) { } } // namespace internal -} // namespace arrow \ No newline at end of file +} // namespace arrow From 2743309595ce62a35aa2f450bd098c224f82c097 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Fri, 11 Jun 2021 14:24:19 -0400 Subject: [PATCH 10/46] simplifying impl --- .../arrow/compute/kernels/scalar_if_else.cc | 103 ++++---- cpp/src/arrow/util/bitmap.h | 235 ++---------------- cpp/src/arrow/util/bitmap_reader.h | 17 +- cpp/src/arrow/util/bitmap_test.cc | 168 +++++-------- cpp/src/arrow/util/bitmap_writer.h | 19 +- 5 files changed, 151 insertions(+), 391 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc index 83e5501a0f1..8a85f61b9a7 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc @@ -108,96 +108,101 @@ Status PromoteNullsVisitor(KernelContext* ctx, const Datum& cond_d, const Datum& return c_valid & ((c_data & l_valid) | (~c_data & r_valid)); }; - Bitmap out_bitmap(output->buffers[0], 0, cond.length); + std::array out_bitmaps{Bitmap{output->buffers[0], 0, cond.length}}; enum { C_VALID, C_DATA, L_VALID, R_VALID }; switch (flag) { case COND_CONST | LEFT_CONST | RIGHT_CONST: { std::array bitmaps{_, cond_data, _, _}; - Bitmap::VisitWordsAndWrite( - bitmaps, - [&](std::array words) { - return apply(*cond_const, words[C_DATA], *left_const, *right_const); - }, - &out_bitmap); + Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps, + [&](const std::array& words_in, + std::array& word_out) { + word_out[0] = apply(*cond_const, words_in[C_DATA], + *left_const, *right_const); + }); break; } case LEFT_CONST | RIGHT_CONST: { std::array bitmaps{cond_valid, cond_data, _, _}; - Bitmap::VisitWordsAndWrite( - bitmaps, - [&](std::array words) { - return apply(words[C_VALID], words[C_DATA], *left_const, *right_const); - }, - &out_bitmap); + Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps, + [&](const std::array& words_in, + std::array& word_out) { + word_out[0] = + apply(words_in[C_VALID], words_in[C_DATA], + *left_const, *right_const); + }); break; } case COND_CONST | RIGHT_CONST: { // bitmaps[C_VALID], bitmaps[R_VALID] might be null; override to make it safe for // Visit() std::array bitmaps{_, cond_data, left_valid, _}; - Bitmap::VisitWordsAndWrite( - bitmaps, - [&](std::array words) { - return apply(*cond_const, words[C_DATA], words[L_VALID], *right_const); - }, - &out_bitmap); + Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps, + [&](const std::array& words_in, + std::array& word_out) { + word_out[0] = apply(*cond_const, words_in[C_DATA], + words_in[L_VALID], *right_const); + }); break; } case RIGHT_CONST: { // bitmaps[R_VALID] might be null; override to make it safe for Visit() std::array bitmaps{cond_valid, cond_data, left_valid, _}; - Bitmap::VisitWordsAndWrite( - bitmaps, - [&](std::array words) { - return apply(words[C_VALID], words[C_DATA], words[L_VALID], *right_const); - }, - &out_bitmap); + Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps, + [&](const std::array& words_in, + std::array& word_out) { + word_out[0] = + apply(words_in[C_VALID], words_in[C_DATA], + words_in[L_VALID], *right_const); + }); break; } case COND_CONST | LEFT_CONST: { // bitmaps[C_VALID], bitmaps[L_VALID] might be null; override to make it safe for // Visit() std::array bitmaps{_, cond_data, _, right_valid}; - Bitmap::VisitWordsAndWrite( - bitmaps, - [&](std::array words) { - return apply(*cond_const, words[C_DATA], *left_const, words[R_VALID]); - }, - &out_bitmap); + Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps, + [&](const std::array& words_in, + std::array& word_out) { + word_out[0] = apply(*cond_const, words_in[C_DATA], + *left_const, words_in[R_VALID]); + }); break; } case LEFT_CONST: { // bitmaps[L_VALID] might be null; override to make it safe for Visit() std::array bitmaps{cond_valid, cond_data, _, right_valid}; - Bitmap::VisitWordsAndWrite( - bitmaps, - [&](std::array words) { - return apply(words[C_VALID], words[C_DATA], *left_const, words[R_VALID]); - }, - &out_bitmap); + Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps, + [&](const std::array& words_in, + std::array& word_out) { + word_out[0] = + apply(words_in[C_VALID], words_in[C_DATA], + *left_const, words_in[R_VALID]); + }); break; } case COND_CONST: { // bitmaps[C_VALID] might be null; override to make it safe for Visit() std::array bitmaps{_, cond_data, left_valid, right_valid}; - Bitmap::VisitWordsAndWrite( - bitmaps, - [&](std::array words) { - return apply(*cond_const, words[C_DATA], words[L_VALID], words[R_VALID]); - }, - &out_bitmap); + Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps, + [&](const std::array& words_in, + std::array& word_out) { + word_out[0] = + apply(*cond_const, words_in[C_DATA], + words_in[L_VALID], words_in[R_VALID]); + }); break; } case 0: { std::array bitmaps{cond_valid, cond_data, left_valid, right_valid}; - Bitmap::VisitWordsAndWrite( - bitmaps, - [&](std::array words) { - return apply(words[C_VALID], words[C_DATA], words[L_VALID], words[R_VALID]); - }, - &out_bitmap); + Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps, + [&](const std::array& words_in, + std::array& word_out) { + word_out[0] = + apply(words_in[C_VALID], words_in[C_DATA], + words_in[L_VALID], words_in[R_VALID]); + }); break; } } diff --git a/cpp/src/arrow/util/bitmap.h b/cpp/src/arrow/util/bitmap.h index ec88e7574f1..613366eb8af 100644 --- a/cpp/src/arrow/util/bitmap.h +++ b/cpp/src/arrow/util/bitmap.h @@ -248,186 +248,31 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, return min_offset; } - /// \brief Visit words of bits from each bitmap as array + /// \brief Visit words of bits from each input bitmap as array and collects + /// outputs to an array, to be written into the output bitmaps accordingly. /// /// All bitmaps must have identical length. The first bit in a visited bitmap /// may be offset within the first visited word, but words will otherwise contain /// densely packed bits loaded from the bitmap. That offset within the first word is /// returned. + /// Visitor is expected to have the following signature + /// [](const std::array& in_words, std::array& out_words){...} /// - /// TODO(bkietz) allow for early termination // NOTE: this function is efficient on 3+ sufficiently large bitmaps. // It also has a large prolog / epilog overhead and should be used // carefully in other cases. // For 2 bitmaps or less, and/or smaller bitmaps, see also VisitTwoBitBlocksVoid // and BitmapUInt64Reader. - template >::type::value_type> - static int64_t VisitWordsAndWrite(const std::array& bitmaps_arg, - Visitor&& visitor, Bitmap* out_bitmap_arg) { - constexpr int64_t kBitWidth = sizeof(Word) * 8; - - // local, mutable variables which will be sliced/decremented to represent consumption: - Bitmap bitmaps[N]; // todo use std::array here - int64_t bit_length = BitLength(bitmaps_arg); - - struct BitmapHolder { - BitmapHolder() = default; - explicit BitmapHolder(Bitmap* bitmap_) - : bitmap(bitmap_), - word_offset(bitmap_->template word_offset()), - words(bitmap_->template words()) { - assert(BitmapHolder::word_offset >= 0 && BitmapHolder::word_offset < kBitWidth); - } - - inline void StrideAndUpdate(int64_t _stride) { - BitmapHolder::bitmap->Stride(_stride); - BitmapHolder::word_offset = bitmap->template word_offset(); - assert(BitmapHolder::word_offset >= 0 && BitmapHolder::word_offset < kBitWidth); - BitmapHolder::words = bitmap->template words(); - } - - Bitmap* bitmap{}; - int64_t word_offset = 0; - View words; - }; - - std::array in_bitmaps; - Bitmap out_bitmap = *out_bitmap_arg; // make a copy - - for (size_t i = 0; i < N; ++i) { - bitmaps[i] = bitmaps_arg[i]; // make a copy - in_bitmaps[i] = BitmapHolder(&bitmaps[i]); - } - - auto consume = [&](int64_t consumed_bits) { - for (size_t i = 0; i < N; ++i) { - in_bitmaps[i].StrideAndUpdate(consumed_bits); - } - out_bitmap.Stride(consumed_bits); - - bit_length -= consumed_bits; - }; - - std::array visited_words; - visited_words.fill(0); - - if (bit_length <= kBitWidth * 2) { - // bitmaps fit into one or two words so don't bother with optimization - while (bit_length > 0) { - auto leading_bits = std::min(bit_length, kBitWidth); - SafeLoadWords(bitmaps, 0, leading_bits, false, &visited_words); - Word visit_out = visitor(visited_words); // outputs a word/ partial word - CopyBitmap(reinterpret_cast(&visit_out), 0, leading_bits, - out_bitmap.buffer_->mutable_data(), out_bitmap.offset()); - consume(leading_bits); - } - return 0; - } - - auto word_offset_comp = [](const BitmapHolder& l, const BitmapHolder& r) { - return l.word_offset < r.word_offset; - }; - - int64_t max_word_offset = - (*std::max_element(in_bitmaps.begin(), in_bitmaps.end(), word_offset_comp)) - .word_offset; - int64_t min_word_offset = - (*std::min_element(in_bitmaps.begin(), in_bitmaps.end(), word_offset_comp)) - .word_offset; - if (max_word_offset > 0) { - // consume leading bits - auto leading_bits = kBitWidth - min_word_offset; - SafeLoadWords(bitmaps, 0, leading_bits, true, &visited_words); - Word visit_out = visitor(visited_words); - CopyBitmap(reinterpret_cast(&visit_out), sizeof(Word) * 8 - leading_bits, - leading_bits, out_bitmap.buffer_->mutable_data(), out_bitmap.offset()); - consume(leading_bits); - } - assert((*std::min_element(in_bitmaps.begin(), in_bitmaps.end(), word_offset_comp)) - .word_offset == 0); - - int64_t whole_word_count = bit_length / kBitWidth; - assert(whole_word_count >= 1); - - std::vector visit_outs; - visit_outs.reserve(whole_word_count); - - if (min_word_offset == max_word_offset) { - // all offsets were identical, all leading bits have been consumed - assert(std::all_of( - in_bitmaps.begin(), in_bitmaps.end(), - [](const BitmapHolder& holder) { return holder.word_offset == 0; })); - - for (int64_t word_i = 0; word_i < whole_word_count; ++word_i) { - for (size_t i = 0; i < N; ++i) { - visited_words[i] = in_bitmaps[i].words[word_i]; - } - visit_outs.template emplace_back(visitor(visited_words)); - } - CopyBitmap(reinterpret_cast(visit_outs.data()), 0, - whole_word_count * kBitWidth, out_bitmap.buffer_->mutable_data(), - out_bitmap.offset()); - consume(whole_word_count * kBitWidth); - } else { - // leading bits from potentially incomplete words have been consumed - - // word_i such that words[i][word_i] and words[i][word_i + 1] are lie entirely - // within the bitmap for all i - for (int64_t word_i = 0; word_i < whole_word_count - 1; ++word_i) { - for (size_t i = 0; i < N; ++i) { - const auto ith_words = in_bitmaps[i].words; - const auto ith_word_offset = in_bitmaps[i].word_offset; - if (ith_word_offset == 0) { - visited_words[i] = ith_words[word_i]; - } else { - auto words0 = BitUtil::ToLittleEndian(ith_words[word_i]); - auto words1 = BitUtil::ToLittleEndian(ith_words[word_i + 1]); - visited_words[i] = BitUtil::FromLittleEndian( - (words0 >> ith_word_offset) | (words1 << (kBitWidth - ith_word_offset))); - } - } - visit_outs.template emplace_back(visitor(visited_words)); - } - CopyBitmap(reinterpret_cast(visit_outs.data()), 0, - (whole_word_count - 1) * kBitWidth, out_bitmap.buffer_->mutable_data(), - out_bitmap.offset()); - consume((whole_word_count - 1) * kBitWidth); - - SafeLoadWords(bitmaps, 0, kBitWidth, false, &visited_words); - - Word visit_out = visitor(visited_words); // outputs a word/ partial word - CopyBitmap(reinterpret_cast(&visit_out), 0, kBitWidth, - out_bitmap.buffer_->mutable_data(), out_bitmap.offset()); - consume(kBitWidth); - } - - // load remaining bits - if (bit_length > 0) { - SafeLoadWords(bitmaps, 0, bit_length, false, &visited_words); - Word visit_out = visitor(visited_words); - CopyBitmap(reinterpret_cast(&visit_out), 0, bit_length, - out_bitmap.buffer_->mutable_data(), out_bitmap.offset()); - } - - return min_word_offset; - } - - // template - // using MultiOutputVisitor = std::function& in_words, - // std::array& out_words)>; - template >::type::value_type> static void VisitWordsAndWrite(const std::array& bitmaps_arg, - Visitor&& visitor, - std::array& out_bitmaps_arg) { + std::array* out_bitmaps_arg, + Visitor&& visitor) { constexpr int64_t kBitWidth = sizeof(Word) * 8; int64_t bit_length = BitLength(bitmaps_arg); - assert(bit_length == BitLength(out_bitmaps_arg)); + assert(bit_length == BitLength(*out_bitmaps_arg)); std::array, N> readers; for (size_t i = 0; i < N; ++i) { @@ -437,9 +282,9 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, std::array, M> writers; for (size_t i = 0; i < M; ++i) { - writers[i] = - BitmapWordWriter(out_bitmaps_arg[i].buffer_->mutable_data(), - out_bitmaps_arg[i].offset_, out_bitmaps_arg[i].length_); + const Bitmap& out_bitmap = out_bitmaps_arg->at(i); + writers[i] = BitmapWordWriter(out_bitmap.buffer_->mutable_data(), + out_bitmap.offset_, out_bitmap.length_); } std::array visited_words; @@ -456,76 +301,32 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, for (size_t i = 0; i < N; i++) { visited_words[i] = readers[i].NextWord(); } - visitor(visited_words, output_words); - for (size_t i = 0; i < M; i++) { writers[i].PutNextWord(output_words[i]); } - bit_length -= kBitWidth; } // every reader will have same number of trailing bytes, because of the above reason - // todo when the above issue is resolved, following logic also needs to be fixed! // tailing portion could be more than one word! (ref: BitmapWordReader constructor) - assert(static_cast(bit_length) < kBitWidth * 2); - if (bit_length / kBitWidth) { - // there's one full word in trailing portion. Cant use NextWord() here because it - // doesn't stride the trailing metadata - for (size_t i = 0; i < N; i++) { - visited_words[i] = 0; - for (size_t b = 0; b < sizeof(Word); b++) { - int dummy; - auto byte = static_cast(readers[i].NextTrailingByte(dummy)); - visited_words[i] |= byte << (b * 8); - } - } - - visitor(visited_words, output_words); - - for (size_t i = 0; i < M; i++) { - writers[i].PutNextWord(output_words[i]); - } - - bit_length -= kBitWidth; - } - - // clean-up last partial word - if (bit_length) { + // remaining full/ partial words to write + n_words = (bit_length + kBitWidth - 1) / kBitWidth; + assert(n_words <= 2); + while (n_words--) { + visited_words.fill(0); output_words.fill(0); + int valid_bits; for (size_t i = 0; i < N; i++) { - visited_words[i] = 0; - int n_byte = readers[i].trailing_bytes(); - for (int b = 0; b < n_byte; b++) { - int valid_bits; - auto byte = static_cast(readers[i].NextTrailingByte(valid_bits)); - visited_words[i] |= (byte << b * 8); - } + visited_words[i] = readers[i].NextTrailingWord(valid_bits); } - visitor(visited_words, output_words); - for (size_t i = 0; i < M; i++) { - writers[i].PutNextWord(output_words[i], bit_length); + writers[i].PutTrailingWord(output_words[i], valid_bits); } } } - template >::type::value_type> - static void VisitWordsAndWrite(const std::array& bitmaps_arg, - Visitor&& visitor, Bitmap& out_bitmap_arg) { - std::array out_bitmaps{out_bitmap_arg}; - VisitWordsAndWrite( - bitmaps_arg, - [&](const std::array& in_words, std::array& out_words) { - visitor(in_words, out_words[0]); - }, - out_bitmaps); - } - const std::shared_ptr& buffer() const { return buffer_; } /// offset of first bit relative to buffer().data() diff --git a/cpp/src/arrow/util/bitmap_reader.h b/cpp/src/arrow/util/bitmap_reader.h index a562e9a1294..70584d5dfe9 100644 --- a/cpp/src/arrow/util/bitmap_reader.h +++ b/cpp/src/arrow/util/bitmap_reader.h @@ -153,7 +153,7 @@ class BitmapWordReader { BitmapWordReader(const uint8_t* bitmap, int64_t offset, int64_t length) { bitmap_ = bitmap + offset / 8; offset_ = offset % 8; - bitmap_end_ = bitmap_ + BitUtil::BytesForBits(offset + length); + bitmap_end_ = bitmap_ + BitUtil::BytesForBits(offset_ + length); // decrement word count by one as we may touch two adjacent words in one iteration nwords_ = length / (sizeof(Word) * 8) - 1; @@ -193,6 +193,21 @@ class BitmapWordReader { return word; } + Word NextTrailingWord(int& valid_bits) { + // safest way to create a word from the trailing bits, is to concatenate bytes + // returned by NextTrailingByte + Word word = 0; // only a partial word may be returned. + valid_bits = 0; + int n_byte = std::min(trailing_bytes_, static_cast(sizeof(Word))); + for (int b = 0; b < n_byte; b++) { + int valid; + auto byte = static_cast(NextTrailingByte(valid)); + word |= byte << (b * 8); + valid_bits += valid; + } + return word; + } + uint8_t NextTrailingByte(int& valid_bits) { uint8_t byte; assert(trailing_bits_ > 0); diff --git a/cpp/src/arrow/util/bitmap_test.cc b/cpp/src/arrow/util/bitmap_test.cc index d981cb7611d..601cf6f65ad 100644 --- a/cpp/src/arrow/util/bitmap_test.cc +++ b/cpp/src/arrow/util/bitmap_test.cc @@ -58,95 +58,8 @@ void VerifyBoolOutput(const Bitmap& bitmap, const std::vector& expected) { << "exp: " << VectorToString(expected) << "\ngot: " << bitmap.ToString(); } -class TestBitmapVisit : public ::testing::Test {}; - -TEST_F(TestBitmapVisit, SingleWriterOutputZeroOffset) { - // choosing part = 199, a prime, so that shifts are falling in-between bytes - int64_t part = 199, bits = part * 4; - - std::vector data; - random_bool_vector(data, bits); - - arrow::BooleanBuilder boolean_builder; - ASSERT_OK(boolean_builder.AppendValues(data)); - ASSERT_OK_AND_ASSIGN(auto arrow_data, boolean_builder.Finish()); - - std::shared_ptr& arrow_buffer = arrow_data->data()->buffers[1]; - - Bitmap bm0(arrow_buffer, 0, part); - Bitmap bm1 = bm0.Slice(part * 1, part); // this goes beyond bm0's len - Bitmap bm2 = bm0.Slice(part * 2, part); // this goes beyond bm0's len - - ASSERT_OK_AND_ASSIGN(auto out, AllocateBitmap(part)); - Bitmap out_bm(out, 0, part); - - // (bm0 & bm1) | bm2 - std::array in_bms{bm0, bm1, bm2}; - Bitmap::VisitWordsAndWrite( - in_bms, - [](const std::array& in_words, uint64_t& out_words) { - out_words = (in_words[0] & in_words[1]) | in_words[2]; - }, - out_bm); - - std::vector v0(data.begin(), data.begin() + part); - std::vector v1(data.begin() + part * 1, data.begin() + part * 2); - std::vector v2(data.begin() + part * 2, data.begin() + part * 3); - std::vector v3(part); - // v3 = v0 & v1 - std::transform(v0.begin(), v0.end(), v1.begin(), v3.begin(), std::logical_and()); - // v3 |= v2 - std::transform(v3.begin(), v3.end(), v2.begin(), v3.begin(), std::logical_or()); - - VerifyBoolOutput(out_bm, v3); -} - -TEST_F(TestBitmapVisit, SingleWriterOutputNonZeroOffset) { - // choosing part = 199, a prime - int64_t part = 199, bits = part * 4; - - std::vector data; - random_bool_vector(data, bits); - - arrow::BooleanBuilder boolean_builder; - ASSERT_OK(boolean_builder.AppendValues(data)); - ASSERT_OK_AND_ASSIGN(auto arrow_data, boolean_builder.Finish()); - - std::shared_ptr& arrow_buffer = arrow_data->data()->buffers[1]; - - Bitmap bm0(arrow_buffer, 0, part); - Bitmap bm1 = bm0.Slice(part * 1, part); // this goes beyond bm0's len - Bitmap bm2 = bm0.Slice(part * 2, part); // this goes beyond bm0's len - - // allocate lager buffer but only use the last `part` - ASSERT_OK_AND_ASSIGN(auto out, AllocateBitmap(part * 2)); - Bitmap out_bm(out, part, part); - - // (bm0 & bm1) | bm2 - std::array in_bms{bm0, bm1, bm2}; - Bitmap::VisitWordsAndWrite( - in_bms, - [](const std::array& in_words, uint64_t& out_words) { - out_words = (in_words[0] & in_words[1]) | in_words[2]; - }, - out_bm); - - std::vector v0(data.begin(), data.begin() + part); - std::vector v1(data.begin() + part * 1, data.begin() + part * 2); - std::vector v2(data.begin() + part * 2, data.begin() + part * 3); - std::vector v3(part); - // v3 = v0 & v1 - std::transform(v0.begin(), v0.end(), v1.begin(), v3.begin(), std::logical_and()); - // v3 |= v2 - std::transform(v3.begin(), v3.end(), v2.begin(), v3.begin(), std::logical_or()); - - VerifyBoolOutput(out_bm, v3); -} - -TEST_F(TestBitmapVisit, MultiWriterOutputZeroOffset) { - // choosing part = 199, a prime - int64_t part = 199, bits = part * 4; - +void RunOutputNoOffset(int part) { + int64_t bits = 4 * part; std::vector data; random_bool_vector(data, bits); @@ -173,12 +86,11 @@ TEST_F(TestBitmapVisit, MultiWriterOutputZeroOffset) { // out0 = bm0 & bm1, out1= bm0 | bm2 std::array in_bms{bm0, bm1, bm2}; Bitmap::VisitWordsAndWrite( - in_bms, + in_bms, &out_bms, [](const std::array& in, std::array& out) { out[0] = in[0] & in[1]; out[1] = in[0] | in[2]; - }, - out_bms); + }); std::vector out_v0(part); std::vector out_v1(part); @@ -200,10 +112,8 @@ TEST_F(TestBitmapVisit, MultiWriterOutputZeroOffset) { VerifyBoolOutput(out_bms[1], out_v1); } -TEST_F(TestBitmapVisit, MultiWriterOutputNonZeroOffset) { - // choosing part = 199, a prime - int64_t part = 199, bits = part * 4; - +void RunOutputWithOffset(int64_t part) { + int64_t bits = part * 4; std::vector data; random_bool_vector(data, bits); @@ -226,22 +136,12 @@ TEST_F(TestBitmapVisit, MultiWriterOutputNonZeroOffset) { std::vector v1(data.begin() + part * 1, data.begin() + part * 2); std::vector v2(data.begin() + part * 2, data.begin() + part * 3); - // std::cout << "v0: " << VectorToString(v0)<< "\n"; - // std::cout << "b0: " << bm0.ToString() << "\n"; - // std::cout << "v1: " << VectorToString(v1) << "\n"; - // std::cout << "b1: " << bm1.ToString() << "\n"; - // std::cout << "v2: " << VectorToString(v2) << "\n"; - // std::cout << "b2: " << bm2.ToString() << "\n"; - - // out0 = bm0 & bm1, out1= bm0 | bm2 - std::array in_bms{bm0, bm1, bm2}; - Bitmap::VisitWordsAndWrite( - in_bms, - [](const std::array& in, std::array& out) { - out[0] = in[0] & in[1]; - out[1] = in[0] | in[2]; - }, - out_bms); + std::cout << "v0: " << VectorToString(v0) << "\n"; + std::cout << "b0: " << bm0.ToString() << "\n"; + std::cout << "v1: " << VectorToString(v1) << "\n"; + std::cout << "b1: " << bm1.ToString() << "\n"; + std::cout << "v2: " << VectorToString(v2) << "\n"; + std::cout << "b2: " << bm2.ToString() << "\n"; std::vector out_v0(part); std::vector out_v1(part); @@ -252,9 +152,53 @@ TEST_F(TestBitmapVisit, MultiWriterOutputNonZeroOffset) { std::transform(v0.begin(), v0.end(), v2.begin(), out_v1.begin(), std::logical_or()); + std::cout << "out0: " << VectorToString(out_v0) << "\n"; + std::cout << "out1: " << VectorToString(out_v1) << "\n"; + + // out0 = bm0 & bm1, out1= bm0 | bm2 + std::array in_bms{bm0, bm1, bm2}; + Bitmap::VisitWordsAndWrite( + in_bms, &out_bms, + [](const std::array& in, std::array& out) { + out[0] = in[0] & in[1]; + out[1] = in[0] | in[2]; + }); + VerifyBoolOutput(out_bms[0], out_v0); VerifyBoolOutput(out_bms[1], out_v1); } +class TestBitmapVisitOutputNoOffset : public ::testing::TestWithParam {}; + +TEST_P(TestBitmapVisitOutputNoOffset, Test1) { + auto part = GetParam(); + RunOutputNoOffset(part); +} + +INSTANTIATE_TEST_SUITE_P(General, TestBitmapVisitOutputNoOffset, + testing::Values(199, 256, 1000)); + +INSTANTIATE_TEST_SUITE_P(EdgeCases, TestBitmapVisitOutputNoOffset, + testing::Values(5, 13, 21, 29, 37, 41, 51, 59, 64, 97)); + +INSTANTIATE_TEST_SUITE_P(EdgeCases2, TestBitmapVisitOutputNoOffset, + testing::Values(8, 16, 24, 32, 40, 48, 56, 64)); + +class TestBitmapVisitOutputWithOffset : public ::testing::TestWithParam {}; + +TEST_P(TestBitmapVisitOutputWithOffset, Test2) { + auto part = GetParam(); + RunOutputWithOffset(part); +} + +INSTANTIATE_TEST_SUITE_P(General, TestBitmapVisitOutputWithOffset, + testing::Values(199, 256, 1000)); + +INSTANTIATE_TEST_SUITE_P(EdgeCases, TestBitmapVisitOutputWithOffset, + testing::Values(7, 15, 23, 31, 39, 47, 55, 63, 73, 97)); + +INSTANTIATE_TEST_SUITE_P(EdgeCases2, TestBitmapVisitOutputWithOffset, + testing::Values(8, 16, 24, 32, 40, 48, 56, 64)); + } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/util/bitmap_writer.h b/cpp/src/arrow/util/bitmap_writer.h index ca75abbf15c..e4f86a269fc 100644 --- a/cpp/src/arrow/util/bitmap_writer.h +++ b/cpp/src/arrow/util/bitmap_writer.h @@ -187,7 +187,7 @@ class BitmapWordWriter { BitmapWordWriter(uint8_t* bitmap, int64_t offset, int64_t length) { bitmap_ = bitmap + offset / 8; offset_ = offset % 8; - bitmap_end_ = bitmap_ + BitUtil::BytesForBits(offset + length); + bitmap_end_ = bitmap_ + BitUtil::BytesForBits(offset_ + length); mask_ = (1U << offset_) - 1; if (offset_) { @@ -225,22 +225,17 @@ class BitmapWordWriter { bitmap_ += sizeof(Word); } - void PutNextWord(Word word, int valid_bits) { + void PutTrailingWord(Word word, int valid_bits) { assert(static_cast(valid_bits) <= sizeof(Word) * 8); if (ARROW_PREDICT_FALSE(valid_bits == 0)) { return; - } else if (ARROW_PREDICT_FALSE(valid_bits == sizeof(Word) * 8)) { - return PutNextWord(word); } - int i = 0; - for (; i < valid_bits / 8; i++) { + + int n_bytes = (valid_bits + 7) / 8; + for (int i = 0; i < n_bytes; i++) { uint8_t byte = *(reinterpret_cast(&word) + i); - PutNextTrailingByte(byte, 8); - } - // cleanup - if (int remainder = valid_bits - i * 8) { - assert(static_cast(remainder) < sizeof(Word) * 8); - PutNextTrailingByte(*(reinterpret_cast(&word) + i), remainder); + PutNextTrailingByte(byte, std::min(8, valid_bits)); + valid_bits -= 8; } } From 4907fa096e36fda4b0ae590ea2865df0a6c139e6 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Fri, 11 Jun 2021 16:20:23 -0400 Subject: [PATCH 11/46] adding byte visitor to clean up the code --- .../arrow/compute/kernels/scalar_if_else.cc | 84 +++++++++---------- cpp/src/arrow/util/bitmap.h | 44 ++++++---- cpp/src/arrow/util/bitmap_reader.h | 15 ---- cpp/src/arrow/util/bitmap_test.cc | 32 +++---- cpp/src/arrow/util/bitmap_writer.h | 14 ---- 5 files changed, 83 insertions(+), 106 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc index 8a85f61b9a7..37db3391996 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc @@ -72,9 +72,6 @@ Status PromoteNullsVisitor(KernelContext* ctx, const Datum& cond_d, const Datum& Bitmap cond_valid{cond.buffers[0], cond.offset, cond.length}; Bitmap left_valid = GetBitmap(left_d, 0); Bitmap right_valid = GetBitmap(right_d, 0); - // sometimes Bitmaps will be ignored, in which case we replace access to them with - // duplicated (probably elided) access to cond_data - const Bitmap& _ = cond_data; // cond.valid & (cond.data & left.valid | ~cond.data & right.valid) // In the following cases, we dont need to allocate out_valid bitmap @@ -114,83 +111,79 @@ Status PromoteNullsVisitor(KernelContext* ctx, const Datum& cond_d, const Datum& switch (flag) { case COND_CONST | LEFT_CONST | RIGHT_CONST: { - std::array bitmaps{_, cond_data, _, _}; + std::array bitmaps{cond_data}; Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps, - [&](const std::array& words_in, - std::array& word_out) { - word_out[0] = apply(*cond_const, words_in[C_DATA], - *left_const, *right_const); + [&](const std::array& words_in, + std::array* word_out) { + word_out->at(0) = apply(*cond_const, words_in[0], + *left_const, *right_const); }); break; } case LEFT_CONST | RIGHT_CONST: { - std::array bitmaps{cond_valid, cond_data, _, _}; + std::array bitmaps{cond_valid, cond_data}; Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps, - [&](const std::array& words_in, - std::array& word_out) { - word_out[0] = - apply(words_in[C_VALID], words_in[C_DATA], - *left_const, *right_const); + [&](const std::array& words_in, + std::array* word_out) { + word_out->at(0) = apply(words_in[0], words_in[1], + *left_const, *right_const); }); break; } case COND_CONST | RIGHT_CONST: { // bitmaps[C_VALID], bitmaps[R_VALID] might be null; override to make it safe for // Visit() - std::array bitmaps{_, cond_data, left_valid, _}; + std::array bitmaps{cond_data, left_valid}; Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps, - [&](const std::array& words_in, - std::array& word_out) { - word_out[0] = apply(*cond_const, words_in[C_DATA], - words_in[L_VALID], *right_const); + [&](const std::array& words_in, + std::array* word_out) { + word_out->at(0) = apply(*cond_const, words_in[0], + words_in[1], *right_const); }); break; } case RIGHT_CONST: { // bitmaps[R_VALID] might be null; override to make it safe for Visit() - std::array bitmaps{cond_valid, cond_data, left_valid, _}; + std::array bitmaps{cond_valid, cond_data, left_valid}; Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps, - [&](const std::array& words_in, - std::array& word_out) { - word_out[0] = - apply(words_in[C_VALID], words_in[C_DATA], - words_in[L_VALID], *right_const); + [&](const std::array& words_in, + std::array* word_out) { + word_out->at(0) = apply(words_in[0], words_in[1], + words_in[2], *right_const); }); break; } case COND_CONST | LEFT_CONST: { // bitmaps[C_VALID], bitmaps[L_VALID] might be null; override to make it safe for // Visit() - std::array bitmaps{_, cond_data, _, right_valid}; + std::array bitmaps{cond_data, right_valid}; Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps, - [&](const std::array& words_in, - std::array& word_out) { - word_out[0] = apply(*cond_const, words_in[C_DATA], - *left_const, words_in[R_VALID]); + [&](const std::array& words_in, + std::array* word_out) { + word_out->at(0) = apply(*cond_const, words_in[0], + *left_const, words_in[1]); }); break; } case LEFT_CONST: { // bitmaps[L_VALID] might be null; override to make it safe for Visit() - std::array bitmaps{cond_valid, cond_data, _, right_valid}; + std::array bitmaps{cond_valid, cond_data, right_valid}; Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps, - [&](const std::array& words_in, - std::array& word_out) { - word_out[0] = - apply(words_in[C_VALID], words_in[C_DATA], - *left_const, words_in[R_VALID]); + [&](const std::array& words_in, + std::array* word_out) { + word_out->at(0) = apply(words_in[0], words_in[1], + *left_const, words_in[2]); }); break; } case COND_CONST: { // bitmaps[C_VALID] might be null; override to make it safe for Visit() - std::array bitmaps{_, cond_data, left_valid, right_valid}; + std::array bitmaps{cond_data, left_valid, right_valid}; Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps, - [&](const std::array& words_in, - std::array& word_out) { - word_out[0] = - apply(*cond_const, words_in[C_DATA], - words_in[L_VALID], words_in[R_VALID]); + [&](const std::array& words_in, + std::array* word_out) { + word_out->at(0) = apply(*cond_const, words_in[0], + words_in[1], words_in[2]); }); break; } @@ -198,10 +191,9 @@ Status PromoteNullsVisitor(KernelContext* ctx, const Datum& cond_d, const Datum& std::array bitmaps{cond_valid, cond_data, left_valid, right_valid}; Bitmap::VisitWordsAndWrite(bitmaps, &out_bitmaps, [&](const std::array& words_in, - std::array& word_out) { - word_out[0] = - apply(words_in[C_VALID], words_in[C_DATA], - words_in[L_VALID], words_in[R_VALID]); + std::array* word_out) { + word_out->at(0) = apply(words_in[0], words_in[1], + words_in[2], words_in[3]); }); break; } diff --git a/cpp/src/arrow/util/bitmap.h b/cpp/src/arrow/util/bitmap.h index 613366eb8af..05cc7a309f8 100644 --- a/cpp/src/arrow/util/bitmap.h +++ b/cpp/src/arrow/util/bitmap.h @@ -256,7 +256,7 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, /// densely packed bits loaded from the bitmap. That offset within the first word is /// returned. /// Visitor is expected to have the following signature - /// [](const std::array& in_words, std::array& out_words){...} + /// [](const std::array& in_words, std::array* out_words){...} /// // NOTE: this function is efficient on 3+ sufficiently large bitmaps. // It also has a large prolog / epilog overhead and should be used @@ -296,33 +296,47 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, // todo this will be inefficient in some cases. When there are offsets beyond Word // boundary, every Word would have to be created from 2 adjoining Words auto n_words = readers[0].words(); + bit_length -= n_words * kBitWidth; while (n_words--) { // first collect all words to visited_words array for (size_t i = 0; i < N; i++) { visited_words[i] = readers[i].NextWord(); } - visitor(visited_words, output_words); + visitor(visited_words, &output_words); for (size_t i = 0; i < M; i++) { writers[i].PutNextWord(output_words[i]); } - bit_length -= kBitWidth; } // every reader will have same number of trailing bytes, because of the above reason // tailing portion could be more than one word! (ref: BitmapWordReader constructor) // remaining full/ partial words to write - n_words = (bit_length + kBitWidth - 1) / kBitWidth; - assert(n_words <= 2); - while (n_words--) { - visited_words.fill(0); - output_words.fill(0); - int valid_bits; - for (size_t i = 0; i < N; i++) { - visited_words[i] = readers[i].NextTrailingWord(valid_bits); - } - visitor(visited_words, output_words); - for (size_t i = 0; i < M; i++) { - writers[i].PutTrailingWord(output_words[i], valid_bits); + + if (bit_length) { + // convert the word visitor lambda to a byte_visitor + auto byte_visitor = [&](const std::array& in, + std::array* out) { + std::array in_words; + std::array out_words; + std::copy(in.begin(), in.end(), in_words.begin()); + visitor(in_words, &out_words); + std::move(out_words.begin(), out_words.end(), out->begin()); + }; + + std::array visited_bytes; + std::array output_bytes; + int n_bytes = readers[0].trailing_bytes(); + while (n_bytes--) { + visited_bytes.fill(0); + output_bytes.fill(0); + int valid_bits; + for (size_t i = 0; i < N; i++) { + visited_bytes[i] = readers[i].NextTrailingByte(valid_bits); + } + byte_visitor(visited_bytes, &output_bytes); + for (size_t i = 0; i < M; i++) { + writers[i].PutNextTrailingByte(output_bytes[i], valid_bits); + } } } } diff --git a/cpp/src/arrow/util/bitmap_reader.h b/cpp/src/arrow/util/bitmap_reader.h index 70584d5dfe9..ce1d5f376bd 100644 --- a/cpp/src/arrow/util/bitmap_reader.h +++ b/cpp/src/arrow/util/bitmap_reader.h @@ -193,21 +193,6 @@ class BitmapWordReader { return word; } - Word NextTrailingWord(int& valid_bits) { - // safest way to create a word from the trailing bits, is to concatenate bytes - // returned by NextTrailingByte - Word word = 0; // only a partial word may be returned. - valid_bits = 0; - int n_byte = std::min(trailing_bytes_, static_cast(sizeof(Word))); - for (int b = 0; b < n_byte; b++) { - int valid; - auto byte = static_cast(NextTrailingByte(valid)); - word |= byte << (b * 8); - valid_bits += valid; - } - return word; - } - uint8_t NextTrailingByte(int& valid_bits) { uint8_t byte; assert(trailing_bits_ > 0); diff --git a/cpp/src/arrow/util/bitmap_test.cc b/cpp/src/arrow/util/bitmap_test.cc index 601cf6f65ad..4a782e3ce12 100644 --- a/cpp/src/arrow/util/bitmap_test.cc +++ b/cpp/src/arrow/util/bitmap_test.cc @@ -87,9 +87,9 @@ void RunOutputNoOffset(int part) { std::array in_bms{bm0, bm1, bm2}; Bitmap::VisitWordsAndWrite( in_bms, &out_bms, - [](const std::array& in, std::array& out) { - out[0] = in[0] & in[1]; - out[1] = in[0] | in[2]; + [](const std::array& in, std::array* out) { + out->at(0) = in[0] & in[1]; + out->at(1) = in[0] | in[2]; }); std::vector out_v0(part); @@ -124,8 +124,8 @@ void RunOutputWithOffset(int64_t part) { std::shared_ptr& arrow_buffer = arrow_data->data()->buffers[1]; Bitmap bm0(arrow_buffer, 0, part); - Bitmap bm1 = bm0.Slice(part * 1, part); // this goes beyond bm0's len - Bitmap bm2 = bm0.Slice(part * 2, part); // this goes beyond bm0's len + Bitmap bm1(arrow_buffer, part * 1, part); + Bitmap bm2(arrow_buffer, part * 2, part); std::array out_bms; ASSERT_OK_AND_ASSIGN(auto out, AllocateBitmap(part * 4)); @@ -136,12 +136,12 @@ void RunOutputWithOffset(int64_t part) { std::vector v1(data.begin() + part * 1, data.begin() + part * 2); std::vector v2(data.begin() + part * 2, data.begin() + part * 3); - std::cout << "v0: " << VectorToString(v0) << "\n"; - std::cout << "b0: " << bm0.ToString() << "\n"; - std::cout << "v1: " << VectorToString(v1) << "\n"; - std::cout << "b1: " << bm1.ToString() << "\n"; - std::cout << "v2: " << VectorToString(v2) << "\n"; - std::cout << "b2: " << bm2.ToString() << "\n"; + // std::cout << "v0: " << VectorToString(v0) << "\n"; + // std::cout << "b0: " << bm0.ToString() << "\n"; + // std::cout << "v1: " << VectorToString(v1) << "\n"; + // std::cout << "b1: " << bm1.ToString() << "\n"; + // std::cout << "v2: " << VectorToString(v2) << "\n"; + // std::cout << "b2: " << bm2.ToString() << "\n"; std::vector out_v0(part); std::vector out_v1(part); @@ -152,16 +152,16 @@ void RunOutputWithOffset(int64_t part) { std::transform(v0.begin(), v0.end(), v2.begin(), out_v1.begin(), std::logical_or()); - std::cout << "out0: " << VectorToString(out_v0) << "\n"; - std::cout << "out1: " << VectorToString(out_v1) << "\n"; + // std::cout << "out0: " << VectorToString(out_v0) << "\n"; + // std::cout << "out1: " << VectorToString(out_v1) << "\n"; // out0 = bm0 & bm1, out1= bm0 | bm2 std::array in_bms{bm0, bm1, bm2}; Bitmap::VisitWordsAndWrite( in_bms, &out_bms, - [](const std::array& in, std::array& out) { - out[0] = in[0] & in[1]; - out[1] = in[0] | in[2]; + [](const std::array& in, std::array* out) { + out->at(0) = in[0] & in[1]; + out->at(1) = in[0] | in[2]; }); VerifyBoolOutput(out_bms[0], out_v0); diff --git a/cpp/src/arrow/util/bitmap_writer.h b/cpp/src/arrow/util/bitmap_writer.h index e4f86a269fc..b15b036c248 100644 --- a/cpp/src/arrow/util/bitmap_writer.h +++ b/cpp/src/arrow/util/bitmap_writer.h @@ -225,20 +225,6 @@ class BitmapWordWriter { bitmap_ += sizeof(Word); } - void PutTrailingWord(Word word, int valid_bits) { - assert(static_cast(valid_bits) <= sizeof(Word) * 8); - if (ARROW_PREDICT_FALSE(valid_bits == 0)) { - return; - } - - int n_bytes = (valid_bits + 7) / 8; - for (int i = 0; i < n_bytes; i++) { - uint8_t byte = *(reinterpret_cast(&word) + i); - PutNextTrailingByte(byte, std::min(8, valid_bits)); - valid_bits -= 8; - } - } - void PutNextTrailingByte(uint8_t byte, int valid_bits) { if (valid_bits == 8) { if (offset_) { From 3af7137f0db7b1a45fe332a03ba00078835b5279 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Fri, 11 Jun 2021 17:22:45 -0400 Subject: [PATCH 12/46] adding changes to kleene kernels --- .../arrow/compute/kernels/scalar_boolean.cc | 54 +++++++++---------- .../arrow/compute/kernels/scalar_if_else.cc | 2 - 2 files changed, 27 insertions(+), 29 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_boolean.cc b/cpp/src/arrow/compute/kernels/scalar_boolean.cc index 89107120fa3..065e01b2780 100644 --- a/cpp/src/arrow/compute/kernels/scalar_boolean.cc +++ b/cpp/src/arrow/compute/kernels/scalar_boolean.cc @@ -30,60 +30,60 @@ namespace compute { namespace { -enum BitmapIndex { LEFT_VALID, LEFT_DATA, RIGHT_VALID, RIGHT_DATA }; - template void ComputeKleene(ComputeWord&& compute_word, KernelContext* ctx, const ArrayData& left, const ArrayData& right, ArrayData* out) { DCHECK(left.null_count != 0 || right.null_count != 0) << "ComputeKleene is unnecessarily expensive for the non-null case"; - Bitmap bitmaps[4]; - bitmaps[LEFT_VALID] = {left.buffers[0], left.offset, left.length}; - bitmaps[LEFT_DATA] = {left.buffers[1], left.offset, left.length}; + Bitmap left_valid_bm{left.buffers[0], left.offset, left.length}; + Bitmap left_data_bm{left.buffers[1], left.offset, left.length}; - bitmaps[RIGHT_VALID] = {right.buffers[0], right.offset, right.length}; - bitmaps[RIGHT_DATA] = {right.buffers[1], right.offset, right.length}; + Bitmap right_valid_bm{right.buffers[0], right.offset, right.length}; + Bitmap right_data_bm{right.buffers[1], right.offset, right.length}; - auto out_validity = out->GetMutableValues(0); - auto out_data = out->GetMutableValues(1); + std::array out_bms{Bitmap(out->buffers[0], out->offset, out->length), + Bitmap(out->buffers[1], out->offset, out->length)}; - int64_t i = 0; auto apply = [&](uint64_t left_valid, uint64_t left_data, uint64_t right_valid, - uint64_t right_data) { + uint64_t right_data, uint64_t* out_validity, uint64_t* out_data) { auto left_true = left_valid & left_data; auto left_false = left_valid & ~left_data; auto right_true = right_valid & right_data; auto right_false = right_valid & ~right_data; - compute_word(left_true, left_false, right_true, right_false, &out_validity[i], - &out_data[i]); - ++i; + compute_word(left_true, left_false, right_true, right_false, out_validity, out_data); }; if (right.null_count == 0) { - // bitmaps[RIGHT_VALID] might be null; override to make it safe for Visit() - bitmaps[RIGHT_VALID] = bitmaps[RIGHT_DATA]; - Bitmap::VisitWords(bitmaps, [&](std::array words) { - apply(words[LEFT_VALID], words[LEFT_DATA], ~uint64_t(0), words[RIGHT_DATA]); - }); + std::array in_bms{left_valid_bm, left_data_bm, right_data_bm}; + Bitmap::VisitWordsAndWrite( + in_bms, &out_bms, + [&](const std::array& in, std::array* out) { + apply(in[0], in[1], ~uint64_t(0), in[2], &(out->at(0)), &(out->at(1))); + }); return; } if (left.null_count == 0) { - // bitmaps[LEFT_VALID] might be null; override to make it safe for Visit() - bitmaps[LEFT_VALID] = bitmaps[LEFT_DATA]; - Bitmap::VisitWords(bitmaps, [&](std::array words) { - apply(~uint64_t(0), words[LEFT_DATA], words[RIGHT_VALID], words[RIGHT_DATA]); - }); + std::array in_bms{left_data_bm, right_valid_bm, right_data_bm}; + Bitmap::VisitWordsAndWrite( + in_bms, &out_bms, + [&](const std::array& in, std::array* out) { + apply(~uint64_t(0), in[0], in[1], in[2], &(out->at(0)), &(out->at(1))); + }); return; } DCHECK(left.null_count != 0 && right.null_count != 0); - Bitmap::VisitWords(bitmaps, [&](std::array words) { - apply(words[LEFT_VALID], words[LEFT_DATA], words[RIGHT_VALID], words[RIGHT_DATA]); - }); + std::array in_bms{left_valid_bm, left_data_bm, right_valid_bm, + right_data_bm}; + Bitmap::VisitWordsAndWrite( + in_bms, &out_bms, + [&](const std::array& in, std::array* out) { + apply(in[0], in[1], in[2], in[3], &(out->at(0)), &(out->at(1))); + }); } inline BooleanScalar InvertScalar(const Scalar& in) { diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc index 37db3391996..147b68f4baa 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc @@ -107,8 +107,6 @@ Status PromoteNullsVisitor(KernelContext* ctx, const Datum& cond_d, const Datum& std::array out_bitmaps{Bitmap{output->buffers[0], 0, cond.length}}; - enum { C_VALID, C_DATA, L_VALID, R_VALID }; - switch (flag) { case COND_CONST | LEFT_CONST | RIGHT_CONST: { std::array bitmaps{cond_data}; From cc659e91cf53f1f268acac4498c7155c6e6e69da Mon Sep 17 00:00:00 2001 From: niranda perera Date: Sat, 12 Jun 2021 00:46:56 -0400 Subject: [PATCH 13/46] fix for kleene test failures with NullHandling::COMPUTED_PREALLOCATE and can_write_into_slices=true --- .../arrow/compute/kernels/scalar_boolean.cc | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_boolean.cc b/cpp/src/arrow/compute/kernels/scalar_boolean.cc index 065e01b2780..cfcad558aed 100644 --- a/cpp/src/arrow/compute/kernels/scalar_boolean.cc +++ b/cpp/src/arrow/compute/kernels/scalar_boolean.cc @@ -204,7 +204,9 @@ struct KleeneAndOp : Commutative { ArrayData* out) { if (left.GetNullCount() == 0 && right.GetNullCount() == 0) { out->null_count = 0; - out->buffers[0] = nullptr; + // out->buffers[0] = nullptr; + // Kleene kernels have validity bitmap pre-allocated. Therefore, set it to 1 + std::memset(out->buffers[0]->mutable_data(), UINT8_MAX, out->buffers[0]->size()); return AndOp::Call(ctx, left, right, out); } auto compute_word = [](uint64_t left_true, uint64_t left_false, uint64_t right_true, @@ -307,7 +309,9 @@ struct KleeneOrOp : Commutative { ArrayData* out) { if (left.GetNullCount() == 0 && right.GetNullCount() == 0) { out->null_count = 0; - out->buffers[0] = nullptr; + // out->buffers[0] = nullptr; + // Kleene kernels have validity bitmap pre-allocated. Therefore, set it to 1 + std::memset(out->buffers[0]->mutable_data(), UINT8_MAX, out->buffers[0]->size()); return OrOp::Call(ctx, left, right, out); } @@ -437,7 +441,9 @@ struct KleeneAndNotOp { ArrayData* out) { if (left.GetNullCount() == 0 && right.GetNullCount() == 0) { out->null_count = 0; - out->buffers[0] = nullptr; + // out->buffers[0] = nullptr; + // Kleene kernels have validity bitmap pre-allocated. Therefore, set it to 1 + std::memset(out->buffers[0]->mutable_data(), UINT8_MAX, out->buffers[0]->size()); return AndNotOp::Call(ctx, left, right, out); } @@ -453,9 +459,8 @@ struct KleeneAndNotOp { } }; -void MakeFunction(std::string name, int arity, ArrayKernelExec exec, +void MakeFunction(const std::string& name, int arity, ArrayKernelExec exec, const FunctionDoc* doc, FunctionRegistry* registry, - bool can_write_into_slices = true, NullHandling::type null_handling = NullHandling::INTERSECTION) { auto func = std::make_shared(name, Arity(arity), doc); @@ -463,7 +468,6 @@ void MakeFunction(std::string name, int arity, ArrayKernelExec exec, std::vector in_types(arity, InputType(boolean())); ScalarKernel kernel(std::move(in_types), boolean(), exec); kernel.null_handling = null_handling; - kernel.can_write_into_slices = can_write_into_slices; DCHECK_OK(func->AddKernel(kernel)); DCHECK_OK(registry->AddFunction(std::move(func))); @@ -551,14 +555,11 @@ void RegisterScalarBoolean(FunctionRegistry* registry) { // The Kleene logic kernels cannot write into sliced output bitmaps MakeFunction("and_kleene", 2, applicator::SimpleBinary, &and_kleene_doc, - registry, - /*can_write_into_slices=*/false, NullHandling::COMPUTED_PREALLOCATE); + registry, NullHandling::COMPUTED_PREALLOCATE); MakeFunction("and_not_kleene", 2, applicator::SimpleBinary, - &and_not_kleene_doc, registry, - /*can_write_into_slices=*/false, NullHandling::COMPUTED_PREALLOCATE); + &and_not_kleene_doc, registry, NullHandling::COMPUTED_PREALLOCATE); MakeFunction("or_kleene", 2, applicator::SimpleBinary, &or_kleene_doc, - registry, - /*can_write_into_slices=*/false, NullHandling::COMPUTED_PREALLOCATE); + registry, NullHandling::COMPUTED_PREALLOCATE); } } // namespace internal From a0b4b42df009fe8f4c37bff360335552c682719b Mon Sep 17 00:00:00 2001 From: niranda perera Date: Sun, 13 Jun 2021 20:30:26 -0400 Subject: [PATCH 14/46] adding set/clear bitmap methods --- .../arrow/compute/kernels/scalar_boolean.cc | 12 +++--- cpp/src/arrow/util/bitmap_ops.cc | 40 +++++++++++++++++++ cpp/src/arrow/util/bitmap_ops.h | 8 ++++ 3 files changed, 54 insertions(+), 6 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_boolean.cc b/cpp/src/arrow/compute/kernels/scalar_boolean.cc index cfcad558aed..cba07eb057f 100644 --- a/cpp/src/arrow/compute/kernels/scalar_boolean.cc +++ b/cpp/src/arrow/compute/kernels/scalar_boolean.cc @@ -204,9 +204,9 @@ struct KleeneAndOp : Commutative { ArrayData* out) { if (left.GetNullCount() == 0 && right.GetNullCount() == 0) { out->null_count = 0; - // out->buffers[0] = nullptr; // Kleene kernels have validity bitmap pre-allocated. Therefore, set it to 1 - std::memset(out->buffers[0]->mutable_data(), UINT8_MAX, out->buffers[0]->size()); + arrow::internal::SetBitmap(out->buffers[0]->mutable_data(), out->offset, + out->length); return AndOp::Call(ctx, left, right, out); } auto compute_word = [](uint64_t left_true, uint64_t left_false, uint64_t right_true, @@ -309,9 +309,9 @@ struct KleeneOrOp : Commutative { ArrayData* out) { if (left.GetNullCount() == 0 && right.GetNullCount() == 0) { out->null_count = 0; - // out->buffers[0] = nullptr; // Kleene kernels have validity bitmap pre-allocated. Therefore, set it to 1 - std::memset(out->buffers[0]->mutable_data(), UINT8_MAX, out->buffers[0]->size()); + arrow::internal::SetBitmap(out->buffers[0]->mutable_data(), out->offset, + out->length); return OrOp::Call(ctx, left, right, out); } @@ -441,9 +441,9 @@ struct KleeneAndNotOp { ArrayData* out) { if (left.GetNullCount() == 0 && right.GetNullCount() == 0) { out->null_count = 0; - // out->buffers[0] = nullptr; // Kleene kernels have validity bitmap pre-allocated. Therefore, set it to 1 - std::memset(out->buffers[0]->mutable_data(), UINT8_MAX, out->buffers[0]->size()); + arrow::internal::SetBitmap(out->buffers[0]->mutable_data(), out->offset, + out->length); return AndNotOp::Call(ctx, left, right, out); } diff --git a/cpp/src/arrow/util/bitmap_ops.cc b/cpp/src/arrow/util/bitmap_ops.cc index 63c8b008f4a..f657bc8db95 100644 --- a/cpp/src/arrow/util/bitmap_ops.cc +++ b/cpp/src/arrow/util/bitmap_ops.cc @@ -383,5 +383,45 @@ void BitmapOrNot(const uint8_t* left, int64_t left_offset, const uint8_t* right, BitmapOp(left, left_offset, right, right_offset, length, out_offset, out); } +template +void SetBitmapImpl(uint8_t* data, int64_t offset, int64_t length) { + int64_t prologue = std::min(((offset + 7) / 8) * 8 - offset, length); + + if (prologue) { // align to a byte boundary + DCHECK_LT(prologue, 8); + BitmapWriter writer(data, offset, prologue); + for (auto i = 0; i < prologue; i++) { + value ? writer.Set() : writer.Clear(); + writer.Next(); + } + writer.Finish(); + offset += prologue; + length -= prologue; + } + + if (length) { // set values per byte + DCHECK_EQ(offset % 8, 0); + std::memset(data + offset / 8, value ? UINT8_MAX : 0, length / 8); + offset += ((length / 8) * 8); + length -= ((length / 8) * 8); + } + + if (length) { // clean up + BitmapWriter writer(data, offset, length); + for (auto i = 0; i < length; i++) { + value ? writer.Set() : writer.Clear(); + writer.Next(); + } + writer.Finish(); + } +} + +void SetBitmap(uint8_t* data, int64_t offset, int64_t length) { + SetBitmapImpl(data, offset, length); +} + +void ClearBitmap(uint8_t* data, int64_t offset, int64_t length) { + SetBitmapImpl(data, offset, length); +} } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/util/bitmap_ops.h b/cpp/src/arrow/util/bitmap_ops.h index 40a7797a239..ecc8a77f024 100644 --- a/cpp/src/arrow/util/bitmap_ops.h +++ b/cpp/src/arrow/util/bitmap_ops.h @@ -202,5 +202,13 @@ ARROW_EXPORT void BitmapOrNot(const uint8_t* left, int64_t left_offset, const uint8_t* right, int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out); +/// \brief Sets all bits in the bitmap to true +ARROW_EXPORT +void SetBitmap(uint8_t* data, int64_t offset, int64_t length); + +/// \brief Clears all bits in the bitmap (set to false) +ARROW_EXPORT +void ClearBitmap(uint8_t* data, int64_t offset, int64_t length); + } // namespace internal } // namespace arrow From aea1b0f5873bdd91f8e736cf91a5560b47b6b533 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Mon, 14 Jun 2021 00:28:58 -0400 Subject: [PATCH 15/46] lint fixes --- cpp/src/arrow/util/bitmap_ops.cc | 7 ++++--- cpp/src/arrow/util/bitmap_test.cc | 3 +-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/util/bitmap_ops.cc b/cpp/src/arrow/util/bitmap_ops.cc index f657bc8db95..c1d94b6b588 100644 --- a/cpp/src/arrow/util/bitmap_ops.cc +++ b/cpp/src/arrow/util/bitmap_ops.cc @@ -387,7 +387,7 @@ template void SetBitmapImpl(uint8_t* data, int64_t offset, int64_t length) { int64_t prologue = std::min(((offset + 7) / 8) * 8 - offset, length); - if (prologue) { // align to a byte boundary + if (prologue) { // align to a byte boundary DCHECK_LT(prologue, 8); BitmapWriter writer(data, offset, prologue); for (auto i = 0; i < prologue; i++) { @@ -399,14 +399,15 @@ void SetBitmapImpl(uint8_t* data, int64_t offset, int64_t length) { length -= prologue; } - if (length) { // set values per byte + if (length) { // set values per byte DCHECK_EQ(offset % 8, 0); std::memset(data + offset / 8, value ? UINT8_MAX : 0, length / 8); offset += ((length / 8) * 8); length -= ((length / 8) * 8); } - if (length) { // clean up + if (length) { // clean up + DCHECK_LT(prologue, 8); BitmapWriter writer(data, offset, length); for (auto i = 0; i < length; i++) { value ? writer.Set() : writer.Clear(); diff --git a/cpp/src/arrow/util/bitmap_test.cc b/cpp/src/arrow/util/bitmap_test.cc index 4a782e3ce12..4c2958f6432 100644 --- a/cpp/src/arrow/util/bitmap_test.cc +++ b/cpp/src/arrow/util/bitmap_test.cc @@ -18,14 +18,13 @@ #include "arrow/util/bitmap.h" #include +#include #include #include #include #include -#include "arrow/buffer.h" - namespace arrow { namespace internal { From 6f30a986d6ebb8c5950b04a066e657076b5671f6 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Mon, 14 Jun 2021 13:57:46 -0400 Subject: [PATCH 16/46] adding SpliceWord and refactoring code --- cpp/src/arrow/util/CMakeLists.txt | 1 - cpp/src/arrow/util/bit_util.h | 16 +++ cpp/src/arrow/util/bit_util_test.cc | 195 ++++++++++++++++++++++++++ cpp/src/arrow/util/bitmap_ops.cc | 48 ++++--- cpp/src/arrow/util/bitmap_test.cc | 203 ---------------------------- 5 files changed, 242 insertions(+), 221 deletions(-) delete mode 100644 cpp/src/arrow/util/bitmap_test.cc diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt index 571834dfca6..e26a17120cd 100644 --- a/cpp/src/arrow/util/CMakeLists.txt +++ b/cpp/src/arrow/util/CMakeLists.txt @@ -44,7 +44,6 @@ add_arrow_test(utility-test async_generator_test.cc bit_block_counter_test.cc bit_util_test.cc - bitmap_test.cc cache_test.cc checked_cast_test.cc compression_test.cc diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h index 01845791faa..60e5ef6543f 100644 --- a/cpp/src/arrow/util/bit_util.h +++ b/cpp/src/arrow/util/bit_util.h @@ -316,5 +316,21 @@ static inline void SetBitTo(uint8_t* bits, int64_t i, bool bit_is_set) { ARROW_EXPORT void SetBitsTo(uint8_t* bits, int64_t start_offset, int64_t length, bool bits_are_set); +template +constexpr Word WordBitMask(int i) { + return (static_cast(1) << i) - 1; +} + +/// \brief Create a word with low `n` bits from `low` and high `sizeof(Word)-n` bits +/// from `high`. +/// Word ret +/// for (i = 0; i < sizeof(Word); i++){ +/// ret[i]= i < n ? low[i]: high[i]; +/// } +template +constexpr Word SpliceWord(int n, Word low, Word high) { + return (high & ~WordBitMask(n)) | (low & WordBitMask(n)); +} + } // namespace BitUtil } // namespace arrow diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index e5a5e4c39be..0fe39fa804b 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -1975,6 +1975,37 @@ TEST(BitUtil, BitsetStack) { ASSERT_EQ(stack.TopSize(), 0); } +template +void CheckSplice(int n, Word low, Word high) { + std::bitset ret; + for (size_t i = 0; i < ret.size(); i++) { + ret[i] = i < static_cast(n) + ? BitUtil::GetBit(reinterpret_cast(&low), i) + : BitUtil::GetBit(reinterpret_cast(&high), i); + } + + ASSERT_EQ(static_cast(ret.to_ulong()), BitUtil::SpliceWord(n, low, high)); +} + +TEST(SpliceWord, SpliceWord) { + uint64_t low = 123456789, high = 987654321; + + CheckSplice(0, static_cast(low), static_cast(high)); + CheckSplice(UINT8_MAX, static_cast(low), static_cast(high)); + CheckSplice(sizeof(uint8_t) / 3, static_cast(low), + static_cast(high)); + + CheckSplice(0, static_cast(low), static_cast(high)); + CheckSplice(UINT32_MAX, static_cast(low), + static_cast(high)); + CheckSplice(sizeof(uint32_t) / 3, static_cast(low), + static_cast(high)); + + CheckSplice(0, low, high); + CheckSplice(UINT32_MAX, low, high); + CheckSplice(sizeof(uint32_t) / 3, low, high); +} + // test the basic assumption of word level Bitmap::Visit TEST(Bitmap, ShiftingWordsOptimization) { // single word @@ -2156,5 +2187,169 @@ TEST(Bitmap, VisitWordsAnd) { } } +void random_bool_vector(std::vector& vec, int64_t size, double p = 0.5) { + vec.reserve(size); + std::random_device rd; + std::mt19937 gen(rd()); + std::bernoulli_distribution d(p); + + for (int n = 0; n < size; ++n) { + vec.push_back(d(gen)); + } +} + +std::string VectorToString(const std::vector& v) { + std::string out(v.size() + +((v.size() - 1) / 8), ' '); + for (size_t i = 0; i < v.size(); ++i) { + out[i + (i / 8)] = v[i] ? '1' : '0'; + } + return out; +} + +void VerifyBoolVectorAndBitmap(const Bitmap& bitmap, const std::vector& expected) { + arrow::BooleanBuilder boolean_builder; + ASSERT_OK(boolean_builder.AppendValues(expected)); + ASSERT_OK_AND_ASSIGN(auto arr, boolean_builder.Finish()); + + ASSERT_TRUE(BitmapEquals(bitmap.buffer()->data(), bitmap.offset(), + arr->data()->buffers[1]->data(), 0, expected.size())) + << "exp: " << VectorToString(expected) << "\ngot: " << bitmap.ToString(); +} + +class TestBitmapVisitAndWriteOutputNoOffset : public ::testing::TestWithParam {}; + +TEST_P(TestBitmapVisitAndWriteOutputNoOffset, Test1) { + auto part = GetParam(); + int64_t bits = 4 * part; + std::vector data; + random_bool_vector(data, bits); + + arrow::BooleanBuilder boolean_builder; + ASSERT_OK(boolean_builder.AppendValues(data)); + ASSERT_OK_AND_ASSIGN(auto arrow_data, boolean_builder.Finish()); + + std::shared_ptr& arrow_buffer = arrow_data->data()->buffers[1]; + + Bitmap bm0(arrow_buffer, 0, part); + Bitmap bm1 = bm0.Slice(part * 1, part); // this goes beyond bm0's len + Bitmap bm2 = bm0.Slice(part * 2, part); // this goes beyond bm0's len + + std::array out_bms; + ASSERT_OK_AND_ASSIGN(auto out0, AllocateBitmap(part)); + ASSERT_OK_AND_ASSIGN(auto out1, AllocateBitmap(part)); + out_bms[0] = Bitmap(out0, 0, part); + out_bms[1] = Bitmap(out1, 0, part); + + std::vector v0(data.begin(), data.begin() + part); + std::vector v1(data.begin() + part * 1, data.begin() + part * 2); + std::vector v2(data.begin() + part * 2, data.begin() + part * 3); + + // out0 = bm0 & bm1, out1= bm0 | bm2 + std::array in_bms{bm0, bm1, bm2}; + Bitmap::VisitWordsAndWrite( + in_bms, &out_bms, + [](const std::array& in, std::array* out) { + out->at(0) = in[0] & in[1]; + out->at(1) = in[0] | in[2]; + }); + + std::vector out_v0(part); + std::vector out_v1(part); + // v3 = v0 & v1 + std::transform(v0.begin(), v0.end(), v1.begin(), out_v0.begin(), + std::logical_and()); + // v3 |= v2 + std::transform(v0.begin(), v0.end(), v2.begin(), out_v1.begin(), + std::logical_or()); + + // std::cout << "v0: " << VectorToString(v0) << "\n" + // << "b0: " << bm0.ToString() << "\n" + // << "v1: " << VectorToString(v1) << "\n" + // << "b1: " << bm1.ToString() << "\n" + // << "v2: " << VectorToString(v2) << "\n" + // << "b2: " << bm2.ToString() << "\n"; + + VerifyBoolVectorAndBitmap(out_bms[0], out_v0); + VerifyBoolVectorAndBitmap(out_bms[1], out_v1); +} + +INSTANTIATE_TEST_SUITE_P(VisitWriteGeneral, TestBitmapVisitAndWriteOutputNoOffset, + testing::Values(199, 256, 1000)); + +INSTANTIATE_TEST_SUITE_P(VisitWriteEdgeCases, TestBitmapVisitAndWriteOutputNoOffset, + testing::Values(5, 13, 21, 29, 37, 41, 51, 59, 64, 97)); + +INSTANTIATE_TEST_SUITE_P(VisitWriteEdgeCases2, TestBitmapVisitAndWriteOutputNoOffset, + testing::Values(8, 16, 24, 32, 40, 48, 56, 64)); + +class TestBitmapVisitAndWriteOutputWithOffset : public ::testing::TestWithParam { +}; + +TEST_P(TestBitmapVisitAndWriteOutputWithOffset, Test2) { + auto part = GetParam(); + int64_t bits = part * 4; + std::vector data; + random_bool_vector(data, bits); + + arrow::BooleanBuilder boolean_builder; + ASSERT_OK(boolean_builder.AppendValues(data)); + ASSERT_OK_AND_ASSIGN(auto arrow_data, boolean_builder.Finish()); + + std::shared_ptr& arrow_buffer = arrow_data->data()->buffers[1]; + + Bitmap bm0(arrow_buffer, 0, part); + Bitmap bm1(arrow_buffer, part * 1, part); + Bitmap bm2(arrow_buffer, part * 2, part); + + std::array out_bms; + ASSERT_OK_AND_ASSIGN(auto out, AllocateBitmap(part * 4)); + out_bms[0] = Bitmap(out, part, part); + out_bms[1] = Bitmap(out, part * 2, part); + + std::vector v0(data.begin(), data.begin() + part); + std::vector v1(data.begin() + part * 1, data.begin() + part * 2); + std::vector v2(data.begin() + part * 2, data.begin() + part * 3); + + // std::cout << "v0: " << VectorToString(v0) << "\n" + // << "b0: " << bm0.ToString() << "\n" + // << "v1: " << VectorToString(v1) << "\n" + // << "b1: " << bm1.ToString() << "\n" + // << "v2: " << VectorToString(v2) << "\n" + // << "b2: " << bm2.ToString() << "\n"; + + std::vector out_v0(part); + std::vector out_v1(part); + // v3 = v0 & v1 + std::transform(v0.begin(), v0.end(), v1.begin(), out_v0.begin(), + std::logical_and()); + // v3 |= v2 + std::transform(v0.begin(), v0.end(), v2.begin(), out_v1.begin(), + std::logical_or()); + + // std::cout << "out0: " << VectorToString(out_v0) << "\n" + // << "out1: " << VectorToString(out_v1) << "\n"; + + // out0 = bm0 & bm1, out1= bm0 | bm2 + std::array in_bms{bm0, bm1, bm2}; + Bitmap::VisitWordsAndWrite( + in_bms, &out_bms, + [](const std::array& in, std::array* out) { + out->at(0) = in[0] & in[1]; + out->at(1) = in[0] | in[2]; + }); + + VerifyBoolVectorAndBitmap(out_bms[0], out_v0); + VerifyBoolVectorAndBitmap(out_bms[1], out_v1); +} + +INSTANTIATE_TEST_SUITE_P(VisitWriteGeneral, TestBitmapVisitAndWriteOutputWithOffset, + testing::Values(199, 256, 1000)); + +INSTANTIATE_TEST_SUITE_P(VisitWriteEdgeCases, TestBitmapVisitAndWriteOutputWithOffset, + testing::Values(7, 15, 23, 31, 39, 47, 55, 63, 73, 97)); + +INSTANTIATE_TEST_SUITE_P(VisitWriteEdgeCases2, TestBitmapVisitAndWriteOutputWithOffset, + testing::Values(8, 16, 24, 32, 40, 48, 56, 64)); + } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/util/bitmap_ops.cc b/cpp/src/arrow/util/bitmap_ops.cc index c1d94b6b588..fce23de35d8 100644 --- a/cpp/src/arrow/util/bitmap_ops.cc +++ b/cpp/src/arrow/util/bitmap_ops.cc @@ -385,35 +385,49 @@ void BitmapOrNot(const uint8_t* left, int64_t left_offset, const uint8_t* right, template void SetBitmapImpl(uint8_t* data, int64_t offset, int64_t length) { - int64_t prologue = std::min(((offset + 7) / 8) * 8 - offset, length); + // offset length + // data |<------------->| + // |--------|...|--------|...|--------| + // |<--->| |<--->| + // pro epi + if (ARROW_PREDICT_FALSE(length == 0)) { + return; + } + + constexpr uint8_t set_byte = value ? UINT8_MAX : 0; + + int prologue = static_cast(((offset + 7) / 8) * 8 - offset); + DCHECK_LT(prologue, 8); + + if (length < prologue) { // special case where a mask is required + // offset length + // data |<->| + // |--------|...|--------|... + // mask |111| + // |<---->| + // pro + uint8_t mask = BitUtil::kPrecedingBitmask[8 - prologue] ^ + BitUtil::kPrecedingBitmask[8 - prologue + length]; + data[offset / 8] |= mask; + return; + } if (prologue) { // align to a byte boundary - DCHECK_LT(prologue, 8); - BitmapWriter writer(data, offset, prologue); - for (auto i = 0; i < prologue; i++) { - value ? writer.Set() : writer.Clear(); - writer.Next(); - } - writer.Finish(); + data[offset / 8] = BitUtil::SpliceWord(offset, data[offset / 8], set_byte); offset += prologue; length -= prologue; } - if (length) { // set values per byte + if (length / 8) { // set values per byte DCHECK_EQ(offset % 8, 0); - std::memset(data + offset / 8, value ? UINT8_MAX : 0, length / 8); + std::memset(data + offset / 8, set_byte, length / 8); offset += ((length / 8) * 8); length -= ((length / 8) * 8); } if (length) { // clean up - DCHECK_LT(prologue, 8); - BitmapWriter writer(data, offset, length); - for (auto i = 0; i < length; i++) { - value ? writer.Set() : writer.Clear(); - writer.Next(); - } - writer.Finish(); + DCHECK_LT(length, 8); + data[offset / 8] = BitUtil::SpliceWord(length, set_byte, data[offset / 8]); } } diff --git a/cpp/src/arrow/util/bitmap_test.cc b/cpp/src/arrow/util/bitmap_test.cc deleted file mode 100644 index 4c2958f6432..00000000000 --- a/cpp/src/arrow/util/bitmap_test.cc +++ /dev/null @@ -1,203 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "arrow/util/bitmap.h" - -#include -#include -#include -#include - -#include -#include - -namespace arrow { -namespace internal { - -void random_bool_vector(std::vector& vec, int64_t size, double p = 0.5) { - vec.reserve(size); - std::random_device rd; - std::mt19937 gen(rd()); - std::bernoulli_distribution d(p); - - for (int n = 0; n < size; ++n) { - vec.push_back(d(gen)); - } -} - -std::string VectorToString(const std::vector& v) { - std::string out(v.size() + +((v.size() - 1) / 8), ' '); - for (size_t i = 0; i < v.size(); ++i) { - out[i + (i / 8)] = v[i] ? '1' : '0'; - } - return out; -} - -void VerifyBoolOutput(const Bitmap& bitmap, const std::vector& expected) { - arrow::BooleanBuilder boolean_builder; - ASSERT_OK(boolean_builder.AppendValues(expected)); - ASSERT_OK_AND_ASSIGN(auto arr, boolean_builder.Finish()); - - ASSERT_TRUE(BitmapEquals(bitmap.buffer()->data(), bitmap.offset(), - arr->data()->buffers[1]->data(), 0, expected.size())) - << "exp: " << VectorToString(expected) << "\ngot: " << bitmap.ToString(); -} - -void RunOutputNoOffset(int part) { - int64_t bits = 4 * part; - std::vector data; - random_bool_vector(data, bits); - - arrow::BooleanBuilder boolean_builder; - ASSERT_OK(boolean_builder.AppendValues(data)); - ASSERT_OK_AND_ASSIGN(auto arrow_data, boolean_builder.Finish()); - - std::shared_ptr& arrow_buffer = arrow_data->data()->buffers[1]; - - Bitmap bm0(arrow_buffer, 0, part); - Bitmap bm1 = bm0.Slice(part * 1, part); // this goes beyond bm0's len - Bitmap bm2 = bm0.Slice(part * 2, part); // this goes beyond bm0's len - - std::array out_bms; - ASSERT_OK_AND_ASSIGN(auto out0, AllocateBitmap(part)); - ASSERT_OK_AND_ASSIGN(auto out1, AllocateBitmap(part)); - out_bms[0] = Bitmap(out0, 0, part); - out_bms[1] = Bitmap(out1, 0, part); - - std::vector v0(data.begin(), data.begin() + part); - std::vector v1(data.begin() + part * 1, data.begin() + part * 2); - std::vector v2(data.begin() + part * 2, data.begin() + part * 3); - - // out0 = bm0 & bm1, out1= bm0 | bm2 - std::array in_bms{bm0, bm1, bm2}; - Bitmap::VisitWordsAndWrite( - in_bms, &out_bms, - [](const std::array& in, std::array* out) { - out->at(0) = in[0] & in[1]; - out->at(1) = in[0] | in[2]; - }); - - std::vector out_v0(part); - std::vector out_v1(part); - // v3 = v0 & v1 - std::transform(v0.begin(), v0.end(), v1.begin(), out_v0.begin(), - std::logical_and()); - // v3 |= v2 - std::transform(v0.begin(), v0.end(), v2.begin(), out_v1.begin(), - std::logical_or()); - - // std::cout << "v0: " << VectorToString(v0)<< "\n"; - // std::cout << "b0: " << bm0.ToString()<< "\n"; - // std::cout << "v1: " << VectorToString(v1)<< "\n"; - // std::cout << "b1: " << bm1.ToString()<< "\n"; - // std::cout << "v2: " << VectorToString(v2) << "\n"; - // std::cout << "b2: " << bm2.ToString() << "\n"; - - VerifyBoolOutput(out_bms[0], out_v0); - VerifyBoolOutput(out_bms[1], out_v1); -} - -void RunOutputWithOffset(int64_t part) { - int64_t bits = part * 4; - std::vector data; - random_bool_vector(data, bits); - - arrow::BooleanBuilder boolean_builder; - ASSERT_OK(boolean_builder.AppendValues(data)); - ASSERT_OK_AND_ASSIGN(auto arrow_data, boolean_builder.Finish()); - - std::shared_ptr& arrow_buffer = arrow_data->data()->buffers[1]; - - Bitmap bm0(arrow_buffer, 0, part); - Bitmap bm1(arrow_buffer, part * 1, part); - Bitmap bm2(arrow_buffer, part * 2, part); - - std::array out_bms; - ASSERT_OK_AND_ASSIGN(auto out, AllocateBitmap(part * 4)); - out_bms[0] = Bitmap(out, part, part); - out_bms[1] = Bitmap(out, part * 2, part); - - std::vector v0(data.begin(), data.begin() + part); - std::vector v1(data.begin() + part * 1, data.begin() + part * 2); - std::vector v2(data.begin() + part * 2, data.begin() + part * 3); - - // std::cout << "v0: " << VectorToString(v0) << "\n"; - // std::cout << "b0: " << bm0.ToString() << "\n"; - // std::cout << "v1: " << VectorToString(v1) << "\n"; - // std::cout << "b1: " << bm1.ToString() << "\n"; - // std::cout << "v2: " << VectorToString(v2) << "\n"; - // std::cout << "b2: " << bm2.ToString() << "\n"; - - std::vector out_v0(part); - std::vector out_v1(part); - // v3 = v0 & v1 - std::transform(v0.begin(), v0.end(), v1.begin(), out_v0.begin(), - std::logical_and()); - // v3 |= v2 - std::transform(v0.begin(), v0.end(), v2.begin(), out_v1.begin(), - std::logical_or()); - - // std::cout << "out0: " << VectorToString(out_v0) << "\n"; - // std::cout << "out1: " << VectorToString(out_v1) << "\n"; - - // out0 = bm0 & bm1, out1= bm0 | bm2 - std::array in_bms{bm0, bm1, bm2}; - Bitmap::VisitWordsAndWrite( - in_bms, &out_bms, - [](const std::array& in, std::array* out) { - out->at(0) = in[0] & in[1]; - out->at(1) = in[0] | in[2]; - }); - - VerifyBoolOutput(out_bms[0], out_v0); - VerifyBoolOutput(out_bms[1], out_v1); -} - -class TestBitmapVisitOutputNoOffset : public ::testing::TestWithParam {}; - -TEST_P(TestBitmapVisitOutputNoOffset, Test1) { - auto part = GetParam(); - RunOutputNoOffset(part); -} - -INSTANTIATE_TEST_SUITE_P(General, TestBitmapVisitOutputNoOffset, - testing::Values(199, 256, 1000)); - -INSTANTIATE_TEST_SUITE_P(EdgeCases, TestBitmapVisitOutputNoOffset, - testing::Values(5, 13, 21, 29, 37, 41, 51, 59, 64, 97)); - -INSTANTIATE_TEST_SUITE_P(EdgeCases2, TestBitmapVisitOutputNoOffset, - testing::Values(8, 16, 24, 32, 40, 48, 56, 64)); - -class TestBitmapVisitOutputWithOffset : public ::testing::TestWithParam {}; - -TEST_P(TestBitmapVisitOutputWithOffset, Test2) { - auto part = GetParam(); - RunOutputWithOffset(part); -} - -INSTANTIATE_TEST_SUITE_P(General, TestBitmapVisitOutputWithOffset, - testing::Values(199, 256, 1000)); - -INSTANTIATE_TEST_SUITE_P(EdgeCases, TestBitmapVisitOutputWithOffset, - testing::Values(7, 15, 23, 31, 39, 47, 55, 63, 73, 97)); - -INSTANTIATE_TEST_SUITE_P(EdgeCases2, TestBitmapVisitOutputWithOffset, - testing::Values(8, 16, 24, 32, 40, 48, 56, 64)); - -} // namespace internal -} // namespace arrow From 40ba1c7628a41f5e6381278c6bf42c23276f0b76 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Mon, 14 Jun 2021 14:25:08 -0400 Subject: [PATCH 17/46] refactor --- .../arrow/compute/kernels/scalar_boolean.cc | 9 +-- cpp/src/arrow/util/bit_util.cc | 55 +++++++++++++++++++ cpp/src/arrow/util/bit_util.h | 8 +++ cpp/src/arrow/util/bitmap_ops.cc | 55 ------------------- cpp/src/arrow/util/bitmap_ops.h | 8 --- 5 files changed, 66 insertions(+), 69 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_boolean.cc b/cpp/src/arrow/compute/kernels/scalar_boolean.cc index cba07eb057f..6de4ef16031 100644 --- a/cpp/src/arrow/compute/kernels/scalar_boolean.cc +++ b/cpp/src/arrow/compute/kernels/scalar_boolean.cc @@ -205,8 +205,7 @@ struct KleeneAndOp : Commutative { if (left.GetNullCount() == 0 && right.GetNullCount() == 0) { out->null_count = 0; // Kleene kernels have validity bitmap pre-allocated. Therefore, set it to 1 - arrow::internal::SetBitmap(out->buffers[0]->mutable_data(), out->offset, - out->length); + BitUtil::SetBitmap(out->buffers[0]->mutable_data(), out->offset, out->length); return AndOp::Call(ctx, left, right, out); } auto compute_word = [](uint64_t left_true, uint64_t left_false, uint64_t right_true, @@ -310,8 +309,7 @@ struct KleeneOrOp : Commutative { if (left.GetNullCount() == 0 && right.GetNullCount() == 0) { out->null_count = 0; // Kleene kernels have validity bitmap pre-allocated. Therefore, set it to 1 - arrow::internal::SetBitmap(out->buffers[0]->mutable_data(), out->offset, - out->length); + BitUtil::SetBitmap(out->buffers[0]->mutable_data(), out->offset, out->length); return OrOp::Call(ctx, left, right, out); } @@ -442,8 +440,7 @@ struct KleeneAndNotOp { if (left.GetNullCount() == 0 && right.GetNullCount() == 0) { out->null_count = 0; // Kleene kernels have validity bitmap pre-allocated. Therefore, set it to 1 - arrow::internal::SetBitmap(out->buffers[0]->mutable_data(), out->offset, - out->length); + BitUtil::SetBitmap(out->buffers[0]->mutable_data(), out->offset, out->length); return AndNotOp::Call(ctx, left, right, out); } diff --git a/cpp/src/arrow/util/bit_util.cc b/cpp/src/arrow/util/bit_util.cc index 6e23678ddf9..9c0ef6bc9bf 100644 --- a/cpp/src/arrow/util/bit_util.cc +++ b/cpp/src/arrow/util/bit_util.cc @@ -20,6 +20,8 @@ #include #include +#include "arrow/util/logging.h" + namespace arrow { namespace BitUtil { @@ -67,5 +69,58 @@ void SetBitsTo(uint8_t* bits, int64_t start_offset, int64_t length, bool bits_ar bits[bytes_end - 1] |= static_cast(fill_byte & ~last_byte_mask); } +template +void SetBitmapImpl(uint8_t* data, int64_t offset, int64_t length) { + // offset length + // data |<------------->| + // |--------|...|--------|...|--------| + // |<--->| |<--->| + // pro epi + if (ARROW_PREDICT_FALSE(length == 0)) { + return; + } + + constexpr uint8_t set_byte = value ? UINT8_MAX : 0; + + int prologue = static_cast(((offset + 7) / 8) * 8 - offset); + DCHECK_LT(prologue, 8); + + if (length < prologue) { // special case where a mask is required + // offset length + // data |<->| + // |--------|...|--------|... + // mask --> |111| + // |<---->| + // pro + uint8_t mask = BitUtil::kPrecedingBitmask[8 - prologue] ^ + BitUtil::kPrecedingBitmask[8 - prologue + length]; + data[offset / 8] |= mask; + return; + } + + // align to a byte boundary + data[offset / 8] = BitUtil::SpliceWord(offset, data[offset / 8], set_byte); + offset += prologue; + length -= prologue; + + // set values per byte + DCHECK_EQ(offset % 8, 0); + std::memset(data + offset / 8, set_byte, length / 8); + offset += ((length / 8) * 8); + length -= ((length / 8) * 8); + + // clean up + DCHECK_LT(length, 8); + data[offset / 8] = BitUtil::SpliceWord(length, set_byte, data[offset / 8]); +} + +void SetBitmap(uint8_t* data, int64_t offset, int64_t length) { + SetBitmapImpl(data, offset, length); +} + +void ClearBitmap(uint8_t* data, int64_t offset, int64_t length) { + SetBitmapImpl(data, offset, length); +} + } // namespace BitUtil } // namespace arrow diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h index 60e5ef6543f..f0a556e21b1 100644 --- a/cpp/src/arrow/util/bit_util.h +++ b/cpp/src/arrow/util/bit_util.h @@ -316,6 +316,14 @@ static inline void SetBitTo(uint8_t* bits, int64_t i, bool bit_is_set) { ARROW_EXPORT void SetBitsTo(uint8_t* bits, int64_t start_offset, int64_t length, bool bits_are_set); +/// \brief Sets all bits in the bitmap to true +ARROW_EXPORT +void SetBitmap(uint8_t* data, int64_t offset, int64_t length); + +/// \brief Clears all bits in the bitmap (set to false) +ARROW_EXPORT +void ClearBitmap(uint8_t* data, int64_t offset, int64_t length); + template constexpr Word WordBitMask(int i) { return (static_cast(1) << i) - 1; diff --git a/cpp/src/arrow/util/bitmap_ops.cc b/cpp/src/arrow/util/bitmap_ops.cc index fce23de35d8..63c8b008f4a 100644 --- a/cpp/src/arrow/util/bitmap_ops.cc +++ b/cpp/src/arrow/util/bitmap_ops.cc @@ -383,60 +383,5 @@ void BitmapOrNot(const uint8_t* left, int64_t left_offset, const uint8_t* right, BitmapOp(left, left_offset, right, right_offset, length, out_offset, out); } -template -void SetBitmapImpl(uint8_t* data, int64_t offset, int64_t length) { - // offset length - // data |<------------->| - // |--------|...|--------|...|--------| - // |<--->| |<--->| - // pro epi - if (ARROW_PREDICT_FALSE(length == 0)) { - return; - } - - constexpr uint8_t set_byte = value ? UINT8_MAX : 0; - - int prologue = static_cast(((offset + 7) / 8) * 8 - offset); - DCHECK_LT(prologue, 8); - - if (length < prologue) { // special case where a mask is required - // offset length - // data |<->| - // |--------|...|--------|... - // mask |111| - // |<---->| - // pro - uint8_t mask = BitUtil::kPrecedingBitmask[8 - prologue] ^ - BitUtil::kPrecedingBitmask[8 - prologue + length]; - data[offset / 8] |= mask; - return; - } - - if (prologue) { // align to a byte boundary - data[offset / 8] = BitUtil::SpliceWord(offset, data[offset / 8], set_byte); - offset += prologue; - length -= prologue; - } - - if (length / 8) { // set values per byte - DCHECK_EQ(offset % 8, 0); - std::memset(data + offset / 8, set_byte, length / 8); - offset += ((length / 8) * 8); - length -= ((length / 8) * 8); - } - - if (length) { // clean up - DCHECK_LT(length, 8); - data[offset / 8] = BitUtil::SpliceWord(length, set_byte, data[offset / 8]); - } -} - -void SetBitmap(uint8_t* data, int64_t offset, int64_t length) { - SetBitmapImpl(data, offset, length); -} - -void ClearBitmap(uint8_t* data, int64_t offset, int64_t length) { - SetBitmapImpl(data, offset, length); -} } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/util/bitmap_ops.h b/cpp/src/arrow/util/bitmap_ops.h index ecc8a77f024..40a7797a239 100644 --- a/cpp/src/arrow/util/bitmap_ops.h +++ b/cpp/src/arrow/util/bitmap_ops.h @@ -202,13 +202,5 @@ ARROW_EXPORT void BitmapOrNot(const uint8_t* left, int64_t left_offset, const uint8_t* right, int64_t right_offset, int64_t length, int64_t out_offset, uint8_t* out); -/// \brief Sets all bits in the bitmap to true -ARROW_EXPORT -void SetBitmap(uint8_t* data, int64_t offset, int64_t length); - -/// \brief Clears all bits in the bitmap (set to false) -ARROW_EXPORT -void ClearBitmap(uint8_t* data, int64_t offset, int64_t length); - } // namespace internal } // namespace arrow From ae38c47d5e67c08c3ac99ec6c9e16a3908afa1ee Mon Sep 17 00:00:00 2001 From: niranda perera Date: Wed, 16 Jun 2021 18:06:18 -0400 Subject: [PATCH 18/46] adding benchmark --- cpp/src/arrow/util/CMakeLists.txt | 1 + cpp/src/arrow/util/bit_util_benchmark_temp.cc | 110 ++++++++++++++++++ 2 files changed, 111 insertions(+) create mode 100644 cpp/src/arrow/util/bit_util_benchmark_temp.cc diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt index e26a17120cd..1851a9afa5e 100644 --- a/cpp/src/arrow/util/CMakeLists.txt +++ b/cpp/src/arrow/util/CMakeLists.txt @@ -93,3 +93,4 @@ add_arrow_benchmark(trie_benchmark) add_arrow_benchmark(utf8_util_benchmark) add_arrow_benchmark(value_parsing_benchmark) add_arrow_benchmark(variant_benchmark) +add_arrow_benchmark(bit_util_benchmark_temp) diff --git a/cpp/src/arrow/util/bit_util_benchmark_temp.cc b/cpp/src/arrow/util/bit_util_benchmark_temp.cc new file mode 100644 index 00000000000..d0b67dea701 --- /dev/null +++ b/cpp/src/arrow/util/bit_util_benchmark_temp.cc @@ -0,0 +1,110 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include +#include +#include + +#include "arrow/buffer.h" +#include "arrow/testing/gtest_util.h" +#include "arrow/testing/util.h" +#include "arrow/util/bit_block_counter.h" +#include "arrow/util/bit_util.h" +#include "arrow/util/bitmap_reader.h" +#include "benchmark/benchmark.h" + +namespace arrow { +namespace BitUtil { + +using internal::BitBlockCount; +using internal::BitBlockCounter; +using internal::BitmapWordReader; + +const int64_t kBufferSize = 1024 * (std::rand() % 25 + 1000); + +// const int seed = std::rand(); + +static std::shared_ptr CreateRandomBuffer(int64_t nbytes) { + auto buffer = *AllocateBuffer(nbytes); + memset(buffer->mutable_data(), 0, nbytes); + random_bytes(nbytes, /*seed=*/0, buffer->mutable_data()); + return std::move(buffer); +} + +static void BitBlockCounterBench(benchmark::State& state) { + int64_t nbytes = state.range(0); + std::shared_ptr cond_buf = CreateRandomBuffer(nbytes); + for (auto _ : state) { + BitBlockCounter counter(cond_buf->data(), 0, nbytes * 8); + + int64_t offset = 0; + int64_t set_bits = 0; + + while (offset < nbytes * 8) { + const BitBlockCount& word = counter.NextWord(); + // if (word.AllSet()) { + // set_bits += word.length; + // } else if (word.popcount) { + // set_bits += word.popcount; + // } + set_bits += word.popcount; + offset += word.length; + } + benchmark::ClobberMemory(); + } + + state.SetBytesProcessed(state.iterations() * nbytes); +} + +static void BitmapWordReaderBench(benchmark::State& state) { + int64_t nbytes = state.range(0); + std::shared_ptr cond_buf = CreateRandomBuffer(nbytes); + for (auto _ : state) { + BitmapWordReader counter(cond_buf->data(), 0, nbytes * 8); + + int64_t set_bits = 0; + + int64_t cnt = counter.words(); + while (cnt--) { + const auto& word = counter.NextWord(); + // if (word == UINT64_MAX) { + // set_bits += sizeof(uint64_t) * 8; + // } else if (word) { + // set_bits += PopCount(word); + // } + set_bits += PopCount(word); + } + + cnt = counter.trailing_bytes(); + while (cnt--) { + int valid_bits; + const auto& byte = static_cast(counter.NextTrailingByte(valid_bits)); + set_bits += PopCount(kPrecedingBitmask[valid_bits] & byte); + } + benchmark::ClobberMemory(); + } + state.SetBytesProcessed(state.iterations() * nbytes); +} + +BENCHMARK(BitBlockCounterBench)->Arg(kBufferSize); +BENCHMARK(BitmapWordReaderBench)->Arg(kBufferSize); + +} // namespace BitUtil +} // namespace arrow From e25a0d2883cae4d3ac5bd505327700a0d2bac87f Mon Sep 17 00:00:00 2001 From: niranda perera Date: Wed, 16 Jun 2021 19:09:37 -0400 Subject: [PATCH 19/46] adding benchmark1 --- cpp/src/arrow/util/bit_util_benchmark_temp.cc | 65 +++++++++++++------ 1 file changed, 45 insertions(+), 20 deletions(-) diff --git a/cpp/src/arrow/util/bit_util_benchmark_temp.cc b/cpp/src/arrow/util/bit_util_benchmark_temp.cc index d0b67dea701..2230f2c6dfb 100644 --- a/cpp/src/arrow/util/bit_util_benchmark_temp.cc +++ b/cpp/src/arrow/util/bit_util_benchmark_temp.cc @@ -37,9 +37,7 @@ using internal::BitBlockCount; using internal::BitBlockCounter; using internal::BitmapWordReader; -const int64_t kBufferSize = 1024 * (std::rand() % 25 + 1000); - -// const int seed = std::rand(); +const int64_t kBufferSize = 1024 * 1024; static std::shared_ptr CreateRandomBuffer(int64_t nbytes) { auto buffer = *AllocateBuffer(nbytes); @@ -51,20 +49,27 @@ static std::shared_ptr CreateRandomBuffer(int64_t nbytes) { static void BitBlockCounterBench(benchmark::State& state) { int64_t nbytes = state.range(0); std::shared_ptr cond_buf = CreateRandomBuffer(nbytes); + std::shared_ptr data_buf = CreateRandomBuffer(nbytes * 8 * 8); + std::shared_ptr dest_buf = CreateRandomBuffer(nbytes * 8 * 8); for (auto _ : state) { BitBlockCounter counter(cond_buf->data(), 0, nbytes * 8); - int64_t offset = 0; - int64_t set_bits = 0; + const uint8_t* cond_ptr = cond_buf->data(); + const uint64_t* data_ptr = reinterpret_cast(data_buf->data()); + uint64_t* dest_ptr = reinterpret_cast(dest_buf->mutable_data()); + int64_t offset = 0; while (offset < nbytes * 8) { const BitBlockCount& word = counter.NextWord(); - // if (word.AllSet()) { - // set_bits += word.length; - // } else if (word.popcount) { - // set_bits += word.popcount; - // } - set_bits += word.popcount; + if (word.AllSet()) { + std::memcpy(dest_ptr + offset, data_ptr + offset, word.length * 8); + } else if (word.popcount) { + for (int64_t i = 0; i < word.length; i++) { + if (GetBit(cond_ptr, offset + i)) { + dest_ptr[offset + i] = data_ptr[offset + i]; + } + } + } offset += word.length; } benchmark::ClobberMemory(); @@ -76,27 +81,47 @@ static void BitBlockCounterBench(benchmark::State& state) { static void BitmapWordReaderBench(benchmark::State& state) { int64_t nbytes = state.range(0); std::shared_ptr cond_buf = CreateRandomBuffer(nbytes); + std::shared_ptr data_buf = CreateRandomBuffer(nbytes * 8 * 8); + std::shared_ptr dest_buf = CreateRandomBuffer(nbytes * 8 * 8); + for (auto _ : state) { BitmapWordReader counter(cond_buf->data(), 0, nbytes * 8); - int64_t set_bits = 0; + const uint8_t* cond_ptr = cond_buf->data(); + const auto* data_ptr = reinterpret_cast(data_buf->data()); + auto* dest_ptr = reinterpret_cast(dest_buf->mutable_data()); + int64_t offset = 0; int64_t cnt = counter.words(); while (cnt--) { const auto& word = counter.NextWord(); - // if (word == UINT64_MAX) { - // set_bits += sizeof(uint64_t) * 8; - // } else if (word) { - // set_bits += PopCount(word); - // } - set_bits += PopCount(word); + if (word == UINT64_MAX) { + std::memcpy(dest_ptr + offset, data_ptr + offset, 64 * 8); + } else if (word) { + for (int64_t i = 0; i < 8; i++) { + if (GetBit(cond_ptr, offset + i)) { + dest_ptr[offset + i] = data_ptr[offset + i]; + } + } + } + offset += 8; } cnt = counter.trailing_bytes(); while (cnt--) { int valid_bits; - const auto& byte = static_cast(counter.NextTrailingByte(valid_bits)); - set_bits += PopCount(kPrecedingBitmask[valid_bits] & byte); + const auto& byte = counter.NextTrailingByte(valid_bits); + if (byte == UINT8_MAX && valid_bits == 8) { + std::memcpy(dest_ptr, data_ptr, 8 * 8); + } else { + for (int64_t i = 0; i < valid_bits; i++) { + if (GetBit(cond_ptr, offset + i)) { + dest_ptr[offset + i] = data_ptr[offset + i]; + } + } + } + + offset += valid_bits; } benchmark::ClobberMemory(); } From bd7463a3e50db47b906ff4f15297554a0445da72 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Thu, 17 Jun 2021 09:19:54 -0400 Subject: [PATCH 20/46] Revert "adding benchmark1" This reverts commit 67d60872 --- cpp/src/arrow/util/bit_util_benchmark_temp.cc | 65 ++++++------------- 1 file changed, 20 insertions(+), 45 deletions(-) diff --git a/cpp/src/arrow/util/bit_util_benchmark_temp.cc b/cpp/src/arrow/util/bit_util_benchmark_temp.cc index 2230f2c6dfb..d0b67dea701 100644 --- a/cpp/src/arrow/util/bit_util_benchmark_temp.cc +++ b/cpp/src/arrow/util/bit_util_benchmark_temp.cc @@ -37,7 +37,9 @@ using internal::BitBlockCount; using internal::BitBlockCounter; using internal::BitmapWordReader; -const int64_t kBufferSize = 1024 * 1024; +const int64_t kBufferSize = 1024 * (std::rand() % 25 + 1000); + +// const int seed = std::rand(); static std::shared_ptr CreateRandomBuffer(int64_t nbytes) { auto buffer = *AllocateBuffer(nbytes); @@ -49,27 +51,20 @@ static std::shared_ptr CreateRandomBuffer(int64_t nbytes) { static void BitBlockCounterBench(benchmark::State& state) { int64_t nbytes = state.range(0); std::shared_ptr cond_buf = CreateRandomBuffer(nbytes); - std::shared_ptr data_buf = CreateRandomBuffer(nbytes * 8 * 8); - std::shared_ptr dest_buf = CreateRandomBuffer(nbytes * 8 * 8); for (auto _ : state) { BitBlockCounter counter(cond_buf->data(), 0, nbytes * 8); - const uint8_t* cond_ptr = cond_buf->data(); - const uint64_t* data_ptr = reinterpret_cast(data_buf->data()); - uint64_t* dest_ptr = reinterpret_cast(dest_buf->mutable_data()); - int64_t offset = 0; + int64_t set_bits = 0; + while (offset < nbytes * 8) { const BitBlockCount& word = counter.NextWord(); - if (word.AllSet()) { - std::memcpy(dest_ptr + offset, data_ptr + offset, word.length * 8); - } else if (word.popcount) { - for (int64_t i = 0; i < word.length; i++) { - if (GetBit(cond_ptr, offset + i)) { - dest_ptr[offset + i] = data_ptr[offset + i]; - } - } - } + // if (word.AllSet()) { + // set_bits += word.length; + // } else if (word.popcount) { + // set_bits += word.popcount; + // } + set_bits += word.popcount; offset += word.length; } benchmark::ClobberMemory(); @@ -81,47 +76,27 @@ static void BitBlockCounterBench(benchmark::State& state) { static void BitmapWordReaderBench(benchmark::State& state) { int64_t nbytes = state.range(0); std::shared_ptr cond_buf = CreateRandomBuffer(nbytes); - std::shared_ptr data_buf = CreateRandomBuffer(nbytes * 8 * 8); - std::shared_ptr dest_buf = CreateRandomBuffer(nbytes * 8 * 8); - for (auto _ : state) { BitmapWordReader counter(cond_buf->data(), 0, nbytes * 8); - const uint8_t* cond_ptr = cond_buf->data(); - const auto* data_ptr = reinterpret_cast(data_buf->data()); - auto* dest_ptr = reinterpret_cast(dest_buf->mutable_data()); + int64_t set_bits = 0; - int64_t offset = 0; int64_t cnt = counter.words(); while (cnt--) { const auto& word = counter.NextWord(); - if (word == UINT64_MAX) { - std::memcpy(dest_ptr + offset, data_ptr + offset, 64 * 8); - } else if (word) { - for (int64_t i = 0; i < 8; i++) { - if (GetBit(cond_ptr, offset + i)) { - dest_ptr[offset + i] = data_ptr[offset + i]; - } - } - } - offset += 8; + // if (word == UINT64_MAX) { + // set_bits += sizeof(uint64_t) * 8; + // } else if (word) { + // set_bits += PopCount(word); + // } + set_bits += PopCount(word); } cnt = counter.trailing_bytes(); while (cnt--) { int valid_bits; - const auto& byte = counter.NextTrailingByte(valid_bits); - if (byte == UINT8_MAX && valid_bits == 8) { - std::memcpy(dest_ptr, data_ptr, 8 * 8); - } else { - for (int64_t i = 0; i < valid_bits; i++) { - if (GetBit(cond_ptr, offset + i)) { - dest_ptr[offset + i] = data_ptr[offset + i]; - } - } - } - - offset += valid_bits; + const auto& byte = static_cast(counter.NextTrailingByte(valid_bits)); + set_bits += PopCount(kPrecedingBitmask[valid_bits] & byte); } benchmark::ClobberMemory(); } From 83904223d81580b89fd2588df38332f72cbf6a55 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Thu, 17 Jun 2021 09:20:18 -0400 Subject: [PATCH 21/46] adding do not optimize --- cpp/src/arrow/util/bit_util_benchmark_temp.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/util/bit_util_benchmark_temp.cc b/cpp/src/arrow/util/bit_util_benchmark_temp.cc index d0b67dea701..359653c9644 100644 --- a/cpp/src/arrow/util/bit_util_benchmark_temp.cc +++ b/cpp/src/arrow/util/bit_util_benchmark_temp.cc @@ -55,7 +55,7 @@ static void BitBlockCounterBench(benchmark::State& state) { BitBlockCounter counter(cond_buf->data(), 0, nbytes * 8); int64_t offset = 0; - int64_t set_bits = 0; + uint64_t set_bits = 0; while (offset < nbytes * 8) { const BitBlockCount& word = counter.NextWord(); @@ -65,6 +65,7 @@ static void BitBlockCounterBench(benchmark::State& state) { // set_bits += word.popcount; // } set_bits += word.popcount; + benchmark::DoNotOptimize(set_bits); offset += word.length; } benchmark::ClobberMemory(); @@ -90,6 +91,7 @@ static void BitmapWordReaderBench(benchmark::State& state) { // set_bits += PopCount(word); // } set_bits += PopCount(word); + benchmark::DoNotOptimize(set_bits); } cnt = counter.trailing_bytes(); @@ -97,6 +99,7 @@ static void BitmapWordReaderBench(benchmark::State& state) { int valid_bits; const auto& byte = static_cast(counter.NextTrailingByte(valid_bits)); set_bits += PopCount(kPrecedingBitmask[valid_bits] & byte); + benchmark::DoNotOptimize(set_bits); } benchmark::ClobberMemory(); } From 952015a4fdc5a4708123393dd09f16e8f935da96 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Thu, 17 Jun 2021 11:11:19 -0400 Subject: [PATCH 22/46] adding ifelse bench --- cpp/src/arrow/compute/kernels/CMakeLists.txt | 1 + .../kernels/scalar_if_else_benchmark.cc | 64 +++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc diff --git a/cpp/src/arrow/compute/kernels/CMakeLists.txt b/cpp/src/arrow/compute/kernels/CMakeLists.txt index 326578588a7..3362d91cbe8 100644 --- a/cpp/src/arrow/compute/kernels/CMakeLists.txt +++ b/cpp/src/arrow/compute/kernels/CMakeLists.txt @@ -37,6 +37,7 @@ add_arrow_benchmark(scalar_arithmetic_benchmark PREFIX "arrow-compute") add_arrow_benchmark(scalar_boolean_benchmark PREFIX "arrow-compute") add_arrow_benchmark(scalar_cast_benchmark PREFIX "arrow-compute") add_arrow_benchmark(scalar_compare_benchmark PREFIX "arrow-compute") +add_arrow_benchmark(scalar_if_else_benchmark PREFIX "arrow-compute") add_arrow_benchmark(scalar_set_lookup_benchmark PREFIX "arrow-compute") add_arrow_benchmark(scalar_string_benchmark PREFIX "arrow-compute") diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc new file mode 100644 index 00000000000..09336d93091 --- /dev/null +++ b/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc @@ -0,0 +1,64 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include + +namespace arrow { +namespace compute { + +const int64_t elems = 1024 * 1024; + +template +static void IfElseBench(benchmark::State& state) { + using CType = typename Type::c_type; + auto type = TypeTraits::type_singleton(); + using ArrayType = typename TypeTraits::ArrayType; + + int64_t len = state.range(0); + + random::RandomArrayGenerator rand(/*seed=*/0); + + auto cond = std::static_pointer_cast( + rand.ArrayOf(boolean(), len, /*null_probability=*/0.01)); + auto left = std::static_pointer_cast( + rand.ArrayOf(type, len, /*null_probability=*/0.01)); + auto right = std::static_pointer_cast( + rand.ArrayOf(type, len, /*null_probability=*/0.01)); + + for (auto _ : state) { + ABORT_NOT_OK(IfElse(cond, left, right)); + } + + state.SetBytesProcessed(state.iterations() * (len / 8 + 2 * len * sizeof(CType))); +} + +static void IfElseBench64Wide(benchmark::State& state) { + return IfElseBench(state); +} + +static void IfElseBench32Wide(benchmark::State& state) { + return IfElseBench(state); +} + +BENCHMARK(IfElseBench32Wide)->Arg(elems); +BENCHMARK(IfElseBench64Wide)->Arg(elems); + +} // namespace compute +} // namespace arrow \ No newline at end of file From 588373bf4fb98ccbdf0a7a204a45124635098e65 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Thu, 17 Jun 2021 11:29:52 -0400 Subject: [PATCH 23/46] adding offset bench --- .../compute/kernels/scalar_if_else_benchmark.cc | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc index 09336d93091..c3afa94da5f 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc @@ -32,6 +32,7 @@ static void IfElseBench(benchmark::State& state) { using ArrayType = typename TypeTraits::ArrayType; int64_t len = state.range(0); + int64_t offset = state.range(1); random::RandomArrayGenerator rand(/*seed=*/0); @@ -43,10 +44,11 @@ static void IfElseBench(benchmark::State& state) { rand.ArrayOf(type, len, /*null_probability=*/0.01)); for (auto _ : state) { - ABORT_NOT_OK(IfElse(cond, left, right)); + ABORT_NOT_OK(IfElse(cond->Slice(offset), left->Slice(offset), right->Slice(offset))); } - state.SetBytesProcessed(state.iterations() * (len / 8 + 2 * len * sizeof(CType))); + state.SetBytesProcessed(state.iterations() * + ((len - offset) / 8 + 2 * (len - offset) * sizeof(CType))); } static void IfElseBench64Wide(benchmark::State& state) { @@ -57,8 +59,11 @@ static void IfElseBench32Wide(benchmark::State& state) { return IfElseBench(state); } -BENCHMARK(IfElseBench32Wide)->Arg(elems); -BENCHMARK(IfElseBench64Wide)->Arg(elems); +BENCHMARK(IfElseBench32Wide)->Args({elems, 0}); +BENCHMARK(IfElseBench64Wide)->Args({elems, 0}); + +BENCHMARK(IfElseBench32Wide)->Args({elems, 99}); +BENCHMARK(IfElseBench64Wide)->Args({elems, 99}); } // namespace compute } // namespace arrow \ No newline at end of file From ec127b7ee1563d104b51c45ab4783575f39b7ed6 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Thu, 17 Jun 2021 12:54:24 -0400 Subject: [PATCH 24/46] replacing bitblockcounter in ifelse --- .../arrow/compute/kernels/scalar_if_else.cc | 66 +++++++++++++++---- 1 file changed, 53 insertions(+), 13 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc index 147b68f4baa..e9da2b2942f 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc @@ -21,11 +21,13 @@ #include #include #include +#include namespace arrow { using internal::BitBlockCount; using internal::BitBlockCounter; using internal::Bitmap; +using internal::BitmapWordReader; namespace compute { @@ -223,28 +225,66 @@ struct IfElseFunctor> { std::memcpy(out_values, right_data, right.length * sizeof(T)); const auto* cond_data = cond.buffers[1]->data(); // this is a BoolArray - BitBlockCounter bit_counter(cond_data, cond.offset, cond.length); + // BitBlockCounter bit_counter(cond_data, cond.offset, cond.length); + BitmapWordReader cond_reader(cond_data, cond.offset, cond.length); // selectively copy values from left data const T* left_data = left.GetValues(1); - int64_t offset = cond.offset; + int64_t offset = 0; + int64_t bit_offset = cond.offset; + + int64_t cnt = cond_reader.words(); // todo this can be improved by intrinsics. ex: _mm*_mask_store_e* (vmovdqa*) - while (offset < cond.offset + cond.length) { - const BitBlockCount& block = bit_counter.NextWord(); - if (block.AllSet()) { // all from left - std::memcpy(out_values, left_data, block.length * sizeof(T)); - } else if (block.popcount) { // selectively copy from left - for (int64_t i = 0; i < block.length; ++i) { - if (BitUtil::GetBit(cond_data, offset + i)) { - out_values[i] = left_data[i]; + + // while (offset < cond.offset + cond.length) { + // const BitBlockCount& block = bit_counter.NextWord(); + // if (block.AllSet()) { // all from left + // std::memcpy(out_values, left_data, block.length * sizeof(T)); + // } else if (block.popcount) { // selectively copy from left + // for (int64_t i = 0; i < block.length; ++i) { + // if (BitUtil::GetBit(cond_data, offset + i)) { + // out_values[i] = left_data[i]; + // } + // } + // } + // + // offset += block.length; + // out_values += block.length; + // left_data += block.length; + // } + + constexpr int64_t WordBitsSize = sizeof(uint64_t) * 8; + while (cnt--) { + uint64_t word = cond_reader.NextWord(); + if (word == UINT64_MAX) { + std::memcpy(out_values + offset, left_data + offset, WordBitsSize * sizeof(T)); + } else if (word) { + for (int64_t i = 0; i < WordBitsSize; ++i) { + if (BitUtil::GetBit(cond_data, bit_offset + i)) { + out_values[i + offset] = left_data[i + offset]; } } } + offset += WordBitsSize; + bit_offset += WordBitsSize; + } - offset += block.length; - out_values += block.length; - left_data += block.length; + cnt = cond_reader.trailing_bytes(); + while (cnt--) { + int valid_bits; + uint8_t byte = cond_reader.NextTrailingByte(valid_bits); + if (byte == UINT8_MAX && valid_bits == 8) { + std::memcpy(out_values + offset, left_data + offset, 8 * sizeof(T)); + } else if (byte) { + for (int i = 0; i < valid_bits; ++i) { + if (BitUtil::GetBit(cond_data, bit_offset + i)) { + out_values[i + offset] = left_data[i + offset]; + } + } + } + offset += 8; + bit_offset += 8; } out->buffers[1] = std::move(out_buf); From 8d9023c43ed36bffe2825bdde0d03c86f4fd79b5 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Thu, 17 Jun 2021 14:43:45 -0400 Subject: [PATCH 25/46] replacing bitblockcounter in ifelse --- .../arrow/compute/kernels/scalar_if_else.cc | 177 +++++++++++------- 1 file changed, 108 insertions(+), 69 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc index e9da2b2942f..4d31cdbcf5d 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc @@ -212,6 +212,8 @@ struct IfElseFunctor> { using T = typename TypeTraits::CType; // A - Array // S - Scalar + using Word = uint64_t ; + static constexpr int64_t word_len = sizeof(Word) * 8; // AAA static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left, @@ -225,49 +227,29 @@ struct IfElseFunctor> { std::memcpy(out_values, right_data, right.length * sizeof(T)); const auto* cond_data = cond.buffers[1]->data(); // this is a BoolArray - // BitBlockCounter bit_counter(cond_data, cond.offset, cond.length); - BitmapWordReader cond_reader(cond_data, cond.offset, cond.length); + BitmapWordReader cond_reader(cond_data, cond.offset, cond.length); // selectively copy values from left data const T* left_data = left.GetValues(1); - int64_t offset = 0; + int64_t data_offset = 0; int64_t bit_offset = cond.offset; - int64_t cnt = cond_reader.words(); - // todo this can be improved by intrinsics. ex: _mm*_mask_store_e* (vmovdqa*) - - // while (offset < cond.offset + cond.length) { - // const BitBlockCount& block = bit_counter.NextWord(); - // if (block.AllSet()) { // all from left - // std::memcpy(out_values, left_data, block.length * sizeof(T)); - // } else if (block.popcount) { // selectively copy from left - // for (int64_t i = 0; i < block.length; ++i) { - // if (BitUtil::GetBit(cond_data, offset + i)) { - // out_values[i] = left_data[i]; - // } - // } - // } - // - // offset += block.length; - // out_values += block.length; - // left_data += block.length; - // } - - constexpr int64_t WordBitsSize = sizeof(uint64_t) * 8; + int64_t cnt = cond_reader.words(); while (cnt--) { - uint64_t word = cond_reader.NextWord(); + Word word = cond_reader.NextWord(); if (word == UINT64_MAX) { - std::memcpy(out_values + offset, left_data + offset, WordBitsSize * sizeof(T)); + std::memcpy(out_values + data_offset, left_data + data_offset, + word_len * sizeof(T)); } else if (word) { - for (int64_t i = 0; i < WordBitsSize; ++i) { + for (int64_t i = 0; i < word_len; ++i) { if (BitUtil::GetBit(cond_data, bit_offset + i)) { - out_values[i + offset] = left_data[i + offset]; + out_values[data_offset + i] = left_data[data_offset + i]; } } } - offset += WordBitsSize; - bit_offset += WordBitsSize; + data_offset += word_len; + bit_offset += word_len; } cnt = cond_reader.trailing_bytes(); @@ -275,15 +257,15 @@ struct IfElseFunctor> { int valid_bits; uint8_t byte = cond_reader.NextTrailingByte(valid_bits); if (byte == UINT8_MAX && valid_bits == 8) { - std::memcpy(out_values + offset, left_data + offset, 8 * sizeof(T)); + std::memcpy(out_values + data_offset, left_data + data_offset, 8 * sizeof(T)); } else if (byte) { for (int i = 0; i < valid_bits; ++i) { if (BitUtil::GetBit(cond_data, bit_offset + i)) { - out_values[i + offset] = left_data[i + offset]; + out_values[data_offset + i] = left_data[data_offset + i]; } } } - offset += 8; + data_offset += 8; bit_offset += 8; } @@ -303,27 +285,46 @@ struct IfElseFunctor> { std::memcpy(out_values, right_data, right.length * sizeof(T)); const auto* cond_data = cond.buffers[1]->data(); // this is a BoolArray - BitBlockCounter bit_counter(cond_data, cond.offset, cond.length); + BitmapWordReader cond_reader(cond_data, cond.offset, cond.length); // selectively copy values from left data T left_data = internal::UnboxScalar::Unbox(left); - int64_t offset = cond.offset; + int64_t data_offset = 0; + int64_t bit_offset = cond.offset; // todo this can be improved by intrinsics. ex: _mm*_mask_store_e* (vmovdqa*) - while (offset < cond.offset + cond.length) { - const BitBlockCount& block = bit_counter.NextWord(); - if (block.AllSet()) { // all from left - std::fill(out_values, out_values + block.length, left_data); - } else if (block.popcount) { // selectively copy from left - for (int64_t i = 0; i < block.length; ++i) { - if (BitUtil::GetBit(cond_data, offset + i)) { - out_values[i] = left_data; + int64_t cnt = cond_reader.words(); + while (cnt--) { + Word word = cond_reader.NextWord(); + if (word == UINT64_MAX) { + std::fill(out_values + data_offset, out_values + data_offset + word_len, + left_data); + } else if (word) { + for (int64_t i = 0; i < word_len; ++i) { + if (BitUtil::GetBit(cond_data, bit_offset + i)) { + out_values[data_offset + i] = left_data; } } } + data_offset += word_len; + bit_offset += word_len; + } - offset += block.length; - out_values += block.length; + cnt = cond_reader.trailing_bytes(); + while (cnt--) { + int valid_bits; + uint8_t byte = cond_reader.NextTrailingByte(valid_bits); + if (byte == UINT8_MAX && valid_bits == 8) { + std::fill(out_values + data_offset, out_values + data_offset + 8, left_data); + } else if (byte) { + for (int i = 0; i < valid_bits; ++i) { + if (BitUtil::GetBit(cond_data, bit_offset + i)) { + out_values[data_offset + i] = left_data; + } + } + } + data_offset += 8; + bit_offset += 8; } out->buffers[1] = std::move(out_buf); @@ -342,28 +343,47 @@ struct IfElseFunctor> { std::memcpy(out_values, left_data, left.length * sizeof(T)); const auto* cond_data = cond.buffers[1]->data(); // this is a BoolArray - BitBlockCounter bit_counter(cond_data, cond.offset, cond.length); + BitmapWordReader cond_reader(cond_data, cond.offset, cond.length); // selectively copy values from left data T right_data = internal::UnboxScalar::Unbox(right); - int64_t offset = cond.offset; + int64_t data_offset = 0; + int64_t bit_offset = cond.offset; // todo this can be improved by intrinsics. ex: _mm*_mask_store_e* (vmovdqa*) // left data is already in the output buffer. Therefore, mask needs to be inverted - while (offset < cond.offset + cond.length) { - const BitBlockCount& block = bit_counter.NextWord(); - if (block.NoneSet()) { // all from right - std::fill(out_values, out_values + block.length, right_data); - } else if (block.popcount) { // selectively copy from right - for (int64_t i = 0; i < block.length; ++i) { - if (!BitUtil::GetBit(cond_data, offset + i)) { - out_values[i] = right_data; + int64_t cnt = cond_reader.words(); + while (cnt--) { + Word word = cond_reader.NextWord(); + if (word == 0) { // all from right + std::fill(out_values + data_offset, out_values + data_offset + word_len, + right_data); + } else if (word != UINT64_MAX) { // selectively copy from right + for (int64_t i = 0; i < word_len; ++i) { + if (!BitUtil::GetBit(cond_data, bit_offset + i)) { + out_values[data_offset + i] = right_data; } } } + data_offset += word_len; + bit_offset += word_len; + } - offset += block.length; - out_values += block.length; + cnt = cond_reader.trailing_bytes(); + while (cnt--) { + int valid_bits; + uint8_t byte = cond_reader.NextTrailingByte(valid_bits); + if (byte == 0 && valid_bits == 8) { + std::fill(out_values + data_offset, out_values + data_offset + 8, right_data); + } else if (byte != UINT8_MAX) { + for (int i = 0; i < valid_bits; ++i) { + if (!BitUtil::GetBit(cond_data, bit_offset + i)) { + out_values[data_offset + i] = right_data; + } + } + } + data_offset += 8; + bit_offset += 8; } out->buffers[1] = std::move(out_buf); @@ -382,27 +402,46 @@ struct IfElseFunctor> { std::fill(out_values, out_values + cond.length, right_data); const auto* cond_data = cond.buffers[1]->data(); // this is a BoolArray - BitBlockCounter bit_counter(cond_data, cond.offset, cond.length); + BitmapWordReader cond_reader(cond_data, cond.offset, cond.length); // selectively copy values from left data T left_data = internal::UnboxScalar::Unbox(left); - int64_t offset = cond.offset; + int64_t data_offset = 0; + int64_t bit_offset = cond.offset; // todo this can be improved by intrinsics. ex: _mm*_mask_store_e* (vmovdqa*) - while (offset < cond.offset + cond.length) { - const BitBlockCount& block = bit_counter.NextWord(); - if (block.AllSet()) { // all from left - std::fill(out_values, out_values + block.length, left_data); - } else if (block.popcount) { // selectively copy from left - for (int64_t i = 0; i < block.length; ++i) { - if (BitUtil::GetBit(cond_data, offset + i)) { - out_values[i] = left_data; + int64_t cnt = cond_reader.words(); + while (cnt--) { + Word word = cond_reader.NextWord(); + if (word == UINT64_MAX) { // all from left + std::fill(out_values + data_offset, out_values + data_offset + word_len, + left_data); + } else if (word) { // selectively copy from left + for (int64_t i = 0; i < word_len; ++i) { + if (BitUtil::GetBit(cond_data, bit_offset + i)) { + out_values[data_offset + i] = left_data; } } } + data_offset += word_len; + bit_offset += word_len; + } - offset += block.length; - out_values += block.length; + cnt = cond_reader.trailing_bytes(); + while (cnt--) { + int valid_bits; + uint8_t byte = cond_reader.NextTrailingByte(valid_bits); + if (byte == UINT8_MAX && valid_bits == 8) { + std::fill(out_values + data_offset, out_values + data_offset + 8, left_data); + } else if (byte) { + for (int i = 0; i < valid_bits; ++i) { + if (BitUtil::GetBit(cond_data, bit_offset + i)) { + out_values[data_offset + i] = left_data; + } + } + } + data_offset += 8; + bit_offset += 8; } out->buffers[1] = std::move(out_buf); From 4e640b1c225368d4df332a4f82d747c568f2781c Mon Sep 17 00:00:00 2001 From: niranda perera Date: Thu, 17 Jun 2021 15:14:18 -0400 Subject: [PATCH 26/46] extending bench suite --- .../kernels/scalar_if_else_benchmark.cc | 56 +++++++++++++++++-- .../compute/kernels/scalar_if_else_test.cc | 9 ++- 2 files changed, 57 insertions(+), 8 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc index c3afa94da5f..937921a05b2 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#include #include #include #include @@ -51,19 +52,62 @@ static void IfElseBench(benchmark::State& state) { ((len - offset) / 8 + 2 * (len - offset) * sizeof(CType))); } -static void IfElseBench64Wide(benchmark::State& state) { +template +static void IfElseBenchContiguous(benchmark::State& state) { + using CType = typename Type::c_type; + auto type = TypeTraits::type_singleton(); + using ArrayType = typename TypeTraits::ArrayType; + + int64_t len = state.range(0); + int64_t offset = state.range(1); + + ASSERT_OK_AND_ASSIGN(auto temp1, MakeArrayFromScalar(BooleanScalar(true), len / 2)); + ASSERT_OK_AND_ASSIGN(auto temp2, + MakeArrayFromScalar(BooleanScalar(false), len - len / 2)); + ASSERT_OK_AND_ASSIGN(auto concat, Concatenate({temp1, temp2})); + auto cond = std::static_pointer_cast(concat); + + random::RandomArrayGenerator rand(/*seed=*/0); + auto left = std::static_pointer_cast( + rand.ArrayOf(type, len, /*null_probability=*/0.01)); + auto right = std::static_pointer_cast( + rand.ArrayOf(type, len, /*null_probability=*/0.01)); + + for (auto _ : state) { + ABORT_NOT_OK(IfElse(cond->Slice(offset), left->Slice(offset), right->Slice(offset))); + } + + state.SetBytesProcessed(state.iterations() * + ((len - offset) / 8 + 2 * (len - offset) * sizeof(CType))); +} + +static void IfElseBench64(benchmark::State& state) { return IfElseBench(state); } -static void IfElseBench32Wide(benchmark::State& state) { +static void IfElseBench32(benchmark::State& state) { return IfElseBench(state); } -BENCHMARK(IfElseBench32Wide)->Args({elems, 0}); -BENCHMARK(IfElseBench64Wide)->Args({elems, 0}); +static void IfElseBench64Contiguous(benchmark::State& state) { + return IfElseBenchContiguous(state); +} + +static void IfElseBench32Contiguous(benchmark::State& state) { + return IfElseBenchContiguous(state); +} + +BENCHMARK(IfElseBench32)->Args({elems, 0}); +BENCHMARK(IfElseBench64)->Args({elems, 0}); + +BENCHMARK(IfElseBench32)->Args({elems, 99}); +BENCHMARK(IfElseBench64)->Args({elems, 99}); + +BENCHMARK(IfElseBench32Contiguous)->Args({elems, 0}); +BENCHMARK(IfElseBench64Contiguous)->Args({elems, 0}); -BENCHMARK(IfElseBench32Wide)->Args({elems, 99}); -BENCHMARK(IfElseBench64Wide)->Args({elems, 99}); +BENCHMARK(IfElseBench32Contiguous)->Args({elems, 99}); +BENCHMARK(IfElseBench64Contiguous)->Args({elems, 99}); } // namespace compute } // namespace arrow \ No newline at end of file diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc index 2b63af2f26f..c9347bc6a4b 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc @@ -16,6 +16,7 @@ // under the License. #include +#include #include #include #include @@ -56,8 +57,12 @@ TYPED_TEST(TestIfElsePrimitive, IfElseFixedSizeRand) { random::RandomArrayGenerator rand(/*seed=*/0); int64_t len = 1000; - auto cond = std::static_pointer_cast( - rand.ArrayOf(boolean(), len, /*null_probability=*/0.01)); + ASSERT_OK_AND_ASSIGN(auto temp1, MakeArrayFromScalar(BooleanScalar(true), 64)); + ASSERT_OK_AND_ASSIGN(auto temp2, MakeArrayFromScalar(BooleanScalar(false), 64)); + auto temp3 = rand.ArrayOf(boolean(), len - 64 * 2, /*null_probability=*/0.01); + ASSERT_OK_AND_ASSIGN(auto concat, Concatenate({temp1, temp2, temp3})); + auto cond = std::static_pointer_cast(concat); + auto left = std::static_pointer_cast( rand.ArrayOf(type, len, /*null_probability=*/0.01)); auto right = std::static_pointer_cast( From d9ee399b4185c15bb2fb52be36823a6a003bd9e8 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Thu, 17 Jun 2021 15:34:13 -0400 Subject: [PATCH 27/46] Update cpp/src/arrow/compute/kernels/scalar_boolean.cc Co-authored-by: Benjamin Kietzman --- cpp/src/arrow/compute/kernels/scalar_boolean.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_boolean.cc b/cpp/src/arrow/compute/kernels/scalar_boolean.cc index 6de4ef16031..7a0e3654edb 100644 --- a/cpp/src/arrow/compute/kernels/scalar_boolean.cc +++ b/cpp/src/arrow/compute/kernels/scalar_boolean.cc @@ -550,7 +550,6 @@ void RegisterScalarBoolean(FunctionRegistry* registry) { MakeFunction("or", 2, applicator::SimpleBinary, &or_doc, registry); MakeFunction("xor", 2, applicator::SimpleBinary, &xor_doc, registry); - // The Kleene logic kernels cannot write into sliced output bitmaps MakeFunction("and_kleene", 2, applicator::SimpleBinary, &and_kleene_doc, registry, NullHandling::COMPUTED_PREALLOCATE); MakeFunction("and_not_kleene", 2, applicator::SimpleBinary, From 197e1c4663f8e9dc058d8a7b096761df7d137ecc Mon Sep 17 00:00:00 2001 From: niranda perera Date: Thu, 17 Jun 2021 15:51:34 -0400 Subject: [PATCH 28/46] Apply suggestions from code review Co-authored-by: Benjamin Kietzman --- cpp/src/arrow/util/bit_util.cc | 6 +++--- cpp/src/arrow/util/bit_util.h | 6 +++--- cpp/src/arrow/util/bit_util_test.cc | 9 +-------- cpp/src/arrow/util/bitmap.h | 9 ++------- 4 files changed, 9 insertions(+), 21 deletions(-) diff --git a/cpp/src/arrow/util/bit_util.cc b/cpp/src/arrow/util/bit_util.cc index 9c0ef6bc9bf..47bf1563150 100644 --- a/cpp/src/arrow/util/bit_util.cc +++ b/cpp/src/arrow/util/bit_util.cc @@ -82,7 +82,7 @@ void SetBitmapImpl(uint8_t* data, int64_t offset, int64_t length) { constexpr uint8_t set_byte = value ? UINT8_MAX : 0; - int prologue = static_cast(((offset + 7) / 8) * 8 - offset); + auto prologue = BitUtil::RoundUp(offset, 8) - offset; DCHECK_LT(prologue, 8); if (length < prologue) { // special case where a mask is required @@ -106,8 +106,8 @@ void SetBitmapImpl(uint8_t* data, int64_t offset, int64_t length) { // set values per byte DCHECK_EQ(offset % 8, 0); std::memset(data + offset / 8, set_byte, length / 8); - offset += ((length / 8) * 8); - length -= ((length / 8) * 8); + offset += BitUtil::RoundDown(length, 8); + length -= BitUtil::RoundDown(length, 8); // clean up DCHECK_LT(length, 8); diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h index f0a556e21b1..adce96308b1 100644 --- a/cpp/src/arrow/util/bit_util.h +++ b/cpp/src/arrow/util/bit_util.h @@ -324,9 +324,9 @@ void SetBitmap(uint8_t* data, int64_t offset, int64_t length); ARROW_EXPORT void ClearBitmap(uint8_t* data, int64_t offset, int64_t length); -template -constexpr Word WordBitMask(int i) { - return (static_cast(1) << i) - 1; +template (~static_cast(0))> +constexpr Word TrailingWordBitmask(int i) { + return ARROW_PREDICT_FALSE(i >= sizeof(Word) * 8) ? 0 : all << i; } /// \brief Create a word with low `n` bits from `low` and high `sizeof(Word)-n` bits diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index 0fe39fa804b..bbd06d3cbbb 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -1984,7 +1984,7 @@ void CheckSplice(int n, Word low, Word high) { : BitUtil::GetBit(reinterpret_cast(&high), i); } - ASSERT_EQ(static_cast(ret.to_ulong()), BitUtil::SpliceWord(n, low, high)); + ASSERT_EQ(BitUtil::SpliceWord(n, low, high), static_cast(ret.to_ulong()); } TEST(SpliceWord, SpliceWord) { @@ -2262,13 +2262,6 @@ TEST_P(TestBitmapVisitAndWriteOutputNoOffset, Test1) { std::transform(v0.begin(), v0.end(), v2.begin(), out_v1.begin(), std::logical_or()); - // std::cout << "v0: " << VectorToString(v0) << "\n" - // << "b0: " << bm0.ToString() << "\n" - // << "v1: " << VectorToString(v1) << "\n" - // << "b1: " << bm1.ToString() << "\n" - // << "v2: " << VectorToString(v2) << "\n" - // << "b2: " << bm2.ToString() << "\n"; - VerifyBoolVectorAndBitmap(out_bms[0], out_v0); VerifyBoolVectorAndBitmap(out_bms[1], out_v1); } diff --git a/cpp/src/arrow/util/bitmap.h b/cpp/src/arrow/util/bitmap.h index 05cc7a309f8..78bfca8d408 100644 --- a/cpp/src/arrow/util/bitmap.h +++ b/cpp/src/arrow/util/bitmap.h @@ -76,11 +76,6 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, return Bitmap(buffer_, offset_ + offset, length); } - void Stride(int64_t stride) { - this->offset_ += stride; - this->length_ -= stride; - } - std::string ToString() const; bool Equals(const Bitmap& other) const; @@ -293,8 +288,8 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, output_words.fill(0); // every reader will have same number of words, since they are same length'ed - // todo this will be inefficient in some cases. When there are offsets beyond Word - // boundary, every Word would have to be created from 2 adjoining Words + // TODO($JIRA) this will be inefficient in some cases. When there are offsets beyond Word + // boundary, every Word would have to be created from 2 adjoining Words auto n_words = readers[0].words(); bit_length -= n_words * kBitWidth; while (n_words--) { From 4c7f445aab1fbe14777776cb1bf5dcac60c9c18d Mon Sep 17 00:00:00 2001 From: niranda perera Date: Thu, 17 Jun 2021 19:42:13 -0400 Subject: [PATCH 29/46] adding PR comments --- cpp/src/arrow/util/bit_util.cc | 4 +- cpp/src/arrow/util/bit_util.h | 16 ++- cpp/src/arrow/util/bit_util_test.cc | 198 +++++++++------------------- cpp/src/arrow/util/bitmap.h | 4 +- 4 files changed, 76 insertions(+), 146 deletions(-) diff --git a/cpp/src/arrow/util/bit_util.cc b/cpp/src/arrow/util/bit_util.cc index 47bf1563150..b1ac21e8e41 100644 --- a/cpp/src/arrow/util/bit_util.cc +++ b/cpp/src/arrow/util/bit_util.cc @@ -94,12 +94,12 @@ void SetBitmapImpl(uint8_t* data, int64_t offset, int64_t length) { // pro uint8_t mask = BitUtil::kPrecedingBitmask[8 - prologue] ^ BitUtil::kPrecedingBitmask[8 - prologue + length]; - data[offset / 8] |= mask; + data[offset / 8] = value ? data[offset / 8] | mask : data[offset / 8] & ~mask; return; } // align to a byte boundary - data[offset / 8] = BitUtil::SpliceWord(offset, data[offset / 8], set_byte); + data[offset / 8] = BitUtil::SpliceWord(prologue, data[offset / 8], set_byte); offset += prologue; length -= prologue; diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h index adce96308b1..a9775552c7b 100644 --- a/cpp/src/arrow/util/bit_util.h +++ b/cpp/src/arrow/util/bit_util.h @@ -324,9 +324,17 @@ void SetBitmap(uint8_t* data, int64_t offset, int64_t length); ARROW_EXPORT void ClearBitmap(uint8_t* data, int64_t offset, int64_t length); -template (~static_cast(0))> -constexpr Word TrailingWordBitmask(int i) { - return ARROW_PREDICT_FALSE(i >= sizeof(Word) * 8) ? 0 : all << i; +/// Returns a mask with lower i bits set to 1. If i >= sizeof(Word)*8, all-ones will be +/// returned +/// ex: +/// PrecedingWordBitmask(0)= 0x00 +/// PrecedingWordBitmask(4)= 0x0f +/// PrecedingWordBitmask(8)= 0xff +/// PrecedingWordBitmask(8)= 0x00ff +/// ref: https://stackoverflow.com/a/59523400 +template +constexpr Word PrecedingWordBitmask(unsigned int const i) { + return (static_cast(i < sizeof(Word) * 8) << (i & (sizeof(Word) * 8 - 1))) - 1; } /// \brief Create a word with low `n` bits from `low` and high `sizeof(Word)-n` bits @@ -337,7 +345,7 @@ constexpr Word TrailingWordBitmask(int i) { /// } template constexpr Word SpliceWord(int n, Word low, Word high) { - return (high & ~WordBitMask(n)) | (low & WordBitMask(n)); + return (high & ~PrecedingWordBitmask(n)) | (low & PrecedingWordBitmask(n)); } } // namespace BitUtil diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index bbd06d3cbbb..316a52de087 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -1984,26 +1984,36 @@ void CheckSplice(int n, Word low, Word high) { : BitUtil::GetBit(reinterpret_cast(&high), i); } - ASSERT_EQ(BitUtil::SpliceWord(n, low, high), static_cast(ret.to_ulong()); + Word res = BitUtil::SpliceWord(n, low, high); + Word exp = static_cast(ret.to_ulong()); + assert(res == exp); } TEST(SpliceWord, SpliceWord) { uint64_t low = 123456789, high = 987654321; + static_assert( + BitUtil::PrecedingWordBitmask(0) == BitUtil::kPrecedingBitmask[0], ""); + static_assert( + BitUtil::PrecedingWordBitmask(5) == BitUtil::kPrecedingBitmask[5], ""); + static_assert(BitUtil::PrecedingWordBitmask(8) == UINT8_MAX, ""); + + static_assert(BitUtil::PrecedingWordBitmask(0) == uint64_t(0), ""); + static_assert(BitUtil::PrecedingWordBitmask(33) == 8589934591, ""); + static_assert(BitUtil::PrecedingWordBitmask(64) == UINT64_MAX, ""); + static_assert(BitUtil::PrecedingWordBitmask(65) == UINT64_MAX, ""); + CheckSplice(0, static_cast(low), static_cast(high)); - CheckSplice(UINT8_MAX, static_cast(low), static_cast(high)); - CheckSplice(sizeof(uint8_t) / 3, static_cast(low), - static_cast(high)); + CheckSplice(8, static_cast(low), static_cast(high)); + CheckSplice(8 / 3, static_cast(low), static_cast(high)); CheckSplice(0, static_cast(low), static_cast(high)); - CheckSplice(UINT32_MAX, static_cast(low), - static_cast(high)); - CheckSplice(sizeof(uint32_t) / 3, static_cast(low), - static_cast(high)); + CheckSplice(32, static_cast(low), static_cast(high)); + CheckSplice(32 / 3, static_cast(low), static_cast(high)); CheckSplice(0, low, high); - CheckSplice(UINT32_MAX, low, high); - CheckSplice(sizeof(uint32_t) / 3, low, high); + CheckSplice(64, low, high); + CheckSplice(64 / 3, low, high); } // test the basic assumption of word level Bitmap::Visit @@ -2187,62 +2197,29 @@ TEST(Bitmap, VisitWordsAnd) { } } -void random_bool_vector(std::vector& vec, int64_t size, double p = 0.5) { - vec.reserve(size); - std::random_device rd; - std::mt19937 gen(rd()); - std::bernoulli_distribution d(p); - - for (int n = 0; n < size; ++n) { - vec.push_back(d(gen)); - } -} - -std::string VectorToString(const std::vector& v) { - std::string out(v.size() + +((v.size() - 1) / 8), ' '); - for (size_t i = 0; i < v.size(); ++i) { - out[i + (i / 8)] = v[i] ? '1' : '0'; - } - return out; -} - -void VerifyBoolVectorAndBitmap(const Bitmap& bitmap, const std::vector& expected) { - arrow::BooleanBuilder boolean_builder; - ASSERT_OK(boolean_builder.AppendValues(expected)); - ASSERT_OK_AND_ASSIGN(auto arr, boolean_builder.Finish()); - - ASSERT_TRUE(BitmapEquals(bitmap.buffer()->data(), bitmap.offset(), - arr->data()->buffers[1]->data(), 0, expected.size())) - << "exp: " << VectorToString(expected) << "\ngot: " << bitmap.ToString(); -} - -class TestBitmapVisitAndWriteOutputNoOffset : public ::testing::TestWithParam {}; - -TEST_P(TestBitmapVisitAndWriteOutputNoOffset, Test1) { - auto part = GetParam(); - int64_t bits = 4 * part; - std::vector data; - random_bool_vector(data, bits); +void DoBitmapVisitAndWrite(int64_t part, bool with_offset) { + int64_t bits = part * 4; - arrow::BooleanBuilder boolean_builder; - ASSERT_OK(boolean_builder.AppendValues(data)); - ASSERT_OK_AND_ASSIGN(auto arrow_data, boolean_builder.Finish()); + random::RandomArrayGenerator rand(/*seed=*/0); + auto arrow_data = rand.ArrayOf(boolean(), bits, 0); std::shared_ptr& arrow_buffer = arrow_data->data()->buffers[1]; Bitmap bm0(arrow_buffer, 0, part); - Bitmap bm1 = bm0.Slice(part * 1, part); // this goes beyond bm0's len - Bitmap bm2 = bm0.Slice(part * 2, part); // this goes beyond bm0's len + Bitmap bm1(arrow_buffer, part * 1, part); + Bitmap bm2(arrow_buffer, part * 2, part); std::array out_bms; - ASSERT_OK_AND_ASSIGN(auto out0, AllocateBitmap(part)); - ASSERT_OK_AND_ASSIGN(auto out1, AllocateBitmap(part)); - out_bms[0] = Bitmap(out0, 0, part); - out_bms[1] = Bitmap(out1, 0, part); - - std::vector v0(data.begin(), data.begin() + part); - std::vector v1(data.begin() + part * 1, data.begin() + part * 2); - std::vector v2(data.begin() + part * 2, data.begin() + part * 3); + if (with_offset) { + ASSERT_OK_AND_ASSIGN(auto out, AllocateBitmap(part * 4)); + out_bms[0] = Bitmap(out, part, part); + out_bms[1] = Bitmap(out, part * 2, part); + } else { + ASSERT_OK_AND_ASSIGN(auto out0, AllocateBitmap(part)); + ASSERT_OK_AND_ASSIGN(auto out1, AllocateBitmap(part)); + out_bms[0] = Bitmap(out0, 0, part); + out_bms[1] = Bitmap(out1, 0, part); + } // out0 = bm0 & bm1, out1= bm0 | bm2 std::array in_bms{bm0, bm1, bm2}; @@ -2253,96 +2230,39 @@ TEST_P(TestBitmapVisitAndWriteOutputNoOffset, Test1) { out->at(1) = in[0] | in[2]; }); - std::vector out_v0(part); - std::vector out_v1(part); - // v3 = v0 & v1 - std::transform(v0.begin(), v0.end(), v1.begin(), out_v0.begin(), - std::logical_and()); - // v3 |= v2 - std::transform(v0.begin(), v0.end(), v2.begin(), out_v1.begin(), - std::logical_or()); + auto pool = MemoryPool::CreateDefault(); + ASSERT_OK_AND_ASSIGN(auto exp_0, + BitmapAnd(pool.get(), bm0.buffer()->data(), bm0.offset(), + bm1.buffer()->data(), bm1.offset(), part, 0)); + ASSERT_OK_AND_ASSIGN(auto exp_1, + BitmapOr(pool.get(), bm0.buffer()->data(), bm0.offset(), + bm2.buffer()->data(), bm2.offset(), part, 0)); - VerifyBoolVectorAndBitmap(out_bms[0], out_v0); - VerifyBoolVectorAndBitmap(out_bms[1], out_v1); + ASSERT_TRUE(BitmapEquals(exp_0->data(), 0, out_bms[0].buffer()->data(), + out_bms[0].offset(), part)) + << "exp: " << Bitmap(exp_0->data(), 0, part).ToString() << std::endl + << "got: " << out_bms[0].ToString(); + + ASSERT_TRUE(BitmapEquals(exp_1->data(), 0, out_bms[1].buffer()->data(), + out_bms[1].offset(), part)) + << "exp: " << Bitmap(exp_1->data(), 0, part).ToString() << std::endl + << "got: " << out_bms[1].ToString(); } -INSTANTIATE_TEST_SUITE_P(VisitWriteGeneral, TestBitmapVisitAndWriteOutputNoOffset, +class TestBitmapVisitAndWrite : public ::testing::TestWithParam {}; + +INSTANTIATE_TEST_SUITE_P(VisitWriteGeneral, TestBitmapVisitAndWrite, testing::Values(199, 256, 1000)); -INSTANTIATE_TEST_SUITE_P(VisitWriteEdgeCases, TestBitmapVisitAndWriteOutputNoOffset, +INSTANTIATE_TEST_SUITE_P(VisitWriteEdgeCases, TestBitmapVisitAndWrite, testing::Values(5, 13, 21, 29, 37, 41, 51, 59, 64, 97)); -INSTANTIATE_TEST_SUITE_P(VisitWriteEdgeCases2, TestBitmapVisitAndWriteOutputNoOffset, +INSTANTIATE_TEST_SUITE_P(VisitWriteEdgeCases2, TestBitmapVisitAndWrite, testing::Values(8, 16, 24, 32, 40, 48, 56, 64)); -class TestBitmapVisitAndWriteOutputWithOffset : public ::testing::TestWithParam { -}; - -TEST_P(TestBitmapVisitAndWriteOutputWithOffset, Test2) { - auto part = GetParam(); - int64_t bits = part * 4; - std::vector data; - random_bool_vector(data, bits); - - arrow::BooleanBuilder boolean_builder; - ASSERT_OK(boolean_builder.AppendValues(data)); - ASSERT_OK_AND_ASSIGN(auto arrow_data, boolean_builder.Finish()); +TEST_P(TestBitmapVisitAndWrite, NoOffset) { DoBitmapVisitAndWrite(GetParam(), false); } - std::shared_ptr& arrow_buffer = arrow_data->data()->buffers[1]; - - Bitmap bm0(arrow_buffer, 0, part); - Bitmap bm1(arrow_buffer, part * 1, part); - Bitmap bm2(arrow_buffer, part * 2, part); - - std::array out_bms; - ASSERT_OK_AND_ASSIGN(auto out, AllocateBitmap(part * 4)); - out_bms[0] = Bitmap(out, part, part); - out_bms[1] = Bitmap(out, part * 2, part); - - std::vector v0(data.begin(), data.begin() + part); - std::vector v1(data.begin() + part * 1, data.begin() + part * 2); - std::vector v2(data.begin() + part * 2, data.begin() + part * 3); - - // std::cout << "v0: " << VectorToString(v0) << "\n" - // << "b0: " << bm0.ToString() << "\n" - // << "v1: " << VectorToString(v1) << "\n" - // << "b1: " << bm1.ToString() << "\n" - // << "v2: " << VectorToString(v2) << "\n" - // << "b2: " << bm2.ToString() << "\n"; - - std::vector out_v0(part); - std::vector out_v1(part); - // v3 = v0 & v1 - std::transform(v0.begin(), v0.end(), v1.begin(), out_v0.begin(), - std::logical_and()); - // v3 |= v2 - std::transform(v0.begin(), v0.end(), v2.begin(), out_v1.begin(), - std::logical_or()); - - // std::cout << "out0: " << VectorToString(out_v0) << "\n" - // << "out1: " << VectorToString(out_v1) << "\n"; - - // out0 = bm0 & bm1, out1= bm0 | bm2 - std::array in_bms{bm0, bm1, bm2}; - Bitmap::VisitWordsAndWrite( - in_bms, &out_bms, - [](const std::array& in, std::array* out) { - out->at(0) = in[0] & in[1]; - out->at(1) = in[0] | in[2]; - }); - - VerifyBoolVectorAndBitmap(out_bms[0], out_v0); - VerifyBoolVectorAndBitmap(out_bms[1], out_v1); -} - -INSTANTIATE_TEST_SUITE_P(VisitWriteGeneral, TestBitmapVisitAndWriteOutputWithOffset, - testing::Values(199, 256, 1000)); - -INSTANTIATE_TEST_SUITE_P(VisitWriteEdgeCases, TestBitmapVisitAndWriteOutputWithOffset, - testing::Values(7, 15, 23, 31, 39, 47, 55, 63, 73, 97)); - -INSTANTIATE_TEST_SUITE_P(VisitWriteEdgeCases2, TestBitmapVisitAndWriteOutputWithOffset, - testing::Values(8, 16, 24, 32, 40, 48, 56, 64)); +TEST_P(TestBitmapVisitAndWrite, WithOffset) { DoBitmapVisitAndWrite(GetParam(), true); } } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/util/bitmap.h b/cpp/src/arrow/util/bitmap.h index 78bfca8d408..619135bab2d 100644 --- a/cpp/src/arrow/util/bitmap.h +++ b/cpp/src/arrow/util/bitmap.h @@ -315,7 +315,9 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, std::array out_words; std::copy(in.begin(), in.end(), in_words.begin()); visitor(in_words, &out_words); - std::move(out_words.begin(), out_words.end(), out->begin()); + for (size_t i = 0; i < M; i++) { + out->at(i) = static_cast(out_words[i]); + } }; std::array visited_bytes; From b12a20d87605bf84a07759c5c16e034c0caa8f53 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Fri, 18 Jun 2021 08:50:14 -0400 Subject: [PATCH 30/46] fixing errors --- cpp/src/arrow/compute/kernels/scalar_if_else.cc | 4 ++-- cpp/src/arrow/util/bit_util.cc | 5 +++-- cpp/src/arrow/util/bit_util_test.cc | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc index 4d31cdbcf5d..a71705816b8 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc @@ -212,7 +212,7 @@ struct IfElseFunctor> { using T = typename TypeTraits::CType; // A - Array // S - Scalar - using Word = uint64_t ; + using Word = uint64_t; static constexpr int64_t word_len = sizeof(Word) * 8; // AAA @@ -413,7 +413,7 @@ struct IfElseFunctor> { int64_t cnt = cond_reader.words(); while (cnt--) { Word word = cond_reader.NextWord(); - if (word == UINT64_MAX) { // all from left + if (word == UINT64_MAX) { // all from left std::fill(out_values + data_offset, out_values + data_offset + word_len, left_data); } else if (word) { // selectively copy from left diff --git a/cpp/src/arrow/util/bit_util.cc b/cpp/src/arrow/util/bit_util.cc index b1ac21e8e41..e33b65b841a 100644 --- a/cpp/src/arrow/util/bit_util.cc +++ b/cpp/src/arrow/util/bit_util.cc @@ -82,7 +82,7 @@ void SetBitmapImpl(uint8_t* data, int64_t offset, int64_t length) { constexpr uint8_t set_byte = value ? UINT8_MAX : 0; - auto prologue = BitUtil::RoundUp(offset, 8) - offset; + auto prologue = static_cast(BitUtil::RoundUp(offset, 8) - offset); DCHECK_LT(prologue, 8); if (length < prologue) { // special case where a mask is required @@ -111,7 +111,8 @@ void SetBitmapImpl(uint8_t* data, int64_t offset, int64_t length) { // clean up DCHECK_LT(length, 8); - data[offset / 8] = BitUtil::SpliceWord(length, set_byte, data[offset / 8]); + data[offset / 8] = + BitUtil::SpliceWord(static_cast(length), set_byte, data[offset / 8]); } void SetBitmap(uint8_t* data, int64_t offset, int64_t length) { diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index 316a52de087..4568dc4219d 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -1986,7 +1986,7 @@ void CheckSplice(int n, Word low, Word high) { Word res = BitUtil::SpliceWord(n, low, high); Word exp = static_cast(ret.to_ulong()); - assert(res == exp); + ASSERT_EQ(exp, res) << "exp: " << exp << " got: " << res << std::endl; } TEST(SpliceWord, SpliceWord) { From a20e2959440ca54c1a8ee61a0a690ed806041ee6 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Fri, 18 Jun 2021 15:24:34 -0400 Subject: [PATCH 31/46] simplifying if-else --- .../arrow/compute/kernels/scalar_if_else.cc | 148 ++++++------------ 1 file changed, 47 insertions(+), 101 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc index a71705816b8..e59b9828ee0 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc @@ -215,36 +215,24 @@ struct IfElseFunctor> { using Word = uint64_t; static constexpr int64_t word_len = sizeof(Word) * 8; - // AAA - static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left, - const ArrayData& right, ArrayData* out) { - ARROW_ASSIGN_OR_RAISE(std::shared_ptr out_buf, - ctx->Allocate(cond.length * sizeof(T))); - T* out_values = reinterpret_cast(out_buf->mutable_data()); - - // copy right data to out_buff - const T* right_data = right.GetValues(1); - std::memcpy(out_values, right_data, right.length * sizeof(T)); - - const auto* cond_data = cond.buffers[1]->data(); // this is a BoolArray - BitmapWordReader cond_reader(cond_data, cond.offset, cond.length); - - // selectively copy values from left data - const T* left_data = left.GetValues(1); + template + static void RunIfElseLoop(const ArrayData& cond, HandleBulk handle_bulk, + HandleEach handle_each) { int64_t data_offset = 0; int64_t bit_offset = cond.offset; + const auto* cond_data = cond.buffers[1]->data(); // this is a BoolArray + + BitmapWordReader cond_reader(cond_data, cond.offset, cond.length); - // todo this can be improved by intrinsics. ex: _mm*_mask_store_e* (vmovdqa*) int64_t cnt = cond_reader.words(); while (cnt--) { Word word = cond_reader.NextWord(); if (word == UINT64_MAX) { - std::memcpy(out_values + data_offset, left_data + data_offset, - word_len * sizeof(T)); + handle_bulk(data_offset, word_len); } else if (word) { for (int64_t i = 0; i < word_len; ++i) { if (BitUtil::GetBit(cond_data, bit_offset + i)) { - out_values[data_offset + i] = left_data[data_offset + i]; + handle_each(data_offset + i); } } } @@ -257,17 +245,40 @@ struct IfElseFunctor> { int valid_bits; uint8_t byte = cond_reader.NextTrailingByte(valid_bits); if (byte == UINT8_MAX && valid_bits == 8) { - std::memcpy(out_values + data_offset, left_data + data_offset, 8 * sizeof(T)); + handle_bulk(data_offset, 8); } else if (byte) { for (int i = 0; i < valid_bits; ++i) { if (BitUtil::GetBit(cond_data, bit_offset + i)) { - out_values[data_offset + i] = left_data[data_offset + i]; + handle_each(data_offset + i); } } } data_offset += 8; bit_offset += 8; } + } + + // AAA + static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left, + const ArrayData& right, ArrayData* out) { + ARROW_ASSIGN_OR_RAISE(std::shared_ptr out_buf, + ctx->Allocate(cond.length * sizeof(T))); + T* out_values = reinterpret_cast(out_buf->mutable_data()); + + // copy right data to out_buff + const T* right_data = right.GetValues(1); + std::memcpy(out_values, right_data, right.length * sizeof(T)); + + // selectively copy values from left data + const T* left_data = left.GetValues(1); + + RunIfElseLoop( + cond, + [&](int64_t data_offset, int64_t num_elems) { + std::memcpy(out_values + data_offset, left_data + data_offset, + num_elems * sizeof(T)); + }, + [&](int64_t data_offset) { out_values[data_offset] = left_data[data_offset]; }); out->buffers[1] = std::move(out_buf); return Status::OK(); @@ -284,48 +295,16 @@ struct IfElseFunctor> { const T* right_data = right.GetValues(1); std::memcpy(out_values, right_data, right.length * sizeof(T)); - const auto* cond_data = cond.buffers[1]->data(); // this is a BoolArray - BitmapWordReader cond_reader(cond_data, cond.offset, cond.length); - // selectively copy values from left data T left_data = internal::UnboxScalar::Unbox(left); - int64_t data_offset = 0; - int64_t bit_offset = cond.offset; - // todo this can be improved by intrinsics. ex: _mm*_mask_store_e* (vmovdqa*) - int64_t cnt = cond_reader.words(); - while (cnt--) { - Word word = cond_reader.NextWord(); - if (word == UINT64_MAX) { - std::fill(out_values + data_offset, out_values + data_offset + word_len, - left_data); - } else if (word) { - for (int64_t i = 0; i < word_len; ++i) { - if (BitUtil::GetBit(cond_data, bit_offset + i)) { - out_values[data_offset + i] = left_data; - } - } - } - data_offset += word_len; - bit_offset += word_len; - } - - cnt = cond_reader.trailing_bytes(); - while (cnt--) { - int valid_bits; - uint8_t byte = cond_reader.NextTrailingByte(valid_bits); - if (byte == UINT8_MAX && valid_bits == 8) { - std::fill(out_values + data_offset, out_values + data_offset + 8, left_data); - } else if (byte) { - for (int i = 0; i < valid_bits; ++i) { - if (BitUtil::GetBit(cond_data, bit_offset + i)) { - out_values[data_offset + i] = left_data; - } - } - } - data_offset += 8; - bit_offset += 8; - } + RunIfElseLoop( + cond, + [&](int64_t data_offset, int64_t num_elems) { + std::fill(out_values + data_offset, out_values + data_offset + num_elems, + left_data); + }, + [&](int64_t data_offset) { out_values[data_offset] = left_data; }); out->buffers[1] = std::move(out_buf); return Status::OK(); @@ -401,48 +380,15 @@ struct IfElseFunctor> { T right_data = internal::UnboxScalar::Unbox(right); std::fill(out_values, out_values + cond.length, right_data); - const auto* cond_data = cond.buffers[1]->data(); // this is a BoolArray - BitmapWordReader cond_reader(cond_data, cond.offset, cond.length); - // selectively copy values from left data T left_data = internal::UnboxScalar::Unbox(left); - int64_t data_offset = 0; - int64_t bit_offset = cond.offset; - - // todo this can be improved by intrinsics. ex: _mm*_mask_store_e* (vmovdqa*) - int64_t cnt = cond_reader.words(); - while (cnt--) { - Word word = cond_reader.NextWord(); - if (word == UINT64_MAX) { // all from left - std::fill(out_values + data_offset, out_values + data_offset + word_len, - left_data); - } else if (word) { // selectively copy from left - for (int64_t i = 0; i < word_len; ++i) { - if (BitUtil::GetBit(cond_data, bit_offset + i)) { - out_values[data_offset + i] = left_data; - } - } - } - data_offset += word_len; - bit_offset += word_len; - } - - cnt = cond_reader.trailing_bytes(); - while (cnt--) { - int valid_bits; - uint8_t byte = cond_reader.NextTrailingByte(valid_bits); - if (byte == UINT8_MAX && valid_bits == 8) { - std::fill(out_values + data_offset, out_values + data_offset + 8, left_data); - } else if (byte) { - for (int i = 0; i < valid_bits; ++i) { - if (BitUtil::GetBit(cond_data, bit_offset + i)) { - out_values[data_offset + i] = left_data; - } - } - } - data_offset += 8; - bit_offset += 8; - } + RunIfElseLoop( + cond, + [&](int64_t data_offset, int64_t num_elems) { + std::fill(out_values + data_offset, out_values + data_offset + num_elems, + left_data); + }, + [&](int64_t data_offset) { out_values[data_offset] = left_data; }); out->buffers[1] = std::move(out_buf); return Status::OK(); From e37be50942c1e47bd46ec2c71b406f1c3b70de27 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Fri, 18 Jun 2021 16:07:27 -0400 Subject: [PATCH 32/46] simplifying if-else --- .../arrow/compute/kernels/scalar_if_else.cc | 79 ++++++++----------- 1 file changed, 31 insertions(+), 48 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc index e59b9828ee0..69426bb630a 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc @@ -215,7 +215,15 @@ struct IfElseFunctor> { using Word = uint64_t; static constexpr int64_t word_len = sizeof(Word) * 8; - template + /// Runs the main if_else loop. Here, it is expected that the right data has already + /// been copied to the output. + /// If invert_mask is meant to invert the cond.data. If is set to ~Word(0), then the + /// buffer will be inverted before calling the handle_bulk or handle_each functions. + /// This is useful, when left is an array and right is scalar. Then rather than + /// copying data from the right to output, we can copy left data to the output and + /// invert the cond data to fill right values. Filling out with a scalar is presumed to + /// be more efficient than filling with an array + template static void RunIfElseLoop(const ArrayData& cond, HandleBulk handle_bulk, HandleEach handle_each) { int64_t data_offset = 0; @@ -227,11 +235,12 @@ struct IfElseFunctor> { int64_t cnt = cond_reader.words(); while (cnt--) { Word word = cond_reader.NextWord(); - if (word == UINT64_MAX) { + if ((word ^ invert_mask) == UINT64_MAX) { handle_bulk(data_offset, word_len); - } else if (word) { + } else if (word ^ invert_mask) { for (int64_t i = 0; i < word_len; ++i) { - if (BitUtil::GetBit(cond_data, bit_offset + i)) { + if (BitUtil::GetBit(cond_data, bit_offset + i) ^ + static_cast(invert_mask)) { handle_each(data_offset + i); } } @@ -244,11 +253,12 @@ struct IfElseFunctor> { while (cnt--) { int valid_bits; uint8_t byte = cond_reader.NextTrailingByte(valid_bits); - if (byte == UINT8_MAX && valid_bits == 8) { + if (((byte ^ static_cast(invert_mask)) == UINT8_MAX) && valid_bits == 8) { handle_bulk(data_offset, 8); - } else if (byte) { + } else if (byte ^ static_cast(invert_mask)) { for (int i = 0; i < valid_bits; ++i) { - if (BitUtil::GetBit(cond_data, bit_offset + i)) { + if (BitUtil::GetBit(cond_data, bit_offset + i) ^ + static_cast(invert_mask)) { handle_each(data_offset + i); } } @@ -258,6 +268,13 @@ struct IfElseFunctor> { } } + template + static void RunIfElseLoopInverted(const ArrayData& cond, HandleBulk handle_bulk, + HandleEach handle_each) { + return RunIfElseLoop(cond, handle_bulk, + handle_each); + } + // AAA static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left, const ArrayData& right, ArrayData* out) { @@ -321,49 +338,15 @@ struct IfElseFunctor> { const T* left_data = left.GetValues(1); std::memcpy(out_values, left_data, left.length * sizeof(T)); - const auto* cond_data = cond.buffers[1]->data(); // this is a BoolArray - BitmapWordReader cond_reader(cond_data, cond.offset, cond.length); - - // selectively copy values from left data T right_data = internal::UnboxScalar::Unbox(right); - int64_t data_offset = 0; - int64_t bit_offset = cond.offset; - - // todo this can be improved by intrinsics. ex: _mm*_mask_store_e* (vmovdqa*) - // left data is already in the output buffer. Therefore, mask needs to be inverted - int64_t cnt = cond_reader.words(); - while (cnt--) { - Word word = cond_reader.NextWord(); - if (word == 0) { // all from right - std::fill(out_values + data_offset, out_values + data_offset + word_len, - right_data); - } else if (word != UINT64_MAX) { // selectively copy from right - for (int64_t i = 0; i < word_len; ++i) { - if (!BitUtil::GetBit(cond_data, bit_offset + i)) { - out_values[data_offset + i] = right_data; - } - } - } - data_offset += word_len; - bit_offset += word_len; - } - cnt = cond_reader.trailing_bytes(); - while (cnt--) { - int valid_bits; - uint8_t byte = cond_reader.NextTrailingByte(valid_bits); - if (byte == 0 && valid_bits == 8) { - std::fill(out_values + data_offset, out_values + data_offset + 8, right_data); - } else if (byte != UINT8_MAX) { - for (int i = 0; i < valid_bits; ++i) { - if (!BitUtil::GetBit(cond_data, bit_offset + i)) { - out_values[data_offset + i] = right_data; - } - } - } - data_offset += 8; - bit_offset += 8; - } + RunIfElseLoopInverted( + cond, + [&](int64_t data_offset, int64_t num_elems) { + std::fill(out_values + data_offset, out_values + data_offset + num_elems, + right_data); + }, + [&](int64_t data_offset) { out_values[data_offset] = right_data; }); out->buffers[1] = std::move(out_buf); return Status::OK(); From 6c71c36c4f1141c8a728d0d5a0641f3b9740d147 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Fri, 18 Jun 2021 18:09:35 -0400 Subject: [PATCH 33/46] fixing errors --- cpp/src/arrow/util/bit_util.h | 2 +- cpp/src/arrow/util/bit_util_test.cc | 37 ++++++++++------------------- 2 files changed, 13 insertions(+), 26 deletions(-) diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h index a9775552c7b..95969dbd2da 100644 --- a/cpp/src/arrow/util/bit_util.h +++ b/cpp/src/arrow/util/bit_util.h @@ -340,7 +340,7 @@ constexpr Word PrecedingWordBitmask(unsigned int const i) { /// \brief Create a word with low `n` bits from `low` and high `sizeof(Word)-n` bits /// from `high`. /// Word ret -/// for (i = 0; i < sizeof(Word); i++){ +/// for (i = 0; i < sizeof(Word)*8; i++){ /// ret[i]= i < n ? low[i]: high[i]; /// } template diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index 4568dc4219d..2b42e3b34e4 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -1975,23 +1975,7 @@ TEST(BitUtil, BitsetStack) { ASSERT_EQ(stack.TopSize(), 0); } -template -void CheckSplice(int n, Word low, Word high) { - std::bitset ret; - for (size_t i = 0; i < ret.size(); i++) { - ret[i] = i < static_cast(n) - ? BitUtil::GetBit(reinterpret_cast(&low), i) - : BitUtil::GetBit(reinterpret_cast(&high), i); - } - - Word res = BitUtil::SpliceWord(n, low, high); - Word exp = static_cast(ret.to_ulong()); - ASSERT_EQ(exp, res) << "exp: " << exp << " got: " << res << std::endl; -} - TEST(SpliceWord, SpliceWord) { - uint64_t low = 123456789, high = 987654321; - static_assert( BitUtil::PrecedingWordBitmask(0) == BitUtil::kPrecedingBitmask[0], ""); static_assert( @@ -2003,17 +1987,20 @@ TEST(SpliceWord, SpliceWord) { static_assert(BitUtil::PrecedingWordBitmask(64) == UINT64_MAX, ""); static_assert(BitUtil::PrecedingWordBitmask(65) == UINT64_MAX, ""); - CheckSplice(0, static_cast(low), static_cast(high)); - CheckSplice(8, static_cast(low), static_cast(high)); - CheckSplice(8 / 3, static_cast(low), static_cast(high)); + ASSERT_EQ(BitUtil::SpliceWord(0, 0x12, 0xef), 0xef); + ASSERT_EQ(BitUtil::SpliceWord(8, 0x12, 0xef), 0x12); + ASSERT_EQ(BitUtil::SpliceWord(3, 0x12, 0xef), 0xea); - CheckSplice(0, static_cast(low), static_cast(high)); - CheckSplice(32, static_cast(low), static_cast(high)); - CheckSplice(32 / 3, static_cast(low), static_cast(high)); + ASSERT_EQ(BitUtil::SpliceWord(0, 0x12345678, 0xfedcba98), 0xfedcba98); + ASSERT_EQ(BitUtil::SpliceWord(32, 0x12345678, 0xfedcba98), 0x12345678); + ASSERT_EQ(BitUtil::SpliceWord(24, 0x12345678, 0xfedcba98), 0xfe345678); - CheckSplice(0, low, high); - CheckSplice(64, low, high); - CheckSplice(64 / 3, low, high); + ASSERT_EQ(BitUtil::SpliceWord(0, 0x0123456789abcdef, 0xfedcba9876543210), + 0xfedcba9876543210); + ASSERT_EQ(BitUtil::SpliceWord(64, 0x0123456789abcdef, 0xfedcba9876543210), + 0x0123456789abcdef); + ASSERT_EQ(BitUtil::SpliceWord(48, 0x0123456789abcdef, 0xfedcba9876543210), + 0xfedc456789abcdef); } // test the basic assumption of word level Bitmap::Visit From aeb48ae03562b398c19728babfc95f026eb2bcac Mon Sep 17 00:00:00 2001 From: niranda perera Date: Sat, 19 Jun 2021 11:10:50 -0400 Subject: [PATCH 34/46] attempting to fix msvc error --- cpp/src/arrow/compute/kernels/scalar_if_else.cc | 4 ++-- cpp/src/arrow/util/bitmap.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc index 69426bb630a..7b4be7ed46c 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc @@ -239,7 +239,7 @@ struct IfElseFunctor> { handle_bulk(data_offset, word_len); } else if (word ^ invert_mask) { for (int64_t i = 0; i < word_len; ++i) { - if (BitUtil::GetBit(cond_data, bit_offset + i) ^ + if (BitUtil::GetBit(cond_data, bit_offset + i) != static_cast(invert_mask)) { handle_each(data_offset + i); } @@ -257,7 +257,7 @@ struct IfElseFunctor> { handle_bulk(data_offset, 8); } else if (byte ^ static_cast(invert_mask)) { for (int i = 0; i < valid_bits; ++i) { - if (BitUtil::GetBit(cond_data, bit_offset + i) ^ + if (BitUtil::GetBit(cond_data, bit_offset + i) != static_cast(invert_mask)) { handle_each(data_offset + i); } diff --git a/cpp/src/arrow/util/bitmap.h b/cpp/src/arrow/util/bitmap.h index 619135bab2d..674ff96ca5d 100644 --- a/cpp/src/arrow/util/bitmap.h +++ b/cpp/src/arrow/util/bitmap.h @@ -288,8 +288,8 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, output_words.fill(0); // every reader will have same number of words, since they are same length'ed - // TODO($JIRA) this will be inefficient in some cases. When there are offsets beyond Word - // boundary, every Word would have to be created from 2 adjoining Words + // TODO($JIRA) this will be inefficient in some cases. When there are offsets beyond + // Word boundary, every Word would have to be created from 2 adjoining Words auto n_words = readers[0].words(); bit_length -= n_words * kBitWidth; while (n_words--) { From 4519dd3585fecef8645267a2ae457554aa9be0a6 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Sat, 19 Jun 2021 14:14:58 -0400 Subject: [PATCH 35/46] lint fix --- cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc index 937921a05b2..98fb675da40 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc @@ -110,4 +110,4 @@ BENCHMARK(IfElseBench32Contiguous)->Args({elems, 99}); BENCHMARK(IfElseBench64Contiguous)->Args({elems, 99}); } // namespace compute -} // namespace arrow \ No newline at end of file +} // namespace arrow From 30ec72ec7aa7e6744521168b84790fbf238501b9 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Sat, 19 Jun 2021 14:51:20 -0400 Subject: [PATCH 36/46] fixing the down casting issue --- .../arrow/compute/kernels/scalar_if_else.cc | 55 +++++++++++++------ 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc index 7b4be7ed46c..f83b01df913 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc @@ -223,7 +223,7 @@ struct IfElseFunctor> { /// copying data from the right to output, we can copy left data to the output and /// invert the cond data to fill right values. Filling out with a scalar is presumed to /// be more efficient than filling with an array - template + template static void RunIfElseLoop(const ArrayData& cond, HandleBulk handle_bulk, HandleEach handle_each) { int64_t data_offset = 0; @@ -235,13 +235,24 @@ struct IfElseFunctor> { int64_t cnt = cond_reader.words(); while (cnt--) { Word word = cond_reader.NextWord(); - if ((word ^ invert_mask) == UINT64_MAX) { - handle_bulk(data_offset, word_len); - } else if (word ^ invert_mask) { - for (int64_t i = 0; i < word_len; ++i) { - if (BitUtil::GetBit(cond_data, bit_offset + i) != - static_cast(invert_mask)) { - handle_each(data_offset + i); + if (invert) { + if (word == 0) { + handle_bulk(data_offset, word_len); + } else if (word != UINT64_MAX) { + for (int64_t i = 0; i < word_len; ++i) { + if (!BitUtil::GetBit(cond_data, bit_offset + i)) { + handle_each(data_offset + i); + } + } + } + } else { + if (word == UINT64_MAX) { + handle_bulk(data_offset, word_len); + } else if (word) { + for (int64_t i = 0; i < word_len; ++i) { + if (BitUtil::GetBit(cond_data, bit_offset + i)) { + handle_each(data_offset + i); + } } } } @@ -253,13 +264,24 @@ struct IfElseFunctor> { while (cnt--) { int valid_bits; uint8_t byte = cond_reader.NextTrailingByte(valid_bits); - if (((byte ^ static_cast(invert_mask)) == UINT8_MAX) && valid_bits == 8) { - handle_bulk(data_offset, 8); - } else if (byte ^ static_cast(invert_mask)) { - for (int i = 0; i < valid_bits; ++i) { - if (BitUtil::GetBit(cond_data, bit_offset + i) != - static_cast(invert_mask)) { - handle_each(data_offset + i); + if (invert) { + if (byte == 0 && valid_bits == 8) { + handle_bulk(data_offset, 8); + } else if (byte != UINT8_MAX) { + for (int i = 0; i < valid_bits; ++i) { + if (!BitUtil::GetBit(cond_data, bit_offset + i)) { + handle_each(data_offset + i); + } + } + } + } else { + if (byte == UINT8_MAX && valid_bits == 8) { + handle_bulk(data_offset, 8); + } else if (byte) { + for (int i = 0; i < valid_bits; ++i) { + if (BitUtil::GetBit(cond_data, bit_offset + i)) { + handle_each(data_offset + i); + } } } } @@ -271,8 +293,7 @@ struct IfElseFunctor> { template static void RunIfElseLoopInverted(const ArrayData& cond, HandleBulk handle_bulk, HandleEach handle_each) { - return RunIfElseLoop(cond, handle_bulk, - handle_each); + return RunIfElseLoop(cond, handle_bulk, handle_each); } // AAA From 0e4f1a050a0c2bcb7727bf51104b28b82b3fd4a8 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Sat, 19 Jun 2021 14:52:30 -0400 Subject: [PATCH 37/46] fixing the down casting issue --- cpp/src/arrow/compute/kernels/scalar_if_else.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc index f83b01df913..aa1962e7919 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc @@ -217,7 +217,7 @@ struct IfElseFunctor> { /// Runs the main if_else loop. Here, it is expected that the right data has already /// been copied to the output. - /// If invert_mask is meant to invert the cond.data. If is set to ~Word(0), then the + /// If `invert` is meant to invert the cond.data. If is set to `true`, then the /// buffer will be inverted before calling the handle_bulk or handle_each functions. /// This is useful, when left is an array and right is scalar. Then rather than /// copying data from the right to output, we can copy left data to the output and From f5a14c0709f6ac58a8ce1b4a648daa9676160393 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Sat, 19 Jun 2021 17:22:24 -0400 Subject: [PATCH 38/46] refactor --- cpp/src/arrow/util/CMakeLists.txt | 2 +- .../{bit_util_benchmark_temp.cc => bitmap_reader_benchmark.cc} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename cpp/src/arrow/util/{bit_util_benchmark_temp.cc => bitmap_reader_benchmark.cc} (100%) diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt index 1851a9afa5e..660fb2657b6 100644 --- a/cpp/src/arrow/util/CMakeLists.txt +++ b/cpp/src/arrow/util/CMakeLists.txt @@ -79,6 +79,7 @@ add_arrow_test(threading-utility-test add_arrow_benchmark(bit_block_counter_benchmark) add_arrow_benchmark(bit_util_benchmark) +add_arrow_benchmark(bitmap_reader_benchmark) add_arrow_benchmark(cache_benchmark) add_arrow_benchmark(compression_benchmark) add_arrow_benchmark(decimal_benchmark) @@ -93,4 +94,3 @@ add_arrow_benchmark(trie_benchmark) add_arrow_benchmark(utf8_util_benchmark) add_arrow_benchmark(value_parsing_benchmark) add_arrow_benchmark(variant_benchmark) -add_arrow_benchmark(bit_util_benchmark_temp) diff --git a/cpp/src/arrow/util/bit_util_benchmark_temp.cc b/cpp/src/arrow/util/bitmap_reader_benchmark.cc similarity index 100% rename from cpp/src/arrow/util/bit_util_benchmark_temp.cc rename to cpp/src/arrow/util/bitmap_reader_benchmark.cc From cfb88f8c7c2f5d91083be6d7d1fee3f4d6f1bc2f Mon Sep 17 00:00:00 2001 From: niranda perera Date: Mon, 21 Jun 2021 16:42:29 -0400 Subject: [PATCH 39/46] adding set/clearbitmap tests --- cpp/src/arrow/util/bit_util.cc | 2 +- cpp/src/arrow/util/bit_util_test.cc | 37 +++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/util/bit_util.cc b/cpp/src/arrow/util/bit_util.cc index e33b65b841a..ee4bcde7713 100644 --- a/cpp/src/arrow/util/bit_util.cc +++ b/cpp/src/arrow/util/bit_util.cc @@ -99,7 +99,7 @@ void SetBitmapImpl(uint8_t* data, int64_t offset, int64_t length) { } // align to a byte boundary - data[offset / 8] = BitUtil::SpliceWord(prologue, data[offset / 8], set_byte); + data[offset / 8] = BitUtil::SpliceWord(8 - prologue, data[offset / 8], set_byte); offset += prologue; length -= prologue; diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index 2b42e3b34e4..ded37398f95 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -1532,6 +1532,43 @@ TEST(BitUtilTests, TestSetBitsTo) { } } +TEST(BitUtilTests, TestSetBitmap) { + using BitUtil::SetBitsTo; + for (const auto fill_byte_int : {0xff}) { + const uint8_t fill_byte = static_cast(fill_byte_int); + { + // test set within a byte + uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte}; + BitUtil::SetBitmap(bitmap, 2, 2); + BitUtil::ClearBitmap(bitmap, 4, 2); + ASSERT_BYTES_EQ(bitmap, {static_cast((fill_byte & ~0x3C) | 0xC)}); + } + { + // test straddling a single byte boundary + uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte}; + BitUtil::SetBitmap(bitmap, 4, 7); + BitUtil::ClearBitmap(bitmap, 11, 7); + ASSERT_BYTES_EQ(bitmap, {static_cast((fill_byte & 0xF) | 0xF0), 0x7, + static_cast(fill_byte & ~0x3)}); + } + { + // test byte aligned end + uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte}; + BitUtil::SetBitmap(bitmap, 4, 4); + BitUtil::ClearBitmap(bitmap, 8, 8); + ASSERT_BYTES_EQ(bitmap, + {static_cast((fill_byte & 0xF) | 0xF0), 0x00, fill_byte}); + } + { + // test byte aligned end, multiple bytes + uint8_t bitmap[] = {fill_byte, fill_byte, fill_byte, fill_byte}; + BitUtil::ClearBitmap(bitmap, 0, 24); + uint8_t false_byte = static_cast(0); + ASSERT_BYTES_EQ(bitmap, {false_byte, false_byte, false_byte, fill_byte}); + } + } +} + TEST(BitUtilTests, TestCopyBitmap) { const int kBufferSize = 1000; From 33444d151108acb8d4afe1f9e358aa33597c9979 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Mon, 21 Jun 2021 16:42:52 -0400 Subject: [PATCH 40/46] making if_else kernels write_to_slices --- .../arrow/compute/kernels/scalar_if_else.cc | 452 ++++++++++-------- 1 file changed, 249 insertions(+), 203 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc index aa1962e7919..c82adcfc316 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc @@ -78,36 +78,30 @@ Status PromoteNullsVisitor(KernelContext* ctx, const Datum& cond_d, const Datum& // cond.valid & (cond.data & left.valid | ~cond.data & right.valid) // In the following cases, we dont need to allocate out_valid bitmap - // if cond & left & right all ones, then output is all valid --> out_valid = nullptr + // if cond & left & right all ones, then output is all valid. output validity buffer + // is already allocated, hence set all bits if (cond_const == kAllValid && left_const == kAllValid && right_const == kAllValid) { + BitUtil::SetBitmap(output->buffers[0]->mutable_data(), output->offset, + output->length); return Status::OK(); } if (left_const == kAllValid && right_const == kAllValid) { - // if both left and right are valid, no need to calculate out_valid bitmap. Pass + // if both left and right are valid, no need to calculate out_valid bitmap. Copy // cond validity buffer - // if there's an offset, copy bitmap (cannot slice a bitmap) - if (cond.offset) { - ARROW_ASSIGN_OR_RAISE( - output->buffers[0], - arrow::internal::CopyBitmap(ctx->memory_pool(), cond.buffers[0]->data(), - cond.offset, cond.length)); - } else { // just copy assign cond validity buffer - output->buffers[0] = cond.buffers[0]; - } + arrow::internal::CopyBitmap(cond.buffers[0]->data(), cond.offset, cond.length, + output->buffers[0]->mutable_data(), output->offset); return Status::OK(); } - // following cases requires a separate out_valid buffer - ARROW_ASSIGN_OR_RAISE(output->buffers[0], ctx->AllocateBitmap(cond.length)); - // lambda function that will be used inside the visitor auto apply = [&](uint64_t c_valid, uint64_t c_data, uint64_t l_valid, uint64_t r_valid) { return c_valid & ((c_data & l_valid) | (~c_data & r_valid)); }; - std::array out_bitmaps{Bitmap{output->buffers[0], 0, cond.length}}; + std::array out_bitmaps{ + Bitmap{output->buffers[0], output->offset, output->length}}; switch (flag) { case COND_CONST | LEFT_CONST | RIGHT_CONST: { @@ -201,107 +195,179 @@ Status PromoteNullsVisitor(KernelContext* ctx, const Datum& cond_d, const Datum& return Status::OK(); } -template -struct IfElseFunctor {}; - -// only number types needs to be handled for Fixed sized primitive data types because, -// internal::GenerateTypeAgnosticPrimitive forwards types to the corresponding unsigned -// int type -template -struct IfElseFunctor> { - using T = typename TypeTraits::CType; - // A - Array - // S - Scalar - using Word = uint64_t; - static constexpr int64_t word_len = sizeof(Word) * 8; - - /// Runs the main if_else loop. Here, it is expected that the right data has already - /// been copied to the output. - /// If `invert` is meant to invert the cond.data. If is set to `true`, then the - /// buffer will be inverted before calling the handle_bulk or handle_each functions. - /// This is useful, when left is an array and right is scalar. Then rather than - /// copying data from the right to output, we can copy left data to the output and - /// invert the cond data to fill right values. Filling out with a scalar is presumed to - /// be more efficient than filling with an array - template - static void RunIfElseLoop(const ArrayData& cond, HandleBulk handle_bulk, - HandleEach handle_each) { - int64_t data_offset = 0; - int64_t bit_offset = cond.offset; - const auto* cond_data = cond.buffers[1]->data(); // this is a BoolArray - - BitmapWordReader cond_reader(cond_data, cond.offset, cond.length); - - int64_t cnt = cond_reader.words(); - while (cnt--) { - Word word = cond_reader.NextWord(); - if (invert) { - if (word == 0) { - handle_bulk(data_offset, word_len); - } else if (word != UINT64_MAX) { - for (int64_t i = 0; i < word_len; ++i) { - if (!BitUtil::GetBit(cond_data, bit_offset + i)) { - handle_each(data_offset + i); - } +using Word = uint64_t; +static constexpr int64_t word_len = sizeof(Word) * 8; + +/// Runs the main if_else loop. Here, it is expected that the right data has already +/// been copied to the output. +/// If `invert` is meant to invert the cond.data. If is set to `true`, then the +/// buffer will be inverted before calling the handle_bulk or handle_each functions. +/// This is useful, when left is an array and right is scalar. Then rather than +/// copying data from the right to output, we can copy left data to the output and +/// invert the cond data to fill right values. Filling out with a scalar is presumed to +/// be more efficient than filling with an array +template +static void RunIfElseLoop(const ArrayData& cond, HandleBulk handle_bulk, + HandleEach handle_each) { + int64_t data_offset = 0; + int64_t bit_offset = cond.offset; + const auto* cond_data = cond.buffers[1]->data(); // this is a BoolArray + + BitmapWordReader cond_reader(cond_data, cond.offset, cond.length); + + int64_t cnt = cond_reader.words(); + while (cnt--) { + Word word = cond_reader.NextWord(); + if (invert) { + if (word == 0) { + handle_bulk(data_offset, word_len); + } else if (word != UINT64_MAX) { + for (int64_t i = 0; i < word_len; ++i) { + if (!BitUtil::GetBit(cond_data, bit_offset + i)) { + handle_each(data_offset + i); } } - } else { - if (word == UINT64_MAX) { - handle_bulk(data_offset, word_len); - } else if (word) { - for (int64_t i = 0; i < word_len; ++i) { - if (BitUtil::GetBit(cond_data, bit_offset + i)) { - handle_each(data_offset + i); - } + } + } else { + if (word == UINT64_MAX) { + handle_bulk(data_offset, word_len); + } else if (word) { + for (int64_t i = 0; i < word_len; ++i) { + if (BitUtil::GetBit(cond_data, bit_offset + i)) { + handle_each(data_offset + i); } } } - data_offset += word_len; - bit_offset += word_len; } + data_offset += word_len; + bit_offset += word_len; + } - cnt = cond_reader.trailing_bytes(); - while (cnt--) { - int valid_bits; - uint8_t byte = cond_reader.NextTrailingByte(valid_bits); - if (invert) { - if (byte == 0 && valid_bits == 8) { - handle_bulk(data_offset, 8); - } else if (byte != UINT8_MAX) { - for (int i = 0; i < valid_bits; ++i) { - if (!BitUtil::GetBit(cond_data, bit_offset + i)) { - handle_each(data_offset + i); - } + cnt = cond_reader.trailing_bytes(); + while (cnt--) { + int valid_bits; + uint8_t byte = cond_reader.NextTrailingByte(valid_bits); + if (invert) { + if (byte == 0 && valid_bits == 8) { + handle_bulk(data_offset, 8); + } else if (byte != UINT8_MAX) { + for (int i = 0; i < valid_bits; ++i) { + if (!BitUtil::GetBit(cond_data, bit_offset + i)) { + handle_each(data_offset + i); } } - } else { - if (byte == UINT8_MAX && valid_bits == 8) { - handle_bulk(data_offset, 8); - } else if (byte) { - for (int i = 0; i < valid_bits; ++i) { - if (BitUtil::GetBit(cond_data, bit_offset + i)) { - handle_each(data_offset + i); - } + } + } else { + if (byte == UINT8_MAX && valid_bits == 8) { + handle_bulk(data_offset, 8); + } else if (byte) { + for (int i = 0; i < valid_bits; ++i) { + if (BitUtil::GetBit(cond_data, bit_offset + i)) { + handle_each(data_offset + i); } } } - data_offset += 8; - bit_offset += 8; } + data_offset += 8; + bit_offset += 8; } +} - template - static void RunIfElseLoopInverted(const ArrayData& cond, HandleBulk handle_bulk, - HandleEach handle_each) { - return RunIfElseLoop(cond, handle_bulk, handle_each); +template +static void RunIfElseLoopInverted(const ArrayData& cond, HandleBulk handle_bulk, + HandleEach handle_each) { + return RunIfElseLoop(cond, handle_bulk, handle_each); +} + +/// Runs if-else when cond is a scalar. Two special functions are required, +/// 1.CopyArrayData, 2. BroadcastScalar +template +static Status RunIfElseScalar(const BooleanScalar& cond, const Datum& left, + const Datum& right, Datum* out, + CopyArrayData copy_array_data, + BroadcastScalar broadcast_scalar) { + if (left.is_scalar() && right.is_scalar()) { // output will be a scalar + if (cond.is_valid) { + *out = cond.value ? left.scalar() : right.scalar(); + } else { + *out = MakeNullScalar(left.type()); + } + return Status::OK(); + } + + // either left or right is an array. Output is always an array` + const std::shared_ptr& out_array = out->array(); + if (!cond.is_valid) { + // cond is null; output is all null --> clear validity buffer + BitUtil::ClearBitmap(out_array->buffers[0]->mutable_data(), out_array->offset, + out_array->length); + return Status::OK(); + } + + // cond is a non-null scalar + const auto& valid_data = cond.value ? left : right; + if (valid_data.is_array()) { + // valid_data is an array. Hence copy data to the output buffers + const auto& valid_array = valid_data.array(); + if (valid_array->MayHaveNulls()) { + arrow::internal::CopyBitmap( + valid_array->buffers[0]->data(), valid_array->offset, valid_array->length, + out_array->buffers[0]->mutable_data(), out_array->offset); + } else { // validity buffer is nullptr --> set all bits + BitUtil::SetBitmap(out_array->buffers[0]->mutable_data(), out_array->offset, + out_array->length); + } + copy_array_data(*valid_array, out_array.get()); + return Status::OK(); + + } else { // valid data is scalar + // valid data is a scalar that needs to be broadcasted + const auto& valid_scalar = *valid_data.scalar(); + if (valid_scalar.is_valid) { // if the scalar is non-null, broadcast + BitUtil::SetBitmap(out_array->buffers[0]->mutable_data(), out_array->offset, + out_array->length); + broadcast_scalar(*valid_data.scalar(), out_array.get()); + } else { // scalar is null, clear the output validity buffer + BitUtil::ClearBitmap(out_array->buffers[0]->mutable_data(), out_array->offset, + out_array->length); + } + return Status::OK(); + } +} + +template +struct IfElseFunctor {}; + +// only number types needs to be handled for Fixed sized primitive data types because, +// internal::GenerateTypeAgnosticPrimitive forwards types to the corresponding unsigned +// int type +template +struct IfElseFunctor> { + using T = typename TypeTraits::CType; + // A - Array, S - Scalar, X = Array/Scalar + + // SXX + static Status Call(KernelContext* ctx, const BooleanScalar& cond, const Datum& left, + const Datum& right, Datum* out) { + return RunIfElseScalar( + cond, left, right, out, + /*CopyArrayData*/ + [&](const ArrayData& valid_array, ArrayData* out_array) { + std::memcpy(out_array->GetMutableValues(1), valid_array.GetValues(1), + valid_array.length * sizeof(T)); + }, + /*BroadcastScalar*/ + [&](const Scalar& scalar, ArrayData* out_array) { + T scalar_data = internal::UnboxScalar::Unbox(scalar); + std::fill(out_array->GetMutableValues(1), + out_array->GetMutableValues(1) + out_array->length, scalar_data); + }); } // AAA static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left, const ArrayData& right, ArrayData* out) { - ARROW_ASSIGN_OR_RAISE(std::shared_ptr out_buf, - ctx->Allocate(cond.length * sizeof(T))); - T* out_values = reinterpret_cast(out_buf->mutable_data()); + T* out_values = out->template GetMutableValues(1); // copy right data to out_buff const T* right_data = right.GetValues(1); @@ -318,16 +384,13 @@ struct IfElseFunctor> { }, [&](int64_t data_offset) { out_values[data_offset] = left_data[data_offset]; }); - out->buffers[1] = std::move(out_buf); return Status::OK(); } // ASA static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left, const ArrayData& right, ArrayData* out) { - ARROW_ASSIGN_OR_RAISE(std::shared_ptr out_buf, - ctx->Allocate(cond.length * sizeof(T))); - T* out_values = reinterpret_cast(out_buf->mutable_data()); + T* out_values = out->template GetMutableValues(1); // copy right data to out_buff const T* right_data = right.GetValues(1); @@ -344,16 +407,13 @@ struct IfElseFunctor> { }, [&](int64_t data_offset) { out_values[data_offset] = left_data; }); - out->buffers[1] = std::move(out_buf); return Status::OK(); } // AAS static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left, const Scalar& right, ArrayData* out) { - ARROW_ASSIGN_OR_RAISE(std::shared_ptr out_buf, - ctx->Allocate(cond.length * sizeof(T))); - T* out_values = reinterpret_cast(out_buf->mutable_data()); + T* out_values = out->template GetMutableValues(1); // copy left data to out_buff const T* left_data = left.GetValues(1); @@ -369,16 +429,13 @@ struct IfElseFunctor> { }, [&](int64_t data_offset) { out_values[data_offset] = right_data; }); - out->buffers[1] = std::move(out_buf); return Status::OK(); } // ASS static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left, const Scalar& right, ArrayData* out) { - ARROW_ASSIGN_OR_RAISE(std::shared_ptr out_buf, - ctx->Allocate(cond.length * sizeof(T))); - T* out_values = reinterpret_cast(out_buf->mutable_data()); + T* out_values = out->template GetMutableValues(1); // copy right data to out_buff T right_data = internal::UnboxScalar::Unbox(right); @@ -394,21 +451,41 @@ struct IfElseFunctor> { }, [&](int64_t data_offset) { out_values[data_offset] = left_data; }); - out->buffers[1] = std::move(out_buf); return Status::OK(); } }; template struct IfElseFunctor> { + // A - Array, S - Scalar, X = Array/Scalar + + // SXX + static Status Call(KernelContext* ctx, const BooleanScalar& cond, const Datum& left, + const Datum& right, Datum* out) { + return RunIfElseScalar( + cond, left, right, out, + /*CopyArrayData*/ + [&](const ArrayData& valid_array, ArrayData* out_array) { + arrow::internal::CopyBitmap( + valid_array.buffers[1]->data(), valid_array.offset, valid_array.length, + out_array->buffers[1]->mutable_data(), out_array->offset); + }, + /*BroadcastScalar*/ + [&](const Scalar& scalar, ArrayData* out_array) { + bool scalar_data = internal::UnboxScalar::Unbox(scalar); + BitUtil::SetBitsTo(out_array->buffers[1]->mutable_data(), out_array->offset, + out_array->length, scalar_data); + }); + } + // AAA static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left, const ArrayData& right, ArrayData* out) { // out_buff = right & ~cond - ARROW_ASSIGN_OR_RAISE(std::shared_ptr out_buf, - arrow::internal::BitmapAndNot( - ctx->memory_pool(), right.buffers[1]->data(), right.offset, - cond.buffers[1]->data(), cond.offset, cond.length, 0)); + const auto& out_buf = out->buffers[1]; + arrow::internal::BitmapAndNot(right.buffers[1]->data(), right.offset, + cond.buffers[1]->data(), cond.offset, cond.length, + out->offset, out_buf->mutable_data()); // out_buff = left & cond ARROW_ASSIGN_OR_RAISE(std::shared_ptr temp_buf, @@ -416,9 +493,9 @@ struct IfElseFunctor> { ctx->memory_pool(), left.buffers[1]->data(), left.offset, cond.buffers[1]->data(), cond.offset, cond.length, 0)); - arrow::internal::BitmapOr(out_buf->data(), 0, temp_buf->data(), 0, cond.length, 0, - out_buf->mutable_data()); - out->buffers[1] = std::move(out_buf); + arrow::internal::BitmapOr(out_buf->data(), out->offset, temp_buf->data(), 0, + cond.length, out->offset, out_buf->mutable_data()); + return Status::OK(); } @@ -426,19 +503,19 @@ struct IfElseFunctor> { static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left, const ArrayData& right, ArrayData* out) { // out_buff = right & ~cond - ARROW_ASSIGN_OR_RAISE(std::shared_ptr out_buf, - arrow::internal::BitmapAndNot( - ctx->memory_pool(), right.buffers[1]->data(), right.offset, - cond.buffers[1]->data(), cond.offset, cond.length, 0)); + const auto& out_buf = out->buffers[1]; + arrow::internal::BitmapAndNot(right.buffers[1]->data(), right.offset, + cond.buffers[1]->data(), cond.offset, cond.length, + out->offset, out_buf->mutable_data()); // out_buff = left & cond bool left_data = internal::UnboxScalar::Unbox(left); if (left_data) { - arrow::internal::BitmapOr(out_buf->data(), 0, cond.buffers[1]->data(), cond.offset, - cond.length, 0, out_buf->mutable_data()); + arrow::internal::BitmapOr(out_buf->data(), out->offset, cond.buffers[1]->data(), + cond.offset, cond.length, out->offset, + out_buf->mutable_data()); } - out->buffers[1] = std::move(out_buf); return Status::OK(); } @@ -446,20 +523,20 @@ struct IfElseFunctor> { static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left, const Scalar& right, ArrayData* out) { // out_buff = left & cond - ARROW_ASSIGN_OR_RAISE(std::shared_ptr out_buf, - arrow::internal::BitmapAnd( - ctx->memory_pool(), left.buffers[1]->data(), left.offset, - cond.buffers[1]->data(), cond.offset, cond.length, 0)); + const auto& out_buf = out->buffers[1]; + arrow::internal::BitmapAnd(left.buffers[1]->data(), left.offset, + cond.buffers[1]->data(), cond.offset, cond.length, + out->offset, out_buf->mutable_data()); bool right_data = internal::UnboxScalar::Unbox(right); // out_buff = left & cond | right & ~cond if (right_data) { - arrow::internal::BitmapOrNot(out_buf->data(), 0, cond.buffers[1]->data(), - cond.offset, cond.length, 0, out_buf->mutable_data()); + arrow::internal::BitmapOrNot(out_buf->data(), out->offset, cond.buffers[1]->data(), + cond.offset, cond.length, out->offset, + out_buf->mutable_data()); } - out->buffers[1] = std::move(out_buf); return Status::OK(); } @@ -469,66 +546,32 @@ struct IfElseFunctor> { bool left_data = internal::UnboxScalar::Unbox(left); bool right_data = internal::UnboxScalar::Unbox(right); + const auto& out_buf = out->buffers[1]; + // out_buf = left & cond | right & ~cond - std::shared_ptr out_buf = nullptr; + // std::shared_ptr out_buf = nullptr; if (left_data) { if (right_data) { // out_buf = ones - ARROW_ASSIGN_OR_RAISE(out_buf, ctx->AllocateBitmap(cond.length)); - // filling with UINT8_MAX upto the buffer's size (in bytes) - std::memset(out_buf->mutable_data(), UINT8_MAX, out_buf->size()); + BitUtil::SetBitmap(out_buf->mutable_data(), out->offset, cond.length); } else { // out_buf = cond - out_buf = SliceBuffer(cond.buffers[1], cond.offset, cond.length); + arrow::internal::CopyBitmap(cond.buffers[1]->data(), cond.offset, cond.length, + out_buf->mutable_data(), out->offset); } } else { if (right_data) { // out_buf = ~cond - ARROW_ASSIGN_OR_RAISE(out_buf, arrow::internal::InvertBitmap( - ctx->memory_pool(), cond.buffers[1]->data(), - cond.offset, cond.length)) + arrow::internal::InvertBitmap(cond.buffers[1]->data(), cond.offset, cond.length, + out_buf->mutable_data(), out->offset); } else { // out_buf = zeros - ARROW_ASSIGN_OR_RAISE(out_buf, ctx->AllocateBitmap(cond.length)); + BitUtil::ClearBitmap(out_buf->mutable_data(), out->offset, cond.length); } } - out->buffers[1] = std::move(out_buf); - return Status::OK(); - } -}; -template -struct IfElseFunctor> { - template - static inline Status ReturnCopy(const T& in, T* out) { - // Nothing preallocated, so we assign in into the output - *out = in; return Status::OK(); } - - // AAA - static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left, - const ArrayData& right, ArrayData* out) { - return ReturnCopy(left, out); - } - - // ASA - static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left, - const ArrayData& right, ArrayData* out) { - return ReturnCopy(right, out); - } - - // AAS - static Status Call(KernelContext* ctx, const ArrayData& cond, const ArrayData& left, - const Scalar& right, ArrayData* out) { - return ReturnCopy(left, out); - } - - // ASS - static Status Call(KernelContext* ctx, const ArrayData& cond, const Scalar& left, - const Scalar& right, ArrayData* out) { - return ReturnCopy(cond, out); - } }; template @@ -537,32 +580,7 @@ struct ResolveIfElseExec { // cond is scalar if (batch[0].is_scalar()) { const auto& cond = batch[0].scalar_as(); - if (batch[1].is_scalar() && batch[2].is_scalar()) { - if (cond.is_valid) { - *out = cond.value ? batch[1].scalar() : batch[2].scalar(); - } else { - *out = MakeNullScalar(batch[1].type()); - } - return Status::OK(); - } - // either left or right is an array. Output is always an array - if (!cond.is_valid) { - // cond is null; just create a null array - ARROW_ASSIGN_OR_RAISE( - *out, MakeArrayOfNull(batch[1].type(), batch.length, ctx->memory_pool())) - return Status::OK(); - } - - const auto& valid_data = cond.value ? batch[1] : batch[2]; - if (valid_data.is_array()) { - *out = valid_data; - } else { - // valid data is a scalar that needs to be broadcasted - ARROW_ASSIGN_OR_RAISE( - *out, - MakeArrayFromScalar(*valid_data.scalar(), batch.length, ctx->memory_pool())); - } - return Status::OK(); + return IfElseFunctor::Call(ctx, cond, batch[1], batch[2], out); } // cond is array. Use functors to sort things out @@ -589,6 +607,22 @@ struct ResolveIfElseExec { } }; + +template <> +struct ResolveIfElseExec { + static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { + if (batch[0].is_scalar()) { + *out = MakeNullScalar(null()); + } else { + const std::shared_ptr& cond_array = batch[0].array(); + ARROW_ASSIGN_OR_RAISE( + *out, MakeArrayOfNull(null(), cond_array->length, ctx->memory_pool())); + } + return Status::OK(); + } +}; + + struct IfElseFunction : ScalarFunction { using ScalarFunction::ScalarFunction; @@ -620,14 +654,25 @@ struct IfElseFunction : ScalarFunction { } }; -void AddPrimitiveIfElseKernels(const std::shared_ptr& scalar_function, +void AddNullIfElseKernel(const std::shared_ptr& scalar_function) { + ScalarKernel kernel({boolean(), null(), null()}, null(), + ResolveIfElseExec::Exec); + kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE; + kernel.mem_allocation = MemAllocation::NO_PREALLOCATE; + kernel.can_write_into_slices = false; + + DCHECK_OK(scalar_function->AddKernel(std::move(kernel))); +} + +void AddPrimitiveIfElseKernels(const std::shared_ptr& scalar_function, const std::vector>& types) { for (auto&& type : types) { auto exec = internal::GenerateTypeAgnosticPrimitive(*type); // cond array needs to be boolean always ScalarKernel kernel({boolean(), type, type}, type, exec); - kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE; - kernel.mem_allocation = MemAllocation::NO_PREALLOCATE; + kernel.null_handling = NullHandling::COMPUTED_PREALLOCATE; + kernel.mem_allocation = MemAllocation::PREALLOCATE; + kernel.can_write_into_slices = true; DCHECK_OK(scalar_function->AddKernel(std::move(kernel))); } @@ -653,7 +698,8 @@ void RegisterScalarIfElse(FunctionRegistry* registry) { AddPrimitiveIfElseKernels(func, NumericTypes()); AddPrimitiveIfElseKernels(func, TemporalTypes()); - AddPrimitiveIfElseKernels(func, {boolean(), null()}); + AddPrimitiveIfElseKernels(func, {boolean()}); + AddNullIfElseKernel(func); // todo add binary kernels DCHECK_OK(registry->AddFunction(std::move(func))); From 984b7dba94288bf271c1c6c5dbd85436eacdd85a Mon Sep 17 00:00:00 2001 From: niranda perera Date: Mon, 21 Jun 2021 17:03:59 -0400 Subject: [PATCH 41/46] fixing lint --- cpp/src/arrow/compute/kernels/scalar_if_else.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc index c82adcfc316..54e0725fce7 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc @@ -607,7 +607,6 @@ struct ResolveIfElseExec { } }; - template <> struct ResolveIfElseExec { static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) { @@ -622,7 +621,6 @@ struct ResolveIfElseExec { } }; - struct IfElseFunction : ScalarFunction { using ScalarFunction::ScalarFunction; From adfb0fd0003b4b754f55e22667c6401d9d6172e1 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Mon, 28 Jun 2021 15:35:46 -0400 Subject: [PATCH 42/46] fixing performance isssue --- cpp/src/arrow/util/bitmap.h | 105 +++++++++++++++++++---------- cpp/src/arrow/util/bitmap_reader.h | 15 ++--- cpp/src/arrow/util/bitmap_writer.h | 17 +++-- 3 files changed, 83 insertions(+), 54 deletions(-) diff --git a/cpp/src/arrow/util/bitmap.h b/cpp/src/arrow/util/bitmap.h index 674ff96ca5d..4c19da17819 100644 --- a/cpp/src/arrow/util/bitmap.h +++ b/cpp/src/arrow/util/bitmap.h @@ -243,49 +243,17 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, return min_offset; } - /// \brief Visit words of bits from each input bitmap as array and collects - /// outputs to an array, to be written into the output bitmaps accordingly. - /// - /// All bitmaps must have identical length. The first bit in a visited bitmap - /// may be offset within the first visited word, but words will otherwise contain - /// densely packed bits loaded from the bitmap. That offset within the first word is - /// returned. - /// Visitor is expected to have the following signature - /// [](const std::array& in_words, std::array* out_words){...} - /// - // NOTE: this function is efficient on 3+ sufficiently large bitmaps. - // It also has a large prolog / epilog overhead and should be used - // carefully in other cases. - // For 2 bitmaps or less, and/or smaller bitmaps, see also VisitTwoBitBlocksVoid - // and BitmapUInt64Reader. - template >::type::value_type> - static void VisitWordsAndWrite(const std::array& bitmaps_arg, - std::array* out_bitmaps_arg, - Visitor&& visitor) { + static void RunVisitWordsAndWriteLoop(int64_t bit_length, + std::array& readers, + std::array& writers, + Visitor&& visitor) { constexpr int64_t kBitWidth = sizeof(Word) * 8; - int64_t bit_length = BitLength(bitmaps_arg); - assert(bit_length == BitLength(*out_bitmaps_arg)); - - std::array, N> readers; - for (size_t i = 0; i < N; ++i) { - readers[i] = BitmapWordReader(bitmaps_arg[i].buffer_->data(), - bitmaps_arg[i].offset_, bitmaps_arg[i].length_); - } - - std::array, M> writers; - for (size_t i = 0; i < M; ++i) { - const Bitmap& out_bitmap = out_bitmaps_arg->at(i); - writers[i] = BitmapWordWriter(out_bitmap.buffer_->mutable_data(), - out_bitmap.offset_, out_bitmap.length_); - } - std::array visited_words; - visited_words.fill(0); std::array output_words; - output_words.fill(0); // every reader will have same number of words, since they are same length'ed // TODO($JIRA) this will be inefficient in some cases. When there are offsets beyond @@ -338,6 +306,69 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, } } + /// \brief Visit words of bits from each input bitmap as array and collects + /// outputs to an array, to be written into the output bitmaps accordingly. + /// + /// All bitmaps must have identical length. The first bit in a visited bitmap + /// may be offset within the first visited word, but words will otherwise contain + /// densely packed bits loaded from the bitmap. That offset within the first word is + /// returned. + /// Visitor is expected to have the following signature + /// [](const std::array& in_words, std::array* out_words){...} + /// + // NOTE: this function is efficient on 3+ sufficiently large bitmaps. + // It also has a large prolog / epilog overhead and should be used + // carefully in other cases. + // For 2 bitmaps or less, and/or smaller bitmaps, see also VisitTwoBitBlocksVoid + // and BitmapUInt64Reader. + template >::type::value_type> + static void VisitWordsAndWrite(const std::array& bitmaps_arg, + std::array* out_bitmaps_arg, + Visitor&& visitor) { + int64_t bit_length = BitLength(bitmaps_arg); + assert(bit_length == BitLength(*out_bitmaps_arg)); + + // if both input and output bitmaps have no byte offset, then use special template + if (std::all_of(bitmaps_arg.begin(), bitmaps_arg.end(), + [](const Bitmap& b) { return b.offset_ % 8 == 0; }) && + std::all_of(out_bitmaps_arg->begin(), out_bitmaps_arg->end(), + [](const Bitmap& b) { return b.offset_ % 8 == 0; })) { + std::array, N> readers; + for (size_t i = 0; i < N; ++i) { + const Bitmap& in_bitmap = bitmaps_arg[i]; + readers[i] = BitmapWordReader( + in_bitmap.buffer_->data(), in_bitmap.offset_, in_bitmap.length_); + } + + std::array, M> writers; + for (size_t i = 0; i < M; ++i) { + const Bitmap& out_bitmap = out_bitmaps_arg->at(i); + writers[i] = BitmapWordWriter( + out_bitmap.buffer_->mutable_data(), out_bitmap.offset_, out_bitmap.length_); + } + + RunVisitWordsAndWriteLoop(bit_length, readers, writers, std::move(visitor)); + } else { + std::array, N> readers; + for (size_t i = 0; i < N; ++i) { + const Bitmap& in_bitmap = bitmaps_arg[i]; + readers[i] = BitmapWordReader(in_bitmap.buffer_->data(), in_bitmap.offset_, + in_bitmap.length_); + } + + std::array, M> writers; + for (size_t i = 0; i < M; ++i) { + const Bitmap& out_bitmap = out_bitmaps_arg->at(i); + writers[i] = BitmapWordWriter(out_bitmap.buffer_->mutable_data(), + out_bitmap.offset_, out_bitmap.length_); + } + + RunVisitWordsAndWriteLoop(bit_length, readers, writers, std::move(visitor)); + } + } + const std::shared_ptr& buffer() const { return buffer_; } /// offset of first bit relative to buffer().data() diff --git a/cpp/src/arrow/util/bitmap_reader.h b/cpp/src/arrow/util/bitmap_reader.h index ce1d5f376bd..7c43747fafb 100644 --- a/cpp/src/arrow/util/bitmap_reader.h +++ b/cpp/src/arrow/util/bitmap_reader.h @@ -146,15 +146,14 @@ class BitmapUInt64Reader { // on sufficiently large inputs. However, it has a larger prolog / epilog overhead // and should probably not be used for small bitmaps. -template +template class BitmapWordReader { public: BitmapWordReader() = default; - BitmapWordReader(const uint8_t* bitmap, int64_t offset, int64_t length) { - bitmap_ = bitmap + offset / 8; - offset_ = offset % 8; - bitmap_end_ = bitmap_ + BitUtil::BytesForBits(offset_ + length); - + BitmapWordReader(const uint8_t* bitmap, int64_t offset, int64_t length) + : offset_(static_cast(may_have_byte_offset) * (offset % 8)), + bitmap_(bitmap + offset / 8), + bitmap_end_(bitmap_ + BitUtil::BytesForBits(offset_ + length)) { // decrement word count by one as we may touch two adjacent words in one iteration nwords_ = length / (sizeof(Word) * 8) - 1; if (nwords_ < 0) { @@ -174,7 +173,7 @@ class BitmapWordReader { bitmap_ += sizeof(Word); const Word next_word = load(bitmap_); Word word = current_word_; - if (offset_) { + if (may_have_byte_offset && offset_) { // combine two adjacent words into one word // |<------ next ----->|<---- current ---->| // +-------------+-----+-------------+-----+ @@ -215,7 +214,7 @@ class BitmapWordReader { ++bitmap_; const uint8_t next_byte = load(bitmap_); byte = current_byte_; - if (offset_) { + if (may_have_byte_offset && offset_) { byte >>= offset_; byte |= next_byte << (8 - offset_); } diff --git a/cpp/src/arrow/util/bitmap_writer.h b/cpp/src/arrow/util/bitmap_writer.h index b15b036c248..d5c6d909df0 100644 --- a/cpp/src/arrow/util/bitmap_writer.h +++ b/cpp/src/arrow/util/bitmap_writer.h @@ -180,16 +180,15 @@ class FirstTimeBitmapWriter { int64_t byte_offset_; }; -template +template class BitmapWordWriter { public: BitmapWordWriter() = default; - BitmapWordWriter(uint8_t* bitmap, int64_t offset, int64_t length) { - bitmap_ = bitmap + offset / 8; - offset_ = offset % 8; - bitmap_end_ = bitmap_ + BitUtil::BytesForBits(offset_ + length); - mask_ = (1U << offset_) - 1; - + BitmapWordWriter(uint8_t* bitmap, int64_t offset, int64_t length) + : offset_(static_cast(may_have_byte_offset) * (offset % 8)), + bitmap_(bitmap + offset / 8), + bitmap_end_(bitmap_ + BitUtil::BytesForBits(offset_ + length)), + mask_((1U << offset_) - 1) { if (offset_) { if (length >= static_cast(sizeof(Word) * 8)) { current_word_ = load(bitmap_); @@ -200,7 +199,7 @@ class BitmapWordWriter { } void PutNextWord(Word word) { - if (offset_) { + if (may_have_byte_offset && offset_) { // split one word into two adjacent words, don't touch unused bits // |<------ word ----->| // +-----+-------------+ @@ -227,7 +226,7 @@ class BitmapWordWriter { void PutNextTrailingByte(uint8_t byte, int valid_bits) { if (valid_bits == 8) { - if (offset_) { + if (may_have_byte_offset && offset_) { byte = (byte << offset_) | (byte >> (8 - offset_)); uint8_t next_byte = load(bitmap_ + 1); current_byte_ = (current_byte_ & mask_) | (byte & ~mask_); From 6d48f7a9162142ab094be1d8d47680576383e2a6 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Mon, 28 Jun 2021 16:35:07 -0400 Subject: [PATCH 43/46] dummy --- cpp/src/arrow/util/bitmap.h | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/arrow/util/bitmap.h b/cpp/src/arrow/util/bitmap.h index 4c19da17819..c0bd30bdd5c 100644 --- a/cpp/src/arrow/util/bitmap.h +++ b/cpp/src/arrow/util/bitmap.h @@ -358,6 +358,7 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, in_bitmap.length_); } + std::array, M> writers; for (size_t i = 0; i < M; ++i) { const Bitmap& out_bitmap = out_bitmaps_arg->at(i); From d3688664783bb0f7696918981bfee7619b567737 Mon Sep 17 00:00:00 2001 From: niranda perera Date: Mon, 28 Jun 2021 16:35:28 -0400 Subject: [PATCH 44/46] Revert "dummy" This reverts commit 97091f85 --- cpp/src/arrow/util/bitmap.h | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/src/arrow/util/bitmap.h b/cpp/src/arrow/util/bitmap.h index c0bd30bdd5c..4c19da17819 100644 --- a/cpp/src/arrow/util/bitmap.h +++ b/cpp/src/arrow/util/bitmap.h @@ -358,7 +358,6 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, in_bitmap.length_); } - std::array, M> writers; for (size_t i = 0; i < M; ++i) { const Bitmap& out_bitmap = out_bitmaps_arg->at(i); From 4324a73d7ecf3ac513c48fc66ea0a6f4a8240d0b Mon Sep 17 00:00:00 2001 From: niranda perera Date: Mon, 28 Jun 2021 19:18:00 -0400 Subject: [PATCH 45/46] Apply suggestions from code review Co-authored-by: Benjamin Kietzman --- cpp/src/arrow/util/bit_util.h | 8 ++++---- cpp/src/arrow/util/bitmap.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h index 95969dbd2da..1e97e467610 100644 --- a/cpp/src/arrow/util/bit_util.h +++ b/cpp/src/arrow/util/bit_util.h @@ -327,15 +327,15 @@ void ClearBitmap(uint8_t* data, int64_t offset, int64_t length); /// Returns a mask with lower i bits set to 1. If i >= sizeof(Word)*8, all-ones will be /// returned /// ex: -/// PrecedingWordBitmask(0)= 0x00 -/// PrecedingWordBitmask(4)= 0x0f -/// PrecedingWordBitmask(8)= 0xff -/// PrecedingWordBitmask(8)= 0x00ff /// ref: https://stackoverflow.com/a/59523400 template constexpr Word PrecedingWordBitmask(unsigned int const i) { return (static_cast(i < sizeof(Word) * 8) << (i & (sizeof(Word) * 8 - 1))) - 1; } +static_assert(PrecedingWordBitmask(0) == 0x00, ""); +static_assert(PrecedingWordBitmask(4) == 0x0f, ""); +static_assert(PrecedingWordBitmask(8) == 0xff, ""); +static_assert(PrecedingWordBitmask(8) == 0x00ff, ""); /// \brief Create a word with low `n` bits from `low` and high `sizeof(Word)-n` bits /// from `high`. diff --git a/cpp/src/arrow/util/bitmap.h b/cpp/src/arrow/util/bitmap.h index 4c19da17819..461647e6b6c 100644 --- a/cpp/src/arrow/util/bitmap.h +++ b/cpp/src/arrow/util/bitmap.h @@ -365,7 +365,7 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, out_bitmap.offset_, out_bitmap.length_); } - RunVisitWordsAndWriteLoop(bit_length, readers, writers, std::move(visitor)); + RunVisitWordsAndWriteLoop(bit_length, readers, writers, visitor); } } From 1b3144b0c7767c1359e3bbbdcbe000d728ef29fc Mon Sep 17 00:00:00 2001 From: niranda perera Date: Mon, 28 Jun 2021 19:19:39 -0400 Subject: [PATCH 46/46] applying PR comments --- cpp/src/arrow/compute/kernels/scalar_if_else_test.cc | 3 +++ cpp/src/arrow/util/bitmap.h | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc index c9347bc6a4b..670a2d42a3a 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc @@ -57,6 +57,9 @@ TYPED_TEST(TestIfElsePrimitive, IfElseFixedSizeRand) { random::RandomArrayGenerator rand(/*seed=*/0); int64_t len = 1000; + + // adding 64 consecutive 1's and 0's in the cond array to test all-true/ all-false + // word code paths ASSERT_OK_AND_ASSIGN(auto temp1, MakeArrayFromScalar(BooleanScalar(true), 64)); ASSERT_OK_AND_ASSIGN(auto temp2, MakeArrayFromScalar(BooleanScalar(false), 64)); auto temp3 = rand.ArrayOf(boolean(), len - 64 * 2, /*null_probability=*/0.01); diff --git a/cpp/src/arrow/util/bitmap.h b/cpp/src/arrow/util/bitmap.h index 461647e6b6c..141f863c0b8 100644 --- a/cpp/src/arrow/util/bitmap.h +++ b/cpp/src/arrow/util/bitmap.h @@ -349,7 +349,7 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, out_bitmap.buffer_->mutable_data(), out_bitmap.offset_, out_bitmap.length_); } - RunVisitWordsAndWriteLoop(bit_length, readers, writers, std::move(visitor)); + RunVisitWordsAndWriteLoop(bit_length, readers, writers, visitor); } else { std::array, N> readers; for (size_t i = 0; i < N; ++i) {