From 9aaa8ad128fd9195a90c16c3c4ff86814d888a53 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Wed, 30 Oct 2019 13:48:29 -0400 Subject: [PATCH 01/14] ARROW-6396: [C++] Kleene logic in logical kernels --- cpp/src/arrow/util/bit_util.cc | 1 + cpp/src/arrow/util/bit_util.h | 22 ++++++++++ cpp/src/arrow/util/bit_util_benchmark.cc | 51 ++++++++++++++++++++++-- 3 files changed, 70 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/util/bit_util.cc b/cpp/src/arrow/util/bit_util.cc index dab20a69df3..4d66f0eaec4 100644 --- a/cpp/src/arrow/util/bit_util.cc +++ b/cpp/src/arrow/util/bit_util.cc @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "arrow/buffer.h" diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h index 3718a16fa5b..a8a404083b8 100644 --- a/cpp/src/arrow/util/bit_util.h +++ b/cpp/src/arrow/util/bit_util.h @@ -52,6 +52,7 @@ #define ARROW_BYTE_SWAP32 __builtin_bswap32 #endif +#include #include #include #include @@ -827,6 +828,27 @@ Status InvertBitmap(MemoryPool* pool, const uint8_t* bitmap, int64_t offset, ARROW_EXPORT int64_t CountSetBits(const uint8_t* data, int64_t bit_offset, int64_t length); +struct Bitmap { + uint8_t* data; + int64_t offset, length; + + bool GetBit(int64_t i) const { return BitUtil::GetBit(data, i + offset); } + + void SetBitTo(int64_t i, bool v) const { BitUtil::SetBitTo(data, i + offset, v); } + + template + static void Visit(const Bitmap (&bitmaps)[N], Visitor&& visitor) { + auto length = bitmaps[0].length; + std::array bits; + for (int64_t i = 0; i < length; ++i) { + for (size_t bitmap_i = 0; bitmap_i < N; ++bitmap_i) { + bits[bitmap_i] = bitmaps[bitmap_i].GetBit(i); + } + visitor(bits); + } + } +}; + ARROW_EXPORT bool BitmapEquals(const uint8_t* left, int64_t left_offset, const uint8_t* right, int64_t right_offset, int64_t bit_length); diff --git a/cpp/src/arrow/util/bit_util_benchmark.cc b/cpp/src/arrow/util/bit_util_benchmark.cc index 837499252ab..ca6235b8cd1 100644 --- a/cpp/src/arrow/util/bit_util_benchmark.cc +++ b/cpp/src/arrow/util/bit_util_benchmark.cc @@ -17,6 +17,8 @@ #include "benchmark/benchmark.h" +#include +#include #include #include "arrow/buffer.h" @@ -30,8 +32,6 @@ namespace arrow { namespace BitUtil { -#ifdef ARROW_WITH_BENCHMARKS_REFERENCE - // A naive bitmap reader implementation, meant as a baseline against // internal::BitmapReader @@ -84,8 +84,6 @@ class NaiveBitmapWriter { int64_t position_; }; -#endif - static std::shared_ptr CreateRandomBuffer(int64_t nbytes) { std::shared_ptr buffer; ABORT_NOT_OK(AllocateBuffer(nbytes, &buffer)); @@ -94,6 +92,48 @@ static std::shared_ptr CreateRandomBuffer(int64_t nbytes) { return buffer; } +static void BenchmarkBitmapAnd(benchmark::State& state) { + int64_t nbytes = state.range(0); + std::shared_ptr buffer_1 = CreateRandomBuffer(nbytes); + std::shared_ptr buffer_2 = CreateRandomBuffer(nbytes); + std::shared_ptr buffer_3 = CreateRandomBuffer(nbytes); + + const int64_t num_bits = nbytes * 8; + + for (auto _ : state) { + internal::BitmapAnd(buffer_1->data(), 0, buffer_2->data(), 0, num_bits, 0, + buffer_3->mutable_data()); + auto total = internal::CountSetBits(buffer_3->data(), 0, num_bits); + benchmark::DoNotOptimize(total); + } + state.SetBytesProcessed(state.iterations() * nbytes); +} + +static void BenchmarkBitmapVisitAnd(benchmark::State& state) { + int64_t nbytes = state.range(0); + std::shared_ptr buffer_1 = CreateRandomBuffer(nbytes); + std::shared_ptr buffer_2 = CreateRandomBuffer(nbytes); + std::shared_ptr buffer_3 = CreateRandomBuffer(nbytes); + + const int64_t num_bits = nbytes * 8; + + internal::Bitmap bitmap_1{buffer_1->mutable_data(), num_bits, 0}; + internal::Bitmap bitmap_2{buffer_2->mutable_data(), num_bits, 0}; + internal::Bitmap bitmap_3{buffer_3->mutable_data(), num_bits, 0}; + + for (auto _ : state) { + int64_t i = 0; + internal::Bitmap::Visit({bitmap_1, bitmap_2}, [&](std::array bits) { + bool out_bit = + std::accumulate(bits.begin(), bits.end(), true, std::logical_and{}); + bitmap_3.SetBitTo(i++, out_bit); + }); + auto total = internal::CountSetBits(bitmap_3.data, bitmap_3.offset, bitmap_3.length); + benchmark::DoNotOptimize(total); + } + state.SetBytesProcessed(state.iterations() * nbytes); +} + template static void BenchmarkBitmapReader(benchmark::State& state, int64_t nbytes) { std::shared_ptr buffer = CreateRandomBuffer(nbytes); @@ -321,5 +361,8 @@ BENCHMARK(GenerateBitsUnrolled)->Arg(kBufferSize); BENCHMARK(CopyBitmapWithoutOffset)->Arg(kBufferSize); BENCHMARK(CopyBitmapWithOffset)->Arg(kBufferSize); +BENCHMARK(BenchmarkBitmapAnd)->Arg(kBufferSize)->MinTime(1.0); +BENCHMARK(BenchmarkBitmapVisitAnd)->Arg(kBufferSize)->MinTime(1.0); + } // namespace BitUtil } // namespace arrow From e6d61abb0982f43c3bd2e32e595592ca0408c3c3 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Fri, 1 Nov 2019 13:08:01 -0400 Subject: [PATCH 02/14] add unit tests for word wise Bitmap visit --- cpp/src/arrow/compute/kernels/boolean.cc | 56 +++++- cpp/src/arrow/compute/kernels/boolean.h | 18 +- cpp/src/arrow/util/bit_util.cc | 18 ++ cpp/src/arrow/util/bit_util.h | 233 +++++++++++++++++++++-- cpp/src/arrow/util/bit_util_benchmark.cc | 77 +++++--- cpp/src/arrow/util/bit_util_test.cc | 137 +++++++++++++ cpp/src/arrow/util/string_view.h | 1 + 7 files changed, 498 insertions(+), 42 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/boolean.cc b/cpp/src/arrow/compute/kernels/boolean.cc index 19f3bb7b94a..702ca4b44a1 100644 --- a/cpp/src/arrow/compute/kernels/boolean.cc +++ b/cpp/src/arrow/compute/kernels/boolean.cc @@ -17,6 +17,7 @@ #include "arrow/compute/kernels/boolean.h" +#include #include #include @@ -31,6 +32,7 @@ namespace arrow { +using internal::Bitmap; using internal::BitmapAnd; using internal::BitmapOr; using internal::BitmapXor; @@ -91,26 +93,68 @@ class BinaryBooleanKernel : public BinaryKernel { ArrayData* result; result = out->array().get(); - RETURN_NOT_OK(detail::AssignNullIntersection(ctx, left_data, right_data, result)); return Compute(ctx, left_data, right_data, result); } std::shared_ptr out_type() const override { return boolean(); } }; +enum class ResolveNull { KLEENE_LOGIC, PROPAGATE }; + class AndKernel : public BinaryBooleanKernel { + public: + explicit AndKernel(ResolveNull resolve_null) : resolve_null_(resolve_null) {} + + private: Status Compute(FunctionContext* ctx, const ArrayData& left, const ArrayData& right, ArrayData* out) override { - if (right.length > 0) { - BitmapAnd(left.buffers[1]->data(), left.offset, right.buffers[1]->data(), - right.offset, right.length, 0, out->buffers[1]->mutable_data()); + if (resolve_null_ == ResolveNull::PROPAGATE) { + RETURN_NOT_OK(detail::AssignNullIntersection(ctx, left, right, out)); + if (right.length > 0) { + BitmapAnd(left.buffers[1]->data(), left.offset, right.buffers[1]->data(), + right.offset, right.length, 0, out->buffers[1]->mutable_data()); + } + } else { + Bitmap bitmaps[4]; + for (int i = 0, buffer_index = 0; buffer_index != 2; ++buffer_index) { + for (const ArrayData* operand : {&left, &right}) { + bitmaps[i++] = + Bitmap(operand->buffers[buffer_index], operand->offset, operand->length); + } + } + Bitmap out_validity(out->buffers[0], out->offset, out->length); + Bitmap out_values(out->buffers[1], out->offset, out->length); + int64_t i = 0; + Bitmap::Visit(bitmaps, [&](std::bitset<4> bits) { + bool left_valid = bits[0], right_valid = bits[1]; + bool left_value = !left_valid || bits[2]; + bool right_value = !right_valid || bits[3]; + if (left_valid && right_valid) { + out_validity.SetBitTo(i, true); + } else if (!left_valid && !right_valid) { + out_validity.SetBitTo(i, false); + } else { + // one is null, so out will be null or false + out_validity.SetBitTo(i, !(left_value && right_value)); + } + out_values.SetBitTo(i, left_value && right_value); + }); } return Status::OK(); } + + ResolveNull resolve_null_; }; Status And(FunctionContext* ctx, const Datum& left, const Datum& right, Datum* out) { - AndKernel and_kernel; + AndKernel and_kernel(ResolveNull::PROPAGATE); + detail::PrimitiveAllocatingBinaryKernel kernel(&and_kernel); + return detail::InvokeBinaryArrayKernel(ctx, &kernel, left, right, out); +} + +Status KleeneAnd(FunctionContext* ctx, const Datum& left, const Datum& right, + Datum* out) { + AndKernel and_kernel(ResolveNull::KLEENE_LOGIC); detail::PrimitiveAllocatingBinaryKernel kernel(&and_kernel); return detail::InvokeBinaryArrayKernel(ctx, &kernel, left, right, out); } @@ -118,6 +162,7 @@ Status And(FunctionContext* ctx, const Datum& left, const Datum& right, Datum* o class OrKernel : public BinaryBooleanKernel { Status Compute(FunctionContext* ctx, const ArrayData& left, const ArrayData& right, ArrayData* out) override { + RETURN_NOT_OK(detail::AssignNullIntersection(ctx, left, right, out)); if (right.length > 0) { BitmapOr(left.buffers[1]->data(), left.offset, right.buffers[1]->data(), right.offset, right.length, 0, out->buffers[1]->mutable_data()); @@ -135,6 +180,7 @@ Status Or(FunctionContext* ctx, const Datum& left, const Datum& right, Datum* ou class XorKernel : public BinaryBooleanKernel { Status Compute(FunctionContext* ctx, const ArrayData& left, const ArrayData& right, ArrayData* out) override { + RETURN_NOT_OK(detail::AssignNullIntersection(ctx, left, right, out)); if (right.length > 0) { BitmapXor(left.buffers[1]->data(), left.offset, right.buffers[1]->data(), right.offset, right.length, 0, out->buffers[1]->mutable_data()); diff --git a/cpp/src/arrow/compute/kernels/boolean.h b/cpp/src/arrow/compute/kernels/boolean.h index fb88659dbc4..7f485a721f7 100644 --- a/cpp/src/arrow/compute/kernels/boolean.h +++ b/cpp/src/arrow/compute/kernels/boolean.h @@ -37,7 +37,9 @@ class FunctionContext; ARROW_EXPORT Status Invert(FunctionContext* context, const Datum& value, Datum* out); -/// \brief Element-wise AND of two boolean datums +/// \brief Element-wise AND of two boolean datums which always propagates nulls +/// (null and false is null). +/// /// \param[in] context the FunctionContext /// \param[in] left left operand (array) /// \param[in] right right operand (array) @@ -48,6 +50,20 @@ Status Invert(FunctionContext* context, const Datum& value, Datum* out); ARROW_EXPORT Status And(FunctionContext* context, const Datum& left, const Datum& right, Datum* out); +/// \brief Element-wise AND of two boolean datums with a Kleene truth table +/// (null and false is false). +/// +/// \param[in] context the FunctionContext +/// \param[in] left left operand (array) +/// \param[in] right right operand (array) +/// \param[out] out resulting datum +/// +/// \since 0.11.0 +/// \note API not yet finalized +ARROW_EXPORT +Status KleeneAnd(FunctionContext* context, const Datum& left, const Datum& right, + Datum* out); + /// \brief Element-wise OR of two boolean datums /// \param[in] context the FunctionContext /// \param[in] left left operand (array) diff --git a/cpp/src/arrow/util/bit_util.cc b/cpp/src/arrow/util/bit_util.cc index 4d66f0eaec4..25d8de74458 100644 --- a/cpp/src/arrow/util/bit_util.cc +++ b/cpp/src/arrow/util/bit_util.cc @@ -24,6 +24,7 @@ #endif #include +#include #include #include #include @@ -31,6 +32,7 @@ #include #include +#include "arrow/array.h" #include "arrow/buffer.h" #include "arrow/status.h" #include "arrow/util/align_util.h" @@ -324,6 +326,22 @@ Status BitmapOp(MemoryPool* pool, const uint8_t* left, int64_t left_offset, } // namespace +std::string Bitmap::ToString() const { + std::string out(length_, '0'); + for (int64_t i = 0; i < length_; ++i) { + out[i] = GetBit(i) ? '1' : '0'; + } + return out; +} + +std::shared_ptr Bitmap::ToArray() const { + return std::make_shared(length_, buffer_, nullptr, 0, offset_); +} + +std::string Bitmap::Diff(const Bitmap& other) const { + return ToArray()->Diff(*other.ToArray()); +} + Status BitmapAnd(MemoryPool* pool, const uint8_t* left, int64_t left_offset, const uint8_t* right, int64_t right_offset, int64_t length, int64_t out_offset, std::shared_ptr* out_buffer) { diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h index a8a404083b8..9cf7c9bf7a7 100644 --- a/cpp/src/arrow/util/bit_util.h +++ b/cpp/src/arrow/util/bit_util.h @@ -52,24 +52,29 @@ #define ARROW_BYTE_SWAP32 __builtin_bswap32 #endif +#include #include +#include #include #include #include #include #include #include +#include #include +#include "arrow/buffer.h" +#include "arrow/util/functional.h" #include "arrow/util/macros.h" #include "arrow/util/type_traits.h" #include "arrow/util/visibility.h" namespace arrow { -class Buffer; class MemoryPool; class Status; +class BooleanArray; namespace detail { @@ -828,25 +833,231 @@ Status InvertBitmap(MemoryPool* pool, const uint8_t* bitmap, int64_t offset, ARROW_EXPORT int64_t CountSetBits(const uint8_t* data, int64_t bit_offset, int64_t length); -struct Bitmap { - uint8_t* data; - int64_t offset, length; +class ARROW_EXPORT Bitmap { + public: + Bitmap() = default; + + Bitmap(std::shared_ptr buffer, int64_t offset, int64_t length) + : buffer_(std::move(buffer)), offset_(offset), length_(length) {} - bool GetBit(int64_t i) const { return BitUtil::GetBit(data, i + offset); } + Bitmap Slice(int64_t offset) const { + return Bitmap(buffer_, offset_ + offset, length_ - offset); + } + + Bitmap Slice(int64_t offset, int64_t length) const { + return Bitmap(buffer_, offset_ + offset, length); + } - void SetBitTo(int64_t i, bool v) const { BitUtil::SetBitTo(data, i + offset, v); } + std::shared_ptr ToArray() const; + + std::string ToString() const; + + std::string Diff(const Bitmap& other) const; + + bool GetBit(int64_t i) const { return BitUtil::GetBit(buffer_->data(), i + offset_); } + + void SetBitTo(int64_t i, bool v) const { + BitUtil::SetBitTo(buffer_->mutable_data(), i + offset_, v); + } + + /* + template + BitmapWordAlignParams WordAlignParams() const { + return BitmapWordAlign(buffer_->data(), offset_, length_); + } + */ template static void Visit(const Bitmap (&bitmaps)[N], Visitor&& visitor) { - auto length = bitmaps[0].length; - std::array bits; - for (int64_t i = 0; i < length; ++i) { - for (size_t bitmap_i = 0; bitmap_i < N; ++bitmap_i) { - bits[bitmap_i] = bitmaps[bitmap_i].GetBit(i); + VisitImpl(bitmaps, VisitedIs>{}, + std::forward(visitor)); + } + + /* + template < + size_t N, typename Visitor, + typename Word = + typename internal::call_traits::argument_type<0, Visitor&&>::value_type, + typename Enable = typename std::enable_if::value>::type> + static void Visit(const Bitmap (&bitmaps)[N], Visitor&& visitor) { + auto bit_length = bitmaps[0].length(); + + constexpr size_t kBitWidth = sizeof(Word) * 8; + + BitmapWordAlignParams align_params[N]; + bool leading_bits = false; + for (int i = 0; i < N; ++i) { + align_params[i] = bitmaps[i].WordAlignParams(); + + auto leading_bytes = CeilDiv(align_params[i].leading_bits, 8); + if (align_params[i].aligned_start - leading_bytes < bitmaps[i].buffer()->data()) { + // this bitmap's leading_bits do not lie in an accessible word + leading_bits = true; + } + } + + std::array visited_words; + + if (leading_bits) { + // consume bits which do not lie in an accessible word first + auto length = std::min(kBitWidth, bit_length); + for (int i = 0; i < N; ++i) { + auto bitmap = bitmaps[i].Slice(0, length); + std::memcpy(&visited_words[i], bitmap.byte_offset(), bitmap.byte_length()); + align_params[i] = bitmap.WordAlignParams(); + } + visitor(visited_words); + } + + const Word* words[N]; + int64_t offsets[N]; + + size_t trailing_bits = 0; + for (int i = 0; i < N; ++i) { + auto trailing_bytes = CeilDiv(align_params[i].trailing_bits, 8); + auto aligned_end = + align_params[i].aligned_start + align_params[i].aligned_words * sizeof(Word); + if (aligned_end + trailing_bytes > + bitmaps[i].buffer()->data() + bitmaps[i].buffer()->size()) { + // this bitmap's trailing_bits do not lie in an accessible word + trailing_bits = std::max(trailing_bits, align_params[i].trailing_bits); + } + + words[i] = reinterpret_cast(align_params[i].aligned_start) - 1; + offsets[i] = bitmaps[i].offset(); + } + + for (int i = 0; i < N; ++i) { + auto bytes = bitmaps[i].buffer()->data(); + auto misalignment = reinterpret_cast(bytes) % alignof(IntType); + values[i] = reinterpret_cast(bytes - misalignment); + offsets[i] = bitmaps[i].offset() + misalignment * 8; + auto capacity = bitmaps[i].buffer()->capacity() + misalignment; + + // clamp offsets to [0, kBitWidth) + values[i] -= offsets[i] / kBitWidth; + offsets[i] %= kBitWidth; + } + + for (int64_t int_i = 0; int_i < int_length; ++int_i) { + for (int i = 0; i < N; ++i) { + if (offsets[i] == 0) { + ints[i] = values[i][int_i]; + } else { + ints[i] = values[i][int_i] << offsets[i]; + ints[i] |= values[i][int_i] >> (kBitWidth - offsets[i]); + } + } + visitor(ints); + } + } + */ + + const std::shared_ptr& buffer() const { return buffer_; } + + /// offset of first bit relative to buffer().data() + int64_t offset() const { return offset_; } + + int64_t length() const { return length_; } + + /// string_view of all bytes which contain any bit in this bitmap + util::basic_string_view bytes() const { + auto byte_offset = offset_ / 8; + auto byte_count = BitUtil::CeilDiv(offset_ + length_, 8) - byte_offset; + return util::basic_string_view(buffer_->data() + byte_offset, byte_count); + } + + template + util::basic_string_view words() const { + auto bytes_addr = AsInt(bytes().data()); + auto words_addr = bytes_addr - bytes_addr % sizeof(Word); + auto word_byte_count = + BitUtil::RoundUpToPowerOf2(bytes_addr + bytes().size(), sizeof(Word)) - + words_addr; + return util::basic_string_view(reinterpret_cast(words_addr), + word_byte_count / sizeof(Word)); + } + + /// offset of first bit relative to words().data() + template + int64_t word_offset() const { + return offset_ + (AsInt(buffer_->data()) - AsInt(words().data())) * 8; + } + + private: + template + struct VisitedIs {}; + + static size_t AsInt(const void* ptr) { + return reinterpret_cast(reinterpret_cast(ptr)); + } + + template + static void VisitImpl(const Bitmap (&bitmaps)[N], VisitedIs>, + Visitor&& visitor) { + auto bit_length = bitmaps[0].length(); + std::bitset bits; + for (int64_t bit_i = 0; bit_i < bit_length; ++bit_i) { + for (int i = 0; i < N; ++i) { + bits[i] = bitmaps[i].GetBit(bit_i); } visitor(bits); } } + + template + static void VisitImpl(const Bitmap (&bitmaps)[N], VisitedIs>, + Visitor&& visitor) { + util::basic_string_view words[N]; + int64_t offsets[N]; + + constexpr size_t kBitWidth = sizeof(Word) * 8; + + for (int i = 0; i < N; ++i) { + words[i] = bitmaps[i].template words(); + offsets[i] = bitmaps[i].offset(); + + // clamp offsets to [0, kBitWidth) + words[i] = words[i].substr(offsets[i] / kBitWidth); + offsets[i] %= kBitWidth; + } + + std::array visited_words; + + size_t word_count = 0; + for (auto word : words) { + // words[i].size() == words[j].size() if offsets[i] == 0 and offsets[j] == 0 + // words[i].size() == words[j].size()+1 if offsets[i] != 0 and offsets[j] == 0 + word_count = std::max(word_count, word.size()); + } + + for (size_t word_i = 0; word_i < word_count - 1; ++word_i) { + for (int i = 0; i < N; ++i) { + if (offsets[i] == 0) { + visited_words[i] = words[i][word_i]; + } else { + visited_words[i] = words[i][word_i] >> offsets[i]; + visited_words[i] |= words[i][word_i + 1] << (kBitWidth - offsets[i]); + } + } + visitor(visited_words); + } + + // FIXME(bkietz) words.back() or words.front() might not be accessible (for example if + // buffer points to unaligned stack bytes). How to test that...? + + // handle last word + size_t word_i = word_count - 1; + for (int i = 0; i < N; ++i) { + if (word_i < words[i].size()) { + visited_words[i] = words[i][word_i] >> offsets[i]; + } + } + visitor(visited_words); + } + + std::shared_ptr buffer_; + int64_t offset_ = 0, length_ = 0; }; ARROW_EXPORT diff --git a/cpp/src/arrow/util/bit_util_benchmark.cc b/cpp/src/arrow/util/bit_util_benchmark.cc index ca6235b8cd1..85fd2625587 100644 --- a/cpp/src/arrow/util/bit_util_benchmark.cc +++ b/cpp/src/arrow/util/bit_util_benchmark.cc @@ -19,6 +19,7 @@ #include #include +#include #include #include "arrow/buffer.h" @@ -92,46 +93,64 @@ static std::shared_ptr CreateRandomBuffer(int64_t nbytes) { return buffer; } -static void BenchmarkBitmapAnd(benchmark::State& state) { +template +static void BenchmarkAndImpl(benchmark::State& state, DoAnd&& do_and) { int64_t nbytes = state.range(0); + int64_t offset = state.range(1); + std::shared_ptr buffer_1 = CreateRandomBuffer(nbytes); std::shared_ptr buffer_2 = CreateRandomBuffer(nbytes); std::shared_ptr buffer_3 = CreateRandomBuffer(nbytes); - const int64_t num_bits = nbytes * 8; + const int64_t num_bits = nbytes * 8 - offset; + + internal::Bitmap bitmap_1{buffer_1, 0, num_bits}; + internal::Bitmap bitmap_2{buffer_2, offset, num_bits}; + internal::Bitmap bitmap_3{buffer_3, 0, num_bits}; for (auto _ : state) { - internal::BitmapAnd(buffer_1->data(), 0, buffer_2->data(), 0, num_bits, 0, - buffer_3->mutable_data()); - auto total = internal::CountSetBits(buffer_3->data(), 0, num_bits); + do_and({bitmap_1, bitmap_2}, &bitmap_3); + auto total = internal::CountSetBits(bitmap_3.buffer()->data(), bitmap_3.offset(), + bitmap_3.length()); benchmark::DoNotOptimize(total); } state.SetBytesProcessed(state.iterations() * nbytes); } -static void BenchmarkBitmapVisitAnd(benchmark::State& state) { - int64_t nbytes = state.range(0); - std::shared_ptr buffer_1 = CreateRandomBuffer(nbytes); - std::shared_ptr buffer_2 = CreateRandomBuffer(nbytes); - std::shared_ptr buffer_3 = CreateRandomBuffer(nbytes); +static void BenchmarkBitmapAnd(benchmark::State& state) { + BenchmarkAndImpl(state, [](const internal::Bitmap(&bitmaps)[2], internal::Bitmap* out) { + internal::BitmapAnd(bitmaps[0].buffer()->data(), bitmaps[0].offset(), + bitmaps[1].buffer()->data(), bitmaps[1].offset(), + bitmaps[0].length(), 0, out->buffer()->mutable_data()); + }); +} - const int64_t num_bits = nbytes * 8; +static void BenchmarkBitmapVisitBitsetAnd(benchmark::State& state) { + BenchmarkAndImpl(state, [](const internal::Bitmap(&bitmaps)[2], internal::Bitmap* out) { + int64_t i = 0; + internal::Bitmap::Visit( + bitmaps, [&](std::bitset<2> bits) { out->SetBitTo(i++, bits[0] && bits[1]); }); + }); +} - internal::Bitmap bitmap_1{buffer_1->mutable_data(), num_bits, 0}; - internal::Bitmap bitmap_2{buffer_2->mutable_data(), num_bits, 0}; - internal::Bitmap bitmap_3{buffer_3->mutable_data(), num_bits, 0}; +static void BenchmarkBitmapVisitUInt8And(benchmark::State& state) { + BenchmarkAndImpl(state, [](const internal::Bitmap(&bitmaps)[2], internal::Bitmap* out) { + int64_t i = 0; + internal::Bitmap::Visit(bitmaps, [&](std::array uint8s) { + reinterpret_cast(out->buffer()->mutable_data())[i++] = + uint8s[0] & uint8s[1]; + }); + }); +} - for (auto _ : state) { +static void BenchmarkBitmapVisitUInt64And(benchmark::State& state) { + BenchmarkAndImpl(state, [](const internal::Bitmap(&bitmaps)[2], internal::Bitmap* out) { int64_t i = 0; - internal::Bitmap::Visit({bitmap_1, bitmap_2}, [&](std::array bits) { - bool out_bit = - std::accumulate(bits.begin(), bits.end(), true, std::logical_and{}); - bitmap_3.SetBitTo(i++, out_bit); + internal::Bitmap::Visit(bitmaps, [&](std::array uint64s) { + reinterpret_cast(out->buffer()->mutable_data())[i++] = + uint64s[0] & uint64s[1]; }); - auto total = internal::CountSetBits(bitmap_3.data, bitmap_3.offset, bitmap_3.length); - benchmark::DoNotOptimize(total); - } - state.SetBytesProcessed(state.iterations() * nbytes); + }); } template @@ -361,8 +380,16 @@ BENCHMARK(GenerateBitsUnrolled)->Arg(kBufferSize); BENCHMARK(CopyBitmapWithoutOffset)->Arg(kBufferSize); BENCHMARK(CopyBitmapWithOffset)->Arg(kBufferSize); -BENCHMARK(BenchmarkBitmapAnd)->Arg(kBufferSize)->MinTime(1.0); -BENCHMARK(BenchmarkBitmapVisitAnd)->Arg(kBufferSize)->MinTime(1.0); +static std::vector> and_benchmark_ranges{ + // buffer sizes + {kBufferSize, kBufferSize * 2}, + // offsets of second buffer + {0, 2}}; + +BENCHMARK(BenchmarkBitmapAnd)->Ranges(and_benchmark_ranges)->MinTime(1.0); +BENCHMARK(BenchmarkBitmapVisitBitsetAnd)->Ranges(and_benchmark_ranges)->MinTime(1.0); +BENCHMARK(BenchmarkBitmapVisitUInt8And)->Ranges(and_benchmark_ranges)->MinTime(1.0); +BENCHMARK(BenchmarkBitmapVisitUInt64And)->Ranges(and_benchmark_ranges)->MinTime(1.0); } // namespace BitUtil } // namespace arrow diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index 4ff7e4e1a9d..9b7fed75447 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -1092,4 +1092,141 @@ TEST(BitUtil, BitsetStack) { ASSERT_EQ(stack.TopSize(), 0); } +// test the basic assumption of word level Bitmap::Visit +TEST(Bitmap, ShiftingWordsOptimization) { + // single word + { + uint64_t word; + auto bytes = reinterpret_cast(&word); + constexpr size_t kBitWidth = sizeof(word) * 8; + + for (int seed = 0; seed < 64; ++seed) { + random_bytes(sizeof(word), seed, bytes); + + // bits are accessible through simple bit shifting of the word + for (int i = 0; i < kBitWidth; ++i) { + ASSERT_EQ(BitUtil::GetBit(bytes, i), bool((word >> i) & 1)); + } + + // bit offset can therefore be accomodated by shifting the word + for (int offset = 0; offset < (kBitWidth * 3) / 4; ++offset) { + uint64_t shifted_word = word >> offset; + auto shifted_bytes = reinterpret_cast(&shifted_word); + ASSERT_TRUE( + internal::BitmapEquals(bytes, offset, shifted_bytes, 0, kBitWidth - offset)); + } + } + } + + // two words + { + uint64_t words[2]; + auto bytes = reinterpret_cast(words); + constexpr size_t kBitWidth = sizeof(words[0]) * 8; + + for (int seed = 0; seed < 64; ++seed) { + random_bytes(sizeof(words), seed, bytes); + + // bits are accessible through simple bit shifting of a word + for (int i = 0; i < kBitWidth; ++i) { + ASSERT_EQ(BitUtil::GetBit(bytes, i), bool((words[0] >> i) & 1)); + } + for (int i = 0; i < kBitWidth; ++i) { + ASSERT_EQ(BitUtil::GetBit(bytes, i + kBitWidth), bool((words[1] >> i) & 1)); + } + + // bit offset can therefore be accomodated by shifting the word + for (int offset = 1; offset < (kBitWidth * 3) / 4; offset += 3) { + uint64_t shifted_words[2]; + shifted_words[0] = words[0] >> offset | (words[1] << (kBitWidth - offset)); + shifted_words[1] = words[1] >> offset; + auto shifted_bytes = reinterpret_cast(shifted_words); + + // from offset to unshifted word boundary + ASSERT_TRUE( + internal::BitmapEquals(bytes, offset, shifted_bytes, 0, kBitWidth - offset)); + + // from unshifted word boundary to shifted word boundary + ASSERT_TRUE(internal::BitmapEquals(bytes, kBitWidth, shifted_bytes, + kBitWidth - offset, offset)); + + // from shifted word boundary to end + ASSERT_TRUE(internal::BitmapEquals(bytes, kBitWidth + offset, shifted_bytes, + kBitWidth, kBitWidth - offset)); + } + } + } +} + +// reconstruct a bitmap from a word-wise visit +TEST(Bitmap, Visit) { + using internal::Bitmap; + + constexpr int64_t nbytes = 1 << 10; + std::shared_ptr buffer, actual_buffer; + for (std::shared_ptr* b : {&buffer, &actual_buffer}) { + ASSERT_OK(AllocateBuffer(nbytes, b)); + memset((*b)->mutable_data(), 0, nbytes); + } + random_bytes(nbytes, 0, buffer->mutable_data()); + + constexpr int64_t kBitWidth = 8 * sizeof(uint64_t); + + for (int64_t offset : {0, 1, 2, 5, 17}) { + for (int64_t num_bits : + {int64_t(13), int64_t(9), kBitWidth - 1, kBitWidth, kBitWidth + 1, + nbytes * 8 - offset, nbytes * 6, nbytes * 4}) { + Bitmap bitmaps[] = {{buffer, offset, num_bits}}; + + int64_t i = 0; + Bitmap::Visit(bitmaps, [&](std::array uint64s) { + reinterpret_cast(actual_buffer->mutable_data())[i++] = uint64s[0]; + }); + + ASSERT_TRUE(internal::BitmapEquals(actual_buffer->data(), 0, buffer->data(), offset, + num_bits)) + << "offset:" << offset << " bits:" << num_bits << std::endl + << Bitmap(actual_buffer, 0, num_bits).Diff({buffer, offset, num_bits}); + } + } +} + +// compute bitwise and of bitmaps using word-wise visit +TEST(Bitmap, VisitAnd) { + using internal::Bitmap; + + constexpr int64_t nbytes = 1 << 10; + std::shared_ptr buffer, actual_buffer, expected_buffer; + for (std::shared_ptr* b : {&buffer, &actual_buffer, &expected_buffer}) { + ASSERT_OK(AllocateBuffer(nbytes, b)); + memset((*b)->mutable_data(), 0, nbytes); + } + random_bytes(nbytes, 0, buffer->mutable_data()); + + constexpr int64_t kBitWidth = 8 * sizeof(uint64_t); + + for (int64_t offset : {0, 1, 2, 5, 17}) { + for (int64_t num_bits : + {int64_t(13), int64_t(9), kBitWidth - 1, kBitWidth, kBitWidth + 1, + nbytes * 8 - offset, nbytes * 6, nbytes * 4}) { + Bitmap bitmaps[] = {{buffer, 0, num_bits}, {buffer, offset, num_bits}}; + + int64_t i = 0; + Bitmap::Visit(bitmaps, [&](std::array uint64s) { + reinterpret_cast(actual_buffer->mutable_data())[i++] = + uint64s[0] & uint64s[1]; + }); + + internal::BitmapAnd(bitmaps[0].buffer()->data(), bitmaps[0].offset(), + bitmaps[1].buffer()->data(), bitmaps[1].offset(), + bitmaps[0].length(), 0, expected_buffer->mutable_data()); + + ASSERT_TRUE(internal::BitmapEquals(actual_buffer->data(), 0, + expected_buffer->data(), 0, num_bits)) + << "offset:" << offset << " bits:" << num_bits << std::endl + << Bitmap(actual_buffer, 0, num_bits).Diff({expected_buffer, 0, num_bits}); + } + } +} + } // namespace arrow diff --git a/cpp/src/arrow/util/string_view.h b/cpp/src/arrow/util/string_view.h index 88748429b7e..c1ed8ca1e24 100644 --- a/cpp/src/arrow/util/string_view.h +++ b/cpp/src/arrow/util/string_view.h @@ -25,6 +25,7 @@ namespace arrow { namespace util { +using nonstd::basic_string_view; using nonstd::string_view; } // namespace util From 8e3057ee6ebdb5c0a8bc89231728a8dc05488abb Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Fri, 1 Nov 2019 16:05:30 -0400 Subject: [PATCH 03/14] delete cruft --- cpp/src/arrow/util/bit_util.h | 116 ++++------------------------ cpp/src/arrow/util/bit_util_test.cc | 2 +- 2 files changed, 15 insertions(+), 103 deletions(-) diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h index 9cf7c9bf7a7..ff6e5181a47 100644 --- a/cpp/src/arrow/util/bit_util.h +++ b/cpp/src/arrow/util/bit_util.h @@ -14,8 +14,8 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -#ifndef ARROW_UTIL_BIT_UTIL_H -#define ARROW_UTIL_BIT_UTIL_H + +#pragma once #ifdef _WIN32 #define ARROW_LITTLE_ENDIAN 1 @@ -860,99 +860,21 @@ class ARROW_EXPORT Bitmap { BitUtil::SetBitTo(buffer_->mutable_data(), i + offset_, v); } - /* - template - BitmapWordAlignParams WordAlignParams() const { - return BitmapWordAlign(buffer_->data(), offset_, length_); - } - */ - + /// \brief Visit bits of this bitmap either as bitset or as array. + /// + /// If visiting by words, note that the every Word containing a bit + /// must be accessible (and initialized, or Valgrind will barf). + /// This is satisfied by most Buffers in Arrow space (which are allocated with + /// maximum alignment and padded to contain whole cache lines). + /// + /// XXX should I add a dcheck/error return/... for incorrect alignment? I can't + /// detect uninit template static void Visit(const Bitmap (&bitmaps)[N], Visitor&& visitor) { VisitImpl(bitmaps, VisitedIs>{}, std::forward(visitor)); } - /* - template < - size_t N, typename Visitor, - typename Word = - typename internal::call_traits::argument_type<0, Visitor&&>::value_type, - typename Enable = typename std::enable_if::value>::type> - static void Visit(const Bitmap (&bitmaps)[N], Visitor&& visitor) { - auto bit_length = bitmaps[0].length(); - - constexpr size_t kBitWidth = sizeof(Word) * 8; - - BitmapWordAlignParams align_params[N]; - bool leading_bits = false; - for (int i = 0; i < N; ++i) { - align_params[i] = bitmaps[i].WordAlignParams(); - - auto leading_bytes = CeilDiv(align_params[i].leading_bits, 8); - if (align_params[i].aligned_start - leading_bytes < bitmaps[i].buffer()->data()) { - // this bitmap's leading_bits do not lie in an accessible word - leading_bits = true; - } - } - - std::array visited_words; - - if (leading_bits) { - // consume bits which do not lie in an accessible word first - auto length = std::min(kBitWidth, bit_length); - for (int i = 0; i < N; ++i) { - auto bitmap = bitmaps[i].Slice(0, length); - std::memcpy(&visited_words[i], bitmap.byte_offset(), bitmap.byte_length()); - align_params[i] = bitmap.WordAlignParams(); - } - visitor(visited_words); - } - - const Word* words[N]; - int64_t offsets[N]; - - size_t trailing_bits = 0; - for (int i = 0; i < N; ++i) { - auto trailing_bytes = CeilDiv(align_params[i].trailing_bits, 8); - auto aligned_end = - align_params[i].aligned_start + align_params[i].aligned_words * sizeof(Word); - if (aligned_end + trailing_bytes > - bitmaps[i].buffer()->data() + bitmaps[i].buffer()->size()) { - // this bitmap's trailing_bits do not lie in an accessible word - trailing_bits = std::max(trailing_bits, align_params[i].trailing_bits); - } - - words[i] = reinterpret_cast(align_params[i].aligned_start) - 1; - offsets[i] = bitmaps[i].offset(); - } - - for (int i = 0; i < N; ++i) { - auto bytes = bitmaps[i].buffer()->data(); - auto misalignment = reinterpret_cast(bytes) % alignof(IntType); - values[i] = reinterpret_cast(bytes - misalignment); - offsets[i] = bitmaps[i].offset() + misalignment * 8; - auto capacity = bitmaps[i].buffer()->capacity() + misalignment; - - // clamp offsets to [0, kBitWidth) - values[i] -= offsets[i] / kBitWidth; - offsets[i] %= kBitWidth; - } - - for (int64_t int_i = 0; int_i < int_length; ++int_i) { - for (int i = 0; i < N; ++i) { - if (offsets[i] == 0) { - ints[i] = values[i][int_i]; - } else { - ints[i] = values[i][int_i] << offsets[i]; - ints[i] |= values[i][int_i] >> (kBitWidth - offsets[i]); - } - } - visitor(ints); - } - } - */ - const std::shared_ptr& buffer() const { return buffer_; } /// offset of first bit relative to buffer().data() @@ -1008,18 +930,13 @@ class ARROW_EXPORT Bitmap { template static void VisitImpl(const Bitmap (&bitmaps)[N], VisitedIs>, Visitor&& visitor) { - util::basic_string_view words[N]; - int64_t offsets[N]; - constexpr size_t kBitWidth = sizeof(Word) * 8; + util::basic_string_view words[N]; + int64_t offsets[N]; for (int i = 0; i < N; ++i) { words[i] = bitmaps[i].template words(); - offsets[i] = bitmaps[i].offset(); - - // clamp offsets to [0, kBitWidth) - words[i] = words[i].substr(offsets[i] / kBitWidth); - offsets[i] %= kBitWidth; + offsets[i] = bitmaps[i].template word_offset(); } std::array visited_words; @@ -1043,9 +960,6 @@ class ARROW_EXPORT Bitmap { visitor(visited_words); } - // FIXME(bkietz) words.back() or words.front() might not be accessible (for example if - // buffer points to unaligned stack bytes). How to test that...? - // handle last word size_t word_i = word_count - 1; for (int i = 0; i < N; ++i) { @@ -1166,5 +1080,3 @@ class BitsetStack { } // namespace internal } // namespace arrow - -#endif // ARROW_UTIL_BIT_UTIL_H diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index 9b7fed75447..a1d05008366 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -1205,7 +1205,7 @@ TEST(Bitmap, VisitAnd) { constexpr int64_t kBitWidth = 8 * sizeof(uint64_t); - for (int64_t offset : {0, 1, 2, 5, 17}) { + for (int64_t offset : {0, 1, 2, 5, 17, int(kBitWidth + 1), int(kBitWidth + 17)}) { for (int64_t num_bits : {int64_t(13), int64_t(9), kBitWidth - 1, kBitWidth, kBitWidth + 1, nbytes * 8 - offset, nbytes * 6, nbytes * 4}) { From 9e67d55f02e1d677f06bb5f2353349a15f5a7171 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Mon, 4 Nov 2019 11:38:47 -0500 Subject: [PATCH 04/14] Add word-wise implementation of Kleene logic re enable tests from filter.cc --- cpp/src/arrow/compute/kernels/boolean.cc | 144 +++++++++++++----- cpp/src/arrow/compute/kernels/boolean.h | 18 ++- cpp/src/arrow/compute/kernels/boolean_test.cc | 86 +++++++---- cpp/src/arrow/dataset/filter.cc | 4 +- cpp/src/arrow/dataset/filter_test.cc | 7 +- cpp/src/arrow/util/bit_util.h | 23 ++- cpp/src/arrow/util/bit_util_test.cc | 16 +- 7 files changed, 219 insertions(+), 79 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/boolean.cc b/cpp/src/arrow/compute/kernels/boolean.cc index 702ca4b44a1..047076f7889 100644 --- a/cpp/src/arrow/compute/kernels/boolean.cc +++ b/cpp/src/arrow/compute/kernels/boolean.cc @@ -78,7 +78,13 @@ Status Invert(FunctionContext* ctx, const Datum& value, Datum* out) { return Status::OK(); } +enum class ResolveNull { KLEENE_LOGIC, PROPAGATE }; + class BinaryBooleanKernel : public BinaryKernel { + public: + explicit BinaryBooleanKernel(ResolveNull resolve_null) : resolve_null_(resolve_null) {} + + protected: virtual Status Compute(FunctionContext* ctx, const ArrayData& left, const ArrayData& right, ArrayData* out) = 0; @@ -97,53 +103,89 @@ class BinaryBooleanKernel : public BinaryKernel { } std::shared_ptr out_type() const override { return boolean(); } -}; -enum class ResolveNull { KLEENE_LOGIC, PROPAGATE }; + enum BitmapIndex { LEFT_VALID, LEFT_DATA, RIGHT_VALID, RIGHT_DATA }; + + template + Status ComputeKleene(ComputeWord&& compute_word, FunctionContext* ctx, + const ArrayData& left, const ArrayData& right, ArrayData* out) { + DCHECK(left.null_count != 0 || right.null_count != 0); + + Bitmap bitmaps[4]; + bitmaps[LEFT_VALID] = {left.buffers[0], left.offset, left.length}; + bitmaps[LEFT_DATA] = {left.buffers[1], left.offset, left.length}; + + bitmaps[RIGHT_VALID] = {right.buffers[0], right.offset, right.length}; + bitmaps[RIGHT_DATA] = {right.buffers[1], right.offset, right.length}; + + RETURN_NOT_OK(AllocateEmptyBitmap(ctx->memory_pool(), out->length, &out->buffers[0])); + + auto out_validity = out->GetMutableValues(0); + auto out_data = out->GetMutableValues(1); + + int64_t i = 0; + auto apply = [&](uint64_t left_valid, uint64_t left_data, uint64_t right_valid, + uint64_t right_data) { + auto left_true = left_valid & left_data; + auto left_false = left_valid & ~left_data; + + auto right_true = right_valid & right_data; + auto right_false = right_valid & ~right_data; + + compute_word(left_true, left_false, right_true, right_false, &out_validity[i], + &out_data[i]); + ++i; + }; + + if (right.null_count == 0 || left.null_count == 0) { + if (left.null_count == 0) { + // ensure only bitmaps[RIGHT_VALID].buffer might be null + std::swap(bitmaps[LEFT_VALID], bitmaps[RIGHT_VALID]); + std::swap(bitmaps[LEFT_DATA], bitmaps[RIGHT_DATA]); + } + // override bitmaps[RIGHT_VALID] to make it safe for Visit() + bitmaps[RIGHT_VALID] = bitmaps[RIGHT_DATA]; + + Bitmap::Visit(bitmaps, [&](std::array words) { + apply(words[LEFT_VALID], words[LEFT_DATA], ~uint64_t(0), words[RIGHT_DATA]); + }); + } else { + Bitmap::Visit(bitmaps, [&](std::array words) { + apply(words[LEFT_VALID], words[LEFT_DATA], words[RIGHT_VALID], words[RIGHT_DATA]); + }); + } + return Status::OK(); + } + + ResolveNull resolve_null_; +}; class AndKernel : public BinaryBooleanKernel { public: - explicit AndKernel(ResolveNull resolve_null) : resolve_null_(resolve_null) {} + using BinaryBooleanKernel::BinaryBooleanKernel; private: Status Compute(FunctionContext* ctx, const ArrayData& left, const ArrayData& right, ArrayData* out) override { - if (resolve_null_ == ResolveNull::PROPAGATE) { + if (resolve_null_ == ResolveNull::PROPAGATE || + (left.GetNullCount() == 0 && right.GetNullCount() == 0)) { RETURN_NOT_OK(detail::AssignNullIntersection(ctx, left, right, out)); if (right.length > 0) { BitmapAnd(left.buffers[1]->data(), left.offset, right.buffers[1]->data(), right.offset, right.length, 0, out->buffers[1]->mutable_data()); } - } else { - Bitmap bitmaps[4]; - for (int i = 0, buffer_index = 0; buffer_index != 2; ++buffer_index) { - for (const ArrayData* operand : {&left, &right}) { - bitmaps[i++] = - Bitmap(operand->buffers[buffer_index], operand->offset, operand->length); - } - } - Bitmap out_validity(out->buffers[0], out->offset, out->length); - Bitmap out_values(out->buffers[1], out->offset, out->length); - int64_t i = 0; - Bitmap::Visit(bitmaps, [&](std::bitset<4> bits) { - bool left_valid = bits[0], right_valid = bits[1]; - bool left_value = !left_valid || bits[2]; - bool right_value = !right_valid || bits[3]; - if (left_valid && right_valid) { - out_validity.SetBitTo(i, true); - } else if (!left_valid && !right_valid) { - out_validity.SetBitTo(i, false); - } else { - // one is null, so out will be null or false - out_validity.SetBitTo(i, !(left_value && right_value)); - } - out_values.SetBitTo(i, left_value && right_value); - }); + return Status::OK(); } - return Status::OK(); - } - ResolveNull resolve_null_; + static auto compute_word = [](uint64_t left_true, uint64_t left_false, + uint64_t right_true, uint64_t right_false, + uint64_t* out_valid, uint64_t* out_data) { + *out_data = left_true & right_true; + *out_valid = left_false | right_false | (left_true & right_true); + }; + + return ComputeKleene(compute_word, ctx, left, right, out); + } }; Status And(FunctionContext* ctx, const Datum& left, const Datum& right, Datum* out) { @@ -160,24 +202,50 @@ Status KleeneAnd(FunctionContext* ctx, const Datum& left, const Datum& right, } class OrKernel : public BinaryBooleanKernel { + public: + using BinaryBooleanKernel::BinaryBooleanKernel; + + private: Status Compute(FunctionContext* ctx, const ArrayData& left, const ArrayData& right, ArrayData* out) override { - RETURN_NOT_OK(detail::AssignNullIntersection(ctx, left, right, out)); - if (right.length > 0) { - BitmapOr(left.buffers[1]->data(), left.offset, right.buffers[1]->data(), - right.offset, right.length, 0, out->buffers[1]->mutable_data()); + if (resolve_null_ == ResolveNull::PROPAGATE || + (left.GetNullCount() == 0 && right.GetNullCount() == 0)) { + RETURN_NOT_OK(detail::AssignNullIntersection(ctx, left, right, out)); + if (right.length > 0) { + BitmapOr(left.buffers[1]->data(), left.offset, right.buffers[1]->data(), + right.offset, right.length, 0, out->buffers[1]->mutable_data()); + } + return Status::OK(); } - return Status::OK(); + + static auto compute_word = [](uint64_t left_true, uint64_t left_false, + uint64_t right_true, uint64_t right_false, + uint64_t* out_valid, uint64_t* out_data) { + *out_data = left_true | right_true; + *out_valid = left_true | right_true | (left_false & right_false); + }; + + return ComputeKleene(compute_word, ctx, left, right, out); } }; Status Or(FunctionContext* ctx, const Datum& left, const Datum& right, Datum* out) { - OrKernel or_kernel; + OrKernel or_kernel(ResolveNull::PROPAGATE); + detail::PrimitiveAllocatingBinaryKernel kernel(&or_kernel); + return detail::InvokeBinaryArrayKernel(ctx, &kernel, left, right, out); +} + +Status KleeneOr(FunctionContext* ctx, const Datum& left, const Datum& right, Datum* out) { + OrKernel or_kernel(ResolveNull::KLEENE_LOGIC); detail::PrimitiveAllocatingBinaryKernel kernel(&or_kernel); return detail::InvokeBinaryArrayKernel(ctx, &kernel, left, right, out); } class XorKernel : public BinaryBooleanKernel { + public: + XorKernel() : BinaryBooleanKernel(ResolveNull::PROPAGATE) {} + + private: Status Compute(FunctionContext* ctx, const ArrayData& left, const ArrayData& right, ArrayData* out) override { RETURN_NOT_OK(detail::AssignNullIntersection(ctx, left, right, out)); diff --git a/cpp/src/arrow/compute/kernels/boolean.h b/cpp/src/arrow/compute/kernels/boolean.h index 7f485a721f7..9ab2647a70b 100644 --- a/cpp/src/arrow/compute/kernels/boolean.h +++ b/cpp/src/arrow/compute/kernels/boolean.h @@ -64,7 +64,9 @@ ARROW_EXPORT Status KleeneAnd(FunctionContext* context, const Datum& left, const Datum& right, Datum* out); -/// \brief Element-wise OR of two boolean datums +/// \brief Element-wise OR of two boolean datums which always propagates nulls +/// (null and true is null). +/// /// \param[in] context the FunctionContext /// \param[in] left left operand (array) /// \param[in] right right operand (array) @@ -75,6 +77,20 @@ Status KleeneAnd(FunctionContext* context, const Datum& left, const Datum& right ARROW_EXPORT Status Or(FunctionContext* context, const Datum& left, const Datum& right, Datum* out); +/// \brief Element-wise OR of two boolean datums with a Kleene truth table +/// (null and true is true). +/// +/// \param[in] context the FunctionContext +/// \param[in] left left operand (array) +/// \param[in] right right operand (array) +/// \param[out] out resulting datum +/// +/// \since 0.11.0 +/// \note API not yet finalized +ARROW_EXPORT +Status KleeneOr(FunctionContext* context, const Datum& left, const Datum& right, + Datum* out); + /// \brief Element-wise XOR of two boolean datums /// \param[in] context the FunctionContext /// \param[in] left left operand (array) diff --git a/cpp/src/arrow/compute/kernels/boolean_test.cc b/cpp/src/arrow/compute/kernels/boolean_test.cc index b7f57840f87..f6488a186b4 100644 --- a/cpp/src/arrow/compute/kernels/boolean_test.cc +++ b/cpp/src/arrow/compute/kernels/boolean_test.cc @@ -62,37 +62,39 @@ class TestBooleanKernel : public ComputeFixture, public TestBase { ASSERT_TRUE(result_ca->Equals(expected)); } - void TestBinaryKernel(const BinaryKernelFunc& kernel, const std::vector& values1, - const std::vector& values2, - const std::vector& values3, - const std::vector& values3_nulls) { - auto type = boolean(); - auto a1 = _MakeArray(type, values1, {}); - auto a2 = _MakeArray(type, values2, {}); - auto a3 = _MakeArray(type, values3, {}); - auto a1_nulls = _MakeArray(type, values1, values1); - auto a2_nulls = _MakeArray(type, values2, values2); - auto a3_nulls = _MakeArray(type, values3, values3_nulls); - - TestArrayBinary(kernel, a1, a2, a3); - TestArrayBinary(kernel, a1_nulls, a2_nulls, a3_nulls); - TestArrayBinary(kernel, a1->Slice(1), a2->Slice(1), a3->Slice(1)); - TestArrayBinary(kernel, a1_nulls->Slice(1), a2_nulls->Slice(1), a3_nulls->Slice(1)); + void TestBinaryKernel(const BinaryKernelFunc& kernel, + const std::shared_ptr& left, + const std::shared_ptr& right, + const std::shared_ptr& expected) { + TestArrayBinary(kernel, left, right, expected); + TestArrayBinary(kernel, left->Slice(1), right->Slice(1), expected->Slice(1)); // ChunkedArray - std::vector> ca1_arrs = {a1, a1->Slice(1)}; - auto ca1 = std::make_shared(ca1_arrs); - std::vector> ca2_arrs = {a2, a2->Slice(1)}; - auto ca2 = std::make_shared(ca2_arrs); - std::vector> ca3_arrs = {a3, a3->Slice(1)}; - auto ca3 = std::make_shared(ca3_arrs); - TestChunkedArrayBinary(kernel, ca1, ca2, ca3); + auto cleft = std::make_shared(ArrayVector{left, left->Slice(1)}); + auto cright = std::make_shared(ArrayVector{right, right->Slice(1)}); + auto cexpected = + std::make_shared(ArrayVector{expected, expected->Slice(1)}); + TestChunkedArrayBinary(kernel, cleft, cright, cexpected); // ChunkedArray with different chunks - std::vector> ca4_arrs = {a1->Slice(0, 1), a1->Slice(1), - a1->Slice(1, 1), a1->Slice(2)}; - auto ca4 = std::make_shared(ca4_arrs); - TestChunkedArrayBinary(kernel, ca4, ca2, ca3); + cleft = std::make_shared(ArrayVector{ + left->Slice(0, 1), left->Slice(1), left->Slice(1, 1), left->Slice(2)}); + TestChunkedArrayBinary(kernel, cleft, cright, cexpected); + } + + void TestBinaryKernelPropagate(const BinaryKernelFunc& kernel, + const std::vector& left, + const std::vector& right, + const std::vector& expected, + const std::vector& expected_nulls) { + auto type = boolean(); + TestBinaryKernel(kernel, _MakeArray(type, left, {}), + _MakeArray(type, right, {}), + _MakeArray(type, expected, {})); + + TestBinaryKernel(kernel, _MakeArray(type, left, left), + _MakeArray(type, right, right), + _MakeArray(type, expected, expected_nulls)); } }; @@ -154,7 +156,7 @@ TEST_F(TestBooleanKernel, And) { std::vector values1 = {true, false, true, false, true, true}; std::vector values2 = {true, true, false, false, true, false}; std::vector values3 = {true, false, false, false, true, false}; - TestBinaryKernel(And, values1, values2, values3, values3); + TestBinaryKernelPropagate(And, values1, values2, values3, values3); } TEST_F(TestBooleanKernel, Or) { @@ -162,7 +164,7 @@ TEST_F(TestBooleanKernel, Or) { std::vector values2 = {true, true, false, false, true, false}; std::vector values3 = {true, true, true, false, true, true}; std::vector values3_nulls = {true, false, false, false, true, false}; - TestBinaryKernel(Or, values1, values2, values3, values3_nulls); + TestBinaryKernelPropagate(Or, values1, values2, values3, values3_nulls); } TEST_F(TestBooleanKernel, Xor) { @@ -170,7 +172,31 @@ TEST_F(TestBooleanKernel, Xor) { std::vector values2 = {true, true, false, false, true, false}; std::vector values3 = {false, true, true, false, false, true}; std::vector values3_nulls = {true, false, false, false, true, false}; - TestBinaryKernel(Xor, values1, values2, values3, values3_nulls); + TestBinaryKernelPropagate(Xor, values1, values2, values3, values3_nulls); +} + +TEST_F(TestBooleanKernel, KleeneAnd) { + auto left = ArrayFromJSON(boolean(), " [true, true, true, false, false, null]"); + auto right = ArrayFromJSON(boolean(), " [true, false, null, false, null, null]"); + auto expected = ArrayFromJSON(boolean(), "[true, false, null, false, false, null]"); + TestBinaryKernel(KleeneAnd, left, right, expected); + + left = ArrayFromJSON(boolean(), " [true, true, false, null, null]"); + right = ArrayFromJSON(boolean(), " [true, false, false, true, false]"); + expected = ArrayFromJSON(boolean(), "[true, false, false, null, false]"); + TestBinaryKernel(KleeneAnd, left, right, expected); +} + +TEST_F(TestBooleanKernel, KleeneOr) { + auto left = ArrayFromJSON(boolean(), " [true, true, true, false, false, null]"); + auto right = ArrayFromJSON(boolean(), " [true, false, null, false, null, null]"); + auto expected = ArrayFromJSON(boolean(), "[true, true, true, false, null, null]"); + TestBinaryKernel(KleeneOr, left, right, expected); + + left = ArrayFromJSON(boolean(), " [true, true, false, null, null]"); + right = ArrayFromJSON(boolean(), " [true, false, false, true, false]"); + expected = ArrayFromJSON(boolean(), "[true, true, false, true, null]"); + TestBinaryKernel(KleeneOr, left, right, expected); } } // namespace compute diff --git a/cpp/src/arrow/dataset/filter.cc b/cpp/src/arrow/dataset/filter.cc index 06e7ed2f88a..34cd3a4b7a8 100644 --- a/cpp/src/arrow/dataset/filter.cc +++ b/cpp/src/arrow/dataset/filter.cc @@ -894,7 +894,7 @@ Result TreeEvaluator::Evaluate(const AndExpression& expr, if (lhs.is_array() && rhs.is_array()) { Datum out; compute::FunctionContext ctx{pool_}; - RETURN_NOT_OK(arrow::compute::And(&ctx, lhs, rhs, &out)); + RETURN_NOT_OK(arrow::compute::KleeneAnd(&ctx, lhs, rhs, &out)); return std::move(out); } @@ -925,7 +925,7 @@ Result TreeEvaluator::Evaluate(const OrExpression& expr, if (lhs.is_array() && rhs.is_array()) { Datum out; compute::FunctionContext ctx{pool_}; - RETURN_NOT_OK(arrow::compute::Or(&ctx, lhs, rhs, &out)); + RETURN_NOT_OK(arrow::compute::KleeneOr(&ctx, lhs, rhs, &out)); return std::move(out); } diff --git a/cpp/src/arrow/dataset/filter_test.cc b/cpp/src/arrow/dataset/filter_test.cc index 81677d697f9..085e8402f15 100644 --- a/cpp/src/arrow/dataset/filter_test.cc +++ b/cpp/src/arrow/dataset/filter_test.cc @@ -241,7 +241,7 @@ TEST_F(FilterTest, Basics) { {"a": 1, "b": 0.2, "in": 1}, {"a": 2, "b": -0.1, "in": 0}, {"a": 0, "b": 0.1, "in": 0}, - {"a": 0, "b": null, "in": null}, + {"a": 0, "b": null, "in": 0}, {"a": 0, "b": 1.0, "in": 0} ])"); } @@ -259,8 +259,7 @@ TEST_F(FilterTest, ConditionOnAbsentColumn) { ])"); } -TEST_F(FilterTest, DISABLED_KleeneTruthTables) { - // FIXME(bkietz) enable this test after ARROW-6396 +TEST_F(FilterTest, KleeneTruthTables) { // TODO(bkietz) also test various ranks against each other AssertFilter("a"_ and "b"_, {field("a", boolean()), field("b", boolean())}, R"([ {"a":null, "b":null, "in":null}, @@ -273,7 +272,7 @@ TEST_F(FilterTest, DISABLED_KleeneTruthTables) { {"a":false, "b":false, "in":false} ])"); - AssertFilter("a"_ and "b"_, {field("a", boolean()), field("b", boolean())}, R"([ + AssertFilter("a"_ or "b"_, {field("a", boolean()), field("b", boolean())}, R"([ {"a":null, "b":null, "in":null}, {"a":null, "b":true, "in":true}, {"a":null, "b":false, "in":null}, diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h index ff6e5181a47..8d1c72ea43b 100644 --- a/cpp/src/arrow/util/bit_util.h +++ b/cpp/src/arrow/util/bit_util.h @@ -840,6 +840,11 @@ class ARROW_EXPORT Bitmap { Bitmap(std::shared_ptr buffer, int64_t offset, int64_t length) : buffer_(std::move(buffer)), offset_(offset), length_(length) {} + Bitmap(const uint8_t* data, int64_t offset, int64_t length) + : buffer_(std::make_shared(data, BitUtil::BytesForBits(offset + length))), + offset_(offset), + length_(length) {} + Bitmap Slice(int64_t offset) const { return Bitmap(buffer_, offset_ + offset, length_ - offset); } @@ -856,6 +861,8 @@ class ARROW_EXPORT Bitmap { bool GetBit(int64_t i) const { return BitUtil::GetBit(buffer_->data(), i + offset_); } + bool operator[](int64_t i) const { return GetBit(i); } + void SetBitTo(int64_t i, bool v) const { BitUtil::SetBitTo(buffer_->mutable_data(), i + offset_, v); } @@ -867,8 +874,7 @@ class ARROW_EXPORT Bitmap { /// This is satisfied by most Buffers in Arrow space (which are allocated with /// maximum alignment and padded to contain whole cache lines). /// - /// XXX should I add a dcheck/error return/... for incorrect alignment? I can't - /// detect uninit + /// TODO(bkietz) allow for early termination template static void Visit(const Bitmap (&bitmaps)[N], Visitor&& visitor) { VisitImpl(bitmaps, VisitedIs>{}, @@ -880,15 +886,26 @@ class ARROW_EXPORT Bitmap { /// offset of first bit relative to buffer().data() int64_t offset() const { return offset_; } + /// number of bits in this Bitmap int64_t length() const { return length_; } - /// string_view of all bytes which contain any bit in this bitmap + /// string_view of all bytes which contain any bit in this Bitmap util::basic_string_view bytes() const { auto byte_offset = offset_ / 8; auto byte_count = BitUtil::CeilDiv(offset_ + length_, 8) - byte_offset; return util::basic_string_view(buffer_->data() + byte_offset, byte_count); } + template + bool word_aligned() const { + auto words = this->words(); + auto valid_addr = AsInt(buffer_->data()); + auto words_addr = AsInt(words.data()); + return words_addr >= valid_addr && + words_addr + words.size() * sizeof(Word) <= valid_addr + buffer_->capacity(); + } + + /// string_view of all Words which contain any bit in this Bitmap template util::basic_string_view words() const { auto bytes_addr = AsInt(bytes().data()); diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index a1d05008366..58c4a8ebf13 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -1158,6 +1158,20 @@ TEST(Bitmap, ShiftingWordsOptimization) { } } +TEST(Bitmap, WordVisitable) { + using internal::Bitmap; + + constexpr int64_t kBitWidth = 8 * sizeof(uint64_t); + uint64_t word; + auto bytes = reinterpret_cast(&word); + + // skip first byte in bitmap + ASSERT_FALSE(Bitmap(bytes + 1, 0, kBitWidth - 8).template word_aligned()); + + // exclude last byte from bitmap + ASSERT_FALSE(Bitmap(bytes, 0, kBitWidth - 8).template word_aligned()); +} + // reconstruct a bitmap from a word-wise visit TEST(Bitmap, Visit) { using internal::Bitmap; @@ -1191,7 +1205,7 @@ TEST(Bitmap, Visit) { } } -// compute bitwise and of bitmaps using word-wise visit +// compute bitwise AND of bitmaps using word-wise visit TEST(Bitmap, VisitAnd) { using internal::Bitmap; From fb592e51a6f62946eb4abd6b61044f0aecd12c82 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Mon, 4 Nov 2019 11:54:35 -0500 Subject: [PATCH 05/14] lint fixes --- cpp/src/arrow/compute/kernels/boolean.cc | 1 + cpp/src/arrow/util/bit_util.cc | 1 + cpp/src/arrow/util/bit_util.h | 9 +++++---- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/boolean.cc b/cpp/src/arrow/compute/kernels/boolean.cc index 047076f7889..11b689a1ead 100644 --- a/cpp/src/arrow/compute/kernels/boolean.cc +++ b/cpp/src/arrow/compute/kernels/boolean.cc @@ -19,6 +19,7 @@ #include #include +#include #include #include "arrow/array.h" diff --git a/cpp/src/arrow/util/bit_util.cc b/cpp/src/arrow/util/bit_util.cc index 25d8de74458..325aba06419 100644 --- a/cpp/src/arrow/util/bit_util.cc +++ b/cpp/src/arrow/util/bit_util.cc @@ -30,6 +30,7 @@ #include #include #include +#include #include #include "arrow/array.h" diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h index 8d1c72ea43b..d46d85f948c 100644 --- a/cpp/src/arrow/util/bit_util.h +++ b/cpp/src/arrow/util/bit_util.h @@ -60,6 +60,7 @@ #include #include #include +#include #include #include #include @@ -937,7 +938,7 @@ class ARROW_EXPORT Bitmap { auto bit_length = bitmaps[0].length(); std::bitset bits; for (int64_t bit_i = 0; bit_i < bit_length; ++bit_i) { - for (int i = 0; i < N; ++i) { + for (size_t i = 0; i < N; ++i) { bits[i] = bitmaps[i].GetBit(bit_i); } visitor(bits); @@ -951,7 +952,7 @@ class ARROW_EXPORT Bitmap { util::basic_string_view words[N]; int64_t offsets[N]; - for (int i = 0; i < N; ++i) { + for (size_t i = 0; i < N; ++i) { words[i] = bitmaps[i].template words(); offsets[i] = bitmaps[i].template word_offset(); } @@ -966,7 +967,7 @@ class ARROW_EXPORT Bitmap { } for (size_t word_i = 0; word_i < word_count - 1; ++word_i) { - for (int i = 0; i < N; ++i) { + for (size_t i = 0; i < N; ++i) { if (offsets[i] == 0) { visited_words[i] = words[i][word_i]; } else { @@ -979,7 +980,7 @@ class ARROW_EXPORT Bitmap { // handle last word size_t word_i = word_count - 1; - for (int i = 0; i < N; ++i) { + for (size_t i = 0; i < N; ++i) { if (word_i < words[i].size()) { visited_words[i] = words[i][word_i] >> offsets[i]; } From c515223eb1bf05a7e9123349729c99eec9b66634 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Tue, 5 Nov 2019 17:58:47 -0500 Subject: [PATCH 06/14] review comments --- cpp/src/arrow/compute/kernels/boolean.cc | 4 +- cpp/src/arrow/compute/kernels/boolean.h | 2 +- cpp/src/arrow/compute/kernels/boolean_test.cc | 13 +- cpp/src/arrow/util/bit_util.h | 139 +++++++++--------- cpp/src/arrow/util/bit_util_benchmark.cc | 6 +- cpp/src/arrow/util/bit_util_test.cc | 54 +++++-- 6 files changed, 124 insertions(+), 94 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/boolean.cc b/cpp/src/arrow/compute/kernels/boolean.cc index 11b689a1ead..5fca5c58573 100644 --- a/cpp/src/arrow/compute/kernels/boolean.cc +++ b/cpp/src/arrow/compute/kernels/boolean.cc @@ -147,11 +147,11 @@ class BinaryBooleanKernel : public BinaryKernel { // override bitmaps[RIGHT_VALID] to make it safe for Visit() bitmaps[RIGHT_VALID] = bitmaps[RIGHT_DATA]; - Bitmap::Visit(bitmaps, [&](std::array words) { + Bitmap::VisitWords(bitmaps, [&](std::array words) { apply(words[LEFT_VALID], words[LEFT_DATA], ~uint64_t(0), words[RIGHT_DATA]); }); } else { - Bitmap::Visit(bitmaps, [&](std::array words) { + Bitmap::VisitWords(bitmaps, [&](std::array words) { apply(words[LEFT_VALID], words[LEFT_DATA], words[RIGHT_VALID], words[RIGHT_DATA]); }); } diff --git a/cpp/src/arrow/compute/kernels/boolean.h b/cpp/src/arrow/compute/kernels/boolean.h index 9ab2647a70b..c0b0d7490e8 100644 --- a/cpp/src/arrow/compute/kernels/boolean.h +++ b/cpp/src/arrow/compute/kernels/boolean.h @@ -78,7 +78,7 @@ ARROW_EXPORT Status Or(FunctionContext* context, const Datum& left, const Datum& right, Datum* out); /// \brief Element-wise OR of two boolean datums with a Kleene truth table -/// (null and true is true). +/// (null or true is true). /// /// \param[in] context the FunctionContext /// \param[in] left left operand (array) diff --git a/cpp/src/arrow/compute/kernels/boolean_test.cc b/cpp/src/arrow/compute/kernels/boolean_test.cc index f6488a186b4..04d7e500843 100644 --- a/cpp/src/arrow/compute/kernels/boolean_test.cc +++ b/cpp/src/arrow/compute/kernels/boolean_test.cc @@ -44,10 +44,16 @@ class TestBooleanKernel : public ComputeFixture, public TestBase { const std::shared_ptr& right, const std::shared_ptr& expected) { Datum result; + ASSERT_OK(kernel(&this->ctx_, left, right, &result)); ASSERT_EQ(Datum::ARRAY, result.kind()); std::shared_ptr result_array = result.make_array(); ASSERT_ARRAYS_EQUAL(*expected, *result_array); + + ASSERT_OK(kernel(&this->ctx_, right, left, &result)); + ASSERT_EQ(Datum::ARRAY, result.kind()); + result_array = result.make_array(); + ASSERT_ARRAYS_EQUAL(*expected, *result_array); } void TestChunkedArrayBinary(const BinaryKernelFunc& kernel, @@ -55,11 +61,16 @@ class TestBooleanKernel : public ComputeFixture, public TestBase { const std::shared_ptr& right, const std::shared_ptr& expected) { Datum result; - std::shared_ptr result_array; + ASSERT_OK(kernel(&this->ctx_, left, right, &result)); ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind()); std::shared_ptr result_ca = result.chunked_array(); ASSERT_TRUE(result_ca->Equals(expected)); + + ASSERT_OK(kernel(&this->ctx_, right, left, &result)); + ASSERT_EQ(Datum::CHUNKED_ARRAY, result.kind()); + result_ca = result.chunked_array(); + ASSERT_TRUE(result_ca->Equals(expected)); } void TestBinaryKernel(const BinaryKernelFunc& kernel, diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h index d46d85f948c..1a79cfe21b3 100644 --- a/cpp/src/arrow/util/bit_util.h +++ b/cpp/src/arrow/util/bit_util.h @@ -68,6 +68,7 @@ #include "arrow/buffer.h" #include "arrow/util/functional.h" #include "arrow/util/macros.h" +#include "arrow/util/string_view.h" #include "arrow/util/type_traits.h" #include "arrow/util/visibility.h" @@ -836,6 +837,9 @@ int64_t CountSetBits(const uint8_t* data, int64_t bit_offset, int64_t length); class ARROW_EXPORT Bitmap { public: + template + using WordsView = util::basic_string_view; + Bitmap() = default; Bitmap(std::shared_ptr buffer, int64_t offset, int64_t length) @@ -854,8 +858,6 @@ class ARROW_EXPORT Bitmap { return Bitmap(buffer_, offset_ + offset, length); } - std::shared_ptr ToArray() const; - std::string ToString() const; std::string Diff(const Bitmap& other) const; @@ -868,73 +870,9 @@ class ARROW_EXPORT Bitmap { BitUtil::SetBitTo(buffer_->mutable_data(), i + offset_, v); } - /// \brief Visit bits of this bitmap either as bitset or as array. - /// - /// If visiting by words, note that the every Word containing a bit - /// must be accessible (and initialized, or Valgrind will barf). - /// This is satisfied by most Buffers in Arrow space (which are allocated with - /// maximum alignment and padded to contain whole cache lines). - /// - /// TODO(bkietz) allow for early termination - template - static void Visit(const Bitmap (&bitmaps)[N], Visitor&& visitor) { - VisitImpl(bitmaps, VisitedIs>{}, - std::forward(visitor)); - } - - const std::shared_ptr& buffer() const { return buffer_; } - - /// offset of first bit relative to buffer().data() - int64_t offset() const { return offset_; } - - /// number of bits in this Bitmap - int64_t length() const { return length_; } - - /// string_view of all bytes which contain any bit in this Bitmap - util::basic_string_view bytes() const { - auto byte_offset = offset_ / 8; - auto byte_count = BitUtil::CeilDiv(offset_ + length_, 8) - byte_offset; - return util::basic_string_view(buffer_->data() + byte_offset, byte_count); - } - - template - bool word_aligned() const { - auto words = this->words(); - auto valid_addr = AsInt(buffer_->data()); - auto words_addr = AsInt(words.data()); - return words_addr >= valid_addr && - words_addr + words.size() * sizeof(Word) <= valid_addr + buffer_->capacity(); - } - - /// string_view of all Words which contain any bit in this Bitmap - template - util::basic_string_view words() const { - auto bytes_addr = AsInt(bytes().data()); - auto words_addr = bytes_addr - bytes_addr % sizeof(Word); - auto word_byte_count = - BitUtil::RoundUpToPowerOf2(bytes_addr + bytes().size(), sizeof(Word)) - - words_addr; - return util::basic_string_view(reinterpret_cast(words_addr), - word_byte_count / sizeof(Word)); - } - - /// offset of first bit relative to words().data() - template - int64_t word_offset() const { - return offset_ + (AsInt(buffer_->data()) - AsInt(words().data())) * 8; - } - - private: - template - struct VisitedIs {}; - - static size_t AsInt(const void* ptr) { - return reinterpret_cast(reinterpret_cast(ptr)); - } - + /// \brief Visit bits of this bitmap as bitset template - static void VisitImpl(const Bitmap (&bitmaps)[N], VisitedIs>, - Visitor&& visitor) { + static void VisitBits(const Bitmap (&bitmaps)[N], Visitor&& visitor) { auto bit_length = bitmaps[0].length(); std::bitset bits; for (int64_t bit_i = 0; bit_i < bit_length; ++bit_i) { @@ -945,12 +883,21 @@ class ARROW_EXPORT Bitmap { } } - template - static void VisitImpl(const Bitmap (&bitmaps)[N], VisitedIs>, - Visitor&& visitor) { + /// \brief Visit bits of this bitmap as array + /// + /// If visiting by words, note that every Word containing a bit + /// must be accessible (and initialized, or Valgrind will barf). + /// This is satisfied by most Buffers in Arrow space (which are allocated with + /// maximum alignment and padded to contain whole cache lines). + /// + /// TODO(bkietz) allow for early termination + template ::value_type> + static void VisitWords(const Bitmap (&bitmaps)[N], Visitor&& visitor) { constexpr size_t kBitWidth = sizeof(Word) * 8; - util::basic_string_view words[N]; + WordsView words[N]; int64_t offsets[N]; for (size_t i = 0; i < N; ++i) { words[i] = bitmaps[i].template words(); @@ -988,6 +935,54 @@ class ARROW_EXPORT Bitmap { visitor(visited_words); } + const std::shared_ptr& buffer() const { return buffer_; } + + /// offset of first bit relative to buffer().data() + int64_t offset() const { return offset_; } + + /// number of bits in this Bitmap + int64_t length() const { return length_; } + + /// string_view of all bytes which contain any bit in this Bitmap + WordsView bytes() const { + auto byte_offset = offset_ / 8; + auto byte_count = BitUtil::CeilDiv(offset_ + length_, 8) - byte_offset; + return WordsView(buffer_->data() + byte_offset, byte_count); + } + + /// returns true if words in words() lie entirely in buffer() and may be safely + /// accessed + template + bool word_accessible() const { + auto words = this->words(); + auto valid_addr = reinterpret_cast(buffer_->data()); + auto words_addr = reinterpret_cast(words.data()); + return words_addr >= valid_addr && + words_addr + words.size() * sizeof(Word) <= valid_addr + buffer_->capacity(); + } + + /// string_view of all Words which contain any bit in this Bitmap + template + WordsView words() const { + auto bytes_addr = reinterpret_cast(bytes().data()); + auto words_addr = bytes_addr - bytes_addr % sizeof(Word); + auto word_byte_count = + BitUtil::RoundUpToPowerOf2(bytes_addr + bytes().size(), sizeof(Word)) - + words_addr; + return WordsView(reinterpret_cast(words_addr), + word_byte_count / sizeof(Word)); + } + + /// offset of first bit relative to words().data() + template + int64_t word_offset() const { + return offset_ + 8 * (reinterpret_cast(buffer_->data()) - + reinterpret_cast(words().data())); + } + + private: + std::shared_ptr ToArray() const; + std::shared_ptr buffer_; int64_t offset_ = 0, length_ = 0; }; diff --git a/cpp/src/arrow/util/bit_util_benchmark.cc b/cpp/src/arrow/util/bit_util_benchmark.cc index 85fd2625587..193588aa96b 100644 --- a/cpp/src/arrow/util/bit_util_benchmark.cc +++ b/cpp/src/arrow/util/bit_util_benchmark.cc @@ -128,7 +128,7 @@ static void BenchmarkBitmapAnd(benchmark::State& state) { static void BenchmarkBitmapVisitBitsetAnd(benchmark::State& state) { BenchmarkAndImpl(state, [](const internal::Bitmap(&bitmaps)[2], internal::Bitmap* out) { int64_t i = 0; - internal::Bitmap::Visit( + internal::Bitmap::VisitBits( bitmaps, [&](std::bitset<2> bits) { out->SetBitTo(i++, bits[0] && bits[1]); }); }); } @@ -136,7 +136,7 @@ static void BenchmarkBitmapVisitBitsetAnd(benchmark::State& state) { static void BenchmarkBitmapVisitUInt8And(benchmark::State& state) { BenchmarkAndImpl(state, [](const internal::Bitmap(&bitmaps)[2], internal::Bitmap* out) { int64_t i = 0; - internal::Bitmap::Visit(bitmaps, [&](std::array uint8s) { + internal::Bitmap::VisitWords(bitmaps, [&](std::array uint8s) { reinterpret_cast(out->buffer()->mutable_data())[i++] = uint8s[0] & uint8s[1]; }); @@ -146,7 +146,7 @@ static void BenchmarkBitmapVisitUInt8And(benchmark::State& state) { static void BenchmarkBitmapVisitUInt64And(benchmark::State& state) { BenchmarkAndImpl(state, [](const internal::Bitmap(&bitmaps)[2], internal::Bitmap* out) { int64_t i = 0; - internal::Bitmap::Visit(bitmaps, [&](std::array uint64s) { + internal::Bitmap::VisitWords(bitmaps, [&](std::array uint64s) { reinterpret_cast(out->buffer()->mutable_data())[i++] = uint64s[0] & uint64s[1]; }); diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index 58c4a8ebf13..fcc7d46b5e0 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -1104,12 +1104,12 @@ TEST(Bitmap, ShiftingWordsOptimization) { random_bytes(sizeof(word), seed, bytes); // bits are accessible through simple bit shifting of the word - for (int i = 0; i < kBitWidth; ++i) { + for (size_t i = 0; i < kBitWidth; ++i) { ASSERT_EQ(BitUtil::GetBit(bytes, i), bool((word >> i) & 1)); } // bit offset can therefore be accomodated by shifting the word - for (int offset = 0; offset < (kBitWidth * 3) / 4; ++offset) { + for (size_t offset = 0; offset < (kBitWidth * 3) / 4; ++offset) { uint64_t shifted_word = word >> offset; auto shifted_bytes = reinterpret_cast(&shifted_word); ASSERT_TRUE( @@ -1128,15 +1128,15 @@ TEST(Bitmap, ShiftingWordsOptimization) { random_bytes(sizeof(words), seed, bytes); // bits are accessible through simple bit shifting of a word - for (int i = 0; i < kBitWidth; ++i) { + for (size_t i = 0; i < kBitWidth; ++i) { ASSERT_EQ(BitUtil::GetBit(bytes, i), bool((words[0] >> i) & 1)); } - for (int i = 0; i < kBitWidth; ++i) { + for (size_t i = 0; i < kBitWidth; ++i) { ASSERT_EQ(BitUtil::GetBit(bytes, i + kBitWidth), bool((words[1] >> i) & 1)); } // bit offset can therefore be accomodated by shifting the word - for (int offset = 1; offset < (kBitWidth * 3) / 4; offset += 3) { + for (size_t offset = 1; offset < (kBitWidth * 3) / 4; offset += 3) { uint64_t shifted_words[2]; shifted_words[0] = words[0] >> offset | (words[1] << (kBitWidth - offset)); shifted_words[1] = words[1] >> offset; @@ -1161,15 +1161,39 @@ TEST(Bitmap, ShiftingWordsOptimization) { TEST(Bitmap, WordVisitable) { using internal::Bitmap; - constexpr int64_t kBitWidth = 8 * sizeof(uint64_t); - uint64_t word; - auto bytes = reinterpret_cast(&word); - - // skip first byte in bitmap - ASSERT_FALSE(Bitmap(bytes + 1, 0, kBitWidth - 8).template word_aligned()); + uint64_t words[2]; + constexpr int64_t kBitWidth = 8 * sizeof(words); + auto bytes = reinterpret_cast(words); + auto buffer = std::make_shared(bytes, sizeof(words)); + + // whole words are accessible + ASSERT_TRUE(Bitmap(buffer, 0, kBitWidth).template word_accessible()) + << "both words"; + ASSERT_TRUE(Bitmap(buffer, 0, kBitWidth / 2).template word_accessible()) + << "first word"; + ASSERT_TRUE( + Bitmap(buffer, kBitWidth / 2, kBitWidth / 2).template word_accessible()) + << "second word"; + + // words outside the buffer are not accessible + ASSERT_FALSE(Bitmap(buffer, kBitWidth, kBitWidth).template word_accessible()) + << "bitmap references bits after the buffer"; + ASSERT_FALSE(Bitmap(buffer, -kBitWidth, kBitWidth).template word_accessible()) + << "bitmap references bits before the buffer"; + + for (int offset = 1; offset < kBitWidth; ++offset) { + // nonzero offset but the words containing referenced bits still lie within the buffer + ASSERT_TRUE( + Bitmap(bytes, offset, kBitWidth - offset).template word_accessible()); + } - // exclude last byte from bitmap - ASSERT_FALSE(Bitmap(bytes, 0, kBitWidth - 8).template word_aligned()); + // words partially outside the buffer are not accessible + ASSERT_FALSE(Bitmap(SliceBuffer(buffer, 1), 0, kBitWidth - 8) + .template word_accessible()) + << "first byte has been sliced off"; + ASSERT_FALSE(Bitmap(SliceBuffer(buffer, 1, sizeof(words) - 1), 0, kBitWidth) + .template word_accessible()) + << "its last byte has been sliced off"; } // reconstruct a bitmap from a word-wise visit @@ -1193,7 +1217,7 @@ TEST(Bitmap, Visit) { Bitmap bitmaps[] = {{buffer, offset, num_bits}}; int64_t i = 0; - Bitmap::Visit(bitmaps, [&](std::array uint64s) { + Bitmap::VisitWords(bitmaps, [&](std::array uint64s) { reinterpret_cast(actual_buffer->mutable_data())[i++] = uint64s[0]; }); @@ -1226,7 +1250,7 @@ TEST(Bitmap, VisitAnd) { Bitmap bitmaps[] = {{buffer, 0, num_bits}, {buffer, offset, num_bits}}; int64_t i = 0; - Bitmap::Visit(bitmaps, [&](std::array uint64s) { + Bitmap::VisitWords(bitmaps, [&](std::array uint64s) { reinterpret_cast(actual_buffer->mutable_data())[i++] = uint64s[0] & uint64s[1]; }); From c6cf18777360dc07b53ffd459d140f5028453b68 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Wed, 6 Nov 2019 08:24:36 -0500 Subject: [PATCH 07/14] remove explicit MinTime(), add figure to words.__doc__ --- cpp/src/arrow/buffer.h | 4 ++++ cpp/src/arrow/compute/kernels/boolean.cc | 6 +++--- cpp/src/arrow/util/bit_util.h | 22 +++++++++++++++------- cpp/src/arrow/util/bit_util_benchmark.cc | 8 ++++---- cpp/src/arrow/util/string_view.h | 8 ++++---- 5 files changed, 30 insertions(+), 18 deletions(-) diff --git a/cpp/src/arrow/buffer.h b/cpp/src/arrow/buffer.h index 3eb9b033b92..799e2a31f1e 100644 --- a/cpp/src/arrow/buffer.h +++ b/cpp/src/arrow/buffer.h @@ -173,6 +173,10 @@ class ARROW_EXPORT Buffer { return util::string_view(reinterpret_cast(data_), size_); } + /// \brief View buffer contents as a util::bytes_view + /// \return util::bytes_view + explicit operator util::bytes_view() const { return util::bytes_view(data_, size_); } + /// \brief Return a pointer to the buffer's data const uint8_t* data() const { return data_; } /// \brief Return a writable pointer to the buffer's data diff --git a/cpp/src/arrow/compute/kernels/boolean.cc b/cpp/src/arrow/compute/kernels/boolean.cc index 5fca5c58573..020af6cb98d 100644 --- a/cpp/src/arrow/compute/kernels/boolean.cc +++ b/cpp/src/arrow/compute/kernels/boolean.cc @@ -178,9 +178,9 @@ class AndKernel : public BinaryBooleanKernel { return Status::OK(); } - static auto compute_word = [](uint64_t left_true, uint64_t left_false, - uint64_t right_true, uint64_t right_false, - uint64_t* out_valid, uint64_t* out_data) { + auto compute_word = [](uint64_t left_true, uint64_t left_false, uint64_t right_true, + uint64_t right_false, uint64_t* out_valid, + uint64_t* out_data) { *out_data = left_true & right_true; *out_valid = left_false | right_false | (left_true & right_true); }; diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h index 1a79cfe21b3..c32fe0c6bb9 100644 --- a/cpp/src/arrow/util/bit_util.h +++ b/cpp/src/arrow/util/bit_util.h @@ -838,7 +838,7 @@ int64_t CountSetBits(const uint8_t* data, int64_t bit_offset, int64_t length); class ARROW_EXPORT Bitmap { public: template - using WordsView = util::basic_string_view; + using View = util::basic_string_view; Bitmap() = default; @@ -897,7 +897,7 @@ class ARROW_EXPORT Bitmap { static void VisitWords(const Bitmap (&bitmaps)[N], Visitor&& visitor) { constexpr size_t kBitWidth = sizeof(Word) * 8; - WordsView words[N]; + View words[N]; int64_t offsets[N]; for (size_t i = 0; i < N; ++i) { words[i] = bitmaps[i].template words(); @@ -944,10 +944,10 @@ class ARROW_EXPORT Bitmap { int64_t length() const { return length_; } /// string_view of all bytes which contain any bit in this Bitmap - WordsView bytes() const { + util::bytes_view bytes() const { auto byte_offset = offset_ / 8; auto byte_count = BitUtil::CeilDiv(offset_ + length_, 8) - byte_offset; - return WordsView(buffer_->data() + byte_offset, byte_count); + return util::bytes_view(buffer_->data() + byte_offset, byte_count); } /// returns true if words in words() lie entirely in buffer() and may be safely @@ -962,15 +962,23 @@ class ARROW_EXPORT Bitmap { } /// string_view of all Words which contain any bit in this Bitmap + /// + /// For example, given Word=uint16_t and a bitmap spanning bits [20, 36) + /// words() would span bits [16, 48). + /// + /// 0 16 32 48 64 + /// |-------|-------|------|------| (buffer) + /// [ ] (bitmap) + /// |-------|------| (returned words) template - WordsView words() const { + View words() const { auto bytes_addr = reinterpret_cast(bytes().data()); auto words_addr = bytes_addr - bytes_addr % sizeof(Word); auto word_byte_count = BitUtil::RoundUpToPowerOf2(bytes_addr + bytes().size(), sizeof(Word)) - words_addr; - return WordsView(reinterpret_cast(words_addr), - word_byte_count / sizeof(Word)); + return View(reinterpret_cast(words_addr), + word_byte_count / sizeof(Word)); } /// offset of first bit relative to words().data() diff --git a/cpp/src/arrow/util/bit_util_benchmark.cc b/cpp/src/arrow/util/bit_util_benchmark.cc index 193588aa96b..4af1d785156 100644 --- a/cpp/src/arrow/util/bit_util_benchmark.cc +++ b/cpp/src/arrow/util/bit_util_benchmark.cc @@ -386,10 +386,10 @@ static std::vector> and_benchmark_ranges{ // offsets of second buffer {0, 2}}; -BENCHMARK(BenchmarkBitmapAnd)->Ranges(and_benchmark_ranges)->MinTime(1.0); -BENCHMARK(BenchmarkBitmapVisitBitsetAnd)->Ranges(and_benchmark_ranges)->MinTime(1.0); -BENCHMARK(BenchmarkBitmapVisitUInt8And)->Ranges(and_benchmark_ranges)->MinTime(1.0); -BENCHMARK(BenchmarkBitmapVisitUInt64And)->Ranges(and_benchmark_ranges)->MinTime(1.0); +BENCHMARK(BenchmarkBitmapAnd)->Ranges(and_benchmark_ranges); +BENCHMARK(BenchmarkBitmapVisitBitsetAnd)->Ranges(and_benchmark_ranges); +BENCHMARK(BenchmarkBitmapVisitUInt8And)->Ranges(and_benchmark_ranges); +BENCHMARK(BenchmarkBitmapVisitUInt64And)->Ranges(and_benchmark_ranges); } // namespace BitUtil } // namespace arrow diff --git a/cpp/src/arrow/util/string_view.h b/cpp/src/arrow/util/string_view.h index c1ed8ca1e24..26e2903ff9a 100644 --- a/cpp/src/arrow/util/string_view.h +++ b/cpp/src/arrow/util/string_view.h @@ -15,11 +15,12 @@ // specific language governing permissions and limitations // under the License. -#ifndef ARROW_UTIL_STRING_VIEW_H -#define ARROW_UTIL_STRING_VIEW_H +#pragma once #define nssv_CONFIG_SELECT_STRING_VIEW nssv_STRING_VIEW_NONSTD +#include + #include "arrow/vendored/string_view.hpp" // IWYU pragma: export namespace arrow { @@ -27,8 +28,7 @@ namespace util { using nonstd::basic_string_view; using nonstd::string_view; +using bytes_view = basic_string_view; } // namespace util } // namespace arrow - -#endif // ARROW_UTIL_STRING_VIEW_H From bc32ee21db12cd0d1a8808eb5049a3356e86d994 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Wed, 6 Nov 2019 10:35:07 -0500 Subject: [PATCH 08/14] define benchmark ranges as a macro for type deduction --- cpp/src/arrow/util/bit_util_benchmark.cc | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/cpp/src/arrow/util/bit_util_benchmark.cc b/cpp/src/arrow/util/bit_util_benchmark.cc index 4af1d785156..9d2c90d9ed3 100644 --- a/cpp/src/arrow/util/bit_util_benchmark.cc +++ b/cpp/src/arrow/util/bit_util_benchmark.cc @@ -380,16 +380,14 @@ BENCHMARK(GenerateBitsUnrolled)->Arg(kBufferSize); BENCHMARK(CopyBitmapWithoutOffset)->Arg(kBufferSize); BENCHMARK(CopyBitmapWithOffset)->Arg(kBufferSize); -static std::vector> and_benchmark_ranges{ - // buffer sizes - {kBufferSize, kBufferSize * 2}, - // offsets of second buffer - {0, 2}}; - -BENCHMARK(BenchmarkBitmapAnd)->Ranges(and_benchmark_ranges); -BENCHMARK(BenchmarkBitmapVisitBitsetAnd)->Ranges(and_benchmark_ranges); -BENCHMARK(BenchmarkBitmapVisitUInt8And)->Ranges(and_benchmark_ranges); -BENCHMARK(BenchmarkBitmapVisitUInt64And)->Ranges(and_benchmark_ranges); +#define AND_BENCHMARK_RANGES \ + { \ + {kBufferSize, kBufferSize * 2}, { 0, 2 } \ + } +BENCHMARK(BenchmarkBitmapAnd)->Ranges(AND_BENCHMARK_RANGES); +BENCHMARK(BenchmarkBitmapVisitBitsetAnd)->Ranges(AND_BENCHMARK_RANGES); +BENCHMARK(BenchmarkBitmapVisitUInt8And)->Ranges(AND_BENCHMARK_RANGES); +BENCHMARK(BenchmarkBitmapVisitUInt64And)->Ranges(AND_BENCHMARK_RANGES); } // namespace BitUtil } // namespace arrow From 81a5991ff34f4e3ddf975f70ca7d924bcb2cb21d Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Wed, 6 Nov 2019 10:39:28 -0500 Subject: [PATCH 09/14] add explicit cast for RoundUpToPowerOf2 --- cpp/src/arrow/util/bit_util.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h index c32fe0c6bb9..de60f715c58 100644 --- a/cpp/src/arrow/util/bit_util.h +++ b/cpp/src/arrow/util/bit_util.h @@ -975,7 +975,8 @@ class ARROW_EXPORT Bitmap { auto bytes_addr = reinterpret_cast(bytes().data()); auto words_addr = bytes_addr - bytes_addr % sizeof(Word); auto word_byte_count = - BitUtil::RoundUpToPowerOf2(bytes_addr + bytes().size(), sizeof(Word)) - + BitUtil::RoundUpToPowerOf2(static_cast(bytes_addr + bytes().size()), + static_cast(sizeof(Word))) - words_addr; return View(reinterpret_cast(words_addr), word_byte_count / sizeof(Word)); From cf89713bfb24430117ce68a5554cfe0f566f7cec Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Thu, 7 Nov 2019 16:39:09 -0500 Subject: [PATCH 10/14] address review comments, fix bug in VisitWords --- cpp/src/arrow/util/bit_util.h | 48 ++++++++++++++++-------- cpp/src/arrow/util/bit_util_benchmark.cc | 4 ++ cpp/src/arrow/util/bit_util_test.cc | 44 ++++++++++++---------- 3 files changed, 61 insertions(+), 35 deletions(-) diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h index de60f715c58..8e7834e4e33 100644 --- a/cpp/src/arrow/util/bit_util.h +++ b/cpp/src/arrow/util/bit_util.h @@ -55,6 +55,7 @@ #include #include #include +#include #include #include #include @@ -870,10 +871,15 @@ class ARROW_EXPORT Bitmap { BitUtil::SetBitTo(buffer_->mutable_data(), i + offset_, v); } - /// \brief Visit bits of this bitmap as bitset + /// \brief Visit bits from each bitmap as bitset + /// + /// All bitmaps must have identical length. template static void VisitBits(const Bitmap (&bitmaps)[N], Visitor&& visitor) { auto bit_length = bitmaps[0].length(); + for (const auto& bitmap : bitmaps) { + assert(bitmap.length() == bit_length); + } std::bitset bits; for (int64_t bit_i = 0; bit_i < bit_length; ++bit_i) { for (size_t i = 0; i < N; ++i) { @@ -883,37 +889,44 @@ class ARROW_EXPORT Bitmap { } } - /// \brief Visit bits of this bitmap as array + /// \brief Visit words of bits from each bitmap as array /// - /// If visiting by words, note that every Word containing a bit - /// must be accessible (and initialized, or Valgrind will barf). - /// This is satisfied by most Buffers in Arrow space (which are allocated with - /// maximum alignment and padded to contain whole cache lines). + /// All bitmaps must have identical length. + /// + /// Note that every Word containing a bit must be accessible (and initialized, or + /// Valgrind will barf). This is satisfied by most Buffers in Arrow space (which are + /// allocated with maximum alignment and padded to contain whole cache lines). /// /// TODO(bkietz) allow for early termination template ::value_type> static void VisitWords(const Bitmap (&bitmaps)[N], Visitor&& visitor) { - constexpr size_t kBitWidth = sizeof(Word) * 8; + constexpr int64_t kBitWidth = sizeof(Word) * 8; + + auto bit_length = bitmaps[0].length(); + for (const auto& bitmap : bitmaps) { + assert(bitmap.length() == bit_length); + } View words[N]; int64_t offsets[N]; for (size_t i = 0; i < N; ++i) { words[i] = bitmaps[i].template words(); offsets[i] = bitmaps[i].template word_offset(); + assert(offsets[i] >= 0 && offsets[i] < kBitWidth); } std::array visited_words; - size_t word_count = 0; + // some of the bitmaps might span one fewer word than the others + size_t common_word_count = words[0].size(); for (auto word : words) { - // words[i].size() == words[j].size() if offsets[i] == 0 and offsets[j] == 0 - // words[i].size() == words[j].size()+1 if offsets[i] != 0 and offsets[j] == 0 - word_count = std::max(word_count, word.size()); + common_word_count = std::min(common_word_count, word.size()); } - for (size_t word_i = 0; word_i < word_count - 1; ++word_i) { + // word_i such that words[i][word_i] and words[i][word_i + 1] are accessible for all i + for (size_t word_i = 0; word_i < common_word_count - 1; ++word_i) { for (size_t i = 0; i < N; ++i) { if (offsets[i] == 0) { visited_words[i] = words[i][word_i]; @@ -925,11 +938,16 @@ class ARROW_EXPORT Bitmap { visitor(visited_words); } - // handle last word - size_t word_i = word_count - 1; + // handle trailing words + size_t word_i = common_word_count - 1; for (size_t i = 0; i < N; ++i) { - if (word_i < words[i].size()) { + if (offsets[i] == 0) { + visited_words[i] = words[i][word_i]; + } else { visited_words[i] = words[i][word_i] >> offsets[i]; + if (word_i + 1 < words[i].size()) { + visited_words[i] |= words[i][word_i + 1] << (kBitWidth - offsets[i]); + } } } visitor(visited_words); diff --git a/cpp/src/arrow/util/bit_util_benchmark.cc b/cpp/src/arrow/util/bit_util_benchmark.cc index 9d2c90d9ed3..e9eca2a78a0 100644 --- a/cpp/src/arrow/util/bit_util_benchmark.cc +++ b/cpp/src/arrow/util/bit_util_benchmark.cc @@ -33,6 +33,8 @@ namespace arrow { namespace BitUtil { +#ifdef ARROW_WITH_BENCHMARKS_REFERENCE + // A naive bitmap reader implementation, meant as a baseline against // internal::BitmapReader @@ -85,6 +87,8 @@ class NaiveBitmapWriter { int64_t position_; }; +#endif + static std::shared_ptr CreateRandomBuffer(int64_t nbytes) { std::shared_ptr buffer; ABORT_NOT_OK(AllocateBuffer(nbytes, &buffer)); diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index fcc7d46b5e0..0de055b354b 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -1243,26 +1243,30 @@ TEST(Bitmap, VisitAnd) { constexpr int64_t kBitWidth = 8 * sizeof(uint64_t); - for (int64_t offset : {0, 1, 2, 5, 17, int(kBitWidth + 1), int(kBitWidth + 17)}) { - for (int64_t num_bits : - {int64_t(13), int64_t(9), kBitWidth - 1, kBitWidth, kBitWidth + 1, - nbytes * 8 - offset, nbytes * 6, nbytes * 4}) { - Bitmap bitmaps[] = {{buffer, 0, num_bits}, {buffer, offset, num_bits}}; - - int64_t i = 0; - Bitmap::VisitWords(bitmaps, [&](std::array uint64s) { - reinterpret_cast(actual_buffer->mutable_data())[i++] = - uint64s[0] & uint64s[1]; - }); - - internal::BitmapAnd(bitmaps[0].buffer()->data(), bitmaps[0].offset(), - bitmaps[1].buffer()->data(), bitmaps[1].offset(), - bitmaps[0].length(), 0, expected_buffer->mutable_data()); - - ASSERT_TRUE(internal::BitmapEquals(actual_buffer->data(), 0, - expected_buffer->data(), 0, num_bits)) - << "offset:" << offset << " bits:" << num_bits << std::endl - << Bitmap(actual_buffer, 0, num_bits).Diff({expected_buffer, 0, num_bits}); + for (int64_t offset : + {0, 1, 2, 5, 17, int(kBitWidth - 1), int(kBitWidth + 1), int(kBitWidth + 17)}) { + for (int64_t right_offset = 0; right_offset < offset; ++right_offset) { + for (int64_t num_bits : + {int64_t(13), int64_t(9), kBitWidth - 1, kBitWidth, kBitWidth + 1, + nbytes * 8 - offset, nbytes * 6, nbytes * 4}) { + Bitmap bitmaps[] = {{buffer, offset, num_bits}, {buffer, right_offset, num_bits}}; + + int64_t i = 0; + Bitmap::VisitWords(bitmaps, [&](std::array uint64s) { + reinterpret_cast(actual_buffer->mutable_data())[i++] = + uint64s[0] & uint64s[1]; + }); + + internal::BitmapAnd(bitmaps[0].buffer()->data(), bitmaps[0].offset(), + bitmaps[1].buffer()->data(), bitmaps[1].offset(), + bitmaps[0].length(), 0, expected_buffer->mutable_data()); + + ASSERT_TRUE(internal::BitmapEquals(actual_buffer->data(), 0, + expected_buffer->data(), 0, num_bits)) + << "left_offset:" << offset << " bits:" << num_bits + << "right_offset:" << right_offset << std::endl + << Bitmap(actual_buffer, 0, num_bits).Diff({expected_buffer, 0, num_bits}); + } } } } From 8b21db3f8a6d754b27b117589b99e3bc18bed090 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Fri, 8 Nov 2019 12:36:27 -0500 Subject: [PATCH 11/14] refactor to avoid accessing incomplete words --- cpp/src/arrow/util/bit_util.cc | 7 ++ cpp/src/arrow/util/bit_util.h | 165 +++++++++++++++++++--------- cpp/src/arrow/util/bit_util_test.cc | 35 +++--- cpp/src/arrow/util/string_view.h | 5 +- 4 files changed, 143 insertions(+), 69 deletions(-) diff --git a/cpp/src/arrow/util/bit_util.cc b/cpp/src/arrow/util/bit_util.cc index 325aba06419..3d869b79c13 100644 --- a/cpp/src/arrow/util/bit_util.cc +++ b/cpp/src/arrow/util/bit_util.cc @@ -343,6 +343,13 @@ std::string Bitmap::Diff(const Bitmap& other) const { return ToArray()->Diff(*other.ToArray()); } +int64_t Bitmap::BitLength(const Bitmap* bitmaps, size_t N) { + for (size_t i = 1; i < N; ++i) { + DCHECK_EQ(bitmaps[i].length(), bitmaps[0].length()); + } + return bitmaps[0].length(); +} + Status BitmapAnd(MemoryPool* pool, const uint8_t* left, int64_t left_offset, const uint8_t* right, int64_t right_offset, int64_t length, int64_t out_offset, std::shared_ptr* out_buffer) { diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h index 8e7834e4e33..e4989db0bdd 100644 --- a/cpp/src/arrow/util/bit_util.h +++ b/cpp/src/arrow/util/bit_util.h @@ -846,8 +846,15 @@ class ARROW_EXPORT Bitmap { Bitmap(std::shared_ptr buffer, int64_t offset, int64_t length) : buffer_(std::move(buffer)), offset_(offset), length_(length) {} - Bitmap(const uint8_t* data, int64_t offset, int64_t length) - : buffer_(std::make_shared(data, BitUtil::BytesForBits(offset + length))), + Bitmap(const void* data, int64_t offset, int64_t length) + : buffer_(std::make_shared(static_cast(data), + BitUtil::BytesForBits(offset + length))), + offset_(offset), + length_(length) {} + + Bitmap(void* data, int64_t offset, int64_t length) + : buffer_(std::make_shared(static_cast(data), + BitUtil::BytesForBits(offset + length))), offset_(offset), length_(length) {} @@ -876,10 +883,7 @@ class ARROW_EXPORT Bitmap { /// All bitmaps must have identical length. template static void VisitBits(const Bitmap (&bitmaps)[N], Visitor&& visitor) { - auto bit_length = bitmaps[0].length(); - for (const auto& bitmap : bitmaps) { - assert(bitmap.length() == bit_length); - } + int64_t bit_length = BitLength(bitmaps, N); std::bitset bits; for (int64_t bit_i = 0; bit_i < bit_length; ++bit_i) { for (size_t i = 0; i < N; ++i) { @@ -892,65 +896,103 @@ class ARROW_EXPORT Bitmap { /// \brief Visit words of bits from each bitmap as array /// /// All bitmaps must have identical length. - /// - /// Note that every Word containing a bit must be accessible (and initialized, or - /// Valgrind will barf). This is satisfied by most Buffers in Arrow space (which are - /// allocated with maximum alignment and padded to contain whole cache lines). - /// /// TODO(bkietz) allow for early termination template ::value_type> - static void VisitWords(const Bitmap (&bitmaps)[N], Visitor&& visitor) { + static int64_t VisitWords(const Bitmap (&bitmaps_arg)[N], Visitor&& visitor) { constexpr int64_t kBitWidth = sizeof(Word) * 8; - auto bit_length = bitmaps[0].length(); - for (const auto& bitmap : bitmaps) { - assert(bitmap.length() == bit_length); - } - - View words[N]; + // local, mutable variables which will be sliced/decremented to represent consumption: + Bitmap bitmaps[N]; int64_t offsets[N]; + int64_t bit_length = BitLength(bitmaps_arg, N); + View words[N]; for (size_t i = 0; i < N; ++i) { - words[i] = bitmaps[i].template words(); + bitmaps[i] = bitmaps_arg[i]; offsets[i] = bitmaps[i].template word_offset(); assert(offsets[i] >= 0 && offsets[i] < kBitWidth); + words[i] = bitmaps[i].template words(); } + auto consume = [&](int64_t consumed_bits) { + for (size_t i = 0; i < N; ++i) { + bitmaps[i] = bitmaps[i].Slice(consumed_bits, bit_length - consumed_bits); + offsets[i] = bitmaps[i].template word_offset(); + assert(offsets[i] >= 0 && offsets[i] < kBitWidth); + words[i] = bitmaps[i].template words(); + } + bit_length -= consumed_bits; + }; + std::array visited_words; - // some of the bitmaps might span one fewer word than the others - size_t common_word_count = words[0].size(); - for (auto word : words) { - common_word_count = std::min(common_word_count, word.size()); + if (bit_length <= kBitWidth * 2) { + // bitmaps fit into one or two words so don't bother with optimization + while (bit_length > 0) { + auto leading_bits = std::min(bit_length, kBitWidth); + SafeLoadWords(bitmaps, 0, leading_bits, false, &visited_words); + visitor(visited_words); + consume(leading_bits); + } + return 0; } - // word_i such that words[i][word_i] and words[i][word_i + 1] are accessible for all i - for (size_t word_i = 0; word_i < common_word_count - 1; ++word_i) { - for (size_t i = 0; i < N; ++i) { - if (offsets[i] == 0) { - visited_words[i] = words[i][word_i]; - } else { - visited_words[i] = words[i][word_i] >> offsets[i]; - visited_words[i] |= words[i][word_i + 1] << (kBitWidth - offsets[i]); - } - } + int64_t max_offset = *std::max_element(offsets, offsets + N); + int64_t min_offset = *std::min_element(offsets, offsets + N); + if (max_offset > 0) { + // consume leading bits + auto leading_bits = kBitWidth - min_offset; + SafeLoadWords(bitmaps, 0, leading_bits, true, &visited_words); visitor(visited_words); + consume(leading_bits); } + assert(*std::min_element(offsets, offsets + N) == 0); - // handle trailing words - size_t word_i = common_word_count - 1; - for (size_t i = 0; i < N; ++i) { - if (offsets[i] == 0) { - visited_words[i] = words[i][word_i]; - } else { - visited_words[i] = words[i][word_i] >> offsets[i]; - if (word_i + 1 < words[i].size()) { - visited_words[i] |= words[i][word_i + 1] << (kBitWidth - offsets[i]); + int64_t whole_word_count = bit_length / kBitWidth; + assert(whole_word_count >= 1); + + if (min_offset == max_offset) { + // all offsets were identical, all leading bits have been consumed + assert( + std::all_of(offsets, offsets + N, [](int64_t offset) { return offset == 0; })); + + for (int64_t word_i = 0; word_i < whole_word_count; ++word_i) { + for (size_t i = 0; i < N; ++i) { + visited_words[i] = words[i][word_i]; + } + visitor(visited_words); + } + consume(whole_word_count * kBitWidth); + } else { + // leading bits from potentially incomplete words have been consumed + + // word_i such that words[i][word_i] and words[i][word_i + 1] are lie entirely + // within the bitmap for all i + for (int64_t word_i = 0; word_i < whole_word_count - 1; ++word_i) { + for (size_t i = 0; i < N; ++i) { + if (offsets[i] == 0) { + visited_words[i] = words[i][word_i]; + } else { + visited_words[i] = words[i][word_i] >> offsets[i]; + visited_words[i] |= words[i][word_i + 1] << (kBitWidth - offsets[i]); + } } + visitor(visited_words); } + consume((whole_word_count - 1) * kBitWidth); + + SafeLoadWords(bitmaps, 0, kBitWidth, false, &visited_words); + + visitor(visited_words); + consume(kBitWidth); } + + // load remaining bits + SafeLoadWords(bitmaps, 0, bit_length, false, &visited_words); visitor(visited_words); + + return min_offset; } const std::shared_ptr& buffer() const { return buffer_; } @@ -968,17 +1010,6 @@ class ARROW_EXPORT Bitmap { return util::bytes_view(buffer_->data() + byte_offset, byte_count); } - /// returns true if words in words() lie entirely in buffer() and may be safely - /// accessed - template - bool word_accessible() const { - auto words = this->words(); - auto valid_addr = reinterpret_cast(buffer_->data()); - auto words_addr = reinterpret_cast(words.data()); - return words_addr >= valid_addr && - words_addr + words.size() * sizeof(Word) <= valid_addr + buffer_->capacity(); - } - /// string_view of all Words which contain any bit in this Bitmap /// /// For example, given Word=uint16_t and a bitmap spanning bits [20, 36) @@ -988,6 +1019,9 @@ class ARROW_EXPORT Bitmap { /// |-------|-------|------|------| (buffer) /// [ ] (bitmap) /// |-------|------| (returned words) + /// + /// \warning The words may contain bytes which lie outside the buffer or are + /// uninitialized. template View words() const { auto bytes_addr = reinterpret_cast(bytes().data()); @@ -1008,8 +1042,33 @@ class ARROW_EXPORT Bitmap { } private: + /// load words from bitmaps bitwise + template + static void SafeLoadWords(const Bitmap (&bitmaps)[N], int64_t offset, + int64_t out_length, bool set_trailing_bits, + std::array* out) { + int64_t out_offset = set_trailing_bits ? sizeof(Word) * 8 - out_length : 0; + + Bitmap slices[N], out_bitmaps[N]; + for (size_t i = 0; i < N; ++i) { + slices[i] = bitmaps[i].Slice(offset, out_length); + out_bitmaps[i] = Bitmap(&out->at(i), out_offset, out_length); + } + + int64_t bit_i = 0; + Bitmap::VisitBits(slices, [&](std::bitset bits) { + for (size_t i = 0; i < N; ++i) { + out_bitmaps[i].SetBitTo(bit_i, bits[i]); + } + ++bit_i; + }); + } + std::shared_ptr ToArray() const; + /// assert bitmaps have identical length and return that length + static int64_t BitLength(const Bitmap* bitmaps, size_t N); + std::shared_ptr buffer_; int64_t offset_ = 0, length_ = 0; }; diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index 0de055b354b..e418ee1978a 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -1158,6 +1158,7 @@ TEST(Bitmap, ShiftingWordsOptimization) { } } +/* TEST(Bitmap, WordVisitable) { using internal::Bitmap; @@ -1195,6 +1196,7 @@ TEST(Bitmap, WordVisitable) { .template word_accessible()) << "its last byte has been sliced off"; } +*/ // reconstruct a bitmap from a word-wise visit TEST(Bitmap, Visit) { @@ -1217,12 +1219,12 @@ TEST(Bitmap, Visit) { Bitmap bitmaps[] = {{buffer, offset, num_bits}}; int64_t i = 0; - Bitmap::VisitWords(bitmaps, [&](std::array uint64s) { + auto min_offset = Bitmap::VisitWords(bitmaps, [&](std::array uint64s) { reinterpret_cast(actual_buffer->mutable_data())[i++] = uint64s[0]; }); - ASSERT_TRUE(internal::BitmapEquals(actual_buffer->data(), 0, buffer->data(), offset, - num_bits)) + ASSERT_TRUE(internal::BitmapEquals(actual_buffer->data(), min_offset, + buffer->data(), offset, num_bits)) << "offset:" << offset << " bits:" << num_bits << std::endl << Bitmap(actual_buffer, 0, num_bits).Diff({buffer, offset, num_bits}); } @@ -1243,32 +1245,35 @@ TEST(Bitmap, VisitAnd) { constexpr int64_t kBitWidth = 8 * sizeof(uint64_t); - for (int64_t offset : + for (int64_t left_offset : {0, 1, 2, 5, 17, int(kBitWidth - 1), int(kBitWidth + 1), int(kBitWidth + 17)}) { - for (int64_t right_offset = 0; right_offset < offset; ++right_offset) { + for (int64_t right_offset = 0; right_offset < left_offset; ++right_offset) { for (int64_t num_bits : {int64_t(13), int64_t(9), kBitWidth - 1, kBitWidth, kBitWidth + 1, - nbytes * 8 - offset, nbytes * 6, nbytes * 4}) { - Bitmap bitmaps[] = {{buffer, offset, num_bits}, {buffer, right_offset, num_bits}}; + 2 * kBitWidth - 1, 2 * kBitWidth, 2 * kBitWidth + 1, nbytes * 8 - left_offset, + 3 * kBitWidth - 1, 3 * kBitWidth, 3 * kBitWidth + 1, nbytes * 6, + nbytes * 4}) { + Bitmap bitmaps[] = {{buffer, left_offset, num_bits}, + {buffer, right_offset, num_bits}}; int64_t i = 0; - Bitmap::VisitWords(bitmaps, [&](std::array uint64s) { - reinterpret_cast(actual_buffer->mutable_data())[i++] = - uint64s[0] & uint64s[1]; - }); + auto min_offset = + Bitmap::VisitWords(bitmaps, [&](std::array uint64s) { + reinterpret_cast(actual_buffer->mutable_data())[i++] = + uint64s[0] & uint64s[1]; + }); internal::BitmapAnd(bitmaps[0].buffer()->data(), bitmaps[0].offset(), bitmaps[1].buffer()->data(), bitmaps[1].offset(), bitmaps[0].length(), 0, expected_buffer->mutable_data()); - ASSERT_TRUE(internal::BitmapEquals(actual_buffer->data(), 0, + ASSERT_TRUE(internal::BitmapEquals(actual_buffer->data(), min_offset, expected_buffer->data(), 0, num_bits)) - << "left_offset:" << offset << " bits:" << num_bits - << "right_offset:" << right_offset << std::endl + << "left_offset:" << left_offset << " bits:" << num_bits + << " right_offset:" << right_offset << std::endl << Bitmap(actual_buffer, 0, num_bits).Diff({expected_buffer, 0, num_bits}); } } } } - } // namespace arrow diff --git a/cpp/src/arrow/util/string_view.h b/cpp/src/arrow/util/string_view.h index 26e2903ff9a..64a09b121f7 100644 --- a/cpp/src/arrow/util/string_view.h +++ b/cpp/src/arrow/util/string_view.h @@ -26,8 +26,11 @@ namespace arrow { namespace util { -using nonstd::basic_string_view; using nonstd::string_view; + +template > +using basic_string_view = nonstd::basic_string_view; + using bytes_view = basic_string_view; } // namespace util From fea8893575d0b91fc0969dea6bc045528e4e531a Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Fri, 8 Nov 2019 13:25:18 -0500 Subject: [PATCH 12/14] add test for visiting bitmaps with inaccessible words --- cpp/src/arrow/util/bit_util.cc | 8 ++ cpp/src/arrow/util/bit_util.h | 18 ++++- cpp/src/arrow/util/bit_util_benchmark.cc | 6 +- cpp/src/arrow/util/bit_util_test.cc | 94 +++++++++--------------- 4 files changed, 62 insertions(+), 64 deletions(-) diff --git a/cpp/src/arrow/util/bit_util.cc b/cpp/src/arrow/util/bit_util.cc index 3d869b79c13..99c988b1355 100644 --- a/cpp/src/arrow/util/bit_util.cc +++ b/cpp/src/arrow/util/bit_util.cc @@ -343,6 +343,14 @@ std::string Bitmap::Diff(const Bitmap& other) const { return ToArray()->Diff(*other.ToArray()); } +bool Bitmap::Equals(const Bitmap& other) const { + if (length_ != other.length_) { + return false; + } + return BitmapEquals(buffer_->data(), offset_, other.buffer_->data(), other.offset(), + length_); +} + int64_t Bitmap::BitLength(const Bitmap* bitmaps, size_t N) { for (size_t i = 1; i < N; ++i) { DCHECK_EQ(bitmaps[i].length(), bitmaps[0].length()); diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h index e4989db0bdd..5c98cd780b2 100644 --- a/cpp/src/arrow/util/bit_util.h +++ b/cpp/src/arrow/util/bit_util.h @@ -67,8 +67,10 @@ #include #include "arrow/buffer.h" +#include "arrow/util/compare.h" #include "arrow/util/functional.h" #include "arrow/util/macros.h" +#include "arrow/util/string_builder.h" #include "arrow/util/string_view.h" #include "arrow/util/type_traits.h" #include "arrow/util/visibility.h" @@ -836,7 +838,8 @@ Status InvertBitmap(MemoryPool* pool, const uint8_t* bitmap, int64_t offset, ARROW_EXPORT int64_t CountSetBits(const uint8_t* data, int64_t bit_offset, int64_t length); -class ARROW_EXPORT Bitmap { +class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, + public util::EqualityComparable { public: template using View = util::basic_string_view; @@ -868,6 +871,8 @@ class ARROW_EXPORT Bitmap { std::string ToString() const; + bool Equals(const Bitmap& other) const; + std::string Diff(const Bitmap& other) const; bool GetBit(int64_t i) const { return BitUtil::GetBit(buffer_->data(), i + offset_); } @@ -895,7 +900,11 @@ class ARROW_EXPORT Bitmap { /// \brief Visit words of bits from each bitmap as array /// - /// All bitmaps must have identical length. + /// All bitmaps must have identical length. The first bit in a visited bitmap + /// may be offset within the first visited word, but words will otherwise contain + /// densely packed bits loaded from the bitmap. That offset within the first word is + /// returned. + /// /// TODO(bkietz) allow for early termination template visited_words; + visited_words.fill(0); if (bit_length <= kBitWidth * 2) { // bitmaps fit into one or two words so don't bother with optimization @@ -1010,6 +1020,7 @@ class ARROW_EXPORT Bitmap { return util::bytes_view(buffer_->data() + byte_offset, byte_count); } + private: /// string_view of all Words which contain any bit in this Bitmap /// /// For example, given Word=uint16_t and a bitmap spanning bits [20, 36) @@ -1041,12 +1052,13 @@ class ARROW_EXPORT Bitmap { reinterpret_cast(words().data())); } - private: /// load words from bitmaps bitwise template static void SafeLoadWords(const Bitmap (&bitmaps)[N], int64_t offset, int64_t out_length, bool set_trailing_bits, std::array* out) { + out->fill(0); + int64_t out_offset = set_trailing_bits ? sizeof(Word) * 8 - out_length : 0; Bitmap slices[N], out_bitmaps[N]; diff --git a/cpp/src/arrow/util/bit_util_benchmark.cc b/cpp/src/arrow/util/bit_util_benchmark.cc index e9eca2a78a0..5483401cdef 100644 --- a/cpp/src/arrow/util/bit_util_benchmark.cc +++ b/cpp/src/arrow/util/bit_util_benchmark.cc @@ -384,9 +384,9 @@ BENCHMARK(GenerateBitsUnrolled)->Arg(kBufferSize); BENCHMARK(CopyBitmapWithoutOffset)->Arg(kBufferSize); BENCHMARK(CopyBitmapWithOffset)->Arg(kBufferSize); -#define AND_BENCHMARK_RANGES \ - { \ - {kBufferSize, kBufferSize * 2}, { 0, 2 } \ +#define AND_BENCHMARK_RANGES \ + { \ + {kBufferSize * 4, kBufferSize * 16}, { 0, 2 } \ } BENCHMARK(BenchmarkBitmapAnd)->Ranges(AND_BENCHMARK_RANGES); BENCHMARK(BenchmarkBitmapVisitBitsetAnd)->Ranges(AND_BENCHMARK_RANGES); diff --git a/cpp/src/arrow/util/bit_util_test.cc b/cpp/src/arrow/util/bit_util_test.cc index e418ee1978a..90f0fc06134 100644 --- a/cpp/src/arrow/util/bit_util_test.cc +++ b/cpp/src/arrow/util/bit_util_test.cc @@ -1158,50 +1158,18 @@ TEST(Bitmap, ShiftingWordsOptimization) { } } -/* -TEST(Bitmap, WordVisitable) { - using internal::Bitmap; - - uint64_t words[2]; - constexpr int64_t kBitWidth = 8 * sizeof(words); - auto bytes = reinterpret_cast(words); - auto buffer = std::make_shared(bytes, sizeof(words)); - - // whole words are accessible - ASSERT_TRUE(Bitmap(buffer, 0, kBitWidth).template word_accessible()) - << "both words"; - ASSERT_TRUE(Bitmap(buffer, 0, kBitWidth / 2).template word_accessible()) - << "first word"; - ASSERT_TRUE( - Bitmap(buffer, kBitWidth / 2, kBitWidth / 2).template word_accessible()) - << "second word"; - - // words outside the buffer are not accessible - ASSERT_FALSE(Bitmap(buffer, kBitWidth, kBitWidth).template word_accessible()) - << "bitmap references bits after the buffer"; - ASSERT_FALSE(Bitmap(buffer, -kBitWidth, kBitWidth).template word_accessible()) - << "bitmap references bits before the buffer"; - - for (int offset = 1; offset < kBitWidth; ++offset) { - // nonzero offset but the words containing referenced bits still lie within the buffer - ASSERT_TRUE( - Bitmap(bytes, offset, kBitWidth - offset).template word_accessible()); - } - - // words partially outside the buffer are not accessible - ASSERT_FALSE(Bitmap(SliceBuffer(buffer, 1), 0, kBitWidth - 8) - .template word_accessible()) - << "first byte has been sliced off"; - ASSERT_FALSE(Bitmap(SliceBuffer(buffer, 1, sizeof(words) - 1), 0, kBitWidth) - .template word_accessible()) - << "its last byte has been sliced off"; +namespace internal { + +static Bitmap Copy(const Bitmap& bitmap, std::shared_ptr storage) { + int64_t i = 0; + auto min_offset = Bitmap::VisitWords({bitmap}, [&](std::array uint64s) { + reinterpret_cast(storage->mutable_data())[i++] = uint64s[0]; + }); + return Bitmap(std::move(storage), min_offset, bitmap.length()); } -*/ // reconstruct a bitmap from a word-wise visit -TEST(Bitmap, Visit) { - using internal::Bitmap; - +TEST(Bitmap, VisitWords) { constexpr int64_t nbytes = 1 << 10; std::shared_ptr buffer, actual_buffer; for (std::shared_ptr* b : {&buffer, &actual_buffer}) { @@ -1216,25 +1184,34 @@ TEST(Bitmap, Visit) { for (int64_t num_bits : {int64_t(13), int64_t(9), kBitWidth - 1, kBitWidth, kBitWidth + 1, nbytes * 8 - offset, nbytes * 6, nbytes * 4}) { - Bitmap bitmaps[] = {{buffer, offset, num_bits}}; - - int64_t i = 0; - auto min_offset = Bitmap::VisitWords(bitmaps, [&](std::array uint64s) { - reinterpret_cast(actual_buffer->mutable_data())[i++] = uint64s[0]; - }); - - ASSERT_TRUE(internal::BitmapEquals(actual_buffer->data(), min_offset, - buffer->data(), offset, num_bits)) + Bitmap actual = Copy({buffer, offset, num_bits}, actual_buffer); + ASSERT_EQ(actual, Bitmap(buffer->data(), offset, num_bits)) << "offset:" << offset << " bits:" << num_bits << std::endl << Bitmap(actual_buffer, 0, num_bits).Diff({buffer, offset, num_bits}); } } } -// compute bitwise AND of bitmaps using word-wise visit -TEST(Bitmap, VisitAnd) { - using internal::Bitmap; +TEST(Bitmap, VisitPartialWords) { + uint64_t words[2]; + constexpr auto nbytes = sizeof(words); + constexpr auto nbits = nbytes * 8; + + auto buffer = Buffer::Wrap(words, 2); + Bitmap bitmap(buffer, 0, nbits); + std::shared_ptr storage; + ASSERT_OK(AllocateBuffer(nbytes, &storage)); + + // words partially outside the buffer are not accessible, but they are loaded bitwise + auto first_byte_was_missing = Bitmap(SliceBuffer(buffer, 1), 0, nbits - 8); + ASSERT_EQ(Copy(first_byte_was_missing, storage), bitmap.Slice(8)); + auto last_byte_was_missing = Bitmap(SliceBuffer(buffer, 0, nbytes - 1), 0, nbits - 8); + ASSERT_EQ(Copy(last_byte_was_missing, storage), bitmap.Slice(0, nbits - 8)); +} + +// compute bitwise AND of bitmaps using word-wise visit +TEST(Bitmap, VisitWordsAnd) { constexpr int64_t nbytes = 1 << 10; std::shared_ptr buffer, actual_buffer, expected_buffer; for (std::shared_ptr* b : {&buffer, &actual_buffer, &expected_buffer}) { @@ -1263,12 +1240,12 @@ TEST(Bitmap, VisitAnd) { uint64s[0] & uint64s[1]; }); - internal::BitmapAnd(bitmaps[0].buffer()->data(), bitmaps[0].offset(), - bitmaps[1].buffer()->data(), bitmaps[1].offset(), - bitmaps[0].length(), 0, expected_buffer->mutable_data()); + BitmapAnd(bitmaps[0].buffer()->data(), bitmaps[0].offset(), + bitmaps[1].buffer()->data(), bitmaps[1].offset(), bitmaps[0].length(), + 0, expected_buffer->mutable_data()); - ASSERT_TRUE(internal::BitmapEquals(actual_buffer->data(), min_offset, - expected_buffer->data(), 0, num_bits)) + ASSERT_TRUE(BitmapEquals(actual_buffer->data(), min_offset, + expected_buffer->data(), 0, num_bits)) << "left_offset:" << left_offset << " bits:" << num_bits << " right_offset:" << right_offset << std::endl << Bitmap(actual_buffer, 0, num_bits).Diff({expected_buffer, 0, num_bits}); @@ -1276,4 +1253,5 @@ TEST(Bitmap, VisitAnd) { } } } +} // namespace internal } // namespace arrow From 4ea0b427591fefb0f489352437a5e3236cd9ac9d Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Fri, 8 Nov 2019 16:56:35 -0500 Subject: [PATCH 13/14] iwyu: string --- cpp/src/arrow/util/string_view.h | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/arrow/util/string_view.h b/cpp/src/arrow/util/string_view.h index 64a09b121f7..4a51c2ebd9e 100644 --- a/cpp/src/arrow/util/string_view.h +++ b/cpp/src/arrow/util/string_view.h @@ -20,6 +20,7 @@ #define nssv_CONFIG_SELECT_STRING_VIEW nssv_STRING_VIEW_NONSTD #include +#include #include "arrow/vendored/string_view.hpp" // IWYU pragma: export From 4fe55a09113eca146e366f02eca5e136fb110780 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Sun, 10 Nov 2019 08:47:49 -0500 Subject: [PATCH 14/14] cast pointers to intptr_t instead of size_t --- cpp/src/arrow/compute/kernels/boolean.h | 4 ++-- cpp/src/arrow/util/bit_util.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/boolean.h b/cpp/src/arrow/compute/kernels/boolean.h index c0b0d7490e8..06bb5caa605 100644 --- a/cpp/src/arrow/compute/kernels/boolean.h +++ b/cpp/src/arrow/compute/kernels/boolean.h @@ -58,7 +58,7 @@ Status And(FunctionContext* context, const Datum& left, const Datum& right, Datu /// \param[in] right right operand (array) /// \param[out] out resulting datum /// -/// \since 0.11.0 +/// \since 1.0.0 /// \note API not yet finalized ARROW_EXPORT Status KleeneAnd(FunctionContext* context, const Datum& left, const Datum& right, @@ -85,7 +85,7 @@ Status Or(FunctionContext* context, const Datum& left, const Datum& right, Datum /// \param[in] right right operand (array) /// \param[out] out resulting datum /// -/// \since 0.11.0 +/// \since 1.0.0 /// \note API not yet finalized ARROW_EXPORT Status KleeneOr(FunctionContext* context, const Datum& left, const Datum& right, diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h index 5c98cd780b2..c3dbf4b1d28 100644 --- a/cpp/src/arrow/util/bit_util.h +++ b/cpp/src/arrow/util/bit_util.h @@ -1035,7 +1035,7 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, /// uninitialized. template View words() const { - auto bytes_addr = reinterpret_cast(bytes().data()); + auto bytes_addr = reinterpret_cast(bytes().data()); auto words_addr = bytes_addr - bytes_addr % sizeof(Word); auto word_byte_count = BitUtil::RoundUpToPowerOf2(static_cast(bytes_addr + bytes().size()), @@ -1048,8 +1048,8 @@ class ARROW_EXPORT Bitmap : public util::ToStringOstreamable, /// offset of first bit relative to words().data() template int64_t word_offset() const { - return offset_ + 8 * (reinterpret_cast(buffer_->data()) - - reinterpret_cast(words().data())); + return offset_ + 8 * (reinterpret_cast(buffer_->data()) - + reinterpret_cast(words().data())); } /// load words from bitmaps bitwise