From 83527a9f4f4712ec02b9f8ceef0a912abdbdaefa Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Thu, 24 Mar 2016 23:17:39 -0700 Subject: [PATCH 1/8] Draft BooleanBuilder and arrange to share common code between normal numeric builders and boolean builder --- cpp/src/arrow/api.h | 1 - cpp/src/arrow/array-test.cc | 5 +- cpp/src/arrow/test-util.h | 31 ++- cpp/src/arrow/type.h | 15 +- cpp/src/arrow/types/CMakeLists.txt | 1 - cpp/src/arrow/types/boolean.h | 32 --- cpp/src/arrow/types/construct.cc | 3 +- cpp/src/arrow/types/list-test.cc | 5 +- cpp/src/arrow/types/list.h | 11 +- cpp/src/arrow/types/primitive-test.cc | 123 +++++++++-- cpp/src/arrow/types/primitive.cc | 168 ++++++++++++++- cpp/src/arrow/types/primitive.h | 296 +++++++++++++++++--------- cpp/src/arrow/types/string-test.cc | 2 +- cpp/src/arrow/util/bit-util.cc | 14 +- cpp/src/arrow/util/bit-util.h | 5 +- 15 files changed, 515 insertions(+), 197 deletions(-) delete mode 100644 cpp/src/arrow/types/boolean.h diff --git a/cpp/src/arrow/api.h b/cpp/src/arrow/api.h index 7be7f88c22e..2ae80f642f2 100644 --- a/cpp/src/arrow/api.h +++ b/cpp/src/arrow/api.h @@ -27,7 +27,6 @@ #include "arrow/table.h" #include "arrow/type.h" -#include "arrow/types/boolean.h" #include "arrow/types/construct.h" #include "arrow/types/list.h" #include "arrow/types/primitive.h" diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc index 7c6eaf55c0d..121b802d994 100644 --- a/cpp/src/arrow/array-test.cc +++ b/cpp/src/arrow/array-test.cc @@ -71,8 +71,7 @@ TEST_F(TestArray, TestIsNull) { if (x == 0) ++null_count; } - std::shared_ptr null_buf = test::bytes_to_null_buffer(null_bitmap.data(), - null_bitmap.size()); + std::shared_ptr null_buf = test::bytes_to_null_buffer(null_bitmap); std::unique_ptr arr; arr.reset(new Int32Array(null_bitmap.size(), nullptr, null_count, null_buf)); @@ -82,7 +81,7 @@ TEST_F(TestArray, TestIsNull) { ASSERT_TRUE(arr->null_bitmap()->Equals(*null_buf.get())); for (size_t i = 0; i < null_bitmap.size(); ++i) { - EXPECT_EQ(static_cast(null_bitmap[i]), !arr->IsNull(i)) << i; + EXPECT_EQ(null_bitmap[i], !arr->IsNull(i)) << i; } } diff --git a/cpp/src/arrow/test-util.h b/cpp/src/arrow/test-util.h index ea3ce5f7f53..b2bce269992 100644 --- a/cpp/src/arrow/test-util.h +++ b/cpp/src/arrow/test-util.h @@ -98,28 +98,27 @@ void randint(int64_t N, T lower, T upper, std::vector* out) { } +template +void random_real(int n, uint32_t seed, T min_value, T max_value, + std::vector* out) { + std::mt19937 gen(seed); + std::uniform_real_distribution d(min_value, max_value); + for (int i = 0; i < n; ++i) { + out->push_back(d(gen)); + } +} + + template std::shared_ptr to_buffer(const std::vector& values) { return std::make_shared(reinterpret_cast(values.data()), values.size() * sizeof(T)); } -void random_null_bitmap(int64_t n, double pct_null, std::vector* null_bitmap) { - Random rng(random_seed()); - for (int i = 0; i < n; ++i) { - if (rng.NextDoubleFraction() > pct_null) { - null_bitmap->push_back(1); - } else { - // null - null_bitmap->push_back(0); - } - } -} - -void random_null_bitmap(int64_t n, double pct_null, std::vector* null_bitmap) { +void random_null_bitmap(int64_t n, double pct_null, uint8_t* null_bitmap) { Random rng(random_seed()); for (int i = 0; i < n; ++i) { - null_bitmap->push_back(rng.NextDoubleFraction() > pct_null); + null_bitmap[i] = rng.NextDoubleFraction() > pct_null; } } @@ -160,11 +159,11 @@ static inline int null_count(const std::vector& valid_bytes) { return result; } -std::shared_ptr bytes_to_null_buffer(uint8_t* bytes, int length) { +std::shared_ptr bytes_to_null_buffer(const std::vector& bytes) { std::shared_ptr out; // TODO(wesm): error checking - util::bytes_to_bits(bytes, length, &out); + util::bytes_to_bits(bytes, &out); return out; } diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index 5984b6718dd..86e47791b7c 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -132,6 +132,10 @@ struct DataType { return children_.size(); } + virtual int value_size() const { + return -1; + } + virtual std::string ToString() const = 0; }; @@ -191,11 +195,14 @@ inline std::string PrimitiveType::ToString() const { #define PRIMITIVE_DECL(TYPENAME, C_TYPE, ENUM, SIZE, NAME) \ typedef C_TYPE c_type; \ static constexpr Type::type type_enum = Type::ENUM; \ - static constexpr int size = SIZE; \ \ TYPENAME() \ : PrimitiveType() {} \ \ + virtual int value_size() const { \ + return SIZE; \ + } \ + \ static const char* name() { \ return NAME; \ } @@ -295,6 +302,12 @@ struct StructType : public DataType { std::string ToString() const override; }; +// These will be defined elsewhere +template +struct type_traits { +}; + + } // namespace arrow #endif // ARROW_TYPE_H diff --git a/cpp/src/arrow/types/CMakeLists.txt b/cpp/src/arrow/types/CMakeLists.txt index 595b3be6e16..f3e41289bfe 100644 --- a/cpp/src/arrow/types/CMakeLists.txt +++ b/cpp/src/arrow/types/CMakeLists.txt @@ -21,7 +21,6 @@ # Headers: top level install(FILES - boolean.h collection.h construct.h datetime.h diff --git a/cpp/src/arrow/types/boolean.h b/cpp/src/arrow/types/boolean.h deleted file mode 100644 index 1cb91f9ba49..00000000000 --- a/cpp/src/arrow/types/boolean.h +++ /dev/null @@ -1,32 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#ifndef ARROW_TYPES_BOOLEAN_H -#define ARROW_TYPES_BOOLEAN_H - -#include "arrow/types/primitive.h" - -namespace arrow { - -// typedef PrimitiveArrayImpl BooleanArray; - -class BooleanBuilder : public ArrayBuilder { -}; - -} // namespace arrow - -#endif // ARROW_TYPES_BOOLEAN_H diff --git a/cpp/src/arrow/types/construct.cc b/cpp/src/arrow/types/construct.cc index df2317c340b..34647a5005b 100644 --- a/cpp/src/arrow/types/construct.cc +++ b/cpp/src/arrow/types/construct.cc @@ -51,7 +51,7 @@ Status MakeBuilder(MemoryPool* pool, const std::shared_ptr& type, BUILDER_CASE(UINT64, UInt64Builder); BUILDER_CASE(INT64, Int64Builder); - // BUILDER_CASE(BOOL, BooleanBuilder); + BUILDER_CASE(BOOL, BooleanBuilder); BUILDER_CASE(FLOAT, FloatBuilder); BUILDER_CASE(DOUBLE, DoubleBuilder); @@ -83,6 +83,7 @@ Status MakePrimitiveArray(const std::shared_ptr& type, int32_t null_count, const std::shared_ptr& null_bitmap, std::shared_ptr* out) { switch (type->type) { + MAKE_PRIMITIVE_ARRAY_CASE(BOOL, BooleanArray); MAKE_PRIMITIVE_ARRAY_CASE(UINT8, UInt8Array); MAKE_PRIMITIVE_ARRAY_CASE(INT8, Int8Array); MAKE_PRIMITIVE_ARRAY_CASE(UINT16, UInt16Array); diff --git a/cpp/src/arrow/types/list-test.cc b/cpp/src/arrow/types/list-test.cc index eb55ca868ee..4eb560ea522 100644 --- a/cpp/src/arrow/types/list-test.cc +++ b/cpp/src/arrow/types/list-test.cc @@ -116,11 +116,14 @@ TEST_F(TestListBuilder, TestBasics) { Int32Builder* vb = static_cast(builder_->value_builder().get()); + EXPECT_OK(builder_->Reserve(lengths.size())); + EXPECT_OK(vb->Reserve(values.size())); + int pos = 0; for (size_t i = 0; i < lengths.size(); ++i) { ASSERT_OK(builder_->Append(is_null[i] > 0)); for (int j = 0; j < lengths[i]; ++j) { - ASSERT_OK(vb->Append(values[pos++])); + vb->Append(values[pos++]); } } diff --git a/cpp/src/arrow/types/list.h b/cpp/src/arrow/types/list.h index 72e20e943c3..8073b512176 100644 --- a/cpp/src/arrow/types/list.h +++ b/cpp/src/arrow/types/list.h @@ -116,7 +116,8 @@ class ListBuilder : public Int32Builder { int32_t new_capacity = util::next_power2(length_ + length); RETURN_NOT_OK(Resize(new_capacity)); } - memcpy(raw_buffer() + length_, values, length * elsize_); + memcpy(raw_data_ + length_, values, + type_traits::bytes_required(length)); if (valid_bytes != nullptr) { AppendNulls(valid_bytes, length); @@ -132,13 +133,13 @@ class ListBuilder : public Int32Builder { // Add final offset if the length is non-zero if (length_) { - raw_buffer()[length_] = items->length(); + raw_data_[length_] = items->length(); } - auto result = std::make_shared(type_, length_, values_, items, + auto result = std::make_shared(type_, length_, data_, items, null_count_, null_bitmap_); - values_ = null_bitmap_ = nullptr; + data_ = null_bitmap_ = nullptr; capacity_ = length_ = null_count_ = 0; return result; @@ -162,7 +163,7 @@ class ListBuilder : public Int32Builder { } else { util::set_bit(null_bitmap_data_, length_); } - raw_buffer()[length_++] = value_builder_->length(); + raw_data_[length_++] = value_builder_->length(); return Status::OK(); } diff --git a/cpp/src/arrow/types/primitive-test.cc b/cpp/src/arrow/types/primitive-test.cc index 10ba113c591..41a0de0ac6a 100644 --- a/cpp/src/arrow/types/primitive-test.cc +++ b/cpp/src/arrow/types/primitive-test.cc @@ -84,7 +84,7 @@ class TestPrimitiveBuilder : public TestBuilder { typedef typename Attrs::BuilderType BuilderType; typedef typename Attrs::T T; - void SetUp() { + virtual void SetUp() { TestBuilder::SetUp(); type_ = Attrs::type(); @@ -99,7 +99,9 @@ class TestPrimitiveBuilder : public TestBuilder { void RandomData(int N, double pct_null = 0.1) { Attrs::draw(N, &draws_); - test::random_null_bitmap(N, pct_null, &valid_bytes_); + + valid_bytes_.resize(N); + test::random_null_bitmap(N, pct_null, valid_bytes_.data()); } void CheckNullable() { @@ -109,7 +111,7 @@ class TestPrimitiveBuilder : public TestBuilder { reinterpret_cast(draws_.data()), size * sizeof(T)); - auto ex_null_bitmap = test::bytes_to_null_buffer(valid_bytes_.data(), size); + auto ex_null_bitmap = test::bytes_to_null_buffer(valid_bytes_); int32_t ex_null_count = test::null_count(valid_bytes_); auto expected = std::make_shared(size, ex_data, ex_null_count, @@ -122,7 +124,7 @@ class TestPrimitiveBuilder : public TestBuilder { ASSERT_EQ(0, builder_->length()); ASSERT_EQ(0, builder_->capacity()); ASSERT_EQ(0, builder_->null_count()); - ASSERT_EQ(nullptr, builder_->buffer()); + ASSERT_EQ(nullptr, builder_->data()); ASSERT_EQ(ex_null_count, result->null_count()); ASSERT_TRUE(result->EqualsExact(*expected.get())); @@ -142,7 +144,7 @@ class TestPrimitiveBuilder : public TestBuilder { // Builder is now reset ASSERT_EQ(0, builder_nn_->length()); ASSERT_EQ(0, builder_nn_->capacity()); - ASSERT_EQ(nullptr, builder_nn_->buffer()); + ASSERT_EQ(nullptr, builder_nn_->data()); ASSERT_TRUE(result->EqualsExact(*expected.get())); ASSERT_EQ(0, result->null_count()); @@ -150,7 +152,6 @@ class TestPrimitiveBuilder : public TestBuilder { protected: TypePtr type_; - TypePtr type_nn_; shared_ptr builder_; shared_ptr builder_nn_; @@ -176,6 +177,14 @@ class TestPrimitiveBuilder : public TestBuilder { } \ } +#define PFLOAT_DECL(CapType, c_type, LOWER, UPPER) \ + struct P##CapType { \ + PTYPE_DECL(CapType, c_type); \ + static void draw(int N, vector* draws) { \ + test::random_real(N, 0, LOWER, UPPER, draws); \ + } \ + } + PINT_DECL(UInt8, uint8_t, 0, UINT8_MAX); PINT_DECL(UInt16, uint16_t, 0, UINT16_MAX); PINT_DECL(UInt32, uint32_t, 0, UINT32_MAX); @@ -186,8 +195,12 @@ PINT_DECL(Int16, int16_t, INT16_MIN, INT16_MAX); PINT_DECL(Int32, int32_t, INT32_MIN, INT32_MAX); PINT_DECL(Int64, int64_t, INT64_MIN, INT64_MAX); +PFLOAT_DECL(Float, float, -1000, 1000); +PFLOAT_DECL(Double, double, -1000, 1000); + typedef ::testing::Types Primitives; + PInt8, PInt16, PInt32, PInt64, + PFloat, PDouble> Primitives; TYPED_TEST_CASE(TestPrimitiveBuilder, Primitives); @@ -204,7 +217,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestInit) { int n = 1000; ASSERT_OK(this->builder_->Init(n)); ASSERT_EQ(n, this->builder_->capacity()); - ASSERT_EQ(n * sizeof(T), this->builder_->buffer()->size()); + ASSERT_EQ(n * sizeof(T), this->builder_->data()->size()); // unsure if this should go in all builder classes ASSERT_EQ(0, this->builder_->num_children()); @@ -235,12 +248,14 @@ TYPED_TEST(TestPrimitiveBuilder, TestArrayDtorDealloc) { this->RandomData(size); + this->builder_->Reserve(size); + int i; for (i = 0; i < size; ++i) { if (valid_bytes[i] > 0) { - ASSERT_OK(this->builder_->Append(draws[i])); + this->builder_->Append(draws[i]); } else { - ASSERT_OK(this->builder_->AppendNull()); + this->builder_->AppendNull(); } } @@ -261,15 +276,18 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendScalar) { this->RandomData(size); + this->builder_->Reserve(1000); + this->builder_nn_->Reserve(1000); + int i; // Append the first 1000 for (i = 0; i < 1000; ++i) { if (valid_bytes[i] > 0) { - ASSERT_OK(this->builder_->Append(draws[i])); + this->builder_->Append(draws[i]); } else { - ASSERT_OK(this->builder_->AppendNull()); + this->builder_->AppendNull(); } - ASSERT_OK(this->builder_nn_->Append(draws[i])); + this->builder_nn_->Append(draws[i]); } ASSERT_EQ(1000, this->builder_->length()); @@ -278,14 +296,17 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendScalar) { ASSERT_EQ(1000, this->builder_nn_->length()); ASSERT_EQ(1024, this->builder_nn_->capacity()); + this->builder_->Reserve(size - 1000); + this->builder_nn_->Reserve(size - 1000); + // Append the next 9000 for (i = 1000; i < size; ++i) { if (valid_bytes[i] > 0) { - ASSERT_OK(this->builder_->Append(draws[i])); + this->builder_->Append(draws[i]); } else { - ASSERT_OK(this->builder_->AppendNull()); + this->builder_->AppendNull(); } - ASSERT_OK(this->builder_nn_->Append(draws[i])); + this->builder_nn_->Append(draws[i]); } ASSERT_EQ(size, this->builder_->length()); @@ -350,7 +371,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestResize) { ASSERT_OK(this->builder_->Resize(cap)); ASSERT_EQ(cap, this->builder_->capacity()); - ASSERT_EQ(cap * sizeof(T), this->builder_->buffer()->size()); + ASSERT_EQ(cap * sizeof(T), this->builder_->data()->size()); ASSERT_EQ(util::ceil_byte(cap) / 8, this->builder_->null_bitmap()->size()); } @@ -367,4 +388,72 @@ TYPED_TEST(TestPrimitiveBuilder, TestReserve) { this->builder_->capacity()); } +// ---------------------------------------------------------------------- +// Boolean tests + +struct PBoolean { + PTYPE_DECL(Boolean, bool); +}; + +class TestBooleanBuilder : public TestPrimitiveBuilder { + public: + + void RandomData(int N, double pct_null = 0.1) { + draws_.resize(N); + valid_bytes_.resize(N); + + test::random_null_bitmap(N, 0.5, draws_.data()); + test::random_null_bitmap(N, pct_null, valid_bytes_.data()); + } + + void CheckNullable() { + int size = builder_->length(); + + auto ex_data = test::bytes_to_null_buffer(draws_); + auto ex_null_bitmap = test::bytes_to_null_buffer(valid_bytes_); + int32_t ex_null_count = test::null_count(valid_bytes_); + + auto expected = std::make_shared(size, ex_data, ex_null_count, + ex_null_bitmap); + + std::shared_ptr result = std::dynamic_pointer_cast( + builder_->Finish()); + + // Builder is now reset + ASSERT_EQ(0, builder_->length()); + ASSERT_EQ(0, builder_->capacity()); + ASSERT_EQ(0, builder_->null_count()); + ASSERT_EQ(nullptr, builder_->data()); + + ASSERT_EQ(ex_null_count, result->null_count()); + ASSERT_TRUE(result->EqualsExact(*expected.get())); + } + + void CheckNonNullable() { + int size = builder_nn_->length(); + + auto ex_data = test::bytes_to_null_buffer(draws_); + auto expected = std::make_shared(size, ex_data); + + std::shared_ptr result = std::dynamic_pointer_cast( + builder_nn_->Finish()); + + // Builder is now reset + ASSERT_EQ(0, builder_nn_->length()); + ASSERT_EQ(0, builder_nn_->capacity()); + ASSERT_EQ(nullptr, builder_nn_->data()); + + ASSERT_TRUE(result->EqualsExact(*expected.get())); + ASSERT_EQ(0, result->null_count()); + } + + protected: + TypePtr type_; + shared_ptr builder_; + shared_ptr builder_nn_; + + vector draws_; + vector valid_bytes_; +}; + } // namespace arrow diff --git a/cpp/src/arrow/types/primitive.cc b/cpp/src/arrow/types/primitive.cc index ecd5d68ff45..f3d03e660b0 100644 --- a/cpp/src/arrow/types/primitive.cc +++ b/cpp/src/arrow/types/primitive.cc @@ -20,20 +20,20 @@ #include #include "arrow/util/buffer.h" +#include "arrow/util/logging.h" namespace arrow { // ---------------------------------------------------------------------- // Primitive array base -PrimitiveArray::PrimitiveArray(const TypePtr& type, int32_t length, int value_size, +PrimitiveArray::PrimitiveArray(const TypePtr& type, int32_t length, const std::shared_ptr& data, int32_t null_count, const std::shared_ptr& null_bitmap) : Array(type, length, null_count, null_bitmap) { data_ = data; raw_data_ = data == nullptr? nullptr : data_->data(); - value_size_ = value_size; } bool PrimitiveArray::EqualsExact(const PrimitiveArray& other) const { @@ -52,12 +52,15 @@ bool PrimitiveArray::EqualsExact(const PrimitiveArray& other) const { const uint8_t* this_data = raw_data_; const uint8_t* other_data = other.raw_data_; + int value_size = type_->value_size(); + DCHECK_GT(value_size, 0); + for (int i = 0; i < length_; ++i) { - if (!IsNull(i) && memcmp(this_data, other_data, value_size_)) { + if (!IsNull(i) && memcmp(this_data, other_data, value_size)) { return false; } - this_data += value_size_; - other_data += value_size_; + this_data += value_size; + other_data += value_size; } return true; } else { @@ -73,4 +76,159 @@ bool PrimitiveArray::Equals(const std::shared_ptr& arr) const { return EqualsExact(*static_cast(arr.get())); } +template +Status PrimitiveBuilder::Init(int32_t capacity) { + RETURN_NOT_OK(ArrayBuilder::Init(capacity)); + data_ = std::make_shared(pool_); + RETURN_NOT_OK(data_->Resize(type_traits::bytes_required(capacity))); + raw_data_ = reinterpret_cast(data_->mutable_data()); + return Status::OK(); +} + +template +Status PrimitiveBuilder::Resize(int32_t capacity) { + // XXX: Set floor size for now + if (capacity < MIN_BUILDER_CAPACITY) { + capacity = MIN_BUILDER_CAPACITY; + } + + if (capacity_ == 0) { + RETURN_NOT_OK(Init(capacity)); + } else { + RETURN_NOT_OK(ArrayBuilder::Resize(capacity)); + RETURN_NOT_OK(data_->Resize(type_traits::bytes_required(capacity))); + raw_data_ = reinterpret_cast(data_->mutable_data()); + } + capacity_ = capacity; + return Status::OK(); +} + +template +Status PrimitiveBuilder::Reserve(int32_t elements) { + if (length_ + elements > capacity_) { + int32_t new_capacity = util::next_power2(length_ + elements); + return Resize(new_capacity); + } + return Status::OK(); +} + +template +Status NumericBuilder::Append(const value_type* values, int32_t length, + const uint8_t* valid_bytes) { + RETURN_NOT_OK(PrimitiveBuilder::Reserve(length)); + + if (length > 0) { + memcpy(raw_data_ + length_, values, type_traits::bytes_required(length)); + } + + if (valid_bytes != nullptr) { + PrimitiveBuilder::AppendNulls(valid_bytes, length); + } else { + for (int i = 0; i < length; ++i) { + util::set_bit(null_bitmap_data_, length_ + i); + } + } + + length_ += length; + return Status::OK(); +} + +template +void PrimitiveBuilder::AppendNulls(const uint8_t* valid_bytes, int32_t length) { + // If valid_bytes is all not null, then none of the values are null + for (int i = 0; i < length; ++i) { + if (valid_bytes[i] == 0) { + ++null_count_; + } else { + util::set_bit(null_bitmap_data_, length_ + i); + } + } +} + +template +std::shared_ptr PrimitiveBuilder::Finish() { + std::shared_ptr result = std::make_shared< + typename type_traits::ArrayType>( + type_, length_, data_, null_count_, null_bitmap_); + + data_ = null_bitmap_ = nullptr; + capacity_ = length_ = null_count_ = 0; + return result; +} + +template class NumericBuilder; +template class NumericBuilder; +template class NumericBuilder; +template class NumericBuilder; +template class NumericBuilder; +template class NumericBuilder; +template class NumericBuilder; +template class NumericBuilder; +template class NumericBuilder; +template class NumericBuilder; + + +bool BooleanArray::EqualsExact(const BooleanArray& other) const { + if (this == &other) return true; + if (null_count_ != other.null_count_) { + return false; + } + + if (null_count_ > 0) { + bool equal_bitmap = null_bitmap_->Equals(*other.null_bitmap_, + util::bytes_for_bits(length_)); + if (!equal_bitmap) { + return false; + } + + const uint8_t* this_data = raw_data_; + const uint8_t* other_data = other.raw_data_; + + for (int i = 0; i < length_; ++i) { + if (!IsNull(i) && util::get_bit(this_data, i) != util::get_bit(other_data, i)) { + return false; + } + } + return true; + } else { + return data_->Equals(*other.data_, util::bytes_for_bits(length_)); + } +} + +bool BooleanArray::Equals(const std::shared_ptr& arr) const { + if (this == arr.get()) return true; + if (Type::BOOL != arr->type_enum()) { + return false; + } + return EqualsExact(*static_cast(arr.get())); +} + +Status BooleanBuilder::Append(const uint8_t* values, int32_t length, + const uint8_t* valid_bytes) { + RETURN_NOT_OK(Reserve(length)); + + if (length > 0) { + for (int i = 0; i < length; ++i) { + if (values[i] > 0) { + util::set_bit(raw_data_, length_ + i); + } + } + } + + if (valid_bytes != nullptr) { + PrimitiveBuilder::AppendNulls(valid_bytes, length); + } else { + for (int i = 0; i < length; ++i) { + util::set_bit(null_bitmap_data_, length_ + i); + } + } + + length_ += length; + return Status::OK(); +} + +std::shared_ptr BooleanBuilder::Finish() { + return PrimitiveBuilder::Finish(); +} + } // namespace arrow diff --git a/cpp/src/arrow/types/primitive.h b/cpp/src/arrow/types/primitive.h index 4eaff433229..51143f314a4 100644 --- a/cpp/src/arrow/types/primitive.h +++ b/cpp/src/arrow/types/primitive.h @@ -37,7 +37,7 @@ class MemoryPool; // Base class for fixed-size logical types class PrimitiveArray : public Array { public: - PrimitiveArray(const TypePtr& type, int32_t length, int value_size, + PrimitiveArray(const TypePtr& type, int32_t length, const std::shared_ptr& data, int32_t null_count = 0, const std::shared_ptr& null_bitmap = nullptr); @@ -51,25 +51,19 @@ class PrimitiveArray : public Array { protected: std::shared_ptr data_; const uint8_t* raw_data_; - int value_size_; }; #define NUMERIC_ARRAY_DECL(NAME, TypeClass, T) \ class NAME : public PrimitiveArray { \ public: \ using value_type = T; \ - NAME(const TypePtr& type, int32_t length, \ - const std::shared_ptr& data, \ - int32_t null_count = 0, \ - const std::shared_ptr& null_bitmap = nullptr) : \ - PrimitiveArray(std::make_shared(), length, \ - sizeof(T), data, null_count, null_bitmap) {} \ + using PrimitiveArray::PrimitiveArray; \ \ NAME(int32_t length, const std::shared_ptr& data, \ int32_t null_count = 0, \ const std::shared_ptr& null_bitmap = nullptr) : \ PrimitiveArray(std::make_shared(), length, \ - sizeof(T), data, null_count, null_bitmap) {} \ + data, null_count, null_bitmap) {} \ \ bool EqualsExact(const NAME& other) const { \ return PrimitiveArray::EqualsExact( \ @@ -96,148 +90,242 @@ NUMERIC_ARRAY_DECL(Int64Array, Int64Type, int64_t); NUMERIC_ARRAY_DECL(FloatArray, FloatType, float); NUMERIC_ARRAY_DECL(DoubleArray, DoubleType, double); -template +template class PrimitiveBuilder : public ArrayBuilder { public: typedef typename Type::c_type value_type; explicit PrimitiveBuilder(MemoryPool* pool, const TypePtr& type) : ArrayBuilder(pool, type), - values_(nullptr) { - elsize_ = sizeof(value_type); - } + data_(nullptr) {} virtual ~PrimitiveBuilder() {} - Status Resize(int32_t capacity) { - // XXX: Set floor size for now - if (capacity < MIN_BUILDER_CAPACITY) { - capacity = MIN_BUILDER_CAPACITY; - } + Status Init(int32_t capacity); - if (capacity_ == 0) { - RETURN_NOT_OK(Init(capacity)); - } else { - RETURN_NOT_OK(ArrayBuilder::Resize(capacity)); - RETURN_NOT_OK(values_->Resize(capacity * elsize_)); - } - capacity_ = capacity; - return Status::OK(); - } + // Increase the capacity of the builder to accommodate at least the indicated + // number of elements + Status Resize(int32_t capacity); - Status Init(int32_t capacity) { - RETURN_NOT_OK(ArrayBuilder::Init(capacity)); - values_ = std::make_shared(pool_); - return values_->Resize(capacity * elsize_); - } + // Ensure that builder can accommodate an additional number of + // elements. Resizes if the current capacity is not sufficient + Status Reserve(int32_t elements); - Status Reserve(int32_t elements) { - if (length_ + elements > capacity_) { - int32_t new_capacity = util::next_power2(length_ + elements); - return Resize(new_capacity); - } - return Status::OK(); - } + std::shared_ptr Finish() override; - Status Advance(int32_t elements) { - return ArrayBuilder::Advance(elements); - } + using ArrayBuilder::Advance; - // Scalar append - Status Append(value_type val) { + // Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory + void AppendNulls(const uint8_t* valid_bytes, int32_t length); + + Status AppendNull() { if (length_ == capacity_) { // If the capacity was not already a multiple of 2, do so here RETURN_NOT_OK(Resize(util::next_power2(capacity_ + 1))); } - util::set_bit(null_bitmap_data_, length_); - raw_buffer()[length_++] = val; + ++null_count_; + ++length_; return Status::OK(); } + std::shared_ptr data() const { + return data_; + } + + protected: + std::shared_ptr data_; + value_type* raw_data_; +}; + +template +class NumericBuilder : public PrimitiveBuilder { + public: + using typename PrimitiveBuilder::value_type; + using PrimitiveBuilder::PrimitiveBuilder; + + // Scalar append. Does not capacity-check; make sure to call Reserve beforehand + void Append(value_type val) { + util::set_bit(null_bitmap_data_, length_); + raw_data_[length_++] = val; + } + // Vector append // // If passed, valid_bytes is of equal length to values, and any zero byte // will be considered as a null for that slot Status Append(const value_type* values, int32_t length, - const uint8_t* valid_bytes = nullptr) { - if (length_ + length > capacity_) { - int32_t new_capacity = util::next_power2(length_ + length); - RETURN_NOT_OK(Resize(new_capacity)); - } - if (length > 0) { - memcpy(raw_buffer() + length_, values, length * elsize_); - } + const uint8_t* valid_bytes = nullptr); - if (valid_bytes != nullptr) { - AppendNulls(valid_bytes, length); - } else { - for (int i = 0; i < length; ++i) { - util::set_bit(null_bitmap_data_, length_ + i); - } - } + protected: + using PrimitiveBuilder::length_; + using PrimitiveBuilder::null_bitmap_data_; + using PrimitiveBuilder::raw_data_; +}; - length_ += length; - return Status::OK(); +template <> +struct type_traits { + typedef UInt8Array ArrayType; + + static inline int bytes_required(int elements) { + return elements; } +}; - // Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory - void AppendNulls(const uint8_t* valid_bytes, int32_t length) { - // If valid_bytes is all not null, then none of the values are null - for (int i = 0; i < length; ++i) { - if (valid_bytes[i] == 0) { - ++null_count_; - } else { - util::set_bit(null_bitmap_data_, length_ + i); - } - } +template <> +struct type_traits { + typedef Int8Array ArrayType; + + static inline int bytes_required(int elements) { + return elements; } +}; - Status AppendNull() { - if (length_ == capacity_) { - // If the capacity was not already a multiple of 2, do so here - RETURN_NOT_OK(Resize(util::next_power2(capacity_ + 1))); - } - ++null_count_; - ++length_; - return Status::OK(); +template <> +struct type_traits { + typedef UInt16Array ArrayType; + + static inline int bytes_required(int elements) { + return elements * sizeof(uint16_t); + } +}; + +template <> +struct type_traits { + typedef Int16Array ArrayType; + + static inline int bytes_required(int elements) { + return elements * sizeof(int16_t); } +}; - std::shared_ptr Finish() override { - std::shared_ptr result = std::make_shared( - type_, length_, values_, null_count_, null_bitmap_); +template <> +struct type_traits { + typedef UInt32Array ArrayType; - values_ = null_bitmap_ = nullptr; - capacity_ = length_ = null_count_ = 0; - return result; + static inline int bytes_required(int elements) { + return elements * sizeof(uint32_t); } +}; + +template <> +struct type_traits { + typedef Int32Array ArrayType; - value_type* raw_buffer() { - return reinterpret_cast(values_->mutable_data()); + static inline int bytes_required(int elements) { + return elements * sizeof(int32_t); } +}; - std::shared_ptr buffer() const { - return values_; +template <> +struct type_traits { + typedef UInt64Array ArrayType; + + static inline int bytes_required(int elements) { + return elements * sizeof(uint64_t); } +}; - protected: - std::shared_ptr values_; - int elsize_; +template <> +struct type_traits { + typedef Int64Array ArrayType; + + static inline int bytes_required(int elements) { + return elements * sizeof(int64_t); + } +}; +template <> +struct type_traits { + typedef FloatArray ArrayType; + + static inline int bytes_required(int elements) { + return elements * sizeof(float); + } +}; + +template <> +struct type_traits { + typedef DoubleArray ArrayType; + + static inline int bytes_required(int elements) { + return elements * sizeof(double); + } }; // Builders -typedef PrimitiveBuilder UInt8Builder; -typedef PrimitiveBuilder UInt16Builder; -typedef PrimitiveBuilder UInt32Builder; -typedef PrimitiveBuilder UInt64Builder; +typedef NumericBuilder UInt8Builder; +typedef NumericBuilder UInt16Builder; +typedef NumericBuilder UInt32Builder; +typedef NumericBuilder UInt64Builder; + +typedef NumericBuilder Int8Builder; +typedef NumericBuilder Int16Builder; +typedef NumericBuilder Int32Builder; +typedef NumericBuilder Int64Builder; + +typedef NumericBuilder FloatBuilder; +typedef NumericBuilder DoubleBuilder; + + +class BooleanArray : public PrimitiveArray { + public: + using PrimitiveArray::PrimitiveArray; + + BooleanArray(int32_t length, const std::shared_ptr& data, + int32_t null_count = 0, + const std::shared_ptr& null_bitmap = nullptr) : + PrimitiveArray(std::make_shared(), length, + data, null_count, null_bitmap) {} + + bool EqualsExact(const BooleanArray& other) const; + bool Equals(const std::shared_ptr& arr) const override; + + const uint8_t* raw_data() const { + return reinterpret_cast(raw_data_); + } + + bool Value(int i) const { + return util::get_bit(raw_data(), i); + } +}; + +template <> +struct type_traits { + typedef BooleanArray ArrayType; + + static inline int bytes_required(int elements) { + return util::bytes_for_bits(elements); + } +}; + +class BooleanBuilder : public PrimitiveBuilder { + public: + explicit BooleanBuilder(MemoryPool* pool, const TypePtr& type) : + PrimitiveBuilder(pool, type) {} + + virtual ~BooleanBuilder() {} + + std::shared_ptr Finish() override; -typedef PrimitiveBuilder Int8Builder; -typedef PrimitiveBuilder Int16Builder; -typedef PrimitiveBuilder Int32Builder; -typedef PrimitiveBuilder Int64Builder; + // Scalar append + void Append(bool val) { + util::set_bit(null_bitmap_data_, length_); + if (val) { + util::set_bit(raw_data_, length_); + } + ++length_; + } -typedef PrimitiveBuilder FloatBuilder; -typedef PrimitiveBuilder DoubleBuilder; + // Vector append + // + // If passed, valid_bytes is of equal length to values, and any zero byte + // will be considered as a null for that slot + Status Append(const uint8_t* values, int32_t length, + const uint8_t* valid_bytes = nullptr); + + Status Append(const std::vector& values, + const uint8_t* valid_bytes = nullptr); +}; } // namespace arrow diff --git a/cpp/src/arrow/types/string-test.cc b/cpp/src/arrow/types/string-test.cc index b329b4f0ef7..d3a4cc37f9c 100644 --- a/cpp/src/arrow/types/string-test.cc +++ b/cpp/src/arrow/types/string-test.cc @@ -92,7 +92,7 @@ class TestStringContainer : public ::testing::Test { offsets_buf_ = test::to_buffer(offsets_); - null_bitmap_ = test::bytes_to_null_buffer(valid_bytes_.data(), valid_bytes_.size()); + null_bitmap_ = test::bytes_to_null_buffer(valid_bytes_); null_count_ = test::null_count(valid_bytes_); strings_ = std::make_shared(length_, offsets_buf_, values_, diff --git a/cpp/src/arrow/util/bit-util.cc b/cpp/src/arrow/util/bit-util.cc index 292cb33887f..6c6d5330eab 100644 --- a/cpp/src/arrow/util/bit-util.cc +++ b/cpp/src/arrow/util/bit-util.cc @@ -16,6 +16,7 @@ // under the License. #include +#include #include "arrow/util/bit-util.h" #include "arrow/util/buffer.h" @@ -23,25 +24,24 @@ namespace arrow { -void util::bytes_to_bits(uint8_t* bytes, int length, uint8_t* bits) { - for (int i = 0; i < length; ++i) { - if (static_cast(bytes[i])) { +void util::bytes_to_bits(const std::vector& bytes, uint8_t* bits) { + for (size_t i = 0; i < bytes.size(); ++i) { + if (bytes[i] > 0) { set_bit(bits, i); } } } -Status util::bytes_to_bits(uint8_t* bytes, int length, +Status util::bytes_to_bits(const std::vector& bytes, std::shared_ptr* out) { - int bit_length = ceil_byte(length) / 8; + int bit_length = util::bytes_for_bits(bytes.size()); auto buffer = std::make_shared(); RETURN_NOT_OK(buffer->Resize(bit_length)); memset(buffer->mutable_data(), 0, bit_length); - bytes_to_bits(bytes, length, buffer->mutable_data()); + bytes_to_bits(bytes, buffer->mutable_data()); *out = buffer; - return Status::OK(); } diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h index 08222d50894..66b251ac58b 100644 --- a/cpp/src/arrow/util/bit-util.h +++ b/cpp/src/arrow/util/bit-util.h @@ -20,6 +20,7 @@ #include #include +#include namespace arrow { @@ -66,8 +67,8 @@ static inline int64_t next_power2(int64_t n) { return n; } -void bytes_to_bits(uint8_t* bytes, int length, uint8_t* bits); -Status bytes_to_bits(uint8_t*, int, std::shared_ptr*); +void bytes_to_bits(const std::vector& bytes, uint8_t* bits); +Status bytes_to_bits(const std::vector&, std::shared_ptr*); } // namespace util From 0f55344fc41488b5190528e7900d8e20e15f30c5 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Fri, 25 Mar 2016 14:22:58 -0700 Subject: [PATCH 2/8] Initialize memory to 0 after PoolBuffer::Resize to avoid boolean bit setting issues --- cpp/src/arrow/types/primitive-test.cc | 221 ++++++++++++-------------- cpp/src/arrow/types/primitive.cc | 29 +++- cpp/src/arrow/types/primitive.h | 11 +- cpp/src/arrow/util/bit-util.h | 8 +- 4 files changed, 137 insertions(+), 132 deletions(-) diff --git a/cpp/src/arrow/types/primitive-test.cc b/cpp/src/arrow/types/primitive-test.cc index 41a0de0ac6a..5bfdd9399c4 100644 --- a/cpp/src/arrow/types/primitive-test.cc +++ b/cpp/src/arrow/types/primitive-test.cc @@ -83,6 +83,7 @@ class TestPrimitiveBuilder : public TestBuilder { typedef typename Attrs::ArrayType ArrayType; typedef typename Attrs::BuilderType BuilderType; typedef typename Attrs::T T; + typedef typename Attrs::Type Type; virtual void SetUp() { TestBuilder::SetUp(); @@ -104,54 +105,39 @@ class TestPrimitiveBuilder : public TestBuilder { test::random_null_bitmap(N, pct_null, valid_bytes_.data()); } - void CheckNullable() { - int size = builder_->length(); + void Check(const std::shared_ptr& builder, bool nullable) { + int size = builder->length(); - auto ex_data = std::make_shared( - reinterpret_cast(draws_.data()), + auto ex_data = std::make_shared(reinterpret_cast(draws_.data()), size * sizeof(T)); - auto ex_null_bitmap = test::bytes_to_null_buffer(valid_bytes_); - int32_t ex_null_count = test::null_count(valid_bytes_); + std::shared_ptr ex_null_bitmap; + int32_t ex_null_count = 0; + + if (nullable) { + ex_null_bitmap = test::bytes_to_null_buffer(valid_bytes_); + ex_null_count = test::null_count(valid_bytes_); + } else { + ex_null_bitmap = nullptr; + } auto expected = std::make_shared(size, ex_data, ex_null_count, ex_null_bitmap); - std::shared_ptr result = std::dynamic_pointer_cast( - builder_->Finish()); + builder->Finish()); // Builder is now reset - ASSERT_EQ(0, builder_->length()); - ASSERT_EQ(0, builder_->capacity()); - ASSERT_EQ(0, builder_->null_count()); - ASSERT_EQ(nullptr, builder_->data()); + ASSERT_EQ(0, builder->length()); + ASSERT_EQ(0, builder->capacity()); + ASSERT_EQ(0, builder->null_count()); + ASSERT_EQ(nullptr, builder->data()); ASSERT_EQ(ex_null_count, result->null_count()); ASSERT_TRUE(result->EqualsExact(*expected.get())); } - void CheckNonNullable() { - int size = builder_nn_->length(); - - auto ex_data = std::make_shared(reinterpret_cast(draws_.data()), - size * sizeof(T)); - - auto expected = std::make_shared(size, ex_data); - - std::shared_ptr result = std::dynamic_pointer_cast( - builder_nn_->Finish()); - - // Builder is now reset - ASSERT_EQ(0, builder_nn_->length()); - ASSERT_EQ(0, builder_nn_->capacity()); - ASSERT_EQ(nullptr, builder_nn_->data()); - - ASSERT_TRUE(result->EqualsExact(*expected.get())); - ASSERT_EQ(0, result->null_count()); - } - protected: - TypePtr type_; + std::shared_ptr type_; shared_ptr builder_; shared_ptr builder_nn_; @@ -159,14 +145,14 @@ class TestPrimitiveBuilder : public TestBuilder { vector valid_bytes_; }; -#define PTYPE_DECL(CapType, c_type) \ - typedef CapType##Array ArrayType; \ - typedef CapType##Builder BuilderType; \ - typedef CapType##Type Type; \ - typedef c_type T; \ - \ - static TypePtr type() { \ - return TypePtr(new Type()); \ +#define PTYPE_DECL(CapType, c_type) \ + typedef CapType##Array ArrayType; \ + typedef CapType##Builder BuilderType; \ + typedef CapType##Type Type; \ + typedef c_type T; \ + \ + static std::shared_ptr type() { \ + return std::shared_ptr(new Type()); \ } #define PINT_DECL(CapType, c_type, LOWER, UPPER) \ @@ -198,7 +184,63 @@ PINT_DECL(Int64, int64_t, INT64_MIN, INT64_MAX); PFLOAT_DECL(Float, float, -1000, 1000); PFLOAT_DECL(Double, double, -1000, 1000); -typedef ::testing::Types +void TestPrimitiveBuilder::RandomData(int N, double pct_null) { + draws_.resize(N); + valid_bytes_.resize(N); + + test::random_null_bitmap(N, 0.5, draws_.data()); + test::random_null_bitmap(N, pct_null, valid_bytes_.data()); +} + +template <> +void TestPrimitiveBuilder::Check( + const std::shared_ptr& builder, bool nullable) { + int size = builder->length(); + + auto ex_data = test::bytes_to_null_buffer(draws_); + + std::shared_ptr ex_null_bitmap; + int32_t ex_null_count = 0; + + if (nullable) { + ex_null_bitmap = test::bytes_to_null_buffer(valid_bytes_); + ex_null_count = test::null_count(valid_bytes_); + } else { + ex_null_bitmap = nullptr; + } + + auto expected = std::make_shared(size, ex_data, ex_null_count, + ex_null_bitmap); + std::shared_ptr result = std::dynamic_pointer_cast( + builder->Finish()); + + // Builder is now reset + ASSERT_EQ(0, builder->length()); + ASSERT_EQ(0, builder->capacity()); + ASSERT_EQ(0, builder->null_count()); + ASSERT_EQ(nullptr, builder->data()); + + ASSERT_EQ(ex_null_count, result->null_count()); + + ASSERT_EQ(expected->length(), result->length()); + + for (int i = 0; i < result->length(); ++i) { + if (nullable) { + ASSERT_EQ(valid_bytes_[i] == 0, result->IsNull(i)) << i; + } + bool actual = util::get_bit(result->raw_data(), i); + ASSERT_EQ(static_cast(draws_[i]), actual) << i; + } + ASSERT_TRUE(result->EqualsExact(*expected.get())); +} + +typedef ::testing::Types Primitives; @@ -207,17 +249,21 @@ TYPED_TEST_CASE(TestPrimitiveBuilder, Primitives); #define DECL_T() \ typedef typename TestFixture::T T; +#define DECL_TYPE() \ + typedef typename TestFixture::Type Type; + #define DECL_ARRAYTYPE() \ typedef typename TestFixture::ArrayType ArrayType; TYPED_TEST(TestPrimitiveBuilder, TestInit) { - DECL_T(); + DECL_TYPE(); int n = 1000; ASSERT_OK(this->builder_->Init(n)); ASSERT_EQ(n, this->builder_->capacity()); - ASSERT_EQ(n * sizeof(T), this->builder_->data()->size()); + ASSERT_EQ(type_traits::bytes_required(n), + this->builder_->data()->size()); // unsure if this should go in all builder classes ASSERT_EQ(0, this->builder_->num_children()); @@ -280,16 +326,20 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendScalar) { this->builder_nn_->Reserve(1000); int i; + int null_count = 0; // Append the first 1000 for (i = 0; i < 1000; ++i) { if (valid_bytes[i] > 0) { this->builder_->Append(draws[i]); } else { this->builder_->AppendNull(); + ++null_count; } this->builder_nn_->Append(draws[i]); } + ASSERT_EQ(null_count, this->builder_->null_count()); + ASSERT_EQ(1000, this->builder_->length()); ASSERT_EQ(1024, this->builder_->capacity()); @@ -315,8 +365,8 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendScalar) { ASSERT_EQ(size, this->builder_nn_->length()); ASSERT_EQ(util::next_power2(size), this->builder_nn_->capacity()); - this->CheckNullable(); - this->CheckNonNullable(); + this->Check(this->builder_, true); + this->Check(this->builder_nn_, false); } @@ -348,8 +398,8 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendVector) { ASSERT_EQ(size, this->builder_->length()); ASSERT_EQ(util::next_power2(size), this->builder_->capacity()); - this->CheckNullable(); - this->CheckNonNullable(); + this->Check(this->builder_, true); + this->Check(this->builder_nn_, false); } TYPED_TEST(TestPrimitiveBuilder, TestAdvance) { @@ -364,15 +414,16 @@ TYPED_TEST(TestPrimitiveBuilder, TestAdvance) { } TYPED_TEST(TestPrimitiveBuilder, TestResize) { - DECL_T(); + DECL_TYPE(); int cap = MIN_BUILDER_CAPACITY * 2; ASSERT_OK(this->builder_->Resize(cap)); ASSERT_EQ(cap, this->builder_->capacity()); - ASSERT_EQ(cap * sizeof(T), this->builder_->data()->size()); - ASSERT_EQ(util::ceil_byte(cap) / 8, this->builder_->null_bitmap()->size()); + ASSERT_EQ(type_traits::bytes_required(cap), this->builder_->data()->size()); + ASSERT_EQ(util::bytes_for_bits(cap), + this->builder_->null_bitmap()->size()); } TYPED_TEST(TestPrimitiveBuilder, TestReserve) { @@ -388,72 +439,4 @@ TYPED_TEST(TestPrimitiveBuilder, TestReserve) { this->builder_->capacity()); } -// ---------------------------------------------------------------------- -// Boolean tests - -struct PBoolean { - PTYPE_DECL(Boolean, bool); -}; - -class TestBooleanBuilder : public TestPrimitiveBuilder { - public: - - void RandomData(int N, double pct_null = 0.1) { - draws_.resize(N); - valid_bytes_.resize(N); - - test::random_null_bitmap(N, 0.5, draws_.data()); - test::random_null_bitmap(N, pct_null, valid_bytes_.data()); - } - - void CheckNullable() { - int size = builder_->length(); - - auto ex_data = test::bytes_to_null_buffer(draws_); - auto ex_null_bitmap = test::bytes_to_null_buffer(valid_bytes_); - int32_t ex_null_count = test::null_count(valid_bytes_); - - auto expected = std::make_shared(size, ex_data, ex_null_count, - ex_null_bitmap); - - std::shared_ptr result = std::dynamic_pointer_cast( - builder_->Finish()); - - // Builder is now reset - ASSERT_EQ(0, builder_->length()); - ASSERT_EQ(0, builder_->capacity()); - ASSERT_EQ(0, builder_->null_count()); - ASSERT_EQ(nullptr, builder_->data()); - - ASSERT_EQ(ex_null_count, result->null_count()); - ASSERT_TRUE(result->EqualsExact(*expected.get())); - } - - void CheckNonNullable() { - int size = builder_nn_->length(); - - auto ex_data = test::bytes_to_null_buffer(draws_); - auto expected = std::make_shared(size, ex_data); - - std::shared_ptr result = std::dynamic_pointer_cast( - builder_nn_->Finish()); - - // Builder is now reset - ASSERT_EQ(0, builder_nn_->length()); - ASSERT_EQ(0, builder_nn_->capacity()); - ASSERT_EQ(nullptr, builder_nn_->data()); - - ASSERT_TRUE(result->EqualsExact(*expected.get())); - ASSERT_EQ(0, result->null_count()); - } - - protected: - TypePtr type_; - shared_ptr builder_; - shared_ptr builder_nn_; - - vector draws_; - vector valid_bytes_; -}; - } // namespace arrow diff --git a/cpp/src/arrow/types/primitive.cc b/cpp/src/arrow/types/primitive.cc index f3d03e660b0..ae2a14cc1ea 100644 --- a/cpp/src/arrow/types/primitive.cc +++ b/cpp/src/arrow/types/primitive.cc @@ -80,7 +80,11 @@ template Status PrimitiveBuilder::Init(int32_t capacity) { RETURN_NOT_OK(ArrayBuilder::Init(capacity)); data_ = std::make_shared(pool_); - RETURN_NOT_OK(data_->Resize(type_traits::bytes_required(capacity))); + + int64_t nbytes = type_traits::bytes_required(capacity); + RETURN_NOT_OK(data_->Resize(nbytes)); + memset(data_->mutable_data(), 0, nbytes); + raw_data_ = reinterpret_cast(data_->mutable_data()); return Status::OK(); } @@ -96,8 +100,13 @@ Status PrimitiveBuilder::Resize(int32_t capacity) { RETURN_NOT_OK(Init(capacity)); } else { RETURN_NOT_OK(ArrayBuilder::Resize(capacity)); - RETURN_NOT_OK(data_->Resize(type_traits::bytes_required(capacity))); + + int64_t old_bytes = data_->size(); + int64_t new_bytes = type_traits::bytes_required(capacity); + RETURN_NOT_OK(data_->Resize(new_bytes)); raw_data_ = reinterpret_cast(data_->mutable_data()); + + memset(data_->mutable_data() + old_bytes, 0, new_bytes - old_bytes); } capacity_ = capacity; return Status::OK(); @@ -167,6 +176,11 @@ template class NumericBuilder; template class NumericBuilder; template class NumericBuilder; +BooleanArray::BooleanArray(int32_t length, const std::shared_ptr& data, + int32_t null_count, + const std::shared_ptr& null_bitmap) : + PrimitiveArray(std::make_shared(), length, + data, null_count, null_bitmap) {} bool BooleanArray::EqualsExact(const BooleanArray& other) const { if (this == &other) return true; @@ -207,11 +221,11 @@ Status BooleanBuilder::Append(const uint8_t* values, int32_t length, const uint8_t* valid_bytes) { RETURN_NOT_OK(Reserve(length)); - if (length > 0) { - for (int i = 0; i < length; ++i) { - if (values[i] > 0) { - util::set_bit(raw_data_, length_ + i); - } + for (int i = 0; i < length; ++i) { + if (values[i] > 0) { + util::set_bit(raw_data_, length_ + i); + } else { + util::clear_bit(raw_data_, length_ + i); } } @@ -222,7 +236,6 @@ Status BooleanBuilder::Append(const uint8_t* values, int32_t length, util::set_bit(null_bitmap_data_, length_ + i); } } - length_ += length; return Status::OK(); } diff --git a/cpp/src/arrow/types/primitive.h b/cpp/src/arrow/types/primitive.h index 51143f314a4..38f7ef24ada 100644 --- a/cpp/src/arrow/types/primitive.h +++ b/cpp/src/arrow/types/primitive.h @@ -27,6 +27,7 @@ #include "arrow/type.h" #include "arrow/util/bit-util.h" #include "arrow/util/buffer.h" +#include "arrow/util/logging.h" #include "arrow/util/status.h" namespace arrow { @@ -273,9 +274,7 @@ class BooleanArray : public PrimitiveArray { BooleanArray(int32_t length, const std::shared_ptr& data, int32_t null_count = 0, - const std::shared_ptr& null_bitmap = nullptr) : - PrimitiveArray(std::make_shared(), length, - data, null_count, null_bitmap) {} + const std::shared_ptr& null_bitmap = nullptr); bool EqualsExact(const BooleanArray& other) const; bool Equals(const std::shared_ptr& arr) const override; @@ -312,10 +311,16 @@ class BooleanBuilder : public PrimitiveBuilder { util::set_bit(null_bitmap_data_, length_); if (val) { util::set_bit(raw_data_, length_); + } else { + util::clear_bit(raw_data_, length_); } ++length_; } + void Append(uint8_t val) { + Append(static_cast(val)); + } + // Vector append // // If passed, valid_bytes is of equal length to values, and any zero byte diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h index 66b251ac58b..8d6287130dd 100644 --- a/cpp/src/arrow/util/bit-util.h +++ b/cpp/src/arrow/util/bit-util.h @@ -44,15 +44,19 @@ static inline int64_t ceil_2bytes(int64_t size) { static constexpr uint8_t BITMASK[] = {1, 2, 4, 8, 16, 32, 64, 128}; static inline bool get_bit(const uint8_t* bits, int i) { - return bits[i / 8] & BITMASK[i % 8]; + return static_cast(bits[i / 8] & BITMASK[i % 8]); } static inline bool bit_not_set(const uint8_t* bits, int i) { return (bits[i / 8] & BITMASK[i % 8]) == 0; } +static inline void clear_bit(uint8_t* bits, int i) { + bits[i / 8] &= ~BITMASK[i % 8]; +} + static inline void set_bit(uint8_t* bits, int i) { - bits[i / 8] |= 1 << (i % 8); + bits[i / 8] |= BITMASK[i % 8]; } static inline int64_t next_power2(int64_t n) { From 2dae07ed9bde709d8050f3bd30cd7379d3ce3e68 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Fri, 25 Mar 2016 14:24:26 -0700 Subject: [PATCH 3/8] cpplint --- cpp/src/arrow/types/primitive.h | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/arrow/types/primitive.h b/cpp/src/arrow/types/primitive.h index 38f7ef24ada..eba63bcc846 100644 --- a/cpp/src/arrow/types/primitive.h +++ b/cpp/src/arrow/types/primitive.h @@ -21,6 +21,7 @@ #include #include #include +#include #include "arrow/array.h" #include "arrow/builder.h" From 2299a6c9480ac9939629d3a27095bdc970d8a7a6 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Fri, 25 Mar 2016 14:39:04 -0700 Subject: [PATCH 4/8] Install logging.h --- cpp/src/arrow/types/primitive.h | 1 - cpp/src/arrow/util/CMakeLists.txt | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/types/primitive.h b/cpp/src/arrow/types/primitive.h index eba63bcc846..3dc56bd01b5 100644 --- a/cpp/src/arrow/types/primitive.h +++ b/cpp/src/arrow/types/primitive.h @@ -28,7 +28,6 @@ #include "arrow/type.h" #include "arrow/util/bit-util.h" #include "arrow/util/buffer.h" -#include "arrow/util/logging.h" #include "arrow/util/status.h" namespace arrow { diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt index fed05e3690c..d2a4b091fad 100644 --- a/cpp/src/arrow/util/CMakeLists.txt +++ b/cpp/src/arrow/util/CMakeLists.txt @@ -23,6 +23,7 @@ install(FILES bit-util.h buffer.h + logging.h macros.h memory-pool.h status.h @@ -59,7 +60,7 @@ if (ARROW_BUILD_BENCHMARKS) ) else() target_link_libraries(arrow_benchmark_main - benchmark + benchmark pthread ) endif() From dc8d0a4f07876d9a487740783507451ec169985d Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Fri, 25 Mar 2016 14:59:17 -0700 Subject: [PATCH 5/8] Fix up pyarrow per C++ API changes. Add boolean builtin conversion / API support --- python/pyarrow/includes/libarrow.pxd | 3 ++ python/pyarrow/scalar.pyx | 6 ++- python/pyarrow/tests/test_convert_builtin.py | 5 ++- python/pyarrow/tests/test_scalars.py | 39 ++++++++++++-------- python/src/pyarrow/adapters/builtin.cc | 27 ++++++++++++-- 5 files changed, 58 insertions(+), 22 deletions(-) diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index e6afcbd79b6..943a08f84a0 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -86,6 +86,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: c_bool IsNull(int i) + cdef cppclass CBooleanArray" arrow::BooleanArray"(CArray): + c_bool Value(int i) + cdef cppclass CUInt8Array" arrow::UInt8Array"(CArray): uint8_t Value(int i) diff --git a/python/pyarrow/scalar.pyx b/python/pyarrow/scalar.pyx index 04f013d6ca7..0d391e5f26b 100644 --- a/python/pyarrow/scalar.pyx +++ b/python/pyarrow/scalar.pyx @@ -58,7 +58,10 @@ cdef class ArrayValue(Scalar): cdef class BooleanValue(ArrayValue): - pass + + def as_py(self): + cdef CBooleanArray* ap = self.sp_array.get() + return ap.Value(self.index) cdef class Int8Value(ArrayValue): @@ -172,6 +175,7 @@ cdef class ListValue(ArrayValue): cdef dict _scalar_classes = { + Type_BOOL: BooleanValue, Type_UINT8: Int8Value, Type_UINT16: Int16Value, Type_UINT32: Int32Value, diff --git a/python/pyarrow/tests/test_convert_builtin.py b/python/pyarrow/tests/test_convert_builtin.py index 25f69691210..2beb6b39d73 100644 --- a/python/pyarrow/tests/test_convert_builtin.py +++ b/python/pyarrow/tests/test_convert_builtin.py @@ -22,7 +22,10 @@ class TestConvertList(unittest.TestCase): def test_boolean(self): - pass + arr = pyarrow.from_pylist([True, None, False, None]) + assert len(arr) == 4 + assert arr.null_count == 2 + assert arr.type == pyarrow.bool_() def test_empty_list(self): arr = pyarrow.from_pylist([]) diff --git a/python/pyarrow/tests/test_scalars.py b/python/pyarrow/tests/test_scalars.py index 021737db672..4fb850a4d47 100644 --- a/python/pyarrow/tests/test_scalars.py +++ b/python/pyarrow/tests/test_scalars.py @@ -16,67 +16,74 @@ # under the License. from pyarrow.compat import unittest, u -import pyarrow as arrow +import pyarrow as A class TestScalars(unittest.TestCase): def test_null_singleton(self): with self.assertRaises(Exception): - arrow.NAType() + A.NAType() def test_bool(self): - pass + arr = A.from_pylist([True, None, False, None]) + + v = arr[0] + assert isinstance(v, A.BooleanValue) + assert repr(v) == "True" + assert v.as_py() == True + + assert arr[1] is A.NA def test_int64(self): - arr = arrow.from_pylist([1, 2, None]) + arr = A.from_pylist([1, 2, None]) v = arr[0] - assert isinstance(v, arrow.Int64Value) + assert isinstance(v, A.Int64Value) assert repr(v) == "1" assert v.as_py() == 1 - assert arr[2] is arrow.NA + assert arr[2] is A.NA def test_double(self): - arr = arrow.from_pylist([1.5, None, 3]) + arr = A.from_pylist([1.5, None, 3]) v = arr[0] - assert isinstance(v, arrow.DoubleValue) + assert isinstance(v, A.DoubleValue) assert repr(v) == "1.5" assert v.as_py() == 1.5 - assert arr[1] is arrow.NA + assert arr[1] is A.NA v = arr[2] assert v.as_py() == 3.0 def test_string(self): - arr = arrow.from_pylist(['foo', None, u('bar')]) + arr = A.from_pylist(['foo', None, u('bar')]) v = arr[0] - assert isinstance(v, arrow.StringValue) + assert isinstance(v, A.StringValue) assert repr(v) == "'foo'" assert v.as_py() == 'foo' - assert arr[1] is arrow.NA + assert arr[1] is A.NA v = arr[2].as_py() assert v == 'bar' assert isinstance(v, str) def test_list(self): - arr = arrow.from_pylist([['foo', None], None, ['bar'], []]) + arr = A.from_pylist([['foo', None], None, ['bar'], []]) v = arr[0] assert len(v) == 2 - assert isinstance(v, arrow.ListValue) + assert isinstance(v, A.ListValue) assert repr(v) == "['foo', None]" assert v.as_py() == ['foo', None] assert v[0].as_py() == 'foo' - assert v[1] is arrow.NA + assert v[1] is A.NA - assert arr[1] is arrow.NA + assert arr[1] is A.NA v = arr[3] assert len(v) == 0 diff --git a/python/src/pyarrow/adapters/builtin.cc b/python/src/pyarrow/adapters/builtin.cc index acb13acecaf..78fc32caf55 100644 --- a/python/src/pyarrow/adapters/builtin.cc +++ b/python/src/pyarrow/adapters/builtin.cc @@ -61,6 +61,8 @@ class ScalarVisitor { ++total_count_; if (obj == Py_None) { ++none_count_; + } else if (PyBool_Check(obj)) { + ++bool_count_; } else if (PyFloat_Check(obj)) { ++float_count_; } else if (IsPyInteger(obj)) { @@ -256,6 +258,21 @@ class TypedConverter : public SeqConverter { class BoolConverter : public TypedConverter { public: Status AppendData(PyObject* seq) override { + int64_t val; + Py_ssize_t size = PySequence_Size(seq); + RETURN_ARROW_NOT_OK(typed_builder_->Reserve(size)); + for (int64_t i = 0; i < size; ++i) { + OwnedRef item(PySequence_GetItem(seq, i)); + if (item.obj() == Py_None) { + typed_builder_->AppendNull(); + } else { + if (item.obj() == Py_True) { + typed_builder_->Append(true); + } else { + typed_builder_->Append(false); + } + } + } return Status::OK(); } }; @@ -265,14 +282,15 @@ class Int64Converter : public TypedConverter { Status AppendData(PyObject* seq) override { int64_t val; Py_ssize_t size = PySequence_Size(seq); + RETURN_ARROW_NOT_OK(typed_builder_->Reserve(size)); for (int64_t i = 0; i < size; ++i) { OwnedRef item(PySequence_GetItem(seq, i)); if (item.obj() == Py_None) { - RETURN_ARROW_NOT_OK(typed_builder_->AppendNull()); + typed_builder_->AppendNull(); } else { val = PyLong_AsLongLong(item.obj()); RETURN_IF_PYERROR(); - RETURN_ARROW_NOT_OK(typed_builder_->Append(val)); + typed_builder_->Append(val); } } return Status::OK(); @@ -284,14 +302,15 @@ class DoubleConverter : public TypedConverter { Status AppendData(PyObject* seq) override { double val; Py_ssize_t size = PySequence_Size(seq); + RETURN_ARROW_NOT_OK(typed_builder_->Reserve(size)); for (int64_t i = 0; i < size; ++i) { OwnedRef item(PySequence_GetItem(seq, i)); if (item.obj() == Py_None) { - RETURN_ARROW_NOT_OK(typed_builder_->AppendNull()); + typed_builder_->AppendNull(); } else { val = PyFloat_AsDouble(item.obj()); RETURN_IF_PYERROR(); - RETURN_ARROW_NOT_OK(typed_builder_->Append(val)); + typed_builder_->Append(val); } } return Status::OK(); From 74f704bfaece4f2e5bb977ec63d1bc51864fcefc Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Fri, 25 Mar 2016 15:15:41 -0700 Subject: [PATCH 6/8] Try to fix clang issues, make some PrimitiveBuilder methods protected --- cpp/src/arrow/builder.cc | 7 +++++++ cpp/src/arrow/builder.h | 2 ++ cpp/src/arrow/types/primitive-test.cc | 18 ++++++++++-------- cpp/src/arrow/types/primitive.cc | 5 +++++ cpp/src/arrow/types/primitive.h | 26 ++++++++++++++------------ 5 files changed, 38 insertions(+), 20 deletions(-) diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc index 6a62dc3b0e0..4061f35fd5e 100644 --- a/cpp/src/arrow/builder.cc +++ b/cpp/src/arrow/builder.cc @@ -54,5 +54,12 @@ Status ArrayBuilder::Advance(int32_t elements) { return Status::OK(); } +Status ArrayBuilder::Reserve(int32_t elements) { + if (length_ + elements > capacity_) { + int32_t new_capacity = util::next_power2(length_ + elements); + return Resize(new_capacity); + } + return Status::OK(); +} } // namespace arrow diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h index 308e54c80d7..d1a49dce799 100644 --- a/cpp/src/arrow/builder.h +++ b/cpp/src/arrow/builder.h @@ -69,6 +69,8 @@ class ArrayBuilder { // Resizes the null_bitmap array Status Resize(int32_t new_bits); + Status Reserve(int32_t extra_bits); + // For cases where raw data was memcpy'd into the internal buffers, allows us // to advance the length of the builder. It is your responsibility to use // this function responsibly. diff --git a/cpp/src/arrow/types/primitive-test.cc b/cpp/src/arrow/types/primitive-test.cc index 5bfdd9399c4..761845d9381 100644 --- a/cpp/src/arrow/types/primitive-test.cc +++ b/cpp/src/arrow/types/primitive-test.cc @@ -69,11 +69,11 @@ PRIMITIVE_TEST(BooleanType, BOOL, "bool"); // ---------------------------------------------------------------------- // Primitive type tests -TEST_F(TestBuilder, TestResize) { +TEST_F(TestBuilder, TestReserve) { builder_->Init(10); ASSERT_EQ(2, builder_->null_bitmap()->size()); - builder_->Resize(30); + builder_->Reserve(30); ASSERT_EQ(4, builder_->null_bitmap()->size()); } @@ -260,9 +260,9 @@ TYPED_TEST(TestPrimitiveBuilder, TestInit) { DECL_TYPE(); int n = 1000; - ASSERT_OK(this->builder_->Init(n)); - ASSERT_EQ(n, this->builder_->capacity()); - ASSERT_EQ(type_traits::bytes_required(n), + ASSERT_OK(this->builder_->Reserve(n)); + ASSERT_EQ(util::next_power2(n), this->builder_->capacity()); + ASSERT_EQ(util::next_power2(type_traits::bytes_required(n)), this->builder_->data()->size()); // unsure if this should go in all builder classes @@ -404,13 +404,15 @@ TYPED_TEST(TestPrimitiveBuilder, TestAppendVector) { TYPED_TEST(TestPrimitiveBuilder, TestAdvance) { int n = 1000; - ASSERT_OK(this->builder_->Init(n)); + ASSERT_OK(this->builder_->Reserve(n)); ASSERT_OK(this->builder_->Advance(100)); ASSERT_EQ(100, this->builder_->length()); ASSERT_OK(this->builder_->Advance(900)); - ASSERT_RAISES(Invalid, this->builder_->Advance(1)); + + int too_many = this->builder_->capacity() - 1000 + 1; + ASSERT_RAISES(Invalid, this->builder_->Advance(too_many)); } TYPED_TEST(TestPrimitiveBuilder, TestResize) { @@ -418,7 +420,7 @@ TYPED_TEST(TestPrimitiveBuilder, TestResize) { int cap = MIN_BUILDER_CAPACITY * 2; - ASSERT_OK(this->builder_->Resize(cap)); + ASSERT_OK(this->builder_->Reserve(cap)); ASSERT_EQ(cap, this->builder_->capacity()); ASSERT_EQ(type_traits::bytes_required(cap), this->builder_->data()->size()); diff --git a/cpp/src/arrow/types/primitive.cc b/cpp/src/arrow/types/primitive.cc index ae2a14cc1ea..611457dbf63 100644 --- a/cpp/src/arrow/types/primitive.cc +++ b/cpp/src/arrow/types/primitive.cc @@ -121,6 +121,11 @@ Status PrimitiveBuilder::Reserve(int32_t elements) { return Status::OK(); } +template +std::shared_ptr NumericBuilder::Finish() { + return PrimitiveBuilder::Finish(); +} + template Status NumericBuilder::Append(const value_type* values, int32_t length, const uint8_t* valid_bytes) { diff --git a/cpp/src/arrow/types/primitive.h b/cpp/src/arrow/types/primitive.h index 3dc56bd01b5..f090b46608c 100644 --- a/cpp/src/arrow/types/primitive.h +++ b/cpp/src/arrow/types/primitive.h @@ -102,18 +102,6 @@ class PrimitiveBuilder : public ArrayBuilder { virtual ~PrimitiveBuilder() {} - Status Init(int32_t capacity); - - // Increase the capacity of the builder to accommodate at least the indicated - // number of elements - Status Resize(int32_t capacity); - - // Ensure that builder can accommodate an additional number of - // elements. Resizes if the current capacity is not sufficient - Status Reserve(int32_t elements); - - std::shared_ptr Finish() override; - using ArrayBuilder::Advance; // Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory @@ -133,9 +121,21 @@ class PrimitiveBuilder : public ArrayBuilder { return data_; } + // Ensure that builder can accommodate an additional number of + // elements. Resizes if the current capacity is not sufficient + Status Reserve(int32_t elements); + protected: std::shared_ptr data_; value_type* raw_data_; + + Status Init(int32_t capacity); + + // Increase the capacity of the builder to accommodate at least the indicated + // number of elements + Status Resize(int32_t capacity); + + std::shared_ptr Finish() override; }; template @@ -157,6 +157,8 @@ class NumericBuilder : public PrimitiveBuilder { Status Append(const value_type* values, int32_t length, const uint8_t* valid_bytes = nullptr); + std::shared_ptr Finish() override; + protected: using PrimitiveBuilder::length_; using PrimitiveBuilder::null_bitmap_data_; From 2d3b85572502c0618b25aaa676b1d97bf1e79a2c Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Fri, 25 Mar 2016 15:19:56 -0700 Subject: [PATCH 7/8] Use gcc-4.9 to build thirdparty, too --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 49a956ead3d..d89a200b892 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,10 +20,10 @@ matrix: language: cpp os: linux before_script: - - $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh - script: - export CC="gcc-4.9" - export CXX="g++-4.9" + - $TRAVIS_BUILD_DIR/ci/travis_before_script_cpp.sh + script: - $TRAVIS_BUILD_DIR/ci/travis_script_cpp.sh - $TRAVIS_BUILD_DIR/ci/travis_script_python.sh - compiler: clang From f6b60f1996e52cf86c82125850518029e116ca74 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Fri, 25 Mar 2016 16:44:04 -0700 Subject: [PATCH 8/8] Clean up template instantiations to fix clang --- cpp/src/arrow/types/primitive.cc | 79 ++++++++++++-------------- cpp/src/arrow/types/primitive.h | 51 +++++++---------- python/src/pyarrow/adapters/builtin.cc | 1 - 3 files changed, 58 insertions(+), 73 deletions(-) diff --git a/cpp/src/arrow/types/primitive.cc b/cpp/src/arrow/types/primitive.cc index 611457dbf63..c54d0757c47 100644 --- a/cpp/src/arrow/types/primitive.cc +++ b/cpp/src/arrow/types/primitive.cc @@ -122,12 +122,7 @@ Status PrimitiveBuilder::Reserve(int32_t elements) { } template -std::shared_ptr NumericBuilder::Finish() { - return PrimitiveBuilder::Finish(); -} - -template -Status NumericBuilder::Append(const value_type* values, int32_t length, +Status PrimitiveBuilder::Append(const value_type* values, int32_t length, const uint8_t* valid_bytes) { RETURN_NOT_OK(PrimitiveBuilder::Reserve(length)); @@ -170,16 +165,41 @@ std::shared_ptr PrimitiveBuilder::Finish() { return result; } -template class NumericBuilder; -template class NumericBuilder; -template class NumericBuilder; -template class NumericBuilder; -template class NumericBuilder; -template class NumericBuilder; -template class NumericBuilder; -template class NumericBuilder; -template class NumericBuilder; -template class NumericBuilder; +template <> +Status PrimitiveBuilder::Append(const uint8_t* values, int32_t length, + const uint8_t* valid_bytes) { + RETURN_NOT_OK(Reserve(length)); + + for (int i = 0; i < length; ++i) { + if (values[i] > 0) { + util::set_bit(raw_data_, length_ + i); + } else { + util::clear_bit(raw_data_, length_ + i); + } + } + + if (valid_bytes != nullptr) { + PrimitiveBuilder::AppendNulls(valid_bytes, length); + } else { + for (int i = 0; i < length; ++i) { + util::set_bit(null_bitmap_data_, length_ + i); + } + } + length_ += length; + return Status::OK(); +} + +template class PrimitiveBuilder; +template class PrimitiveBuilder; +template class PrimitiveBuilder; +template class PrimitiveBuilder; +template class PrimitiveBuilder; +template class PrimitiveBuilder; +template class PrimitiveBuilder; +template class PrimitiveBuilder; +template class PrimitiveBuilder; +template class PrimitiveBuilder; +template class PrimitiveBuilder; BooleanArray::BooleanArray(int32_t length, const std::shared_ptr& data, int32_t null_count, @@ -222,31 +242,4 @@ bool BooleanArray::Equals(const std::shared_ptr& arr) const { return EqualsExact(*static_cast(arr.get())); } -Status BooleanBuilder::Append(const uint8_t* values, int32_t length, - const uint8_t* valid_bytes) { - RETURN_NOT_OK(Reserve(length)); - - for (int i = 0; i < length; ++i) { - if (values[i] > 0) { - util::set_bit(raw_data_, length_ + i); - } else { - util::clear_bit(raw_data_, length_ + i); - } - } - - if (valid_bytes != nullptr) { - PrimitiveBuilder::AppendNulls(valid_bytes, length); - } else { - for (int i = 0; i < length; ++i) { - util::set_bit(null_bitmap_data_, length_ + i); - } - } - length_ += length; - return Status::OK(); -} - -std::shared_ptr BooleanBuilder::Finish() { - return PrimitiveBuilder::Finish(); -} - } // namespace arrow diff --git a/cpp/src/arrow/types/primitive.h b/cpp/src/arrow/types/primitive.h index f090b46608c..ec6fee35513 100644 --- a/cpp/src/arrow/types/primitive.h +++ b/cpp/src/arrow/types/primitive.h @@ -121,10 +121,19 @@ class PrimitiveBuilder : public ArrayBuilder { return data_; } + // Vector append + // + // If passed, valid_bytes is of equal length to values, and any zero byte + // will be considered as a null for that slot + Status Append(const value_type* values, int32_t length, + const uint8_t* valid_bytes = nullptr); + // Ensure that builder can accommodate an additional number of // elements. Resizes if the current capacity is not sufficient Status Reserve(int32_t elements); + std::shared_ptr Finish() override; + protected: std::shared_ptr data_; value_type* raw_data_; @@ -134,15 +143,15 @@ class PrimitiveBuilder : public ArrayBuilder { // Increase the capacity of the builder to accommodate at least the indicated // number of elements Status Resize(int32_t capacity); - - std::shared_ptr Finish() override; }; -template -class NumericBuilder : public PrimitiveBuilder { +template +class NumericBuilder : public PrimitiveBuilder { public: - using typename PrimitiveBuilder::value_type; - using PrimitiveBuilder::PrimitiveBuilder; + using typename PrimitiveBuilder::value_type; + using PrimitiveBuilder::PrimitiveBuilder; + + using PrimitiveBuilder::Append; // Scalar append. Does not capacity-check; make sure to call Reserve beforehand void Append(value_type val) { @@ -150,19 +159,13 @@ class NumericBuilder : public PrimitiveBuilder { raw_data_[length_++] = val; } - // Vector append - // - // If passed, valid_bytes is of equal length to values, and any zero byte - // will be considered as a null for that slot - Status Append(const value_type* values, int32_t length, - const uint8_t* valid_bytes = nullptr); - - std::shared_ptr Finish() override; - protected: - using PrimitiveBuilder::length_; - using PrimitiveBuilder::null_bitmap_data_; - using PrimitiveBuilder::raw_data_; + using PrimitiveBuilder::length_; + using PrimitiveBuilder::null_bitmap_data_; + using PrimitiveBuilder::raw_data_; + + using PrimitiveBuilder::Init; + using PrimitiveBuilder::Resize; }; template <> @@ -306,7 +309,7 @@ class BooleanBuilder : public PrimitiveBuilder { virtual ~BooleanBuilder() {} - std::shared_ptr Finish() override; + using PrimitiveBuilder::Append; // Scalar append void Append(bool val) { @@ -322,16 +325,6 @@ class BooleanBuilder : public PrimitiveBuilder { void Append(uint8_t val) { Append(static_cast(val)); } - - // Vector append - // - // If passed, valid_bytes is of equal length to values, and any zero byte - // will be considered as a null for that slot - Status Append(const uint8_t* values, int32_t length, - const uint8_t* valid_bytes = nullptr); - - Status Append(const std::vector& values, - const uint8_t* valid_bytes = nullptr); }; } // namespace arrow diff --git a/python/src/pyarrow/adapters/builtin.cc b/python/src/pyarrow/adapters/builtin.cc index 78fc32caf55..78ef1b31f34 100644 --- a/python/src/pyarrow/adapters/builtin.cc +++ b/python/src/pyarrow/adapters/builtin.cc @@ -258,7 +258,6 @@ class TypedConverter : public SeqConverter { class BoolConverter : public TypedConverter { public: Status AppendData(PyObject* seq) override { - int64_t val; Py_ssize_t size = PySequence_Size(seq); RETURN_ARROW_NOT_OK(typed_builder_->Reserve(size)); for (int64_t i = 0; i < size; ++i) {