From 30053ec09ba0c7c48982f96cd08cfda035a62156 Mon Sep 17 00:00:00 2001
From: David Li
Date: Tue, 22 Jun 2021 17:53:39 -0400
Subject: [PATCH 1/9] ARROW-11932: [C++] Implement ArrayBuilder::AppendScalar
---
cpp/src/arrow/array/array_test.cc | 33 ++++++++
cpp/src/arrow/array/builder_base.cc | 116 ++++++++++++++++++++++++++++
cpp/src/arrow/array/builder_base.h | 4 +
cpp/src/arrow/array/builder_dict.h | 1 +
cpp/src/arrow/testing/generator.cc | 90 +++------------------
5 files changed, 163 insertions(+), 81 deletions(-)
diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc
index a97bf134604..97431a736e6 100644
--- a/cpp/src/arrow/array/array_test.cc
+++ b/cpp/src/arrow/array/array_test.cc
@@ -397,6 +397,33 @@ TEST_F(TestArray, TestMakeArrayOfNullUnion) {
}
}
+void AssertAppendScalar(MemoryPool* pool, const std::shared_ptr& scalar) {
+ std::unique_ptr builder;
+ auto null_scalar = MakeNullScalar(scalar->type);
+ ASSERT_OK(MakeBuilder(pool, scalar->type, &builder));
+ ASSERT_OK(builder->AppendScalar(*scalar));
+ ASSERT_OK(builder->AppendScalar(*scalar));
+ ASSERT_OK(builder->AppendScalar(*null_scalar));
+ ASSERT_OK(builder->AppendScalars({scalar, null_scalar}));
+
+ std::shared_ptr out;
+ FinishAndCheckPadding(builder.get(), &out);
+ ASSERT_OK(out->ValidateFull());
+ ASSERT_EQ(out->length(), 5);
+ ASSERT_EQ(out->null_count(), 2);
+ ASSERT_FALSE(out->IsNull(0));
+ ASSERT_FALSE(out->IsNull(1));
+ ASSERT_TRUE(out->IsNull(2));
+ ASSERT_FALSE(out->IsNull(3));
+ ASSERT_TRUE(out->IsNull(4));
+ ASSERT_OK_AND_ASSIGN(auto scalar0, out->GetScalar(0));
+ ASSERT_OK_AND_ASSIGN(auto scalar1, out->GetScalar(1));
+ ASSERT_OK_AND_ASSIGN(auto scalar3, out->GetScalar(3));
+ AssertScalarsEqual(*scalar, *scalar0, /*verbose=*/true);
+ AssertScalarsEqual(*scalar, *scalar1, /*verbose=*/true);
+ AssertScalarsEqual(*scalar, *scalar3, /*verbose=*/true);
+}
+
TEST_F(TestArray, TestMakeArrayFromScalar) {
ASSERT_OK_AND_ASSIGN(auto null_array, MakeArrayFromScalar(NullScalar(), 5));
ASSERT_OK(null_array->ValidateFull());
@@ -447,6 +474,10 @@ TEST_F(TestArray, TestMakeArrayFromScalar) {
ASSERT_EQ(array->null_count(), 0);
}
}
+
+ for (auto scalar : scalars) {
+ AssertAppendScalar(pool_, scalar);
+ }
}
TEST_F(TestArray, TestMakeArrayFromDictionaryScalar) {
@@ -481,6 +512,8 @@ TEST_F(TestArray, TestMakeArrayFromMapScalar) {
ASSERT_OK_AND_ASSIGN(auto item, array->GetScalar(i));
ASSERT_TRUE(item->Equals(scalar));
}
+
+ AssertAppendScalar(pool_, std::make_shared(scalar));
}
TEST_F(TestArray, ValidateBuffersPrimitive) {
diff --git a/cpp/src/arrow/array/builder_base.cc b/cpp/src/arrow/array/builder_base.cc
index b92cc285894..59202241111 100644
--- a/cpp/src/arrow/array/builder_base.cc
+++ b/cpp/src/arrow/array/builder_base.cc
@@ -24,8 +24,11 @@
#include "arrow/array/data.h"
#include "arrow/array/util.h"
#include "arrow/buffer.h"
+#include "arrow/builder.h"
+#include "arrow/scalar.h"
#include "arrow/status.h"
#include "arrow/util/logging.h"
+#include "arrow/visitor_inline.h"
namespace arrow {
@@ -92,6 +95,119 @@ Status ArrayBuilder::Advance(int64_t elements) {
return null_bitmap_builder_.Advance(elements);
}
+struct AppendScalarImpl {
+ template ::BuilderType,
+ typename ScalarType = typename TypeTraits::ScalarType>
+ Status UseBuilder(const AppendScalar& append) {
+ for (const auto scalar : scalars_) {
+ if (scalar->is_valid) {
+ RETURN_NOT_OK(append(internal::checked_cast(*scalar),
+ static_cast(builder_)));
+ } else {
+ RETURN_NOT_OK(builder_->AppendNull());
+ }
+ }
+ return Status::OK();
+ }
+
+ struct AppendValue {
+ template
+ Status operator()(const ScalarType& s, BuilderType* builder) const {
+ return builder->Append(s.value);
+ }
+ };
+
+ struct AppendBuffer {
+ template
+ Status operator()(const ScalarType& s, BuilderType* builder) const {
+ const Buffer& buffer = *s.value;
+ return builder->Append(util::string_view{buffer});
+ }
+ };
+
+ struct AppendList {
+ template
+ Status operator()(const ScalarType& s, BuilderType* builder) const {
+ RETURN_NOT_OK(builder->Append());
+ const Array& list = *s.value;
+ for (int64_t i = 0; i < list.length(); i++) {
+ ARROW_ASSIGN_OR_RAISE(auto scalar, list.GetScalar(i));
+ RETURN_NOT_OK(builder->value_builder()->AppendScalar(*scalar));
+ }
+ return Status::OK();
+ }
+ };
+
+ template
+ enable_if_has_c_type Visit(const T&) {
+ return UseBuilder(AppendValue{});
+ }
+
+ template
+ enable_if_has_string_view Visit(const T&) {
+ return UseBuilder(AppendBuffer{});
+ }
+
+ template
+ enable_if_decimal Visit(const T&) {
+ return UseBuilder(AppendValue{});
+ }
+
+ template
+ enable_if_list_like Visit(const T&) {
+ return UseBuilder(AppendList{});
+ }
+
+ Status Visit(const StructType& type) {
+ auto* builder = static_cast(builder_);
+ for (const auto s : scalars_) {
+ const auto& scalar = internal::checked_cast(*s);
+ for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
+ if (!scalar.is_valid || !scalar.value[field_index]) {
+ RETURN_NOT_OK(builder->field_builder(field_index)->AppendNull());
+ } else {
+ RETURN_NOT_OK(builder->field_builder(field_index)
+ ->AppendScalar(*scalar.value[field_index]));
+ }
+ }
+ RETURN_NOT_OK(builder->Append(scalar.is_valid));
+ }
+ return Status::OK();
+ }
+
+ Status Visit(const DataType& type) {
+ return Status::NotImplemented("AppendScalar for type ", type);
+ }
+
+ Status Convert() { return VisitTypeInline(*scalars_[0]->type, this); }
+
+ std::vector scalars_;
+ ArrayBuilder* builder_;
+};
+
+Status ArrayBuilder::AppendScalar(const Scalar& scalar) {
+ if (!scalar.type->Equals(type())) {
+ return Status::Invalid("Cannot append scalar of type ", scalar.type->ToString(),
+ " to builder for type ", type()->ToString());
+ }
+ return AppendScalarImpl{{&scalar}, this}.Convert();
+}
+
+Status ArrayBuilder::AppendScalars(const ScalarVector& scalars) {
+ if (scalars.empty()) return Status::OK();
+ std::vector refs;
+ refs.reserve(scalars.size());
+ for (const auto& scalar : scalars) {
+ if (!scalar->type->Equals(type())) {
+ return Status::Invalid("Cannot append scalar of type ", scalar->type->ToString(),
+ " to builder for type ", type()->ToString());
+ }
+ refs.push_back(scalar.get());
+ }
+ return AppendScalarImpl{refs, this}.Convert();
+}
+
Status ArrayBuilder::Finish(std::shared_ptr* out) {
std::shared_ptr internal_data;
RETURN_NOT_OK(FinishInternal(&internal_data));
diff --git a/cpp/src/arrow/array/builder_base.h b/cpp/src/arrow/array/builder_base.h
index 15c726241b5..e0ac4d68e1a 100644
--- a/cpp/src/arrow/array/builder_base.h
+++ b/cpp/src/arrow/array/builder_base.h
@@ -116,6 +116,10 @@ class ARROW_EXPORT ArrayBuilder {
/// This method is useful when appending null values to a parent nested type.
virtual Status AppendEmptyValues(int64_t length) = 0;
+ /// \brief Append a value from a scalar
+ virtual Status AppendScalar(const Scalar& scalar);
+ virtual Status AppendScalars(const ScalarVector& scalars);
+
/// For cases where raw data was memcpy'd into the internal buffers, allows us
/// to advance the length of the builder. It is your responsibility to use
/// this function responsibly.
diff --git a/cpp/src/arrow/array/builder_dict.h b/cpp/src/arrow/array/builder_dict.h
index 40d6ce1ba9a..455cb3df7b1 100644
--- a/cpp/src/arrow/array/builder_dict.h
+++ b/cpp/src/arrow/array/builder_dict.h
@@ -29,6 +29,7 @@
#include "arrow/array/builder_primitive.h" // IWYU pragma: export
#include "arrow/array/data.h"
#include "arrow/array/util.h"
+#include "arrow/scalar.h"
#include "arrow/status.h"
#include "arrow/type.h"
#include "arrow/type_traits.h"
diff --git a/cpp/src/arrow/testing/generator.cc b/cpp/src/arrow/testing/generator.cc
index 71fad394d00..33371d55c6d 100644
--- a/cpp/src/arrow/testing/generator.cc
+++ b/cpp/src/arrow/testing/generator.cc
@@ -95,88 +95,16 @@ std::shared_ptr ConstantArrayGenerator::String(int64_t size,
return ConstantArray(size, value);
}
-struct ScalarVectorToArrayImpl {
- template ::BuilderType,
- typename ScalarType = typename TypeTraits::ScalarType>
- Status UseBuilder(const AppendScalar& append) {
- BuilderType builder(type_, default_memory_pool());
- for (const auto& s : scalars_) {
- if (s->is_valid) {
- RETURN_NOT_OK(append(internal::checked_cast(*s), &builder));
- } else {
- RETURN_NOT_OK(builder.AppendNull());
- }
- }
- return builder.FinishInternal(&data_);
- }
-
- struct AppendValue {
- template
- Status operator()(const ScalarType& s, BuilderType* builder) const {
- return builder->Append(s.value);
- }
- };
-
- struct AppendBuffer {
- template
- Status operator()(const ScalarType& s, BuilderType* builder) const {
- const Buffer& buffer = *s.value;
- return builder->Append(util::string_view{buffer});
- }
- };
-
- template
- enable_if_primitive_ctype Visit(const T&) {
- return UseBuilder(AppendValue{});
- }
-
- template
- enable_if_has_string_view Visit(const T&) {
- return UseBuilder(AppendBuffer{});
- }
-
- Status Visit(const StructType& type) {
- data_ = ArrayData::Make(type_, static_cast(scalars_.size()),
- {/*null_bitmap=*/nullptr});
- data_->child_data.resize(type_->num_fields());
-
- ScalarVector field_scalars(scalars_.size());
-
- for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
- for (size_t i = 0; i < scalars_.size(); ++i) {
- field_scalars[i] =
- internal::checked_cast(scalars_[i].get())->value[field_index];
- }
-
- ARROW_ASSIGN_OR_RAISE(data_->child_data[field_index],
- ScalarVectorToArrayImpl{}.Convert(field_scalars));
- }
- return Status::OK();
- }
-
- Status Visit(const DataType& type) {
- return Status::NotImplemented("ScalarVectorToArray for type ", type);
- }
-
- Result> Convert(const ScalarVector& scalars) && {
- if (scalars.size() == 0) {
- return Status::NotImplemented("ScalarVectorToArray with no scalars");
- }
- scalars_ = std::move(scalars);
- type_ = scalars_[0]->type;
- RETURN_NOT_OK(VisitTypeInline(*type_, this));
- return std::move(data_);
- }
-
- std::shared_ptr type_;
- ScalarVector scalars_;
- std::shared_ptr data_;
-};
-
Result> ScalarVectorToArray(const ScalarVector& scalars) {
- ARROW_ASSIGN_OR_RAISE(auto data, ScalarVectorToArrayImpl{}.Convert(scalars));
- return MakeArray(std::move(data));
+ if (scalars.empty()) {
+ return Status::NotImplemented("ScalarVectorToArray with no scalars");
+ }
+ std::unique_ptr builder;
+ RETURN_NOT_OK(MakeBuilder(default_memory_pool(), scalars[0]->type, &builder));
+ RETURN_NOT_OK(builder->AppendScalars(scalars));
+ std::shared_ptr out;
+ RETURN_NOT_OK(builder->Finish(&out));
+ return out;
}
} // namespace arrow
From 333d80992d77fadb3535a51dfb74abb990d68c27 Mon Sep 17 00:00:00 2001
From: David Li
Date: Thu, 24 Jun 2021 11:45:26 -0400
Subject: [PATCH 2/9] ARROW-11932: [C++] Add ArrayBuilder::AppendScalar(const
Scalar&, int64_t)
---
cpp/src/arrow/array/array_test.cc | 25 +++++++--------
cpp/src/arrow/array/builder_base.cc | 49 ++++++++++++++++++-----------
cpp/src/arrow/array/builder_base.h | 5 +--
3 files changed, 46 insertions(+), 33 deletions(-)
diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc
index 97431a736e6..682baab208d 100644
--- a/cpp/src/arrow/array/array_test.cc
+++ b/cpp/src/arrow/array/array_test.cc
@@ -405,23 +405,22 @@ void AssertAppendScalar(MemoryPool* pool, const std::shared_ptr& scalar)
ASSERT_OK(builder->AppendScalar(*scalar));
ASSERT_OK(builder->AppendScalar(*null_scalar));
ASSERT_OK(builder->AppendScalars({scalar, null_scalar}));
+ ASSERT_OK(builder->AppendScalar(*scalar, /*n_repeats=*/2));
+ ASSERT_OK(builder->AppendScalar(*null_scalar, /*n_repeats=*/2));
std::shared_ptr out;
FinishAndCheckPadding(builder.get(), &out);
ASSERT_OK(out->ValidateFull());
- ASSERT_EQ(out->length(), 5);
- ASSERT_EQ(out->null_count(), 2);
- ASSERT_FALSE(out->IsNull(0));
- ASSERT_FALSE(out->IsNull(1));
- ASSERT_TRUE(out->IsNull(2));
- ASSERT_FALSE(out->IsNull(3));
- ASSERT_TRUE(out->IsNull(4));
- ASSERT_OK_AND_ASSIGN(auto scalar0, out->GetScalar(0));
- ASSERT_OK_AND_ASSIGN(auto scalar1, out->GetScalar(1));
- ASSERT_OK_AND_ASSIGN(auto scalar3, out->GetScalar(3));
- AssertScalarsEqual(*scalar, *scalar0, /*verbose=*/true);
- AssertScalarsEqual(*scalar, *scalar1, /*verbose=*/true);
- AssertScalarsEqual(*scalar, *scalar3, /*verbose=*/true);
+ ASSERT_EQ(out->length(), 9);
+ ASSERT_EQ(out->null_count(), 4);
+ for (const auto index : {0, 1, 3, 5, 6}) {
+ ASSERT_FALSE(out->IsNull(index));
+ ASSERT_OK_AND_ASSIGN(auto scalar_i, out->GetScalar(index));
+ AssertScalarsEqual(*scalar, *scalar_i, /*verbose=*/true);
+ }
+ for (const auto index : {2, 4, 7, 8}) {
+ ASSERT_TRUE(out->IsNull(index));
+ }
}
TEST_F(TestArray, TestMakeArrayFromScalar) {
diff --git a/cpp/src/arrow/array/builder_base.cc b/cpp/src/arrow/array/builder_base.cc
index 59202241111..8dbad29008a 100644
--- a/cpp/src/arrow/array/builder_base.cc
+++ b/cpp/src/arrow/array/builder_base.cc
@@ -100,12 +100,14 @@ struct AppendScalarImpl {
typename BuilderType = typename TypeTraits::BuilderType,
typename ScalarType = typename TypeTraits::ScalarType>
Status UseBuilder(const AppendScalar& append) {
- for (const auto scalar : scalars_) {
- if (scalar->is_valid) {
- RETURN_NOT_OK(append(internal::checked_cast(*scalar),
- static_cast(builder_)));
- } else {
- RETURN_NOT_OK(builder_->AppendNull());
+ for (int64_t i = 0; i < n_repeats_; i++) {
+ for (const auto scalar : scalars_) {
+ if (scalar->is_valid) {
+ RETURN_NOT_OK(append(internal::checked_cast(*scalar),
+ static_cast(builder_)));
+ } else {
+ RETURN_NOT_OK(builder_->AppendNull());
+ }
}
}
return Status::OK();
@@ -160,18 +162,20 @@ struct AppendScalarImpl {
}
Status Visit(const StructType& type) {
- auto* builder = static_cast(builder_);
- for (const auto s : scalars_) {
- const auto& scalar = internal::checked_cast(*s);
- for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
- if (!scalar.is_valid || !scalar.value[field_index]) {
- RETURN_NOT_OK(builder->field_builder(field_index)->AppendNull());
- } else {
- RETURN_NOT_OK(builder->field_builder(field_index)
- ->AppendScalar(*scalar.value[field_index]));
+ for (int64_t i = 0; i < n_repeats_; i++) {
+ auto* builder = static_cast(builder_);
+ for (const auto s : scalars_) {
+ const auto& scalar = internal::checked_cast(*s);
+ for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
+ if (!scalar.is_valid || !scalar.value[field_index]) {
+ RETURN_NOT_OK(builder->field_builder(field_index)->AppendNull());
+ } else {
+ RETURN_NOT_OK(builder->field_builder(field_index)
+ ->AppendScalar(*scalar.value[field_index]));
+ }
}
+ RETURN_NOT_OK(builder->Append(scalar.is_valid));
}
- RETURN_NOT_OK(builder->Append(scalar.is_valid));
}
return Status::OK();
}
@@ -183,6 +187,7 @@ struct AppendScalarImpl {
Status Convert() { return VisitTypeInline(*scalars_[0]->type, this); }
std::vector scalars_;
+ int64_t n_repeats_;
ArrayBuilder* builder_;
};
@@ -191,7 +196,15 @@ Status ArrayBuilder::AppendScalar(const Scalar& scalar) {
return Status::Invalid("Cannot append scalar of type ", scalar.type->ToString(),
" to builder for type ", type()->ToString());
}
- return AppendScalarImpl{{&scalar}, this}.Convert();
+ return AppendScalarImpl{{&scalar}, /*n_repeats=*/1, this}.Convert();
+}
+
+Status ArrayBuilder::AppendScalar(const Scalar& scalar, int64_t n_repeats) {
+ if (!scalar.type->Equals(type())) {
+ return Status::Invalid("Cannot append scalar of type ", scalar.type->ToString(),
+ " to builder for type ", type()->ToString());
+ }
+ return AppendScalarImpl{{&scalar}, n_repeats, this}.Convert();
}
Status ArrayBuilder::AppendScalars(const ScalarVector& scalars) {
@@ -205,7 +218,7 @@ Status ArrayBuilder::AppendScalars(const ScalarVector& scalars) {
}
refs.push_back(scalar.get());
}
- return AppendScalarImpl{refs, this}.Convert();
+ return AppendScalarImpl{refs, /*n_repeats=*/1, this}.Convert();
}
Status ArrayBuilder::Finish(std::shared_ptr* out) {
diff --git a/cpp/src/arrow/array/builder_base.h b/cpp/src/arrow/array/builder_base.h
index e0ac4d68e1a..8e60c306796 100644
--- a/cpp/src/arrow/array/builder_base.h
+++ b/cpp/src/arrow/array/builder_base.h
@@ -117,8 +117,9 @@ class ARROW_EXPORT ArrayBuilder {
virtual Status AppendEmptyValues(int64_t length) = 0;
/// \brief Append a value from a scalar
- virtual Status AppendScalar(const Scalar& scalar);
- virtual Status AppendScalars(const ScalarVector& scalars);
+ Status AppendScalar(const Scalar& scalar);
+ Status AppendScalar(const Scalar& scalar, int64_t n_repeats);
+ Status AppendScalars(const ScalarVector& scalars);
/// For cases where raw data was memcpy'd into the internal buffers, allows us
/// to advance the length of the builder. It is your responsibility to use
From f0bd23f7cfec9c7ba5ad55bfba554e2f2420e25f Mon Sep 17 00:00:00 2001
From: David Li
Date: Mon, 28 Jun 2021 10:29:41 -0400
Subject: [PATCH 3/9] ARROW-11932: [C++] Reserve() in AppendScalar
---
cpp/src/arrow/array/builder_base.cc | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/cpp/src/arrow/array/builder_base.cc b/cpp/src/arrow/array/builder_base.cc
index 8dbad29008a..584c244d989 100644
--- a/cpp/src/arrow/array/builder_base.cc
+++ b/cpp/src/arrow/array/builder_base.cc
@@ -95,6 +95,7 @@ Status ArrayBuilder::Advance(int64_t elements) {
return null_bitmap_builder_.Advance(elements);
}
+namespace {
struct AppendScalarImpl {
template ::BuilderType,
@@ -190,6 +191,7 @@ struct AppendScalarImpl {
int64_t n_repeats_;
ArrayBuilder* builder_;
};
+} // namespace
Status ArrayBuilder::AppendScalar(const Scalar& scalar) {
if (!scalar.type->Equals(type())) {
@@ -204,11 +206,13 @@ Status ArrayBuilder::AppendScalar(const Scalar& scalar, int64_t n_repeats) {
return Status::Invalid("Cannot append scalar of type ", scalar.type->ToString(),
" to builder for type ", type()->ToString());
}
+ RETURN_NOT_OK(Reserve(n_repeats));
return AppendScalarImpl{{&scalar}, n_repeats, this}.Convert();
}
Status ArrayBuilder::AppendScalars(const ScalarVector& scalars) {
if (scalars.empty()) return Status::OK();
+ RETURN_NOT_OK(Reserve(scalars.size()));
std::vector refs;
refs.reserve(scalars.size());
for (const auto& scalar : scalars) {
From 8450a4c0cac0383d47d5d07da30496a5aecb6005 Mon Sep 17 00:00:00 2001
From: David Li
Date: Mon, 28 Jun 2021 15:44:55 -0400
Subject: [PATCH 4/9] ARROW-11932: [C++] Don't require an allocation in
AppendScalar
---
cpp/src/arrow/array/builder_base.cc | 32 +++++++++++++++--------------
1 file changed, 17 insertions(+), 15 deletions(-)
diff --git a/cpp/src/arrow/array/builder_base.cc b/cpp/src/arrow/array/builder_base.cc
index 584c244d989..b127dd15415 100644
--- a/cpp/src/arrow/array/builder_base.cc
+++ b/cpp/src/arrow/array/builder_base.cc
@@ -102,9 +102,10 @@ struct AppendScalarImpl {
typename ScalarType = typename TypeTraits::ScalarType>
Status UseBuilder(const AppendScalar& append) {
for (int64_t i = 0; i < n_repeats_; i++) {
- for (const auto scalar : scalars_) {
- if (scalar->is_valid) {
- RETURN_NOT_OK(append(internal::checked_cast(*scalar),
+ for (const std::shared_ptr* scalar = scalars_begin_; scalar != scalars_end_;
+ scalar++) {
+ if ((*scalar)->is_valid) {
+ RETURN_NOT_OK(append(internal::checked_cast(**scalar),
static_cast(builder_)));
} else {
RETURN_NOT_OK(builder_->AppendNull());
@@ -165,8 +166,8 @@ struct AppendScalarImpl {
Status Visit(const StructType& type) {
for (int64_t i = 0; i < n_repeats_; i++) {
auto* builder = static_cast(builder_);
- for (const auto s : scalars_) {
- const auto& scalar = internal::checked_cast(*s);
+ for (const std::shared_ptr* s = scalars_begin_; s != scalars_end_; s++) {
+ const auto& scalar = internal::checked_cast(**s);
for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
if (!scalar.is_valid || !scalar.value[field_index]) {
RETURN_NOT_OK(builder->field_builder(field_index)->AppendNull());
@@ -185,9 +186,10 @@ struct AppendScalarImpl {
return Status::NotImplemented("AppendScalar for type ", type);
}
- Status Convert() { return VisitTypeInline(*scalars_[0]->type, this); }
+ Status Convert() { return VisitTypeInline(*(*scalars_begin_)->type, this); }
- std::vector scalars_;
+ const std::shared_ptr* scalars_begin_;
+ const std::shared_ptr* scalars_end_;
int64_t n_repeats_;
ArrayBuilder* builder_;
};
@@ -198,7 +200,8 @@ Status ArrayBuilder::AppendScalar(const Scalar& scalar) {
return Status::Invalid("Cannot append scalar of type ", scalar.type->ToString(),
" to builder for type ", type()->ToString());
}
- return AppendScalarImpl{{&scalar}, /*n_repeats=*/1, this}.Convert();
+ std::shared_ptr shared{const_cast(&scalar), [](Scalar*) {}};
+ return AppendScalarImpl{&shared, &shared + 1, /*n_repeats=*/1, this}.Convert();
}
Status ArrayBuilder::AppendScalar(const Scalar& scalar, int64_t n_repeats) {
@@ -207,22 +210,21 @@ Status ArrayBuilder::AppendScalar(const Scalar& scalar, int64_t n_repeats) {
" to builder for type ", type()->ToString());
}
RETURN_NOT_OK(Reserve(n_repeats));
- return AppendScalarImpl{{&scalar}, n_repeats, this}.Convert();
+ std::shared_ptr shared{const_cast(&scalar), [](Scalar*) {}};
+ return AppendScalarImpl{&shared, &shared + 1, n_repeats, this}.Convert();
}
Status ArrayBuilder::AppendScalars(const ScalarVector& scalars) {
if (scalars.empty()) return Status::OK();
- RETURN_NOT_OK(Reserve(scalars.size()));
- std::vector refs;
- refs.reserve(scalars.size());
+ const auto ty = type();
for (const auto& scalar : scalars) {
- if (!scalar->type->Equals(type())) {
+ if (!scalar->type->Equals(ty)) {
return Status::Invalid("Cannot append scalar of type ", scalar->type->ToString(),
" to builder for type ", type()->ToString());
}
- refs.push_back(scalar.get());
}
- return AppendScalarImpl{refs, /*n_repeats=*/1, this}.Convert();
+ return AppendScalarImpl{&*scalars.cbegin(), &*scalars.cend(), /*n_repeats=*/1, this}
+ .Convert();
}
Status ArrayBuilder::Finish(std::shared_ptr* out) {
From 3a7037cff5eeec3174c71692a5782f81d567b83a Mon Sep 17 00:00:00 2001
From: David Li
Date: Mon, 28 Jun 2021 17:04:50 -0400
Subject: [PATCH 5/9] ARROW-11932: [C++] Fix test on MSVC
---
cpp/src/arrow/array/builder_base.cc | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/cpp/src/arrow/array/builder_base.cc b/cpp/src/arrow/array/builder_base.cc
index b127dd15415..ab04e099904 100644
--- a/cpp/src/arrow/array/builder_base.cc
+++ b/cpp/src/arrow/array/builder_base.cc
@@ -223,7 +223,8 @@ Status ArrayBuilder::AppendScalars(const ScalarVector& scalars) {
" to builder for type ", type()->ToString());
}
}
- return AppendScalarImpl{&*scalars.cbegin(), &*scalars.cend(), /*n_repeats=*/1, this}
+ return AppendScalarImpl{scalars.data(), scalars.data() + scalars.size(),
+ /*n_repeats=*/1, this}
.Convert();
}
From af2d7f34804772b0b28e092f6509a0654cf986c7 Mon Sep 17 00:00:00 2001
From: David Li
Date: Tue, 29 Jun 2021 11:04:21 -0400
Subject: [PATCH 6/9] Update cpp/src/arrow/array/builder_base.cc
Co-authored-by: Benjamin Kietzman
---
cpp/src/arrow/array/builder_base.cc | 50 +++++++++++++++++++++++------
1 file changed, 41 insertions(+), 9 deletions(-)
diff --git a/cpp/src/arrow/array/builder_base.cc b/cpp/src/arrow/array/builder_base.cc
index ab04e099904..a61f92f0fc2 100644
--- a/cpp/src/arrow/array/builder_base.cc
+++ b/cpp/src/arrow/array/builder_base.cc
@@ -144,20 +144,52 @@ struct AppendScalarImpl {
};
template
- enable_if_has_c_type Visit(const T&) {
- return UseBuilder(AppendValue{});
- }
+ enable_if_fixed_width Visit(const T&) {
+ auto builder = checked_cast::BuilderType*>(builder_);
+ RETURN_NOT_OK(builder->Reserve(n_repeats_ * (scalar_end_ - scalar_begin_)));
- template
- enable_if_has_string_view Visit(const T&) {
- return UseBuilder(AppendBuffer{});
+ for (int64_t i = 0; i < n_repeats_; i++) {
+ for (const std::shared_ptr* raw = scalars_begin_; raw != scalars_end_;
+ raw++) {
+ auto scalar = checked_cast::ScalarType*>(raw->get());
+ if (scalar->is_valid) {
+ builder->UnsafeAppend(scalar->value);
+ } else {
+ builder->UnsafeAppendNull();
+ }
+ }
+ }
+ return Status::OK();
}
template
- enable_if_decimal Visit(const T&) {
- return UseBuilder(AppendValue{});
- }
+ enable_if_base_binary Visit(const T&) {
+ int64_t data_size = 0;
+ for (const std::shared_ptr* raw = scalars_begin_; raw != scalars_end_;
+ raw++) {
+ auto scalar = checked_cast::ScalarType*>(raw->get());
+ if (scalar->is_valid) {
+ data_size += scalar->value->size();
+ }
+ }
+
+ auto builder = checked_cast::BuilderType*>(builder_);
+ RETURN_NOT_OK(builder->Reserve(n_repeats_ * (scalar_end_ - scalar_begin_)));
+ RETURN_NOT_OK(builder->ReserveData(n_repeats_ * data_size));
+ for (int64_t i = 0; i < n_repeats_; i++) {
+ for (const std::shared_ptr* raw = scalars_begin_; raw != scalars_end_;
+ raw++) {
+ auto scalar = checked_cast::ScalarType*>(raw->get());
+ if (scalar->is_valid) {
+ builder->UnsafeAppend(util::string_view{*scalar->value});
+ } else {
+ builder->UnsafeAppendNull();
+ }
+ }
+ }
+ return Status::OK();
+ }
template
enable_if_list_like Visit(const T&) {
return UseBuilder(AppendList{});
From bb62115ed4b66e3e799c507841defd5d57a60045 Mon Sep 17 00:00:00 2001
From: David Li
Date: Tue, 29 Jun 2021 11:31:21 -0400
Subject: [PATCH 7/9] ARROW-11932: [C++] Reserve in more cases
---
cpp/src/arrow/array/builder_base.cc | 110 +++++++++++++---------------
1 file changed, 51 insertions(+), 59 deletions(-)
diff --git a/cpp/src/arrow/array/builder_base.cc b/cpp/src/arrow/array/builder_base.cc
index a61f92f0fc2..d5d3b4e1288 100644
--- a/cpp/src/arrow/array/builder_base.cc
+++ b/cpp/src/arrow/array/builder_base.cc
@@ -97,61 +97,16 @@ Status ArrayBuilder::Advance(int64_t elements) {
namespace {
struct AppendScalarImpl {
- template ::BuilderType,
- typename ScalarType = typename TypeTraits::ScalarType>
- Status UseBuilder(const AppendScalar& append) {
- for (int64_t i = 0; i < n_repeats_; i++) {
- for (const std::shared_ptr* scalar = scalars_begin_; scalar != scalars_end_;
- scalar++) {
- if ((*scalar)->is_valid) {
- RETURN_NOT_OK(append(internal::checked_cast(**scalar),
- static_cast(builder_)));
- } else {
- RETURN_NOT_OK(builder_->AppendNull());
- }
- }
- }
- return Status::OK();
- }
-
- struct AppendValue {
- template
- Status operator()(const ScalarType& s, BuilderType* builder) const {
- return builder->Append(s.value);
- }
- };
-
- struct AppendBuffer {
- template
- Status operator()(const ScalarType& s, BuilderType* builder) const {
- const Buffer& buffer = *s.value;
- return builder->Append(util::string_view{buffer});
- }
- };
-
- struct AppendList {
- template
- Status operator()(const ScalarType& s, BuilderType* builder) const {
- RETURN_NOT_OK(builder->Append());
- const Array& list = *s.value;
- for (int64_t i = 0; i < list.length(); i++) {
- ARROW_ASSIGN_OR_RAISE(auto scalar, list.GetScalar(i));
- RETURN_NOT_OK(builder->value_builder()->AppendScalar(*scalar));
- }
- return Status::OK();
- }
- };
-
template
- enable_if_fixed_width Visit(const T&) {
- auto builder = checked_cast::BuilderType*>(builder_);
- RETURN_NOT_OK(builder->Reserve(n_repeats_ * (scalar_end_ - scalar_begin_)));
+ enable_if_t::value || is_decimal_type::value, Status> Visit(const T&) {
+ auto builder = internal::checked_cast::BuilderType*>(builder_);
+ RETURN_NOT_OK(builder->Reserve(n_repeats_ * (scalars_end_ - scalars_begin_)));
for (int64_t i = 0; i < n_repeats_; i++) {
for (const std::shared_ptr* raw = scalars_begin_; raw != scalars_end_;
raw++) {
- auto scalar = checked_cast::ScalarType*>(raw->get());
+ auto scalar =
+ internal::checked_cast::ScalarType*>(raw->get());
if (scalar->is_valid) {
builder->UnsafeAppend(scalar->value);
} else {
@@ -163,24 +118,26 @@ struct AppendScalarImpl {
}
template
- enable_if_base_binary Visit(const T&) {
+ enable_if_has_string_view Visit(const T&) {
int64_t data_size = 0;
for (const std::shared_ptr* raw = scalars_begin_; raw != scalars_end_;
- raw++) {
- auto scalar = checked_cast::ScalarType*>(raw->get());
+ raw++) {
+ auto scalar =
+ internal::checked_cast::ScalarType*>(raw->get());
if (scalar->is_valid) {
data_size += scalar->value->size();
}
}
- auto builder = checked_cast::BuilderType*>(builder_);
- RETURN_NOT_OK(builder->Reserve(n_repeats_ * (scalar_end_ - scalar_begin_)));
+ auto builder = internal::checked_cast::BuilderType*>(builder_);
+ RETURN_NOT_OK(builder->Reserve(n_repeats_ * (scalars_end_ - scalars_begin_)));
RETURN_NOT_OK(builder->ReserveData(n_repeats_ * data_size));
for (int64_t i = 0; i < n_repeats_; i++) {
for (const std::shared_ptr* raw = scalars_begin_; raw != scalars_end_;
raw++) {
- auto scalar = checked_cast::ScalarType*>(raw->get());
+ auto scalar =
+ internal::checked_cast::ScalarType*>(raw->get());
if (scalar->is_valid) {
builder->UnsafeAppend(util::string_view{*scalar->value});
} else {
@@ -190,14 +147,50 @@ struct AppendScalarImpl {
}
return Status::OK();
}
+
+ struct AppendList {
+ template
+ Status operator()(const ScalarType& s, BuilderType* builder) const {
+ RETURN_NOT_OK(builder->Append());
+ const Array& list = *s.value;
+ for (int64_t i = 0; i < list.length(); i++) {
+ ARROW_ASSIGN_OR_RAISE(auto scalar, list.GetScalar(i));
+ RETURN_NOT_OK(builder->value_builder()->AppendScalar(*scalar));
+ }
+ return Status::OK();
+ }
+ };
+
template
enable_if_list_like Visit(const T&) {
- return UseBuilder(AppendList{});
+ auto builder = internal::checked_cast::BuilderType*>(builder_);
+ for (int64_t i = 0; i < n_repeats_; i++) {
+ for (const std::shared_ptr* scalar = scalars_begin_; scalar != scalars_end_;
+ scalar++) {
+ if ((*scalar)->is_valid) {
+ RETURN_NOT_OK(builder->Append());
+ const Array& list =
+ *internal::checked_cast(**scalar).value;
+ for (int64_t i = 0; i < list.length(); i++) {
+ ARROW_ASSIGN_OR_RAISE(auto scalar, list.GetScalar(i));
+ RETURN_NOT_OK(builder->value_builder()->AppendScalar(*scalar));
+ }
+ } else {
+ RETURN_NOT_OK(builder_->AppendNull());
+ }
+ }
+ }
+ return Status::OK();
}
Status Visit(const StructType& type) {
+ auto* builder = internal::checked_cast(builder_);
+ auto count = n_repeats_ * (scalars_end_ - scalars_begin_);
+ RETURN_NOT_OK(builder->Reserve(count));
+ for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
+ RETURN_NOT_OK(builder->field_builder(field_index)->Reserve(count));
+ }
for (int64_t i = 0; i < n_repeats_; i++) {
- auto* builder = static_cast(builder_);
for (const std::shared_ptr* s = scalars_begin_; s != scalars_end_; s++) {
const auto& scalar = internal::checked_cast(**s);
for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
@@ -241,7 +234,6 @@ Status ArrayBuilder::AppendScalar(const Scalar& scalar, int64_t n_repeats) {
return Status::Invalid("Cannot append scalar of type ", scalar.type->ToString(),
" to builder for type ", type()->ToString());
}
- RETURN_NOT_OK(Reserve(n_repeats));
std::shared_ptr shared{const_cast(&scalar), [](Scalar*) {}};
return AppendScalarImpl{&shared, &shared + 1, n_repeats, this}.Convert();
}
From 4a41940d240d3e0ed37ec0398e04023b03d0b799 Mon Sep 17 00:00:00 2001
From: David Li
Date: Tue, 29 Jun 2021 14:29:44 -0400
Subject: [PATCH 8/9] ARROW-11932: [C++] Handle fixed size binary with other
fixed size types
---
cpp/src/arrow/array/builder_base.cc | 7 +++++--
cpp/src/arrow/array/builder_binary.h | 12 ++++++++++++
2 files changed, 17 insertions(+), 2 deletions(-)
diff --git a/cpp/src/arrow/array/builder_base.cc b/cpp/src/arrow/array/builder_base.cc
index d5d3b4e1288..fc1c3a5057a 100644
--- a/cpp/src/arrow/array/builder_base.cc
+++ b/cpp/src/arrow/array/builder_base.cc
@@ -98,7 +98,10 @@ Status ArrayBuilder::Advance(int64_t elements) {
namespace {
struct AppendScalarImpl {
template
- enable_if_t::value || is_decimal_type::value, Status> Visit(const T&) {
+ enable_if_t::value || is_decimal_type::value ||
+ is_fixed_size_binary_type::value,
+ Status>
+ Visit(const T&) {
auto builder = internal::checked_cast::BuilderType*>(builder_);
RETURN_NOT_OK(builder->Reserve(n_repeats_ * (scalars_end_ - scalars_begin_)));
@@ -118,7 +121,7 @@ struct AppendScalarImpl {
}
template
- enable_if_has_string_view Visit(const T&) {
+ enable_if_base_binary Visit(const T&) {
int64_t data_size = 0;
for (const std::shared_ptr* raw = scalars_begin_; raw != scalars_end_;
raw++) {
diff --git a/cpp/src/arrow/array/builder_binary.h b/cpp/src/arrow/array/builder_binary.h
index c1c664a1249..7653eeca5c4 100644
--- a/cpp/src/arrow/array/builder_binary.h
+++ b/cpp/src/arrow/array/builder_binary.h
@@ -467,6 +467,14 @@ class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder {
return Status::OK();
}
+ Status Append(const Buffer& s) {
+ ARROW_RETURN_NOT_OK(Reserve(1));
+ UnsafeAppend(util::string_view(s));
+ return Status::OK();
+ }
+
+ Status Append(const std::shared_ptr& s) { return Append(*s); }
+
template
Status Append(const std::array& value) {
ARROW_RETURN_NOT_OK(Reserve(1));
@@ -502,6 +510,10 @@ class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder {
UnsafeAppend(reinterpret_cast(value.data()));
}
+ void UnsafeAppend(const Buffer& s) { UnsafeAppend(util::string_view(s)); }
+
+ void UnsafeAppend(const std::shared_ptr& s) { UnsafeAppend(*s); }
+
void UnsafeAppendNull() {
UnsafeAppendToBitmap(false);
byte_builder_.UnsafeAppend(/*num_copies=*/byte_width_, 0);
From 2bc15af32daefa053339e41cba2e068a583d1290 Mon Sep 17 00:00:00 2001
From: David Li
Date: Tue, 29 Jun 2021 14:37:13 -0400
Subject: [PATCH 9/9] ARROW-11932: [C++] Clean up implementation for lists
---
cpp/src/arrow/array/builder_base.cc | 22 +++++++++-------------
1 file changed, 9 insertions(+), 13 deletions(-)
diff --git a/cpp/src/arrow/array/builder_base.cc b/cpp/src/arrow/array/builder_base.cc
index fc1c3a5057a..c892e3d664b 100644
--- a/cpp/src/arrow/array/builder_base.cc
+++ b/cpp/src/arrow/array/builder_base.cc
@@ -151,22 +151,18 @@ struct AppendScalarImpl {
return Status::OK();
}
- struct AppendList {
- template
- Status operator()(const ScalarType& s, BuilderType* builder) const {
- RETURN_NOT_OK(builder->Append());
- const Array& list = *s.value;
- for (int64_t i = 0; i < list.length(); i++) {
- ARROW_ASSIGN_OR_RAISE(auto scalar, list.GetScalar(i));
- RETURN_NOT_OK(builder->value_builder()->AppendScalar(*scalar));
- }
- return Status::OK();
- }
- };
-
template
enable_if_list_like Visit(const T&) {
auto builder = internal::checked_cast::BuilderType*>(builder_);
+ int64_t num_children = 0;
+ for (const std::shared_ptr* scalar = scalars_begin_; scalar != scalars_end_;
+ scalar++) {
+ if (!(*scalar)->is_valid) continue;
+ num_children +=
+ internal::checked_cast(**scalar).value->length();
+ }
+ RETURN_NOT_OK(builder->value_builder()->Reserve(num_children * n_repeats_));
+
for (int64_t i = 0; i < n_repeats_; i++) {
for (const std::shared_ptr* scalar = scalars_begin_; scalar != scalars_end_;
scalar++) {