Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,7 @@ if(ARROW_COMPUTE)
compute/exec/exec_plan.cc
compute/exec/expression.cc
compute/function.cc
compute/function_internal.cc
compute/kernel.cc
compute/registry.cc
compute/kernels/aggregate_basic.cc
Expand Down
32 changes: 32 additions & 0 deletions cpp/src/arrow/array/array_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,32 @@ TEST_F(TestArray, TestMakeArrayOfNullUnion) {
}
}

void AssertAppendScalar(MemoryPool* pool, const std::shared_ptr<Scalar>& scalar) {
std::unique_ptr<arrow::ArrayBuilder> builder;
auto null_scalar = MakeNullScalar(scalar->type);
ASSERT_OK(MakeBuilder(pool, scalar->type, &builder));
ASSERT_OK(builder->AppendScalar(*scalar));
ASSERT_OK(builder->AppendScalar(*scalar));
ASSERT_OK(builder->AppendScalar(*null_scalar));
ASSERT_OK(builder->AppendScalars({scalar, null_scalar}));
ASSERT_OK(builder->AppendScalar(*scalar, /*n_repeats=*/2));
ASSERT_OK(builder->AppendScalar(*null_scalar, /*n_repeats=*/2));

std::shared_ptr<Array> out;
FinishAndCheckPadding(builder.get(), &out);
ASSERT_OK(out->ValidateFull());
ASSERT_EQ(out->length(), 9);
ASSERT_EQ(out->null_count(), 4);
for (const auto index : {0, 1, 3, 5, 6}) {
ASSERT_FALSE(out->IsNull(index));
ASSERT_OK_AND_ASSIGN(auto scalar_i, out->GetScalar(index));
AssertScalarsEqual(*scalar, *scalar_i, /*verbose=*/true);
}
for (const auto index : {2, 4, 7, 8}) {
ASSERT_TRUE(out->IsNull(index));
}
}

TEST_F(TestArray, TestMakeArrayFromScalar) {
ASSERT_OK_AND_ASSIGN(auto null_array, MakeArrayFromScalar(NullScalar(), 5));
ASSERT_OK(null_array->ValidateFull());
Expand Down Expand Up @@ -447,6 +473,10 @@ TEST_F(TestArray, TestMakeArrayFromScalar) {
ASSERT_EQ(array->null_count(), 0);
}
}

for (auto scalar : scalars) {
AssertAppendScalar(pool_, scalar);
}
}

TEST_F(TestArray, TestMakeArrayFromDictionaryScalar) {
Expand Down Expand Up @@ -481,6 +511,8 @@ TEST_F(TestArray, TestMakeArrayFromMapScalar) {
ASSERT_OK_AND_ASSIGN(auto item, array->GetScalar(i));
ASSERT_TRUE(item->Equals(scalar));
}

AssertAppendScalar(pool_, std::make_shared<MapScalar>(scalar));
}

TEST_F(TestArray, ValidateBuffersPrimitive) {
Expand Down
159 changes: 159 additions & 0 deletions cpp/src/arrow/array/builder_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,11 @@
#include "arrow/array/data.h"
#include "arrow/array/util.h"
#include "arrow/buffer.h"
#include "arrow/builder.h"
#include "arrow/scalar.h"
#include "arrow/status.h"
#include "arrow/util/logging.h"
#include "arrow/visitor_inline.h"

namespace arrow {

Expand Down Expand Up @@ -92,6 +95,162 @@ Status ArrayBuilder::Advance(int64_t elements) {
return null_bitmap_builder_.Advance(elements);
}

namespace {
struct AppendScalarImpl {
template <typename T>
enable_if_t<has_c_type<T>::value || is_decimal_type<T>::value ||
is_fixed_size_binary_type<T>::value,
Status>
Visit(const T&) {
auto builder = internal::checked_cast<typename TypeTraits<T>::BuilderType*>(builder_);
RETURN_NOT_OK(builder->Reserve(n_repeats_ * (scalars_end_ - scalars_begin_)));

for (int64_t i = 0; i < n_repeats_; i++) {
for (const std::shared_ptr<Scalar>* raw = scalars_begin_; raw != scalars_end_;
raw++) {
auto scalar =
internal::checked_cast<const typename TypeTraits<T>::ScalarType*>(raw->get());
if (scalar->is_valid) {
builder->UnsafeAppend(scalar->value);
} else {
builder->UnsafeAppendNull();
}
}
}
return Status::OK();
}

template <typename T>
enable_if_base_binary<T, Status> Visit(const T&) {
int64_t data_size = 0;
for (const std::shared_ptr<Scalar>* raw = scalars_begin_; raw != scalars_end_;
raw++) {
auto scalar =
internal::checked_cast<const typename TypeTraits<T>::ScalarType*>(raw->get());
if (scalar->is_valid) {
data_size += scalar->value->size();
}
}

auto builder = internal::checked_cast<typename TypeTraits<T>::BuilderType*>(builder_);
RETURN_NOT_OK(builder->Reserve(n_repeats_ * (scalars_end_ - scalars_begin_)));
RETURN_NOT_OK(builder->ReserveData(n_repeats_ * data_size));

for (int64_t i = 0; i < n_repeats_; i++) {
for (const std::shared_ptr<Scalar>* raw = scalars_begin_; raw != scalars_end_;
raw++) {
auto scalar =
internal::checked_cast<const typename TypeTraits<T>::ScalarType*>(raw->get());
if (scalar->is_valid) {
builder->UnsafeAppend(util::string_view{*scalar->value});
} else {
builder->UnsafeAppendNull();
}
}
}
return Status::OK();
}

template <typename T>
enable_if_list_like<T, Status> Visit(const T&) {
auto builder = internal::checked_cast<typename TypeTraits<T>::BuilderType*>(builder_);
int64_t num_children = 0;
for (const std::shared_ptr<Scalar>* scalar = scalars_begin_; scalar != scalars_end_;
scalar++) {
if (!(*scalar)->is_valid) continue;
num_children +=
internal::checked_cast<const BaseListScalar&>(**scalar).value->length();
}
RETURN_NOT_OK(builder->value_builder()->Reserve(num_children * n_repeats_));

for (int64_t i = 0; i < n_repeats_; i++) {
for (const std::shared_ptr<Scalar>* scalar = scalars_begin_; scalar != scalars_end_;
scalar++) {
if ((*scalar)->is_valid) {
RETURN_NOT_OK(builder->Append());
const Array& list =
*internal::checked_cast<const BaseListScalar&>(**scalar).value;
for (int64_t i = 0; i < list.length(); i++) {
ARROW_ASSIGN_OR_RAISE(auto scalar, list.GetScalar(i));
RETURN_NOT_OK(builder->value_builder()->AppendScalar(*scalar));
}
} else {
RETURN_NOT_OK(builder_->AppendNull());
}
}
}
return Status::OK();
}

Status Visit(const StructType& type) {
auto* builder = internal::checked_cast<StructBuilder*>(builder_);
auto count = n_repeats_ * (scalars_end_ - scalars_begin_);
RETURN_NOT_OK(builder->Reserve(count));
for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
RETURN_NOT_OK(builder->field_builder(field_index)->Reserve(count));
}
for (int64_t i = 0; i < n_repeats_; i++) {
for (const std::shared_ptr<Scalar>* s = scalars_begin_; s != scalars_end_; s++) {
const auto& scalar = internal::checked_cast<const StructScalar&>(**s);
for (int field_index = 0; field_index < type.num_fields(); ++field_index) {
if (!scalar.is_valid || !scalar.value[field_index]) {
RETURN_NOT_OK(builder->field_builder(field_index)->AppendNull());
} else {
RETURN_NOT_OK(builder->field_builder(field_index)
->AppendScalar(*scalar.value[field_index]));
}
}
RETURN_NOT_OK(builder->Append(scalar.is_valid));
}
}
return Status::OK();
}

Status Visit(const DataType& type) {
return Status::NotImplemented("AppendScalar for type ", type);
}

Status Convert() { return VisitTypeInline(*(*scalars_begin_)->type, this); }

const std::shared_ptr<Scalar>* scalars_begin_;
const std::shared_ptr<Scalar>* scalars_end_;
int64_t n_repeats_;
ArrayBuilder* builder_;
};
} // namespace

Status ArrayBuilder::AppendScalar(const Scalar& scalar) {
if (!scalar.type->Equals(type())) {
return Status::Invalid("Cannot append scalar of type ", scalar.type->ToString(),
" to builder for type ", type()->ToString());
}
std::shared_ptr<Scalar> shared{const_cast<Scalar*>(&scalar), [](Scalar*) {}};
return AppendScalarImpl{&shared, &shared + 1, /*n_repeats=*/1, this}.Convert();
}

Status ArrayBuilder::AppendScalar(const Scalar& scalar, int64_t n_repeats) {
if (!scalar.type->Equals(type())) {
return Status::Invalid("Cannot append scalar of type ", scalar.type->ToString(),
" to builder for type ", type()->ToString());
}
std::shared_ptr<Scalar> shared{const_cast<Scalar*>(&scalar), [](Scalar*) {}};
return AppendScalarImpl{&shared, &shared + 1, n_repeats, this}.Convert();
}

Status ArrayBuilder::AppendScalars(const ScalarVector& scalars) {
if (scalars.empty()) return Status::OK();
const auto ty = type();
for (const auto& scalar : scalars) {
if (!scalar->type->Equals(ty)) {
return Status::Invalid("Cannot append scalar of type ", scalar->type->ToString(),
" to builder for type ", type()->ToString());
}
}
return AppendScalarImpl{scalars.data(), scalars.data() + scalars.size(),
/*n_repeats=*/1, this}
.Convert();
}

Status ArrayBuilder::Finish(std::shared_ptr<Array>* out) {
std::shared_ptr<ArrayData> internal_data;
RETURN_NOT_OK(FinishInternal(&internal_data));
Expand Down
5 changes: 5 additions & 0 deletions cpp/src/arrow/array/builder_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,11 @@ class ARROW_EXPORT ArrayBuilder {
/// This method is useful when appending null values to a parent nested type.
virtual Status AppendEmptyValues(int64_t length) = 0;

/// \brief Append a value from a scalar
Status AppendScalar(const Scalar& scalar);
Status AppendScalar(const Scalar& scalar, int64_t n_repeats);
Status AppendScalars(const ScalarVector& scalars);

/// For cases where raw data was memcpy'd into the internal buffers, allows us
/// to advance the length of the builder. It is your responsibility to use
/// this function responsibly.
Expand Down
12 changes: 12 additions & 0 deletions cpp/src/arrow/array/builder_binary.h
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,14 @@ class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder {
return Status::OK();
}

Status Append(const Buffer& s) {
ARROW_RETURN_NOT_OK(Reserve(1));
UnsafeAppend(util::string_view(s));
return Status::OK();
}

Status Append(const std::shared_ptr<Buffer>& s) { return Append(*s); }

template <size_t NBYTES>
Status Append(const std::array<uint8_t, NBYTES>& value) {
ARROW_RETURN_NOT_OK(Reserve(1));
Expand Down Expand Up @@ -502,6 +510,10 @@ class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder {
UnsafeAppend(reinterpret_cast<const uint8_t*>(value.data()));
}

void UnsafeAppend(const Buffer& s) { UnsafeAppend(util::string_view(s)); }

void UnsafeAppend(const std::shared_ptr<Buffer>& s) { UnsafeAppend(*s); }

void UnsafeAppendNull() {
UnsafeAppendToBitmap(false);
byte_builder_.UnsafeAppend(/*num_copies=*/byte_width_, 0);
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/array/builder_dict.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "arrow/array/builder_primitive.h" // IWYU pragma: export
#include "arrow/array/data.h"
#include "arrow/array/util.h"
#include "arrow/scalar.h"
#include "arrow/status.h"
#include "arrow/type.h"
#include "arrow/type_traits.h"
Expand Down
Loading