diff --git a/cpp/src/arrow/compute/kernels/filter-test.cc b/cpp/src/arrow/compute/kernels/filter-test.cc index 033efee5f68..253093e3799 100644 --- a/cpp/src/arrow/compute/kernels/filter-test.cc +++ b/cpp/src/arrow/compute/kernels/filter-test.cc @@ -414,5 +414,34 @@ TEST_F(TestFilterKernelWithStruct, FilterStruct) { ])"); } +class TestFilterKernelWithUnion : public TestFilterKernel {}; + +TEST_F(TestFilterKernelWithUnion, FilterUnion) { + for (auto mode : {UnionMode::SPARSE, UnionMode::DENSE}) { + auto union_type = union_({field("a", int32()), field("b", utf8())}, {2, 5}, mode); + auto union_json = R"([ + null, + [2, 222], + [5, "hello"], + [5, "eh"], + null, + [2, 111] + ])"; + this->AssertFilter(union_type, union_json, "[0, 0, 0, 0, 0, 0]", "[]"); + this->AssertFilter(union_type, union_json, "[0, 1, 1, null, 0, 1]", R"([ + [2, 222], + [5, "hello"], + null, + [2, 111] + ])"); + this->AssertFilter(union_type, union_json, "[1, 0, 1, 0, 1, 0]", R"([ + null, + [5, "hello"], + null + ])"); + this->AssertFilter(union_type, union_json, "[1, 1, 1, 1, 1, 1]", union_json); + } +} + } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/kernels/filter.cc b/cpp/src/arrow/compute/kernels/filter.cc index 8a07663d5dc..6c0eee5ed76 100644 --- a/cpp/src/arrow/compute/kernels/filter.cc +++ b/cpp/src/arrow/compute/kernels/filter.cc @@ -22,7 +22,6 @@ #include #include "arrow/builder.h" -#include "arrow/compute/context.h" #include "arrow/compute/kernels/take-internal.h" #include "arrow/util/checked_cast.h" #include "arrow/util/logging.h" @@ -88,7 +87,7 @@ class FilterKernelImpl : public FilterKernel { if (values.length() != filter.length()) { return Status::Invalid("filter and value array must have identical lengths"); } - RETURN_NOT_OK(taker_->Init(ctx->memory_pool())); + RETURN_NOT_OK(taker_->SetContext(ctx)); RETURN_NOT_OK(taker_->Take(values, FilterIndexSequence(filter, length))); return taker_->Finish(out); } diff --git a/cpp/src/arrow/compute/kernels/take-internal.h b/cpp/src/arrow/compute/kernels/take-internal.h index bacd71b587a..96519a9869a 100644 --- a/cpp/src/arrow/compute/kernels/take-internal.h +++ b/cpp/src/arrow/compute/kernels/take-internal.h @@ -77,6 +77,9 @@ Status VisitIndices(IndexSequence indices, const Array& values, Visitor&& vis) { if (index < 0 || index >= values.length()) { return Status::IndexError("take index out of bounds"); } + } else { + DCHECK_GE(index, 0); + DCHECK_LT(index, values.length()); } bool is_valid = !SomeValuesNull || values.IsValid(index); @@ -121,12 +124,13 @@ class Taker { virtual ~Taker() = default; - // construct any children, must be called once after construction - virtual Status MakeChildren() { return Status::OK(); } + // initialize this taker including constructing any children, + // must be called once after construction before any other methods are called + virtual Status Init() { return Status::OK(); } - // reset this Taker, prepare to gather into an array allocated from pool - // must be called each time the output pool may have changed - virtual Status Init(MemoryPool* pool) = 0; + // reset this Taker and set FunctionContext for taking an array + // must be called each time the FunctionContext may have changed + virtual Status SetContext(FunctionContext* ctx) = 0; // gather elements from an array at the provided indices virtual Status Take(const Array& values, IndexSequence indices) = 0; @@ -200,6 +204,36 @@ class RangeIndexSequence { int64_t index_ = 0, length_ = -1; }; +// an IndexSequence which yields the values of an Array of integers +template +class ArrayIndexSequence { + public: + bool never_out_of_bounds() const { return never_out_of_bounds_; } + void set_never_out_of_bounds() { never_out_of_bounds_ = true; } + + constexpr ArrayIndexSequence() = default; + + explicit ArrayIndexSequence(const Array& indices) + : indices_(&checked_cast&>(indices)) {} + + std::pair Next() { + if (indices_->IsNull(index_)) { + ++index_; + return std::make_pair(-1, false); + } + return std::make_pair(indices_->Value(index_++), true); + } + + int64_t length() const { return indices_->length(); } + + int64_t null_count() const { return indices_->null_count(); } + + private: + const NumericArray* indices_ = nullptr; + int64_t index_ = 0; + bool never_out_of_bounds_ = false; +}; + // Default implementation: taking from a simple array into a builder requires only that // the array supports array.GetView() and the corresponding builder supports // builder.UnsafeAppend(array.GetView()) @@ -211,7 +245,9 @@ class TakerImpl : public Taker { using Taker::Taker; - Status Init(MemoryPool* pool) override { return this->MakeBuilder(pool, &builder_); } + Status SetContext(FunctionContext* ctx) override { + return this->MakeBuilder(ctx->memory_pool(), &builder_); + } Status Take(const Array& values, IndexSequence indices) override { DCHECK(this->type_->Equals(values.type())); @@ -239,7 +275,7 @@ class TakerImpl : public Taker { public: using Taker::Taker; - Status Init(MemoryPool*) override { return Status::OK(); } + Status SetContext(FunctionContext*) override { return Status::OK(); } Status Take(const Array& values, IndexSequence indices) override { DCHECK(this->type_->Equals(values.type())); @@ -267,16 +303,17 @@ class TakerImpl : public Taker { public: using Taker::Taker; - Status MakeChildren() override { + Status Init() override { const auto& list_type = checked_cast(*this->type_); return Taker::Make(list_type.value_type(), &value_taker_); } - Status Init(MemoryPool* pool) override { + Status SetContext(FunctionContext* ctx) override { + auto pool = ctx->memory_pool(); null_bitmap_builder_.reset(new TypedBufferBuilder(pool)); offset_builder_.reset(new TypedBufferBuilder(pool)); RETURN_NOT_OK(offset_builder_->Append(0)); - return value_taker_->Init(pool); + return value_taker_->SetContext(ctx); } Status Take(const Array& values, IndexSequence indices) override { @@ -345,14 +382,15 @@ class TakerImpl : public Taker public: using Taker::Taker; - Status MakeChildren() override { + Status Init() override { const auto& list_type = checked_cast(*this->type_); return Taker::Make(list_type.value_type(), &value_taker_); } - Status Init(MemoryPool* pool) override { + Status SetContext(FunctionContext* ctx) override { + auto pool = ctx->memory_pool(); null_bitmap_builder_.reset(new TypedBufferBuilder(pool)); - return value_taker_->Init(pool); + return value_taker_->SetContext(ctx); } Status Take(const Array& values, IndexSequence indices) override { @@ -398,7 +436,7 @@ class TakerImpl : public Taker { public: using Taker::Taker; - Status MakeChildren() override { + Status Init() override { children_.resize(this->type_->num_children()); for (int i = 0; i < this->type_->num_children(); ++i) { RETURN_NOT_OK( @@ -407,10 +445,10 @@ class TakerImpl : public Taker { return Status::OK(); } - Status Init(MemoryPool* pool) override { - null_bitmap_builder_.reset(new TypedBufferBuilder(pool)); + Status SetContext(FunctionContext* ctx) override { + null_bitmap_builder_.reset(new TypedBufferBuilder(ctx->memory_pool())); for (int i = 0; i < this->type_->num_children(); ++i) { - RETURN_NOT_OK(children_[i]->Init(pool)); + RETURN_NOT_OK(children_[i]->SetContext(ctx)); } return Status::OK(); } @@ -455,20 +493,195 @@ class TakerImpl : public Taker { std::vector>> children_; }; +template +class TakerImpl : public Taker { + public: + using Taker::Taker; + + Status Init() override { + union_type_ = checked_cast(this->type_.get()); + + if (union_type_->mode() == UnionMode::SPARSE) { + sparse_children_.resize(this->type_->num_children()); + } else { + dense_children_.resize(this->type_->num_children()); + child_length_.resize(union_type_->max_type_code() + 1); + } + + for (int i = 0; i < this->type_->num_children(); ++i) { + if (union_type_->mode() == UnionMode::SPARSE) { + RETURN_NOT_OK(Taker::Make(this->type_->child(i)->type(), + &sparse_children_[i])); + } else { + RETURN_NOT_OK(Taker>::Make( + this->type_->child(i)->type(), &dense_children_[i])); + } + } + + return Status::OK(); + } + + Status SetContext(FunctionContext* ctx) override { + pool_ = ctx->memory_pool(); + null_bitmap_builder_.reset(new TypedBufferBuilder(pool_)); + type_id_builder_.reset(new TypedBufferBuilder(pool_)); + + if (union_type_->mode() == UnionMode::DENSE) { + offset_builder_.reset(new TypedBufferBuilder(pool_)); + std::fill(child_length_.begin(), child_length_.end(), 0); + } + + for (int i = 0; i < this->type_->num_children(); ++i) { + if (union_type_->mode() == UnionMode::SPARSE) { + RETURN_NOT_OK(sparse_children_[i]->SetContext(ctx)); + } else { + RETURN_NOT_OK(dense_children_[i]->SetContext(ctx)); + } + } + + return Status::OK(); + } + + Status Take(const Array& values, IndexSequence indices) override { + DCHECK(this->type_->Equals(values.type())); + const auto& union_array = checked_cast(values); + auto type_ids = union_array.raw_type_ids(); + + if (union_type_->mode() == UnionMode::SPARSE) { + RETURN_NOT_OK(null_bitmap_builder_->Reserve(indices.length())); + RETURN_NOT_OK(type_id_builder_->Reserve(indices.length())); + RETURN_NOT_OK(VisitIndices(indices, values, [&](int64_t index, bool is_valid) { + null_bitmap_builder_->UnsafeAppend(is_valid); + type_id_builder_->UnsafeAppend(type_ids[index]); + return Status::OK(); + })); + + // bounds checking was done while appending to the null bitmap + indices.set_never_out_of_bounds(); + + for (int i = 0; i < this->type_->num_children(); ++i) { + RETURN_NOT_OK(sparse_children_[i]->Take(*union_array.child(i), indices)); + } + } else { + // Gathering from the offsets into child arrays is a bit tricky. + std::vector child_counts(union_type_->max_type_code() + 1); + RETURN_NOT_OK(null_bitmap_builder_->Reserve(indices.length())); + RETURN_NOT_OK(type_id_builder_->Reserve(indices.length())); + RETURN_NOT_OK(VisitIndices(indices, values, [&](int64_t index, bool is_valid) { + null_bitmap_builder_->UnsafeAppend(is_valid); + type_id_builder_->UnsafeAppend(type_ids[index]); + child_counts[type_ids[index]] += is_valid; + return Status::OK(); + })); + + // bounds checking was done while appending to the null bitmap + indices.set_never_out_of_bounds(); + + // Allocate temporary storage for the offsets of all valid slots + // NB: Overestimates required space when indices and union_array are + // not null at identical positions. + auto child_offsets_storage_size = + indices.length() - std::max(union_array.null_count(), indices.null_count()); + std::shared_ptr child_offsets_storage; + RETURN_NOT_OK(AllocateBuffer(pool_, child_offsets_storage_size * sizeof(int32_t), + &child_offsets_storage)); + + // Partition offsets by type_id: child_offset_partitions[type_id] will + // point to storage for child_counts[type_id] offsets + std::vector child_offset_partitions(child_counts.size()); + auto child_offsets_storage_data = GetInt32(child_offsets_storage); + for (auto type_id : union_type_->type_codes()) { + child_offset_partitions[type_id] = child_offsets_storage_data; + child_offsets_storage_data += child_counts[type_id]; + } + + // Fill child_offsets_storage with the taken offsets + RETURN_NOT_OK(offset_builder_->Reserve(indices.length())); + RETURN_NOT_OK(VisitIndices(indices, values, [&](int64_t index, bool is_valid) { + auto type_id = type_ids[index]; + if (is_valid) { + offset_builder_->UnsafeAppend(child_length_[type_id]++); + *child_offset_partitions[type_id] = union_array.value_offset(index); + ++child_offset_partitions[type_id]; + } else { + offset_builder_->UnsafeAppend(0); + } + return Status::OK(); + })); + + // Take from each child at those offsets + int64_t taken_offset_begin = 0; + for (int i = 0; i < this->type_->num_children(); ++i) { + auto type_id = union_type_->type_codes()[i]; + auto length = child_counts[type_id]; + Int32Array taken_offsets(length, SliceBuffer(child_offsets_storage, + sizeof(int32_t) * taken_offset_begin, + sizeof(int32_t) * length)); + ArrayIndexSequence child_indices(taken_offsets); + child_indices.set_never_out_of_bounds(); + RETURN_NOT_OK(dense_children_[i]->Take(*union_array.child(i), child_indices)); + taken_offset_begin += length; + } + } + + return Status::OK(); + } + + Status Finish(std::shared_ptr* out) override { + auto null_count = null_bitmap_builder_->false_count(); + auto length = null_bitmap_builder_->length(); + std::shared_ptr null_bitmap, type_ids; + RETURN_NOT_OK(null_bitmap_builder_->Finish(&null_bitmap)); + RETURN_NOT_OK(type_id_builder_->Finish(&type_ids)); + + std::shared_ptr offsets; + if (union_type_->mode() == UnionMode::DENSE) { + RETURN_NOT_OK(offset_builder_->Finish(&offsets)); + } + + ArrayVector fields(this->type_->num_children()); + for (int i = 0; i < this->type_->num_children(); ++i) { + if (union_type_->mode() == UnionMode::SPARSE) { + RETURN_NOT_OK(sparse_children_[i]->Finish(&fields[i])); + } else { + RETURN_NOT_OK(dense_children_[i]->Finish(&fields[i])); + } + } + + out->reset(new UnionArray(this->type_, length, std::move(fields), type_ids, offsets, + null_bitmap, null_count)); + return Status::OK(); + } + + protected: + int32_t* GetInt32(const std::shared_ptr& b) const { + return reinterpret_cast(b->mutable_data()); + } + + const UnionType* union_type_ = nullptr; + MemoryPool* pool_ = nullptr; + std::unique_ptr> null_bitmap_builder_; + std::unique_ptr> type_id_builder_; + std::unique_ptr> offset_builder_; + std::vector>> sparse_children_; + std::vector>>> dense_children_; + std::vector child_length_; +}; + // taking from a DictionaryArray is accomplished by taking from its indices template class TakerImpl : public Taker { public: using Taker::Taker; - Status MakeChildren() override { + Status Init() override { const auto& dict_type = checked_cast(*this->type_); return Taker::Make(dict_type.index_type(), &index_taker_); } - Status Init(MemoryPool* pool) override { + Status SetContext(FunctionContext* ctx) override { dictionary_ = nullptr; - return index_taker_->Init(pool); + return index_taker_->SetContext(ctx); } Status Take(const Array& values, IndexSequence indices) override { @@ -502,12 +715,14 @@ class TakerImpl : public Taker { public: using Taker::Taker; - Status MakeChildren() override { + Status Init() override { const auto& ext_type = checked_cast(*this->type_); return Taker::Make(ext_type.storage_type(), &storage_taker_); } - Status Init(MemoryPool* pool) override { return storage_taker_->Init(pool); } + Status SetContext(FunctionContext* ctx) override { + return storage_taker_->SetContext(ctx); + } Status Take(const Array& values, IndexSequence indices) override { DCHECK(this->type_->Equals(values.type())); @@ -531,11 +746,7 @@ struct TakerMakeImpl { template Status Visit(const T&) { out_->reset(new TakerImpl(type_)); - return (*out_)->MakeChildren(); - } - - Status Visit(const UnionType& t) { - return Status::NotImplemented("gathering values of type ", t); + return (*out_)->Init(); } std::shared_ptr type_; diff --git a/cpp/src/arrow/compute/kernels/take-test.cc b/cpp/src/arrow/compute/kernels/take-test.cc index da5e0c009ef..7ae9321bb46 100644 --- a/cpp/src/arrow/compute/kernels/take-test.cc +++ b/cpp/src/arrow/compute/kernels/take-test.cc @@ -349,6 +349,45 @@ TEST_F(TestTakeKernelWithStruct, TakeStruct) { ])"); } +class TestTakeKernelWithUnion : public TestTakeKernel {}; + +TEST_F(TestTakeKernelWithUnion, TakeUnion) { + for (auto mode : {UnionMode::SPARSE, UnionMode::DENSE}) { + auto union_type = union_({field("a", int32()), field("b", utf8())}, {2, 5}, mode); + auto union_json = R"([ + null, + [2, 222], + [5, "hello"], + [5, "eh"], + null, + [2, 111] + ])"; + this->AssertTake(union_type, union_json, "[]", "[]"); + this->AssertTake(union_type, union_json, "[3, 1, 3, 1, 3]", R"([ + [5, "eh"], + [2, 222], + [5, "eh"], + [2, 222], + [5, "eh"] + ])"); + this->AssertTake(union_type, union_json, "[4, 2, 1]", R"([ + null, + [5, "hello"], + [2, 222] + ])"); + this->AssertTake(union_type, union_json, "[0, 1, 2, 3, 4, 5]", union_json); + this->AssertTake(union_type, union_json, "[0, 2, 2, 2, 2, 2, 2]", R"([ + null, + [5, "hello"], + [5, "hello"], + [5, "hello"], + [5, "hello"], + [5, "hello"], + [5, "hello"] + ])"); + } +} + class TestPermutationsWithTake : public ComputeFixture, public TestBase { protected: void Take(const Int16Array& values, const Int16Array& indices, diff --git a/cpp/src/arrow/compute/kernels/take.cc b/cpp/src/arrow/compute/kernels/take.cc index 6ed9111559b..beabe1d9f41 100644 --- a/cpp/src/arrow/compute/kernels/take.cc +++ b/cpp/src/arrow/compute/kernels/take.cc @@ -19,48 +19,14 @@ #include #include -#include "arrow/compute/context.h" #include "arrow/compute/kernels/take-internal.h" #include "arrow/compute/kernels/take.h" -#include "arrow/util/checked_cast.h" #include "arrow/util/logging.h" #include "arrow/visitor_inline.h" namespace arrow { namespace compute { -using internal::checked_cast; - -// an IndexSequence which yields the values of an Array of integers -template -class ArrayIndexSequence { - public: - bool never_out_of_bounds() const { return never_out_of_bounds_; } - void set_never_out_of_bounds() { never_out_of_bounds_ = true; } - - constexpr ArrayIndexSequence() = default; - - explicit ArrayIndexSequence(const Array& indices) - : indices_(&checked_cast&>(indices)) {} - - std::pair Next() { - if (indices_->IsNull(index_)) { - ++index_; - return std::make_pair(-1, false); - } - return std::make_pair(indices_->Value(index_++), true); - } - - int64_t length() const { return indices_->length(); } - - int64_t null_count() const { return indices_->null_count(); } - - private: - const NumericArray* indices_ = nullptr; - int64_t index_ = 0; - bool never_out_of_bounds_ = false; -}; - template class TakeKernelImpl : public TakeKernel { public: @@ -73,7 +39,7 @@ class TakeKernelImpl : public TakeKernel { Status Take(FunctionContext* ctx, const Array& values, const Array& indices_array, std::shared_ptr* out) override { - RETURN_NOT_OK(taker_->Init(ctx->memory_pool())); + RETURN_NOT_OK(taker_->SetContext(ctx)); RETURN_NOT_OK(taker_->Take(values, ArrayIndexSequence(indices_array))); return taker_->Finish(out); } diff --git a/cpp/src/arrow/ipc/json-simple-test.cc b/cpp/src/arrow/ipc/json-simple-test.cc index ce8b21a84a3..3c4277512d3 100644 --- a/cpp/src/arrow/ipc/json-simple-test.cc +++ b/cpp/src/arrow/ipc/json-simple-test.cc @@ -910,10 +910,11 @@ TEST(TestDenseUnion, Basics) { auto field_b = field("b", boolean()); auto type = union_({field_a, field_b}, {4, 8}, UnionMode::DENSE); - auto array = ArrayFromJSON(type, "[[4, 122], [8, true], [4, null], null, [8, false]]"); + auto array = checked_pointer_cast( + ArrayFromJSON(type, "[null, [4, 122], [8, true], [4, null], null, [8, false]]")); - auto expected_types = ArrayFromJSON(int8(), "[4, 8, 4, null, 8]"); - auto expected_offsets = ArrayFromJSON(int32(), "[0, 0, 1, 0, 1]"); + auto expected_types = ArrayFromJSON(int8(), "[null, 4, 8, 4, null, 8]"); + auto expected_offsets = ArrayFromJSON(int32(), "[0, 0, 0, 1, 0, 1]"); auto expected_a = ArrayFromJSON(int8(), "[122, null]"); auto expected_b = ArrayFromJSON(boolean(), "[true, false]"); @@ -923,6 +924,11 @@ TEST(TestDenseUnion, Basics) { &expected)); ASSERT_ARRAYS_EQUAL(*expected, *array); + + // ensure that the array is as dense as we expect + ASSERT_TRUE(array->value_offsets()->Equals(*expected_offsets->data()->buffers[1])); + ASSERT_ARRAYS_EQUAL(*expected_a, *array->child(0)); + ASSERT_ARRAYS_EQUAL(*expected_b, *array->child(1)); } TEST(TestSparseUnion, Basics) { @@ -948,11 +954,12 @@ TEST(TestDenseUnion, ListOfUnion) { auto field_b = field("b", boolean()); auto union_type = union_({field_a, field_b}, {4, 8}, UnionMode::DENSE); auto list_type = list(union_type); - auto array = ArrayFromJSON(list_type, - "[" - "[[4, 122], [8, true]]," - "[[4, null], null, [8, false]]" - "]"); + auto array = + checked_pointer_cast(ArrayFromJSON(list_type, + "[" + "[[4, 122], [8, true]]," + "[[4, null], null, [8, false]]" + "]")); auto expected_types = ArrayFromJSON(int8(), "[4, 8, 4, null, 8]"); auto expected_offsets = ArrayFromJSON(int32(), "[0, 0, 1, 0, 1]"); @@ -968,6 +975,13 @@ TEST(TestDenseUnion, ListOfUnion) { default_memory_pool(), &expected)); ASSERT_ARRAYS_EQUAL(*expected, *array); + + // ensure that the array is as dense as we expect + auto array_values = checked_pointer_cast(array->values()); + ASSERT_TRUE(array_values->value_offsets()->Equals( + *checked_pointer_cast(expected_values)->value_offsets())); + ASSERT_ARRAYS_EQUAL(*expected_a, *array_values->child(0)); + ASSERT_ARRAYS_EQUAL(*expected_b, *array_values->child(1)); } TEST(TestSparseUnion, ListOfUnion) { @@ -1002,13 +1016,13 @@ TEST(TestDenseUnion, UnionOfStructs) { field("foxtrot", list(int8()))})), field("q", struct_({field("quebec", utf8())}))}; auto type = union_(fields, {0, 23, 47}, UnionMode::DENSE); - auto array = ArrayFromJSON(type, R"([ + auto array = checked_pointer_cast(ArrayFromJSON(type, R"([ [0, {"alpha": 0.0, "bravo": "charlie"}], [23, {"whiskey": 99}], [0, {"bravo": "mike"}], null, [23, {"tango": 8.25, "foxtrot": [0, 2, 3]}] - ])"); + ])")); auto expected_types = ArrayFromJSON(int8(), "[0, 23, 0, null, 23]"); auto expected_offsets = ArrayFromJSON(int32(), "[0, 0, 1, 0, 1]"); @@ -1027,6 +1041,13 @@ TEST(TestDenseUnion, UnionOfStructs) { {"ab", "wtf", "q"}, {0, 23, 47}, &expected)); ASSERT_ARRAYS_EQUAL(*expected, *array); + + // ensure that the array is as dense as we expect + ASSERT_TRUE(array->value_offsets()->Equals(*expected_offsets->data()->buffers[1])); + for (int i = 0; i < type->num_children(); ++i) { + ASSERT_ARRAYS_EQUAL(*checked_cast(*expected).child(i), + *array->child(i)); + } } TEST(TestSparseUnion, UnionOfStructs) { diff --git a/cpp/src/arrow/ipc/json-simple.cc b/cpp/src/arrow/ipc/json-simple.cc index ae01bcc4b31..ac0237fc797 100644 --- a/cpp/src/arrow/ipc/json-simple.cc +++ b/cpp/src/arrow/ipc/json-simple.cc @@ -644,8 +644,10 @@ class UnionConverter final : public ConcreteConverter { } Status AppendNull() override { - for (auto& converter : child_converters_) { - RETURN_NOT_OK(converter->AppendNull()); + if (mode_ == UnionMode::SPARSE) { + for (auto& converter : child_converters_) { + RETURN_NOT_OK(converter->AppendNull()); + } } return builder_->AppendNull(); } @@ -676,15 +678,15 @@ class UnionConverter final : public ConcreteConverter { } auto child_converter = child_converters_[child_num]; - if (mode_ == UnionMode::DENSE) { - RETURN_NOT_OK(checked_cast(*builder_).Append(id)); - } else { + if (mode_ == UnionMode::SPARSE) { RETURN_NOT_OK(checked_cast(*builder_).Append(id)); for (auto&& other_converter : child_converters_) { if (other_converter != child_converter) { RETURN_NOT_OK(other_converter->AppendNull()); } } + } else { + RETURN_NOT_OK(checked_cast(*builder_).Append(id)); } return child_converter->AppendValue(json_obj[1]); }