Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 5 additions & 10 deletions c_glib/arrow-glib/basic-array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2649,16 +2649,11 @@ garrow_array_new_raw_valist(std::shared_ptr<arrow::Array> *arrow_array,
case arrow::Type::type::MAP:
type = GARROW_TYPE_MAP_ARRAY;
break;
case arrow::Type::type::UNION:
{
auto arrow_union_array =
std::static_pointer_cast<arrow::UnionArray>(*arrow_array);
if (arrow_union_array->mode() == arrow::UnionMode::SPARSE) {
type = GARROW_TYPE_SPARSE_UNION_ARRAY;
} else {
type = GARROW_TYPE_DENSE_UNION_ARRAY;
}
}
case arrow::Type::type::SPARSE_UNION:
type = GARROW_TYPE_SPARSE_UNION_ARRAY;
break;
case arrow::Type::type::DENSE_UNION:
type = GARROW_TYPE_DENSE_UNION_ARRAY;
break;
case arrow::Type::type::DICTIONARY:
type = GARROW_TYPE_DICTIONARY_ARRAY;
Expand Down
15 changes: 5 additions & 10 deletions c_glib/arrow-glib/basic-data-type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1376,16 +1376,11 @@ garrow_data_type_new_raw(std::shared_ptr<arrow::DataType> *arrow_data_type)
case arrow::Type::type::STRUCT:
type = GARROW_TYPE_STRUCT_DATA_TYPE;
break;
case arrow::Type::type::UNION:
{
auto arrow_union_data_type =
std::static_pointer_cast<arrow::UnionType>(*arrow_data_type);
if (arrow_union_data_type->mode() == arrow::UnionMode::SPARSE) {
type = GARROW_TYPE_SPARSE_UNION_DATA_TYPE;
} else {
type = GARROW_TYPE_DENSE_UNION_DATA_TYPE;
}
}
case arrow::Type::type::SPARSE_UNION:
type = GARROW_TYPE_SPARSE_UNION_DATA_TYPE;
break;
case arrow::Type::type::DENSE_UNION:
type = GARROW_TYPE_DENSE_UNION_DATA_TYPE;
break;
case arrow::Type::type::DICTIONARY:
type = GARROW_TYPE_DICTIONARY_DATA_TYPE;
Expand Down
8 changes: 4 additions & 4 deletions c_glib/arrow-glib/composite-array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1019,13 +1019,13 @@ garrow_sparse_union_array_new_internal(GArrowSparseUnionDataType *data_type,
arrow_field_names.push_back(arrow_field->name());
}
arrow_sparse_union_array_result =
arrow::UnionArray::MakeSparse(*arrow_type_ids,
arrow::SparseUnionArray::Make(*arrow_type_ids,
arrow_fields,
arrow_field_names,
arrow_union_data_type->type_codes());
} else {
arrow_sparse_union_array_result =
arrow::UnionArray::MakeSparse(*arrow_type_ids, arrow_fields);
arrow::SparseUnionArray::Make(*arrow_type_ids, arrow_fields);
}
if (garrow::check(error,
arrow_sparse_union_array_result,
Expand Down Expand Up @@ -1217,14 +1217,14 @@ garrow_dense_union_array_new_internal(GArrowDenseUnionDataType *data_type,
arrow_field_names.push_back(arrow_field->name());
}
arrow_dense_union_array_result =
arrow::UnionArray::MakeDense(*arrow_type_ids,
arrow::DenseUnionArray::Make(*arrow_type_ids,
*arrow_value_offsets,
arrow_fields,
arrow_field_names,
arrow_union_data_type->type_codes());
} else {
arrow_dense_union_array_result =
arrow::UnionArray::MakeDense(*arrow_type_ids,
arrow::DenseUnionArray::Make(*arrow_type_ids,
*arrow_value_offsets,
arrow_fields);
}
Expand Down
10 changes: 4 additions & 6 deletions c_glib/arrow-glib/composite-data-type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -573,9 +573,8 @@ garrow_sparse_union_data_type_new(GList *fields,
}

auto arrow_data_type =
std::make_shared<arrow::UnionType>(arrow_fields,
arrow_type_codes,
arrow::UnionMode::SPARSE);
std::make_shared<arrow::SparseUnionType>(arrow_fields,
arrow_type_codes);
auto data_type = g_object_new(GARROW_TYPE_SPARSE_UNION_DATA_TYPE,
"data-type", &arrow_data_type,
NULL);
Expand Down Expand Up @@ -623,9 +622,8 @@ garrow_dense_union_data_type_new(GList *fields,
}

auto arrow_data_type =
std::make_shared<arrow::UnionType>(arrow_fields,
arrow_type_codes,
arrow::UnionMode::DENSE);
std::make_shared<arrow::DenseUnionType>(arrow_fields,
arrow_type_codes);
auto data_type = g_object_new(GARROW_TYPE_DENSE_UNION_DATA_TYPE,
"data-type", &arrow_data_type,
NULL);
Expand Down
6 changes: 4 additions & 2 deletions c_glib/arrow-glib/type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,10 @@ garrow_type_from_raw(arrow::Type::type type)
return GARROW_TYPE_STRUCT;
case arrow::Type::type::MAP:
return GARROW_TYPE_MAP;
case arrow::Type::type::UNION:
return GARROW_TYPE_UNION;
case arrow::Type::type::SPARSE_UNION:
return GARROW_TYPE_SPARSE_UNION;
case arrow::Type::type::DENSE_UNION:
return GARROW_TYPE_DENSE_UNION;
case arrow::Type::type::DICTIONARY:
return GARROW_TYPE_DICTIONARY;
default:
Expand Down
6 changes: 4 additions & 2 deletions c_glib/arrow-glib/type.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ G_BEGIN_DECLS
* type. Storage type depends on the parameters.
* @GARROW_TYPE_LIST: A list of some logical data type.
* @GARROW_TYPE_STRUCT: Struct of logical types.
* @GARROW_TYPE_UNION: Unions of logical types.
* @GARROW_TYPE_SPARSE_UNION: Sparse unions of logical types.
* @GARROW_TYPE_DENSE_UNION: Dense unions of logical types.
* @GARROW_TYPE_DICTIONARY: Dictionary aka Category type.
* @GARROW_TYPE_MAP: A repeated struct logical type.
* @GARROW_TYPE_EXTENSION: Custom data type, implemented by user.
Expand Down Expand Up @@ -94,7 +95,8 @@ typedef enum {
GARROW_TYPE_DECIMAL,
GARROW_TYPE_LIST,
GARROW_TYPE_STRUCT,
GARROW_TYPE_UNION,
GARROW_TYPE_SPARSE_UNION,
GARROW_TYPE_DENSE_UNION,
GARROW_TYPE_DICTIONARY,
GARROW_TYPE_MAP,
GARROW_TYPE_EXTENSION,
Expand Down
4 changes: 2 additions & 2 deletions c_glib/test/test-dense-union-data-type.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ def setup
end

def test_type
assert_equal(Arrow::Type::UNION, @data_type.id)
assert_equal(Arrow::Type::DENSE_UNION, @data_type.id)
end

def test_to_s
assert_equal("union[dense]<number: int32=2, text: string=9>",
assert_equal("dense_union<number: int32=2, text: string=9>",
@data_type.to_s)
end

Expand Down
4 changes: 2 additions & 2 deletions c_glib/test/test-sparse-union-data-type.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ def setup
end

def test_type
assert_equal(Arrow::Type::UNION, @data_type.id)
assert_equal(Arrow::Type::SPARSE_UNION, @data_type.id)
end

def test_to_s
assert_equal("union[sparse]<number: int32=2, text: string=9>",
assert_equal("sparse_union<number: int32=2, text: string=9>",
@data_type.to_s)
end

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/adapters/orc/adapter_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ Status GetArrowType(const liborc::Type* type, std::shared_ptr<DataType>* out) {
fields.push_back(field("_union_" + std::to_string(child), elemtype));
type_codes.push_back(static_cast<int8_t>(child));
}
*out = union_(fields, type_codes);
*out = sparse_union(fields, type_codes);
break;
}
default: {
Expand Down
99 changes: 66 additions & 33 deletions cpp/src/arrow/array/array_nested.cc
Original file line number Diff line number Diff line change
Expand Up @@ -556,44 +556,77 @@ Result<ArrayVector> StructArray::Flatten(MemoryPool* pool) const {
// ----------------------------------------------------------------------
// UnionArray

void UnionArray::SetData(const std::shared_ptr<ArrayData>& data) {
this->Array::SetData(data);
void UnionArray::SetData(std::shared_ptr<ArrayData> data) {
this->Array::SetData(std::move(data));

ARROW_CHECK_EQ(data->type->id(), Type::UNION);
ARROW_CHECK_EQ(data->buffers.size(), 3);
union_type_ = checked_cast<const UnionType*>(data_->type.get());

ARROW_CHECK_GE(data_->buffers.size(), 2);
auto type_codes = data_->buffers[1];
auto value_offsets = data_->buffers[2];
raw_type_codes_ = type_codes == nullptr
? nullptr
: reinterpret_cast<const int8_t*>(type_codes->data());
boxed_fields_.resize(data_->child_data.size());
}

void SparseUnionArray::SetData(std::shared_ptr<ArrayData> data) {
this->UnionArray::SetData(std::move(data));

ARROW_CHECK_EQ(data_->type->id(), Type::SPARSE_UNION);
ARROW_CHECK_EQ(data_->buffers.size(), 2);
}

void DenseUnionArray::SetData(const std::shared_ptr<ArrayData>& data) {
this->UnionArray::SetData(std::move(data));

ARROW_CHECK_EQ(data_->type->id(), Type::DENSE_UNION);
ARROW_CHECK_EQ(data_->buffers.size(), 3);
auto value_offsets = data_->buffers[2];
raw_value_offsets_ = value_offsets == nullptr
? nullptr
: reinterpret_cast<const int32_t*>(value_offsets->data());
boxed_fields_.resize(data->child_data.size());
}

UnionArray::UnionArray(const std::shared_ptr<ArrayData>& data) { SetData(data); }
SparseUnionArray::SparseUnionArray(std::shared_ptr<ArrayData> data) {
SetData(std::move(data));
}

SparseUnionArray::SparseUnionArray(std::shared_ptr<DataType> type, int64_t length,
ArrayVector children,
std::shared_ptr<Buffer> type_codes,
std::shared_ptr<Buffer> null_bitmap,
int64_t null_count, int64_t offset) {
auto internal_data = ArrayData::Make(
std::move(type), length,
BufferVector{std::move(null_bitmap), std::move(type_codes)}, null_count, offset);
for (const auto& child : children) {
internal_data->child_data.push_back(child->data());
}
SetData(std::move(internal_data));
}

DenseUnionArray::DenseUnionArray(const std::shared_ptr<ArrayData>& data) {
SetData(data);
}

UnionArray::UnionArray(const std::shared_ptr<DataType>& type, int64_t length,
const std::vector<std::shared_ptr<Array>>& children,
const std::shared_ptr<Buffer>& type_codes,
const std::shared_ptr<Buffer>& value_offsets,
const std::shared_ptr<Buffer>& null_bitmap, int64_t null_count,
int64_t offset) {
DenseUnionArray::DenseUnionArray(std::shared_ptr<DataType> type, int64_t length,
ArrayVector children, std::shared_ptr<Buffer> type_ids,
std::shared_ptr<Buffer> value_offsets,
std::shared_ptr<Buffer> null_bitmap, int64_t null_count,
int64_t offset) {
auto internal_data = ArrayData::Make(
type, length, {null_bitmap, type_codes, value_offsets}, null_count, offset);
std::move(type), length,
BufferVector{std::move(null_bitmap), std::move(type_ids), std::move(value_offsets)},
null_count, offset);
for (const auto& child : children) {
internal_data->child_data.push_back(child->data());
}
SetData(internal_data);
}

Result<std::shared_ptr<Array>> UnionArray::MakeDense(
const Array& type_ids, const Array& value_offsets,
const std::vector<std::shared_ptr<Array>>& children,
const std::vector<std::string>& field_names, const std::vector<int8_t>& type_codes) {
Result<std::shared_ptr<Array>> DenseUnionArray::Make(
const Array& type_ids, const Array& value_offsets, ArrayVector children,
std::vector<std::string> field_names, std::vector<type_code_t> type_codes) {
if (value_offsets.length() == 0) {
return Status::Invalid("UnionArray offsets must have non-zero length");
}
Expand All @@ -607,7 +640,7 @@ Result<std::shared_ptr<Array>> UnionArray::MakeDense(
}

if (value_offsets.null_count() != 0) {
return Status::Invalid("MakeDense does not allow NAs in value_offsets");
return Status::Invalid("Make does not allow NAs in value_offsets");
}

if (field_names.size() > 0 && field_names.size() != children.size()) {
Expand All @@ -622,19 +655,19 @@ Result<std::shared_ptr<Array>> UnionArray::MakeDense(
checked_cast<const Int8Array&>(type_ids).values(),
checked_cast<const Int32Array&>(value_offsets).values()};

std::shared_ptr<DataType> union_type =
union_(children, field_names, type_codes, UnionMode::DENSE);
auto internal_data = ArrayData::Make(union_type, type_ids.length(), std::move(buffers),
type_ids.null_count(), type_ids.offset());
auto union_type = dense_union(children, std::move(field_names), std::move(type_codes));
auto internal_data =
ArrayData::Make(std::move(union_type), type_ids.length(), std::move(buffers),
type_ids.null_count(), type_ids.offset());
for (const auto& child : children) {
internal_data->child_data.push_back(child->data());
}
return std::make_shared<UnionArray>(internal_data);
return std::make_shared<DenseUnionArray>(std::move(internal_data));
}

Result<std::shared_ptr<Array>> UnionArray::MakeSparse(
const Array& type_ids, const std::vector<std::shared_ptr<Array>>& children,
const std::vector<std::string>& field_names, const std::vector<int8_t>& type_codes) {
Result<std::shared_ptr<Array>> SparseUnionArray::Make(
const Array& type_ids, ArrayVector children, std::vector<std::string> field_names,
std::vector<int8_t> type_codes) {
if (type_ids.type_id() != Type::INT8) {
return Status::TypeError("UnionArray type_ids must be signed int8");
}
Expand All @@ -648,19 +681,19 @@ Result<std::shared_ptr<Array>> UnionArray::MakeSparse(
}

BufferVector buffers = {type_ids.null_bitmap(),
checked_cast<const Int8Array&>(type_ids).values(), nullptr};
std::shared_ptr<DataType> union_type =
union_(children, field_names, type_codes, UnionMode::SPARSE);
auto internal_data = ArrayData::Make(union_type, type_ids.length(), std::move(buffers),
type_ids.null_count(), type_ids.offset());
checked_cast<const Int8Array&>(type_ids).values()};
auto union_type = sparse_union(children, std::move(field_names), std::move(type_codes));
auto internal_data =
ArrayData::Make(std::move(union_type), type_ids.length(), std::move(buffers),
type_ids.null_count(), type_ids.offset());
for (const auto& child : children) {
internal_data->child_data.push_back(child->data());
if (child->length() != type_ids.length()) {
return Status::Invalid(
"Sparse UnionArray must have len(child) == len(type_ids) for all children");
}
}
return std::make_shared<UnionArray>(internal_data);
return std::make_shared<SparseUnionArray>(std::move(internal_data));
}

std::shared_ptr<Array> UnionArray::child(int i) const { return field(i); }
Expand Down
Loading