Skip to content

[C++][Parquet] Add support for arrow::ArrayStatistics: zero-copy types #43983

@kou

Description

@kou

Describe the enhancement requested

Subissue of #43549

For types of

std::shared_ptr<Array> TransferZeroCopy(RecordReader* reader,
const std::shared_ptr<Field>& field) {
std::shared_ptr<::arrow::ArrayData> data;
if (field->nullable()) {
std::vector<std::shared_ptr<Buffer>> buffers = {reader->ReleaseIsValid(),
reader->ReleaseValues()};
data = std::make_shared<::arrow::ArrayData>(field->type(), reader->values_written(),
std::move(buffers), reader->null_count());
} else {
std::vector<std::shared_ptr<Buffer>> buffers = {nullptr, reader->ReleaseValues()};
data = std::make_shared<::arrow::ArrayData>(field->type(), reader->values_written(),
std::move(buffers), /*null_count=*/0);
}
return ::arrow::MakeArray(data);
}

case ::arrow::Type::INT32:
case ::arrow::Type::INT64:
case ::arrow::Type::FLOAT:
case ::arrow::Type::DOUBLE:
result = TransferZeroCopy(reader, value_field);

  • Int32
  • Int64
  • Float
  • Double

case ::arrow::Type::TIMESTAMP: {
const ::arrow::TimestampType& timestamp_type =
checked_cast<::arrow::TimestampType&>(*value_field->type());
if (descr->physical_type() == ::parquet::Type::INT96) {
RETURN_NOT_OK(
TransferInt96(reader, pool, value_field, &result, timestamp_type.unit()));
} else {
switch (timestamp_type.unit()) {
case ::arrow::TimeUnit::MILLI:
case ::arrow::TimeUnit::MICRO:
case ::arrow::TimeUnit::NANO:
result = TransferZeroCopy(reader, value_field);

  • Timestamp[milli]
  • Timestamp[micro]
  • Timestamp[nano]

Component(s)

C++, Parquet

Metadata

Metadata

Assignees

Type

No type

Projects

No projects

Milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions