diff --git a/cpp/src/arrow/ipc/metadata_internal.cc b/cpp/src/arrow/ipc/metadata_internal.cc index 05f508df31c..94bb6cbd839 100644 --- a/cpp/src/arrow/ipc/metadata_internal.cc +++ b/cpp/src/arrow/ipc/metadata_internal.cc @@ -286,11 +286,7 @@ Status ConcreteTypeFromFlatbuffer(flatbuf::Type type, const void* type_data, case flatbuf::Type::Timestamp: { auto ts_type = static_cast(type_data); TimeUnit::type unit = FromFlatbufferUnit(ts_type->unit()); - if (ts_type->timezone() != 0 && ts_type->timezone()->Length() > 0) { - *out = timestamp(unit, ts_type->timezone()->str()); - } else { - *out = timestamp(unit); - } + *out = timestamp(unit, StringFromFlatbuffers(ts_type->timezone())); return Status::OK(); } case flatbuf::Type::Duration: { @@ -369,10 +365,7 @@ static Status TypeFromFlatbuffer(const flatbuf::Field* field, const KeyValueMetadata* field_metadata, std::shared_ptr* out) { auto type_data = field->type(); - if (type_data == nullptr) { - return Status::IOError( - "Type-pointer in custom metadata of flatbuffer-encoded Field is null."); - } + CHECK_FLATBUFFERS_NOT_NULL(type_data, "Field.type"); RETURN_NOT_OK(ConcreteTypeFromFlatbuffer(field->type_type(), type_data, children, out)); // Look for extension metadata in custom_metadata field @@ -474,14 +467,8 @@ Status KeyValueMetadataFromFlatbuffer(const KVVector* fb_metadata, metadata->reserve(fb_metadata->size()); for (const auto& pair : *fb_metadata) { - if (pair->key() == nullptr) { - return Status::IOError( - "Key-pointer in custom metadata of flatbuffer-encoded Schema is null."); - } - if (pair->value() == nullptr) { - return Status::IOError( - "Value-pointer in custom metadata of flatbuffer-encoded Schema is null."); - } + CHECK_FLATBUFFERS_NOT_NULL(pair->key(), "custom_metadata.key"); + CHECK_FLATBUFFERS_NOT_NULL(pair->value(), "custom_metadata.value"); metadata->Append(pair->key()->str(), pair->value()->str()); } @@ -776,9 +763,7 @@ Status FieldFromFlatbuffer(const flatbuf::Field* field, DictionaryMemo* dictiona // Reconstruct the data type auto children = field->children(); - if (children == nullptr) { - return Status::IOError("Children-pointer of flatbuffer-encoded Field is null."); - } + CHECK_FLATBUFFERS_NOT_NULL(children, "Field.children"); std::vector> child_fields(children->size()); for (int i = 0; i < static_cast(children->size()); ++i) { RETURN_NOT_OK( @@ -786,6 +771,8 @@ Status FieldFromFlatbuffer(const flatbuf::Field* field, DictionaryMemo* dictiona } RETURN_NOT_OK(TypeFromFlatbuffer(field, child_fields, metadata.get(), &type)); + auto field_name = StringFromFlatbuffers(field->name()); + const flatbuf::DictionaryEncoding* encoding = field->dictionary(); if (encoding != nullptr) { @@ -794,22 +781,14 @@ Status FieldFromFlatbuffer(const flatbuf::Field* field, DictionaryMemo* dictiona // dictionary_memo std::shared_ptr index_type; auto int_data = encoding->indexType(); - if (int_data == nullptr) { - return Status::IOError( - "indexType-pointer in custom metadata of flatbuffer-encoded DictionaryEncoding " - "is null."); - } + CHECK_FLATBUFFERS_NOT_NULL(int_data, "DictionaryEncoding.indexType"); RETURN_NOT_OK(IntFromFlatbuffer(int_data, &index_type)); ARROW_ASSIGN_OR_RAISE(type, DictionaryType::Make(index_type, type, encoding->isOrdered())); - *out = ::arrow::field(field->name()->str(), type, field->nullable(), metadata); + *out = ::arrow::field(field_name, type, field->nullable(), metadata); RETURN_NOT_OK(dictionary_memo->AddField(encoding->id(), *out)); } else { - auto name = field->name(); - if (name == nullptr) { - return Status::IOError("Name-pointer of flatbuffer-encoded Field is null."); - } - *out = ::arrow::field(name->str(), type, field->nullable(), metadata); + *out = ::arrow::field(field_name, type, field->nullable(), metadata); } return Status::OK(); } @@ -1183,17 +1162,15 @@ Status WriteFileFooter(const Schema& schema, const std::vector& dicti Status GetSchema(const void* opaque_schema, DictionaryMemo* dictionary_memo, std::shared_ptr* out) { auto schema = static_cast(opaque_schema); - if (schema->fields() == nullptr) { - return Status::IOError("Fields-pointer of flatbuffer-encoded Schema is null."); - } + CHECK_FLATBUFFERS_NOT_NULL(schema, "schema"); + CHECK_FLATBUFFERS_NOT_NULL(schema->fields(), "Schema.fields"); int num_fields = static_cast(schema->fields()->size()); std::vector> fields(num_fields); for (int i = 0; i < num_fields; ++i) { const flatbuf::Field* field = schema->fields()->Get(i); - if (field == nullptr) { - return Status::IOError("Field-pointer of flatbuffer-encoded Schema is null."); - } + // XXX I don't think this check is necessary (AP) + CHECK_FLATBUFFERS_NOT_NULL(field, "DictionaryEncoding.indexType"); RETURN_NOT_OK(FieldFromFlatbuffer(field, dictionary_memo, &fields[i])); } @@ -1225,12 +1202,7 @@ Status GetTensorMetadata(const Buffer& metadata, std::shared_ptr* type auto dim = tensor->shape()->Get(i); shape->push_back(dim->size()); - auto fb_name = dim->name(); - if (fb_name == 0) { - dim_names->push_back(""); - } else { - dim_names->push_back(fb_name->str()); - } + dim_names->push_back(StringFromFlatbuffers(dim->name())); } if (tensor->strides() && tensor->strides()->size() > 0) { @@ -1239,11 +1211,7 @@ Status GetTensorMetadata(const Buffer& metadata, std::shared_ptr* type } } - auto type_data = tensor->type(); - if (type_data == nullptr) { - return Status::IOError( - "Type-pointer in custom metadata of flatbuffer-encoded Tensor is null."); - } + auto type_data = tensor->type(); // Required return ConcreteTypeFromFlatbuffer(tensor->type_type(), type_data, {}, type); } @@ -1283,12 +1251,7 @@ Status GetSparseTensorMetadata(const Buffer& metadata, std::shared_ptr } if (dim_names) { - auto fb_name = dim->name(); - if (fb_name == 0) { - dim_names->push_back(""); - } else { - dim_names->push_back(fb_name->str()); - } + dim_names->push_back(StringFromFlatbuffers(dim->name())); } } } @@ -1324,11 +1287,7 @@ Status GetSparseTensorMetadata(const Buffer& metadata, std::shared_ptr } } - auto type_data = sparse_tensor->type(); - if (type_data == nullptr) { - return Status::IOError( - "Type-pointer in custom metadata of flatbuffer-encoded SparseTensor is null."); - } + auto type_data = sparse_tensor->type(); // Required if (type) { return ConcreteTypeFromFlatbuffer(sparse_tensor->type_type(), type_data, {}, type); } else { diff --git a/cpp/src/arrow/ipc/metadata_internal.h b/cpp/src/arrow/ipc/metadata_internal.h index b2ac81f86e7..f5ffee3fa75 100644 --- a/cpp/src/arrow/ipc/metadata_internal.h +++ b/cpp/src/arrow/ipc/metadata_internal.h @@ -31,20 +31,16 @@ #include "arrow/buffer.h" #include "arrow/ipc/dictionary.h" // IYWU pragma: keep #include "arrow/ipc/message.h" -#include "arrow/memory_pool.h" #include "arrow/sparse_tensor.h" #include "arrow/status.h" +#include "arrow/type_fwd.h" +#include "arrow/util/macros.h" #include "generated/Message_generated.h" #include "generated/Schema_generated.h" namespace arrow { -class DataType; -class Schema; -class Tensor; -class SparseTensor; - namespace flatbuf = org::apache::arrow::flatbuf; namespace io { @@ -92,6 +88,23 @@ struct FileBlock { int64_t body_length; }; +// Low-level utilities to help with reading Flatbuffers data. + +#define CHECK_FLATBUFFERS_NOT_NULL(fb_value, name) \ + if ((fb_value) == NULLPTR) { \ + return Status::IOError("Unexpected null field ", name, \ + " in flatbuffer-encoded metadata"); \ + } + +template +inline uint32_t FlatBuffersVectorSize(const flatbuffers::Vector* vec) { + return (vec == NULLPTR) ? 0 : vec->size(); +} + +inline std::string StringFromFlatbuffers(const flatbuffers::String* s) { + return (s == NULLPTR) ? "" : s->str(); +} + // Read interface classes. We do not fully deserialize the flatbuffers so that // individual fields metadata can be retrieved from very large schema without // diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc index 8712d46d1c9..f99037a0d34 100644 --- a/cpp/src/arrow/ipc/reader.cc +++ b/cpp/src/arrow/ipc/reader.cc @@ -102,10 +102,7 @@ class IpcComponentSource { Status GetBuffer(int buffer_index, std::shared_ptr* out) { auto buffers = metadata_->buffers(); - if (buffers == nullptr) { - return Status::IOError( - "Buffers-pointer of flatbuffer-encoded RecordBatch is null."); - } + CHECK_FLATBUFFERS_NOT_NULL(buffers, "RecordBatch.buffers"); if (buffer_index >= static_cast(buffers->size())) { return Status::IOError("buffer_index out of range."); } @@ -127,9 +124,7 @@ class IpcComponentSource { Status GetFieldMetadata(int field_index, ArrayData* out) { auto nodes = metadata_->nodes(); - if (nodes == nullptr) { - return Status::IOError("Nodes-pointer of flatbuffer-encoded Table is null."); - } + CHECK_FLATBUFFERS_NOT_NULL(nodes, "Table.nodes"); // pop off a field if (field_index >= static_cast(nodes->size())) { return Status::Invalid("Ran out of field metadata, likely malformed"); @@ -441,6 +436,7 @@ Status ReadDictionary(const Buffer& metadata, DictionaryMemo* dictionary_memo, // The dictionary is embedded in a record batch with a single column std::shared_ptr batch; auto batch_meta = dictionary_batch->data(); + CHECK_FLATBUFFERS_NOT_NULL(batch_meta, "DictionaryBatch.data"); RETURN_NOT_OK(ReadRecordBatch(batch_meta, ::arrow::schema({value_field}), dictionary_memo, options, file, &batch)); if (batch->num_columns() != 1) { @@ -475,9 +471,6 @@ class RecordBatchStreamReader::RecordBatchStreamReaderImpl { } CHECK_MESSAGE_TYPE(Message::SCHEMA, message->type()); CHECK_HAS_NO_BODY(*message); - if (message->header() == nullptr) { - return Status::IOError("Header-pointer of flatbuffer-encoded Message is null."); - } return internal::GetSchema(message->header(), &dictionary_memo_, &schema_); } @@ -663,9 +656,13 @@ class RecordBatchFileReader::RecordBatchFileReaderImpl { return Status::OK(); } - int num_dictionaries() const { return footer_->dictionaries()->size(); } + int num_dictionaries() const { + return static_cast(internal::FlatBuffersVectorSize(footer_->dictionaries())); + } - int num_record_batches() const { return footer_->recordBatches()->size(); } + int num_record_batches() const { + return static_cast(internal::FlatBuffersVectorSize(footer_->recordBatches())); + } MetadataVersion version() const { return internal::GetMetadataVersion(footer_->version());