-
Notifications
You must be signed in to change notification settings - Fork 4k
Closed
Description
Describe the enhancement requested
We can reserve memory before running loops in reading.
Also we can put check on zero null count not to check validity bit mask when there are no nulls.
arrow/cpp/src/parquet/column_reader.cc
Lines 2074 to 2090 in f7286a9
| void ReadValuesSpaced(int64_t values_to_read, int64_t null_count) override { | |
| uint8_t* valid_bits = valid_bits_->mutable_data(); | |
| const int64_t valid_bits_offset = values_written_; | |
| auto values = ValuesHead<FLBA>(); | |
| int64_t num_decoded = this->current_decoder_->DecodeSpaced( | |
| values, static_cast<int>(values_to_read), static_cast<int>(null_count), | |
| valid_bits, valid_bits_offset); | |
| ARROW_DCHECK_EQ(num_decoded, values_to_read); | |
| for (int64_t i = 0; i < num_decoded; i++) { | |
| if (::arrow::bit_util::GetBit(valid_bits, valid_bits_offset + i)) { | |
| PARQUET_THROW_NOT_OK(builder_->Append(values[i].ptr)); | |
| } else { | |
| PARQUET_THROW_NOT_OK(builder_->AppendNull()); | |
| } | |
| } |
We can get this situation when we have optional fields in a batch without having nulls here
arrow/cpp/src/parquet/column_reader.cc
Lines 77 to 93 in ef3797d
| inline bool HasSpacedValues(const ColumnDescriptor* descr) { | |
| if (descr->max_repetition_level() > 0) { | |
| // repeated+flat case | |
| return !descr->schema_node()->is_required(); | |
| } else { | |
| // non-repeated+nested case | |
| // Find if a node forces nulls in the lowest level along the hierarchy | |
| const schema::Node* node = descr->schema_node().get(); | |
| while (node) { | |
| if (node->is_optional()) { | |
| return true; | |
| } | |
| node = node->parent(); | |
| } | |
| return false; | |
| } | |
| } |
Component(s)
C++, Parquet
Reactions are currently unavailable