From 66b65312ad292b8841a36465010ad303f84da6aa Mon Sep 17 00:00:00 2001 From: Fatemah Panahi Date: Thu, 15 Sep 2022 17:45:23 +0000 Subject: [PATCH 01/24] Add SkipRecords to RecordReader and corresponding unit tests. I plan to add a stress test separately. --- cpp/src/parquet/column_reader.cc | 178 ++++++++++++++++-- cpp/src/parquet/column_reader.h | 56 ++++-- cpp/src/parquet/column_reader_test.cc | 261 ++++++++++++++++++++++++++ 3 files changed, 470 insertions(+), 25 deletions(-) diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index c5899a47e95..fd5356b6435 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -1005,7 +1005,7 @@ int64_t TypedColumnReaderImpl::ReadBatchWithDictionary( // Read dictionary indices. *indices_read = ReadDictionaryIndices(indices_to_read, indices); - int64_t total_indices = std::max(num_def_levels, *indices_read); + int64_t total_indices = std::max(num_def_levels, *indices_read); // Some callers use a batch size of 0 just to get the dictionary. int64_t expected_values = std::min(batch_size, this->num_buffered_values_ - this->num_decoded_values_); @@ -1218,20 +1218,21 @@ namespace { constexpr int64_t kMinLevelBatchSize = 1024; template -class TypedRecordReader : public ColumnReaderImplBase, +class TypedRecordReader : public TypedColumnReaderImpl, virtual public RecordReader { public: using T = typename DType::c_type; - using BASE = ColumnReaderImplBase; - TypedRecordReader(const ColumnDescriptor* descr, LevelInfo leaf_info, MemoryPool* pool) - : BASE(descr, pool) { + using BASE = TypedColumnReaderImpl; + TypedRecordReader(const ColumnDescriptor* descr, LevelInfo leaf_info, + MemoryPool* pool) + // Pager must be set using SetPageReader. + : BASE(descr, /* pager = */ nullptr, pool) { leaf_info_ = leaf_info; nullable_values_ = leaf_info.HasNullableValues(); at_record_start_ = true; - records_read_ = 0; values_written_ = 0; - values_capacity_ = 0; null_count_ = 0; + values_capacity_ = 0; levels_written_ = 0; levels_position_ = 0; levels_capacity_ = 0; @@ -1245,7 +1246,7 @@ class TypedRecordReader : public ColumnReaderImplBase, rep_levels_ = AllocateBuffer(pool); Reset(); } - + int64_t available_values_current_page() const { return this->num_buffered_values_ - this->num_decoded_values_; } @@ -1328,6 +1329,160 @@ class TypedRecordReader : public ColumnReaderImplBase, return records_read; } + + // Skip records that we have in our buffer. This function is only for + // non-repeated fields. + int64_t SkipRecordsInBufferNonRepeated(int64_t num_records) { + ARROW_DCHECK(this->max_rep_level_ == 0); + ARROW_DCHECK(this->has_values_to_process()); + + int64_t remaining_records = levels_written_ - levels_position_; + int64_t skipped_records = std::min(num_records, remaining_records); + int64_t start_levels_position = levels_position_; + // Since there is no repetition, number of levels equals number of records. + levels_position_ += skipped_records; + // We skipped the levels by incrementing 'levels_position_'. For values + // we do not have a buffer, so we need to read them and throw them away. + // First we need to figure out how many present/not-null values there are. + std::shared_ptr<::arrow::ResizableBuffer> valid_bits; + valid_bits = AllocateBuffer(this->pool_); + PARQUET_THROW_NOT_OK( + valid_bits->Resize(bit_util::BytesForBits(skipped_records), true)); + ValidityBitmapInputOutput validity_io; + validity_io.values_read_upper_bound = skipped_records; + validity_io.valid_bits = valid_bits->mutable_data(); + validity_io.valid_bits_offset = 0; + DefLevelsToBitmap(def_levels() + start_levels_position, + levels_position_ - start_levels_position, + this->leaf_info_, &validity_io); + int64_t values_to_read = validity_io.values_read - validity_io.null_count; + ReadAndThrowAway(values_to_read); + // Mark the levels as read in the underlying column reader. + this->ConsumeBufferedValues(skipped_records); + return skipped_records; + } + + // Skip records for repeated fields. Returns number of skipped records. + int64_t SkipRecordsRepeated(int64_t num_records) { + ARROW_DCHECK_GT(this->max_rep_level_, 0); + + // For repeated fields, we are technically reading and throwing away the + // levels and values since we do not know the record boundaries in advance. + // Keep filling the buffer and skipping until we reach the desired number + // of records or we run out of values in the column chunk. + int64_t skipped_records = 0; + int64_t remaining_records = num_records; + int64_t level_batch_size = std::max(kMinLevelBatchSize, num_records); + while (!at_record_start_ || skipped_records < num_records) { + // Is there more data to read in this row group? + if (!this->HasNextInternal()) { + if (!at_record_start_) { + // We ended the row group while inside a record that we haven't seen + // the end of yet. So increment the record count for the last record + // in the row group + ++skipped_records; + at_record_start_ = true; + } + break; + } + + if (has_values_to_process()) { + // Look at the already buffered levels, delimit them based on + // (rep_level == 0), report back how many records are in there, and + // fill in how many not-null values (def_level == max_def_level_). + // DelimitRecords updates levels_position_. + int64_t start_levels_position = levels_position_; + int64_t values_seen = 0; + skipped_records += DelimitRecords(remaining_records, &values_seen); + this->ConsumeBufferedValues(levels_position_ - start_levels_position); + ReadAndThrowAway(values_seen); + } + + if (skipped_records == num_records) break; + + // Try reading more levels into the buffer. + remaining_records = num_records - skipped_records; + + int64_t batch_size = + std::min(level_batch_size, available_values_current_page()); + // No more data in column + if (batch_size == 0) { + break; + } + + ReserveLevels(batch_size); + + int16_t* def_levels = this->def_levels() + levels_written_; + int16_t* rep_levels = this->rep_levels() + levels_written_; + + int64_t levels_read = 0; + levels_read = this->ReadDefinitionLevels(batch_size, def_levels); + if (this->ReadRepetitionLevels(batch_size, rep_levels) != levels_read) { + throw ParquetException( + "Number of decoded rep / def levels did not match"); + } + + // Exhausted column chunk + if (levels_read == 0) { + break; + } + + levels_written_ += levels_read; + } + + return skipped_records; + } + + // Read 'num_values' values and throw them away. + int64_t ReadAndThrowAway(int64_t num_values) { + int64_t values_left = num_values; + int64_t batch_size = 1024; // ReadBatch with a smaller memory footprint + int64_t values_read = 0; + + // This will be enough scratch space to accommodate 16-bit levels or any + // value type + int value_size = type_traits::value_byte_size; + std::shared_ptr scratch = AllocateBuffer( + this->pool_, batch_size * std::max(sizeof(int16_t), value_size)); + do { + batch_size = std::min(batch_size, values_left); + values_read = this->ReadValues( + batch_size, reinterpret_cast(scratch->mutable_data())); + values_left -= values_read; + } while (values_read > 0 && values_left > 0); + return num_values - values_left; + } + + int64_t SkipRecords(int64_t num_records) override { + // Top level required field. Number of records equals to number of levels, + // and there is not read-ahead for levels. + if (this->max_rep_level_ == 0 && this->max_def_level_ == 0) { + return this->Skip(num_records); + } + // Non-repeated optional field. + int64_t skipped_records = 0; + if (this->max_rep_level_ == 0) { + if (this->has_values_to_process()) { + // First consume whatever is in the buffer. + skipped_records = SkipRecordsInBufferNonRepeated(num_records); + } + + // If there are more records left, we should have exhausted all the + // buffer. + ARROW_DCHECK(!this->has_values_to_process() || + skipped_records < num_records); + // For records that we have not buffered, we will use the column + // reader's Skip to do the remaining Skip. Since the field is not + // repeated number of levels to skip is the same as number of records + // to skip. + skipped_records += this->Skip(num_records - skipped_records); + } else { + skipped_records += this->SkipRecordsRepeated(num_records); + } + return skipped_records; + } + + // We may outwardly have the appearance of having exhausted a column chunk // when in fact we are in the middle of processing the last batch @@ -1357,7 +1512,8 @@ class TypedRecordReader : public ColumnReaderImplBase, } // Process written repetition/definition levels to reach the end of - // records. Process no more levels than necessary to delimit the indicated + // records. Only used for repeated fields. + // Process no more levels than necessary to delimit the indicated // number of logical records. Updates internal state of RecordReader // // \return Number of records delimited @@ -1494,8 +1650,6 @@ class TypedRecordReader : public ColumnReaderImplBase, levels_capacity_ = levels_remaining; } - records_read_ = 0; - // Call Finish on the binary builders to reset them } @@ -1542,7 +1696,7 @@ class TypedRecordReader : public ColumnReaderImplBase, } else if (this->max_def_level_ > 0) { // No repetition levels, skip delimiting logic. Each level represents a // null or not null entry - records_read = std::min(levels_written_ - levels_position_, num_records); + records_read = std::min(levels_written_ - levels_position_, num_records); // This is advanced by DelimitRecords, which we skipped levels_position_ += records_read; diff --git a/cpp/src/parquet/column_reader.h b/cpp/src/parquet/column_reader.h index 1d35e3988ca..663c73f1ba1 100644 --- a/cpp/src/parquet/column_reader.h +++ b/cpp/src/parquet/column_reader.h @@ -163,7 +163,7 @@ class TypedColumnReader : public ColumnReader { // may be less than the number of repetition and definition levels. With // nested data this is almost certainly true. // - // Set def_levels or rep_levels to nullptr if you want to skip reading them. + // Set def_levels or rep_levels to nullptr if you want to reading them. // This is only safe if you know through some other source that there are no // undefined values. // @@ -272,6 +272,10 @@ class RecordReader { /// \brief Attempt to read indicated number of records from column chunk /// \return number of records read virtual int64_t ReadRecords(int64_t num_records) = 0; + + /// \brief Attempt to skip indicated number of records from column chunk + /// \return number of records skipped + virtual int64_t SkipRecords(int64_t num_records) = 0; /// \brief Pre-allocate space for data. Results in better flat read performance virtual void Reserve(int64_t num_values) = 0; @@ -292,7 +296,8 @@ class RecordReader { /// process virtual bool HasMoreData() const = 0; - /// \brief Advance record reader to the next row group + /// \brief Advance record reader to the next row group. Must be set before + /// any records could be read/skipped. /// \param[in] reader obtained from RowGroupReader::GetColumnPageReader virtual void SetPageReader(std::unique_ptr reader) = 0; @@ -319,10 +324,13 @@ class RecordReader { int64_t levels_position() const { return levels_position_; } /// \brief Number of definition / repetition levels that have been written - /// internally in the reader + /// internally in the reader. This may be larger than values_written() because + // for repeated fields, we need to look at the levels in advance to figure out + // the record boundaries. Once we found the boundaries, we know exactly how + // many values to read. So we do not have buffering for the values. int64_t levels_written() const { return levels_written_; } - /// \brief Number of nulls in the leaf + /// \brief Number of nulls in the leaf that we have read so far. int64_t null_count() const { return null_count_; } /// \brief True if the leaf values are nullable @@ -332,27 +340,49 @@ class RecordReader { bool read_dictionary() const { return read_dictionary_; } protected: + // Indicates if we can have nullable values. bool nullable_values_; bool at_record_start_; int64_t records_read_; + // Stores values. These values are populated based on each ReadRecords call. + // No extra values are buffered for the next call. SkipRecords will not + // add any value to this buffer. + std::shared_ptr<::arrow::ResizableBuffer> values_; + // In the case of false (BYTE_ARRAY), don't allocate the values buffer + // (when we directly read into builder classes). + bool uses_values_; + + // Values that we have read into 'values_' + 'null_count_'. int64_t values_written_; int64_t values_capacity_; int64_t null_count_; - int64_t levels_written_; - int64_t levels_position_; - int64_t levels_capacity_; - - std::shared_ptr<::arrow::ResizableBuffer> values_; - // In the case of false, don't allocate the values buffer (when we directly read into - // builder classes). - bool uses_values_; - + // Each element corresponds to one element in 'values_' and specifies if it + // is null or not null. std::shared_ptr<::arrow::ResizableBuffer> valid_bits_; + + // Buffers for repetition and definition levels. These buffers may have + // more levels than is actually read. This is because we read levels ahead to + // figure our record boundaries for repeated fields. 'levels_written_' shows + // the total number of levels that is in the buffer. 'levels_position_' points + // to the next level that should be read. ReadRecords and SkipRecords both + // advance 'levels_written_' and 'levels_position_'. + // For flat required fields, 'def_levels_' and 'rep_levels_' are not + // populated. For non-repeated fields 'rep_levels_' is not populated. + // 'def_levels_' and 'rep_levels_' must be of the same size if present. std::shared_ptr<::arrow::ResizableBuffer> def_levels_; std::shared_ptr<::arrow::ResizableBuffer> rep_levels_; + + /// \brief Number of definition / repetition levels that have been written + /// internally in the reader. This may be larger than values_written() because + // for repeated fields, we need to look at the levels in advance to figure out + // the record boundaries. Once we found the boundaries, we know exactly how + // many values to read. So we do not have buffering for the values. + int64_t levels_written_; + int64_t levels_position_; + int64_t levels_capacity_; bool read_dictionary_ = false; }; diff --git a/cpp/src/parquet/column_reader_test.cc b/cpp/src/parquet/column_reader_test.cc index 9fc034f41fd..151ed0752db 100644 --- a/cpp/src/parquet/column_reader_test.cc +++ b/cpp/src/parquet/column_reader_test.cc @@ -573,5 +573,266 @@ TEST_F(TestPrimitiveReader, TestNonDictionaryEncodedPagesWithExposeEncoding) { pages_.clear(); } +// Tests reading a repeated field using the RecordReader. +TEST(RecordReaderTest, BasicReadRepeatedField) { + internal::LevelInfo level_info; + level_info.def_level = 1; + level_info.rep_level = 1; + + NodePtr type = schema::Int32("b", Repetition::OPTIONAL); + const ColumnDescriptor descr(type, level_info.def_level, + level_info.rep_level); + + + // Records look like: {[10], [20, 20], [30, 30, 30]} + std::vector> pages; + std::vector values = {10, 20, 20, 30, 30, 30}; + std::vector def_levels = {1, 1, 1, 1, 1, 1}; + std::vector rep_levels = {0, 0, 1, 0, 1, 1}; + + std::unique_ptr pager; + std::shared_ptr page = MakeDataPage( + &descr, values, /*num_values=*/def_levels.size(), Encoding::PLAIN, + /*indices=*/{}, + /*indices_size=*/0, def_levels, level_info.def_level, rep_levels, + level_info.rep_level); + pages.push_back(std::move(page)); + pager.reset(new test::MockPageReader(pages)); + + std::shared_ptr record_reader = + internal::RecordReader::Make(&descr, level_info); + record_reader->SetPageReader(std::move(pager)); + + int64_t records_read = record_reader->ReadRecords(/*num_records=*/2); + + ASSERT_EQ(records_read, 2); + ASSERT_EQ(record_reader->values_written(), 3); + ASSERT_EQ(record_reader->null_count(), 0); + ASSERT_EQ(record_reader->levels_written(), 6); + ASSERT_EQ(record_reader->levels_position(), 3); + + const auto read_values = + reinterpret_cast(record_reader->values()); + std::vector read_vals(read_values, + read_values + record_reader->values_written()); + std::vector read_defs( + record_reader->def_levels(), + record_reader->def_levels() + record_reader->levels_position()); + std::vector read_reps( + record_reader->rep_levels(), + record_reader->rep_levels() + record_reader->levels_position()); + + ASSERT_TRUE(vector_equal(read_vals, {10, 20, 20})); + ASSERT_TRUE(vector_equal(read_defs, {1, 1, 1})); + ASSERT_TRUE(vector_equal(read_reps, {0, 0, 1})); +} + +// Test that we can skip required top level field. +TEST(RecordReaderTest, SkipRequiredTopLevel) { + internal::LevelInfo level_info; + level_info.def_level = 0; + level_info.rep_level = 0; + + NodePtr type = schema::Int32("b", Repetition::REQUIRED); + const ColumnDescriptor descr(type, level_info.def_level, + level_info.rep_level); + + std::vector> pages; + std::vector values = {10, 20, 20, 30, 30, 30}; + + std::unique_ptr pager; + std::shared_ptr page = MakeDataPage( + &descr, values, /*num_values=*/values.size(), Encoding::PLAIN, + /*indices=*/{}, + /*indices_size=*/0, /*def_levels=*/{}, level_info.def_level, + /*rep_levels=*/{}, level_info.rep_level); + pages.push_back(std::move(page)); + pager.reset(new test::MockPageReader(pages)); + + std::shared_ptr record_reader = + internal::RecordReader::Make(&descr, level_info); + record_reader->SetPageReader(std::move(pager)); + + int64_t records_skipped = record_reader->SkipRecords(/*num_records=*/3); + + ASSERT_EQ(records_skipped, 3); + ASSERT_EQ(record_reader->values_written(), 0); + ASSERT_EQ(record_reader->levels_written(), 0); + ASSERT_EQ(record_reader->levels_position(), 0); + + int64_t records_read = record_reader->ReadRecords(/*num_records=*/2); + + ASSERT_EQ(records_read, 2); + ASSERT_EQ(record_reader->values_written(), 2); + ASSERT_EQ(record_reader->levels_written(), 0); + ASSERT_EQ(record_reader->levels_position(), 0); + + const auto read_values = + reinterpret_cast(record_reader->values()); + std::vector read_vals(read_values, + read_values + record_reader->values_written()); + + ASSERT_TRUE(vector_equal(read_vals, {30, 30})); +} + +// Skip an optional field. Intentionally included some null values. +TEST(RecordReaderTest, SkipOptional) { + internal::LevelInfo level_info; + level_info.def_level = 1; + level_info.rep_level = 0; + + NodePtr type = schema::Int32("b", Repetition::OPTIONAL); + const ColumnDescriptor descr(type, level_info.def_level, + level_info.rep_level); + + // Records look like {null, 20, 20, 20, null, 30, 30} + std::vector> pages; + std::vector values = {20, 20, 20, 30, 30}; + std::vector def_levels = {0, 1, 1, 1, 0, 1, 1}; + + std::unique_ptr pager; + std::shared_ptr page = MakeDataPage( + &descr, values, /*num_values=*/values.size(), Encoding::PLAIN, + /*indices=*/{}, + /*indices_size=*/0, def_levels, level_info.def_level, + /*rep_levels=*/{}, level_info.rep_level); + pages.push_back(std::move(page)); + pager.reset(new test::MockPageReader(pages)); + + std::shared_ptr record_reader = + internal::RecordReader::Make(&descr, level_info); + record_reader->SetPageReader(std::move(pager)); + + // This also tests when we start with a Skip. + int64_t records_skipped = record_reader->SkipRecords(/*num_records=*/2); + + ASSERT_EQ(records_skipped, 2); + ASSERT_EQ(record_reader->values_written(), 0); + // Since we started with skipping, there was nothing in the buffer to consume + // for skipping. + ASSERT_EQ(record_reader->levels_written(), 0); + ASSERT_EQ(record_reader->levels_position(), 0); + + int64_t records_read = record_reader->ReadRecords(/*num_records=*/3); + + ASSERT_EQ(records_read, 3); + // One of these values is null. values_written() includes null values. + ASSERT_EQ(record_reader->values_written(), 3); + // When we read, we buffer some levels. Since there are only a few levels + // in our whole column, all of them are read. + // We had skipped 2 of the levels above. So there is only 5 left in total to + // read, and we read 3 of them here. + ASSERT_EQ(record_reader->levels_written(), 5); + ASSERT_EQ(record_reader->levels_position(), 3); + + const auto read_values = + reinterpret_cast(record_reader->values()); + std::vector read_vals(read_values, + read_values + record_reader->values_written() - + record_reader->null_count()); + + ASSERT_TRUE(vector_equal(read_vals, {20, 20})); + + // Skip to the end of the column. + records_skipped = record_reader->SkipRecords(/*num_records=*/5); + + ASSERT_EQ(records_skipped, 2); + // When we skip number of values written does not change, and we advance the + // levels position to skip reading the levels. + ASSERT_EQ(record_reader->values_written(), 3); + ASSERT_EQ(record_reader->levels_written(), 5); + ASSERT_EQ(record_reader->levels_position(), 5); + + // We have exhausted all the records. + ASSERT_EQ(record_reader->ReadRecords(/*num_records=*/3), 0); + ASSERT_EQ(record_reader->SkipRecords(/*num_records=*/3), 0); +} + +// Test skipping for repeated fields. +TEST(RecordReaderTest, SkipRepeated) { + internal::LevelInfo level_info; + level_info.def_level = 1; + level_info.rep_level = 1; + + NodePtr type = schema::Int32("b", Repetition::REPEATED); + const ColumnDescriptor descr(type, level_info.def_level, + level_info.rep_level); + + // Records look like {null, [20, 20, 20], null, [30, 30], [40]} + std::vector> pages; + std::vector values = {20, 20, 20, 30, 30, 40}; + std::vector def_levels = {0, 1, 1, 1, 0, 1, 1, 1}; + std::vector rep_levels = {0, 0, 1, 1, 0, 0, 1, 0}; + + std::unique_ptr pager; + std::shared_ptr page = MakeDataPage( + &descr, values, /*num_values=*/values.size(), Encoding::PLAIN, + /*indices=*/{}, + /*indices_size=*/0, def_levels, level_info.def_level, rep_levels, + level_info.rep_level); + pages.push_back(std::move(page)); + pager.reset(new test::MockPageReader(pages)); + + std::shared_ptr record_reader = + internal::RecordReader::Make(&descr, level_info); + record_reader->SetPageReader(std::move(pager)); + + // This should skip the first null record. + int64_t records_skipped = record_reader->SkipRecords(/*num_records=*/1); + + ASSERT_EQ(records_skipped, 1); + ASSERT_EQ(record_reader->values_written(), 0); + ASSERT_EQ(record_reader->null_count(), 0); + // For repeated fields, we need to read the levels to find the record + // boundaries and skip. So some levels are read. + ASSERT_EQ(record_reader->levels_written(), 8); + ASSERT_EQ(record_reader->levels_position(), 1); + + // Read [20, 20, 20] + int64_t records_read = record_reader->ReadRecords(/*num_records=*/1); + + ASSERT_EQ(records_read, 1); + ASSERT_EQ(record_reader->values_written(), 3); + ASSERT_EQ(record_reader->null_count(), 0); + ASSERT_EQ(record_reader->levels_written(), 8); + ASSERT_EQ(record_reader->levels_position(), 4); + + const auto read_values = + reinterpret_cast(record_reader->values()); + std::vector read_vals(read_values, + read_values + record_reader->values_written() - + record_reader->null_count()); + + ASSERT_TRUE(vector_equal(read_vals, {20, 20, 20})); + + // Skip the null record and also skip [30, 30] + records_skipped = record_reader->SkipRecords(/*num_records=*/2); + + ASSERT_EQ(records_skipped, 2); + ASSERT_EQ(record_reader->values_written(), 3); + ASSERT_EQ(record_reader->null_count(), 0); + ASSERT_EQ(record_reader->levels_written(), 8); + ASSERT_EQ(record_reader->levels_position(), 7); + + { + // Read [40] + records_read = record_reader->ReadRecords(/*num_records=*/1); + + ASSERT_EQ(records_read, 1); + ASSERT_EQ(record_reader->values_written(), 4); + ASSERT_EQ(record_reader->null_count(), 0); + ASSERT_EQ(record_reader->levels_written(), 8); + ASSERT_EQ(record_reader->levels_position(), 8); + + const auto read_values = + reinterpret_cast(record_reader->values()); + std::vector read_vals( + read_values, read_values + record_reader->values_written() - + record_reader->null_count()); + + ASSERT_TRUE(vector_equal(read_vals, {20, 20, 20, 40})); + } +} + } // namespace test } // namespace parquet From c0fac595364a6883cd7e21762fa4c3ed3245eefb Mon Sep 17 00:00:00 2001 From: Fatemah Panahi Date: Fri, 16 Sep 2022 19:39:51 +0000 Subject: [PATCH 02/24] Add test for reading and skipping partial records for repeated fields. --- cpp/src/parquet/column_reader.cc | 52 ++++--- cpp/src/parquet/column_reader_test.cc | 198 ++++++++++++++++++++++++++ 2 files changed, 222 insertions(+), 28 deletions(-) diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index 383bdce2556..3c78f574796 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -1036,7 +1036,7 @@ int64_t TypedColumnReaderImpl::ReadBatch(int64_t batch_size, int16_t* def ReadLevels(batch_size, def_levels, rep_levels, &num_def_levels, &values_to_read); *values_read = this->ReadValues(values_to_read, values); - int64_t total_values = std::max(num_def_levels, *values_read); + int64_t total_values = std::max(num_def_levels, *values_read); int64_t expected_values = std::min(batch_size, this->num_buffered_values_ - this->num_decoded_values_); if (total_values == 0 && expected_values > 0) { @@ -1269,7 +1269,7 @@ class TypedRecordReader : public TypedColumnReaderImpl, records_read += ReadRecordData(num_records); } - int64_t level_batch_size = std::max(kMinLevelBatchSize, num_records); + int64_t level_batch_size = std::max(kMinLevelBatchSize, num_records); // If we are in the middle of a record, we continue until reaching the // desired number of records or the end of the current record if we've found @@ -1362,6 +1362,7 @@ class TypedRecordReader : public TypedColumnReaderImpl, return skipped_records; } + // Skip records for repeated fields. Returns number of skipped records. // Skip records for repeated fields. Returns number of skipped records. int64_t SkipRecordsRepeated(int64_t num_records) { ARROW_DCHECK_GT(this->max_rep_level_, 0); @@ -1371,10 +1372,13 @@ class TypedRecordReader : public TypedColumnReaderImpl, // Keep filling the buffer and skipping until we reach the desired number // of records or we run out of values in the column chunk. int64_t skipped_records = 0; - int64_t remaining_records = num_records; - int64_t level_batch_size = std::max(kMinLevelBatchSize, num_records); + int64_t level_batch_size = std::max(kMinLevelBatchSize, num_records); + // If 'at_record_start_' is false, but (skip_records == num_records), it + // means that for the last record that was counted, we have not seen all + // of it's values yet. while (!at_record_start_ || skipped_records < num_records) { // Is there more data to read in this row group? + // HasNextInternal() will advance to the next page if necessary. if (!this->HasNextInternal()) { if (!at_record_start_) { // We ended the row group while inside a record that we haven't seen @@ -1386,26 +1390,12 @@ class TypedRecordReader : public TypedColumnReaderImpl, break; } - if (has_values_to_process()) { - // Look at the already buffered levels, delimit them based on - // (rep_level == 0), report back how many records are in there, and - // fill in how many not-null values (def_level == max_def_level_). - // DelimitRecords updates levels_position_. - int64_t start_levels_position = levels_position_; - int64_t values_seen = 0; - skipped_records += DelimitRecords(remaining_records, &values_seen); - this->ConsumeBufferedValues(levels_position_ - start_levels_position); - ReadAndThrowAway(values_seen); - } - - if (skipped_records == num_records) break; - - // Try reading more levels into the buffer. - remaining_records = num_records - skipped_records; - + // Read some more levels. int64_t batch_size = std::min(level_batch_size, available_values_current_page()); - // No more data in column + // No more data in column. This must be an empty page. + // If we had exhausted the last page, HasNextInternal() must have advanced + // to the next page. So there must be available values to process. if (batch_size == 0) { break; } @@ -1422,12 +1412,18 @@ class TypedRecordReader : public TypedColumnReaderImpl, "Number of decoded rep / def levels did not match"); } - // Exhausted column chunk - if (levels_read == 0) { - break; - } - levels_written_ += levels_read; + + // Look at the buffered levels, delimit them based on + // (rep_level == 0), report back how many records are in there, and + // fill in how many not-null values (def_level == max_def_level_). + // DelimitRecords updates levels_position_. + int64_t remaining_records = num_records - skipped_records; + int64_t start_levels_position = levels_position_; + int64_t values_seen = 0; + skipped_records += DelimitRecords(remaining_records, &values_seen); + this->ConsumeBufferedValues(levels_position_ - start_levels_position); + ReadAndThrowAway(values_seen); } return skipped_records; @@ -1684,7 +1680,7 @@ class TypedRecordReader : public TypedColumnReaderImpl, int64_t ReadRecordData(int64_t num_records) { // Conservative upper bound const int64_t possible_num_values = - std::max(num_records, levels_written_ - levels_position_); + std::max(num_records, levels_written_ - levels_position_); ReserveValues(possible_num_values); const int64_t start_levels_position = levels_position_; diff --git a/cpp/src/parquet/column_reader_test.cc b/cpp/src/parquet/column_reader_test.cc index 151ed0752db..c623422fa67 100644 --- a/cpp/src/parquet/column_reader_test.cc +++ b/cpp/src/parquet/column_reader_test.cc @@ -834,5 +834,203 @@ TEST(RecordReaderTest, SkipRepeated) { } } +// Test reading when one record spans multiple pages for a repeated field. +TEST(RecordReaderTest, ReadPartialRecord) { + internal::LevelInfo level_info; + level_info.def_level = 1; + level_info.rep_level = 1; + + NodePtr type = schema::Int32("b", Repetition::REPEATED); + const ColumnDescriptor descr(type, level_info.def_level, + level_info.rep_level); + + std::vector> pages; + std::unique_ptr pager; + + // Page 1: {[10], [20, 20, 20 ... } continues to next page. + { + std::shared_ptr page = MakeDataPage( + &descr, /*values=*/{10, 20, 20, 20}, /*num_values=*/4, Encoding::PLAIN, + /*indices=*/{}, + /*indices_size=*/0, /*def_levels=*/{1, 1, 1, 1}, level_info.def_level, + /*rep_levels=*/{0, 0, 1, 1}, level_info.rep_level); + pages.push_back(std::move(page)); + } + + // Page 2: {... 20, 20, ...} continues from previous page and to next page. + { + std::shared_ptr page = MakeDataPage( + &descr, /*values=*/{20, 20}, /*num_values=*/2, Encoding::PLAIN, + /*indices=*/{}, + /*indices_size=*/0, /*def_levels=*/{1, 1}, level_info.def_level, + /*rep_levels=*/{1, 1}, level_info.rep_level); + pages.push_back(std::move(page)); + } + + // Page 3: { ... 20, [30]} continues from previous page. + { + std::shared_ptr page = MakeDataPage( + &descr, /*values=*/{20, 30}, /*num_values=*/2, Encoding::PLAIN, + /*indices=*/{}, + /*indices_size=*/0, /*def_levels=*/{1, 1}, level_info.def_level, + /*rep_levels=*/{1, 0}, level_info.rep_level); + pages.push_back(std::move(page)); + } + + pager.reset(new test::MockPageReader(pages)); + + std::shared_ptr record_reader = + internal::RecordReader::Make(&descr, level_info); + record_reader->SetPageReader(std::move(pager)); + + { + // Read [10] + int64_t records_read = record_reader->ReadRecords(/*num_records=*/1); + + ASSERT_EQ(records_read, 1); + + const auto read_values = + reinterpret_cast(record_reader->values()); + std::vector read_vals( + read_values, read_values + record_reader->values_written() - + record_reader->null_count()); + + ASSERT_TRUE(vector_equal(read_vals, {10})); + } + + { + // Read [20, 20, 20, 20, 20, 20] that spans multiple pages. + int64_t records_read = record_reader->ReadRecords(/*num_records=*/1); + + ASSERT_EQ(records_read, 1); + + const auto read_values = + reinterpret_cast(record_reader->values()); + std::vector read_vals( + read_values, read_values + record_reader->values_written() - + record_reader->null_count()); + + ASSERT_TRUE(vector_equal(read_vals, {10, 20, 20, 20, 20, 20, 20})); + } + + { + // Read [30] + int64_t records_read = record_reader->ReadRecords(/*num_records=*/1); + + ASSERT_EQ(records_read, 1); + + const auto read_values = + reinterpret_cast(record_reader->values()); + std::vector read_vals( + read_values, read_values + record_reader->values_written() - + record_reader->null_count()); + + ASSERT_TRUE(vector_equal(read_vals, {10, 20, 20, 20, 20, 20, 20, 30})); + } +} + +// Test skipping for repeated fields for the case when one record spans multiple +// pages. +TEST(RecordReaderTest, SkipPartialRecord) { + internal::LevelInfo level_info; + level_info.def_level = 1; + level_info.rep_level = 1; + + NodePtr type = schema::Int32("b", Repetition::REPEATED); + const ColumnDescriptor descr(type, level_info.def_level, + level_info.rep_level); + + std::vector> pages; + std::unique_ptr pager; + + // Page 1: {[10], [20, 20, 20 ... } continues to next page. + { + std::shared_ptr page = MakeDataPage( + &descr, /*values=*/{10, 20, 20, 20}, /*num_values=*/4, Encoding::PLAIN, + /*indices=*/{}, + /*indices_size=*/0, /*def_levels=*/{1, 1, 1, 1}, level_info.def_level, + /*rep_levels=*/{0, 0, 1, 1}, level_info.rep_level); + pages.push_back(std::move(page)); + } + + // Page 2: {... 20, 20, ...} continues from previous page and to next page. + { + std::shared_ptr page = MakeDataPage( + &descr, /*values=*/{20, 20}, /*num_values=*/2, Encoding::PLAIN, + /*indices=*/{}, + /*indices_size=*/0, /*def_levels=*/{1, 1}, level_info.def_level, + /*rep_levels=*/{1, 1}, level_info.rep_level); + pages.push_back(std::move(page)); + } + + // Page 3: { ... 20, [30]} continues from previous page. + { + std::shared_ptr page = MakeDataPage( + &descr, /*values=*/{20, 30}, /*num_values=*/2, Encoding::PLAIN, + /*indices=*/{}, + /*indices_size=*/0, /*def_levels=*/{1, 1}, level_info.def_level, + /*rep_levels=*/{1, 0}, level_info.rep_level); + pages.push_back(std::move(page)); + } + + pager.reset(new test::MockPageReader(pages)); + + std::shared_ptr record_reader = + internal::RecordReader::Make(&descr, level_info); + record_reader->SetPageReader(std::move(pager)); + + { + // Read [10] + int64_t records_read = record_reader->ReadRecords(/*num_records=*/1); + + ASSERT_EQ(records_read, 1); + + const auto read_values = + reinterpret_cast(record_reader->values()); + std::vector read_vals( + read_values, read_values + record_reader->values_written() - + record_reader->null_count()); + + ASSERT_TRUE(vector_equal(read_vals, {10})); + ASSERT_EQ(record_reader->values_written(), 1); + ASSERT_EQ(record_reader->null_count(), 0); + // There are 4 levels in the first page. + ASSERT_EQ(record_reader->levels_written(), 4); + ASSERT_EQ(record_reader->levels_position(), 1); + } + + { + // Skip the record that goes across pages. + int64_t records_skipped = record_reader->SkipRecords(/*num_records=*/1); + + ASSERT_EQ(records_skipped, 1); + ASSERT_EQ(record_reader->values_written(), 1); + ASSERT_EQ(record_reader->null_count(), 0); + // For repeated fields, we need to read the levels to find the record + // boundaries and skip. So some levels are read. + ASSERT_EQ(record_reader->levels_written(), 8); + ASSERT_EQ(record_reader->levels_position(), 7); + } + + { + // Read [30] + int64_t records_read = record_reader->ReadRecords(/*num_records=*/1); + + ASSERT_EQ(records_read, 1); + ASSERT_EQ(record_reader->values_written(), 2); + ASSERT_EQ(record_reader->null_count(), 0); + ASSERT_EQ(record_reader->levels_written(), 8); + ASSERT_EQ(record_reader->levels_position(), 8); + + const auto read_values = + reinterpret_cast(record_reader->values()); + std::vector read_vals( + read_values, read_values + record_reader->values_written() - + record_reader->null_count()); + + ASSERT_TRUE(vector_equal(read_vals, {10, 30})); + } +} + } // namespace test } // namespace parquet From 29455595feeeb57ed1f19bc9e30f6900ad2ea942 Mon Sep 17 00:00:00 2001 From: Fatemah Panahi Date: Mon, 19 Sep 2022 17:37:05 +0000 Subject: [PATCH 03/24] Addressed comments. --- cpp/src/parquet/column_reader.cc | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index 3c78f574796..20fe82b1e02 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -1005,7 +1005,7 @@ int64_t TypedColumnReaderImpl::ReadBatchWithDictionary( // Read dictionary indices. *indices_read = ReadDictionaryIndices(indices_to_read, indices); - int64_t total_indices = std::max(num_def_levels, *indices_read); + int64_t total_indices = std::max(num_def_levels, *indices_read); // Some callers use a batch size of 0 just to get the dictionary. int64_t expected_values = std::min(batch_size, this->num_buffered_values_ - this->num_decoded_values_); @@ -1036,7 +1036,7 @@ int64_t TypedColumnReaderImpl::ReadBatch(int64_t batch_size, int16_t* def ReadLevels(batch_size, def_levels, rep_levels, &num_def_levels, &values_to_read); *values_read = this->ReadValues(values_to_read, values); - int64_t total_values = std::max(num_def_levels, *values_read); + int64_t total_values = std::max(num_def_levels, *values_read); int64_t expected_values = std::min(batch_size, this->num_buffered_values_ - this->num_decoded_values_); if (total_values == 0 && expected_values > 0) { @@ -1269,7 +1269,7 @@ class TypedRecordReader : public TypedColumnReaderImpl, records_read += ReadRecordData(num_records); } - int64_t level_batch_size = std::max(kMinLevelBatchSize, num_records); + int64_t level_batch_size = std::max(kMinLevelBatchSize, num_records); // If we are in the middle of a record, we continue until reaching the // desired number of records or the end of the current record if we've found @@ -1329,12 +1329,12 @@ class TypedRecordReader : public TypedColumnReaderImpl, return records_read; } - + // Skip records that we have in our buffer. This function is only for // non-repeated fields. int64_t SkipRecordsInBufferNonRepeated(int64_t num_records) { - ARROW_DCHECK(this->max_rep_level_ == 0); - ARROW_DCHECK(this->has_values_to_process()); + ARROW_DCHECK_EQ(this->max_rep_level_, 0); + if (!this->has_values_to_process()) return 0; int64_t remaining_records = levels_written_ - levels_position_; int64_t skipped_records = std::min(num_records, remaining_records); @@ -1362,7 +1362,6 @@ class TypedRecordReader : public TypedColumnReaderImpl, return skipped_records; } - // Skip records for repeated fields. Returns number of skipped records. // Skip records for repeated fields. Returns number of skipped records. int64_t SkipRecordsRepeated(int64_t num_records) { ARROW_DCHECK_GT(this->max_rep_level_, 0); @@ -1372,7 +1371,7 @@ class TypedRecordReader : public TypedColumnReaderImpl, // Keep filling the buffer and skipping until we reach the desired number // of records or we run out of values in the column chunk. int64_t skipped_records = 0; - int64_t level_batch_size = std::max(kMinLevelBatchSize, num_records); + int64_t level_batch_size = std::max(kMinLevelBatchSize, num_records); // If 'at_record_start_' is false, but (skip_records == num_records), it // means that for the last record that was counted, we have not seen all // of it's values yet. @@ -1458,10 +1457,8 @@ class TypedRecordReader : public TypedColumnReaderImpl, // Non-repeated optional field. int64_t skipped_records = 0; if (this->max_rep_level_ == 0) { - if (this->has_values_to_process()) { - // First consume whatever is in the buffer. - skipped_records = SkipRecordsInBufferNonRepeated(num_records); - } + // First consume whatever is in the buffer. + skipped_records = SkipRecordsInBufferNonRepeated(num_records); // If there are more records left, we should have exhausted all the // buffer. @@ -1477,8 +1474,6 @@ class TypedRecordReader : public TypedColumnReaderImpl, } return skipped_records; } - - // We may outwardly have the appearance of having exhausted a column chunk // when in fact we are in the middle of processing the last batch @@ -1680,7 +1675,7 @@ class TypedRecordReader : public TypedColumnReaderImpl, int64_t ReadRecordData(int64_t num_records) { // Conservative upper bound const int64_t possible_num_values = - std::max(num_records, levels_written_ - levels_position_); + std::max(num_records, levels_written_ - levels_position_); ReserveValues(possible_num_values); const int64_t start_levels_position = levels_position_; From 938e494c74fe444736913edb5bebb200e3648cfb Mon Sep 17 00:00:00 2001 From: Fatemah Panahi Date: Tue, 20 Sep 2022 20:08:49 +0000 Subject: [PATCH 04/24] Addressed most comments. Working on adding a test for variable length type and using gmock. --- cpp/src/parquet/column_reader.cc | 124 +++++++---- cpp/src/parquet/column_reader.h | 13 +- cpp/src/parquet/column_reader_test.cc | 285 ++++++++++++++++---------- 3 files changed, 262 insertions(+), 160 deletions(-) diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index 20fe82b1e02..ab3243926e4 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -60,6 +60,11 @@ using arrow::internal::MultiplyWithOverflow; namespace bit_util = arrow::bit_util; namespace parquet { + +// The minimum number of repetition/definition levels to decode at a time, for +// better vectorized performance when doing many smaller record reads +constexpr int64_t kMinLevelBatchSize = 1024; + namespace { inline bool HasSpacedValues(const ColumnDescriptor* descr) { if (descr->max_repetition_level() > 0) { @@ -1144,7 +1149,7 @@ int64_t TypedColumnReaderImpl::Skip(int64_t num_values_to_skip) { } else { // We need to read this Page // Jump to the right offset in the Page - int64_t batch_size = 1024; // ReadBatch with a smaller memory footprint + int64_t batch_size = kMinLevelBatchSize; // ReadBatch with a smaller memory footprint int64_t values_read = 0; // This will be enough scratch space to accommodate 16-bit levels or any @@ -1211,11 +1216,8 @@ std::shared_ptr ColumnReader::Make(const ColumnDescriptor* descr, // RecordReader namespace internal { -namespace { -// The minimum number of repetition/definition levels to decode at a time, for -// better vectorized performance when doing many smaller record reads -constexpr int64_t kMinLevelBatchSize = 1024; +namespace { template class TypedRecordReader : public TypedColumnReaderImpl, @@ -1246,7 +1248,7 @@ class TypedRecordReader : public TypedColumnReaderImpl, rep_levels_ = AllocateBuffer(pool); Reset(); } - + int64_t available_values_current_page() const { return this->num_buffered_values_ - this->num_decoded_values_; } @@ -1330,6 +1332,26 @@ class TypedRecordReader : public TypedColumnReaderImpl, return records_read; } + // Throw away levels from start_levels_position to levels_position_. + // Will update levels_position_ and levels_written_ accordingly and move + // the levels to left to fill in the gap. It will not shrink the size + // of the buffer or overwrite the positions after levels_written_. + // This is inefficient, though necessary to consume levels that we have + // already read into the buffer and we want to Skip. + void ThrowAwayLevels(int64_t start_levels_position) { + ARROW_DCHECK_LE(levels_position_, levels_written_); + ARROW_DCHECK_LE(start_levels_position, levels_position_); + int64_t gap = levels_position_ - start_levels_position; + + for (int64_t i = levels_position_; i < levels_written_; ++i) { + *(def_levels() + i - gap) = *(def_levels() + i); + *(rep_levels() + i - gap) = *(rep_levels() + i); + } + + levels_written_ -= gap; + levels_position_ -= gap; + } + // Skip records that we have in our buffer. This function is only for // non-repeated fields. int64_t SkipRecordsInBufferNonRepeated(int64_t num_records) { @@ -1341,6 +1363,7 @@ class TypedRecordReader : public TypedColumnReaderImpl, int64_t start_levels_position = levels_position_; // Since there is no repetition, number of levels equals number of records. levels_position_ += skipped_records; + // We skipped the levels by incrementing 'levels_position_'. For values // we do not have a buffer, so we need to read them and throw them away. // First we need to figure out how many present/not-null values there are. @@ -1353,12 +1376,18 @@ class TypedRecordReader : public TypedColumnReaderImpl, validity_io.valid_bits = valid_bits->mutable_data(); validity_io.valid_bits_offset = 0; DefLevelsToBitmap(def_levels() + start_levels_position, - levels_position_ - start_levels_position, + skipped_records, this->leaf_info_, &validity_io); int64_t values_to_read = validity_io.values_read - validity_io.null_count; + + // Now that we have figured out number of values to read, we do not need + // these levels anymore. Updates levels_position_ and levels_written. + ThrowAwayLevels(start_levels_position); ReadAndThrowAway(values_to_read); + // Mark the levels as read in the underlying column reader. this->ConsumeBufferedValues(skipped_records); + return skipped_records; } @@ -1376,9 +1405,9 @@ class TypedRecordReader : public TypedColumnReaderImpl, // means that for the last record that was counted, we have not seen all // of it's values yet. while (!at_record_start_ || skipped_records < num_records) { - // Is there more data to read in this row group? - // HasNextInternal() will advance to the next page if necessary. - if (!this->HasNextInternal()) { + // Is there more data to read in this row group? + // HasNextInternal() will advance to the next page if necessary. + if (!this->HasNextInternal()) { if (!at_record_start_) { // We ended the row group while inside a record that we haven't seen // the end of yet. So increment the record count for the last record @@ -1387,42 +1416,49 @@ class TypedRecordReader : public TypedColumnReaderImpl, at_record_start_ = true; } break; - } - - // Read some more levels. - int64_t batch_size = - std::min(level_batch_size, available_values_current_page()); - // No more data in column. This must be an empty page. - // If we had exhausted the last page, HasNextInternal() must have advanced - // to the next page. So there must be available values to process. - if (batch_size == 0) { + } + + // Read some more levels. + int64_t batch_size = std::min(level_batch_size, available_values_current_page()); + // No more data in column. This must be an empty page. + // If we had exhausted the last page, HasNextInternal() must have advanced + // to the next page. So there must be available values to process. + if (batch_size == 0) { break; - } - - ReserveLevels(batch_size); + } - int16_t* def_levels = this->def_levels() + levels_written_; - int16_t* rep_levels = this->rep_levels() + levels_written_; + // For skip we will read the levels and append them to the end + // of the def_levels and rep_levels just like for read. + ReserveLevels(batch_size); - int64_t levels_read = 0; - levels_read = this->ReadDefinitionLevels(batch_size, def_levels); - if (this->ReadRepetitionLevels(batch_size, rep_levels) != levels_read) { - throw ParquetException( - "Number of decoded rep / def levels did not match"); - } + int16_t* def_levels = this->def_levels() + levels_written_; + int16_t* rep_levels = this->rep_levels() + levels_written_; - levels_written_ += levels_read; - - // Look at the buffered levels, delimit them based on - // (rep_level == 0), report back how many records are in there, and - // fill in how many not-null values (def_level == max_def_level_). - // DelimitRecords updates levels_position_. - int64_t remaining_records = num_records - skipped_records; - int64_t start_levels_position = levels_position_; - int64_t values_seen = 0; - skipped_records += DelimitRecords(remaining_records, &values_seen); - this->ConsumeBufferedValues(levels_position_ - start_levels_position); - ReadAndThrowAway(values_seen); + int64_t levels_read = 0; + levels_read = this->ReadDefinitionLevels(batch_size, def_levels); + if (this->ReadRepetitionLevels(batch_size, rep_levels) != levels_read) { + throw ParquetException("Number of decoded rep / def levels did not match"); + } + + levels_written_ += levels_read; + + // Look at the buffered levels, delimit them based on + // (rep_level == 0), report back how many records are in there, and + // fill in how many not-null values (def_level == max_def_level_). + // DelimitRecords updates levels_position_. + int64_t start_levels_position = levels_position_; + int64_t remaining_records = num_records - skipped_records; + int64_t values_seen = 0; + skipped_records += DelimitRecords(remaining_records, &values_seen); + if (ReadAndThrowAway(values_seen) != values_seen) { + throw ParquetException("Could not read and throw away requested values"); + } + // Mark those levels and values as consumed in the the underlying page. + // This must be done before we throw away levels since it updates + // levels_position_ and levels_written_. + this->ConsumeBufferedValues(levels_position_ - start_levels_position); + // Updated levels_position_ and levels_written_. + ThrowAwayLevels(start_levels_position); } return skipped_records; @@ -1431,7 +1467,7 @@ class TypedRecordReader : public TypedColumnReaderImpl, // Read 'num_values' values and throw them away. int64_t ReadAndThrowAway(int64_t num_values) { int64_t values_left = num_values; - int64_t batch_size = 1024; // ReadBatch with a smaller memory footprint + int64_t batch_size = kMinLevelBatchSize; // ReadBatch with a smaller memory footprint int64_t values_read = 0; // This will be enough scratch space to accommodate 16-bit levels or any @@ -1687,7 +1723,7 @@ class TypedRecordReader : public TypedColumnReaderImpl, } else if (this->max_def_level_ > 0) { // No repetition levels, skip delimiting logic. Each level represents a // null or not null entry - records_read = std::min(levels_written_ - levels_position_, num_records); + records_read = std::min(levels_written_ - levels_position_, num_records); // This is advanced by DelimitRecords, which we skipped levels_position_ += records_read; diff --git a/cpp/src/parquet/column_reader.h b/cpp/src/parquet/column_reader.h index 4a8358be452..3c69758833d 100644 --- a/cpp/src/parquet/column_reader.h +++ b/cpp/src/parquet/column_reader.h @@ -277,9 +277,11 @@ class RecordReader { virtual ~RecordReader() = default; /// \brief Attempt to read indicated number of records from column chunk + /// Note that for repeated fields, a record may have more than one value + /// and all of them are read. /// \return number of records read virtual int64_t ReadRecords(int64_t num_records) = 0; - + /// \brief Attempt to skip indicated number of records from column chunk /// \return number of records skipped virtual int64_t SkipRecords(int64_t num_records) = 0; @@ -369,19 +371,20 @@ class RecordReader { // Each element corresponds to one element in 'values_' and specifies if it // is null or not null. std::shared_ptr<::arrow::ResizableBuffer> valid_bits_; - + // Buffers for repetition and definition levels. These buffers may have // more levels than is actually read. This is because we read levels ahead to // figure our record boundaries for repeated fields. 'levels_written_' shows // the total number of levels that is in the buffer. 'levels_position_' points - // to the next level that should be read. ReadRecords and SkipRecords both - // advance 'levels_written_' and 'levels_position_'. + // to the next level that should be consumed. ReadRecords and SkipRecords both + // advance 'levels_written_' and 'levels_position_'. SkipRecords then will + // remove the skipped levels by shifting the levels to the left. // For flat required fields, 'def_levels_' and 'rep_levels_' are not // populated. For non-repeated fields 'rep_levels_' is not populated. // 'def_levels_' and 'rep_levels_' must be of the same size if present. std::shared_ptr<::arrow::ResizableBuffer> def_levels_; std::shared_ptr<::arrow::ResizableBuffer> rep_levels_; - + /// \brief Number of definition / repetition levels that have been written /// internally in the reader. This may be larger than values_written() because // for repeated fields, we need to look at the levels in advance to figure out diff --git a/cpp/src/parquet/column_reader_test.cc b/cpp/src/parquet/column_reader_test.cc index c623422fa67..5f8b3895c28 100644 --- a/cpp/src/parquet/column_reader_test.cc +++ b/cpp/src/parquet/column_reader_test.cc @@ -682,8 +682,7 @@ TEST(RecordReaderTest, SkipOptional) { level_info.rep_level = 0; NodePtr type = schema::Int32("b", Repetition::OPTIONAL); - const ColumnDescriptor descr(type, level_info.def_level, - level_info.rep_level); + const ColumnDescriptor descr(type, level_info.def_level, level_info.rep_level); // Records look like {null, 20, 20, 20, null, 30, 30} std::vector> pages; @@ -703,45 +702,77 @@ TEST(RecordReaderTest, SkipOptional) { internal::RecordReader::Make(&descr, level_info); record_reader->SetPageReader(std::move(pager)); - // This also tests when we start with a Skip. - int64_t records_skipped = record_reader->SkipRecords(/*num_records=*/2); - - ASSERT_EQ(records_skipped, 2); - ASSERT_EQ(record_reader->values_written(), 0); - // Since we started with skipping, there was nothing in the buffer to consume - // for skipping. - ASSERT_EQ(record_reader->levels_written(), 0); - ASSERT_EQ(record_reader->levels_position(), 0); - - int64_t records_read = record_reader->ReadRecords(/*num_records=*/3); - - ASSERT_EQ(records_read, 3); - // One of these values is null. values_written() includes null values. - ASSERT_EQ(record_reader->values_written(), 3); - // When we read, we buffer some levels. Since there are only a few levels - // in our whole column, all of them are read. - // We had skipped 2 of the levels above. So there is only 5 left in total to - // read, and we read 3 of them here. - ASSERT_EQ(record_reader->levels_written(), 5); - ASSERT_EQ(record_reader->levels_position(), 3); + { + // Skip {null, 20} + // This also tests when we start with a Skip. + int64_t records_skipped = record_reader->SkipRecords(/*num_records=*/2); + + ASSERT_EQ(records_skipped, 2); + ASSERT_EQ(record_reader->values_written(), 0); + // Since we started with skipping, there was nothing in the buffer to consume + // for skipping. + ASSERT_EQ(record_reader->levels_written(), 0); + ASSERT_EQ(record_reader->levels_position(), 0); + } - const auto read_values = - reinterpret_cast(record_reader->values()); - std::vector read_vals(read_values, - read_values + record_reader->values_written() - - record_reader->null_count()); + { + // Read 3 records: {20, 20, null} + int64_t records_read = record_reader->ReadRecords(/*num_records=*/3); + + ASSERT_EQ(records_read, 3); + // One of these values is null. values_written() includes null values. + ASSERT_EQ(record_reader->values_written(), 3); + // When we read, we buffer some levels. Since there are only a few levels + // in our whole column, all of them are read. + // We had skipped 2 of the levels above. So there is only 5 left in total to + // read, and we read 3 of them here. + ASSERT_EQ(record_reader->levels_written(), 5); + ASSERT_EQ(record_reader->levels_position(), 3); + + // Check that definition levels are correct. Repetition levels are not + // relevant since this is not a repeated field. + std::vector read_defs( + record_reader->def_levels(), + record_reader->def_levels() + record_reader->levels_position()); + + // Double check that the values are untouched. + const auto read_values = reinterpret_cast(record_reader->values()); + std::vector read_vals( + read_values, + read_values + record_reader->values_written() - record_reader->null_count()); - ASSERT_TRUE(vector_equal(read_vals, {20, 20})); + ASSERT_TRUE(vector_equal(read_vals, {20, 20})); + ASSERT_TRUE(vector_equal(read_defs, {1, 1, 0})); + } - // Skip to the end of the column. - records_skipped = record_reader->SkipRecords(/*num_records=*/5); + { + // Skip to the end of the column. Skip {30, 30}. + int64_t records_skipped = record_reader->SkipRecords(/*num_records=*/5); + + ASSERT_EQ(records_skipped, 2); + + // When we skip number of values written does not change. However, we will + // throw away the levels that were skipped from the buffer, thus levels_written() + // is reduced by 2. + ASSERT_EQ(record_reader->values_written(), 3); + ASSERT_EQ(record_reader->levels_written(), 3); + ASSERT_EQ(record_reader->levels_position(), 3); + + // Check the definition levels to make sure we can read it correctly and we + // threw away the levels in the buffer that were skipped. + std::vector read_defs( + record_reader->def_levels(), + record_reader->def_levels() + record_reader->levels_position()); + + // Double check that the values are untouched. + const auto read_values = reinterpret_cast(record_reader->values()); + std::vector read_vals( + read_values, + read_values + record_reader->values_written() - record_reader->null_count()); - ASSERT_EQ(records_skipped, 2); - // When we skip number of values written does not change, and we advance the - // levels position to skip reading the levels. - ASSERT_EQ(record_reader->values_written(), 3); - ASSERT_EQ(record_reader->levels_written(), 5); - ASSERT_EQ(record_reader->levels_position(), 5); + ASSERT_TRUE(vector_equal(read_vals, {20, 20})); + ASSERT_TRUE(vector_equal(read_defs, {1, 1, 0})); + } // We have exhausted all the records. ASSERT_EQ(record_reader->ReadRecords(/*num_records=*/3), 0); @@ -755,8 +786,7 @@ TEST(RecordReaderTest, SkipRepeated) { level_info.rep_level = 1; NodePtr type = schema::Int32("b", Repetition::REPEATED); - const ColumnDescriptor descr(type, level_info.def_level, - level_info.rep_level); + const ColumnDescriptor descr(type, level_info.def_level, level_info.rep_level); // Records look like {null, [20, 20, 20], null, [30, 30], [40]} std::vector> pages; @@ -777,60 +807,95 @@ TEST(RecordReaderTest, SkipRepeated) { internal::RecordReader::Make(&descr, level_info); record_reader->SetPageReader(std::move(pager)); - // This should skip the first null record. - int64_t records_skipped = record_reader->SkipRecords(/*num_records=*/1); - - ASSERT_EQ(records_skipped, 1); - ASSERT_EQ(record_reader->values_written(), 0); - ASSERT_EQ(record_reader->null_count(), 0); - // For repeated fields, we need to read the levels to find the record - // boundaries and skip. So some levels are read. - ASSERT_EQ(record_reader->levels_written(), 8); - ASSERT_EQ(record_reader->levels_position(), 1); - - // Read [20, 20, 20] - int64_t records_read = record_reader->ReadRecords(/*num_records=*/1); + { + // This should skip the first null record. + int64_t records_skipped = record_reader->SkipRecords(/*num_records=*/1); - ASSERT_EQ(records_read, 1); - ASSERT_EQ(record_reader->values_written(), 3); - ASSERT_EQ(record_reader->null_count(), 0); - ASSERT_EQ(record_reader->levels_written(), 8); - ASSERT_EQ(record_reader->levels_position(), 4); + ASSERT_EQ(records_skipped, 1); + ASSERT_EQ(record_reader->values_written(), 0); + ASSERT_EQ(record_reader->null_count(), 0); + // For repeated fields, we need to read the levels to find the record + // boundaries and skip. So some levels are read, however, the skipped + // level should not be there after the skip. That's why levels_position() + // is 0. + ASSERT_EQ(record_reader->levels_written(), 7); + ASSERT_EQ(record_reader->levels_position(), 0); + } - const auto read_values = - reinterpret_cast(record_reader->values()); - std::vector read_vals(read_values, - read_values + record_reader->values_written() - - record_reader->null_count()); + { + // Read [20, 20, 20] + int64_t records_read = record_reader->ReadRecords(/*num_records=*/1); - ASSERT_TRUE(vector_equal(read_vals, {20, 20, 20})); + ASSERT_EQ(records_read, 1); + ASSERT_EQ(record_reader->values_written(), 3); + ASSERT_EQ(record_reader->null_count(), 0); + ASSERT_EQ(record_reader->levels_written(), 7); + ASSERT_EQ(record_reader->levels_position(), 3); + + const auto read_values = reinterpret_cast(record_reader->values()); + std::vector read_vals(read_values, + read_values + record_reader->values_written()); + std::vector read_defs( + record_reader->def_levels(), + record_reader->def_levels() + record_reader->levels_position()); + std::vector read_reps( + record_reader->rep_levels(), + record_reader->rep_levels() + record_reader->levels_position()); + + ASSERT_TRUE(vector_equal(read_vals, {20, 20, 20})); + ASSERT_TRUE(vector_equal(read_defs, {1, 1, 1})); + ASSERT_TRUE(vector_equal(read_reps, {0, 1, 1})); + } - // Skip the null record and also skip [30, 30] - records_skipped = record_reader->SkipRecords(/*num_records=*/2); + { + // Skip the null record and also skip [30, 30] + int64_t records_skipped = record_reader->SkipRecords(/*num_records=*/2); - ASSERT_EQ(records_skipped, 2); - ASSERT_EQ(record_reader->values_written(), 3); - ASSERT_EQ(record_reader->null_count(), 0); - ASSERT_EQ(record_reader->levels_written(), 8); - ASSERT_EQ(record_reader->levels_position(), 7); + ASSERT_EQ(records_skipped, 2); + ASSERT_EQ(record_reader->values_written(), 3); + ASSERT_EQ(record_reader->null_count(), 0); + // We remove the skipped levels from the buffer. + ASSERT_EQ(record_reader->levels_written(), 4); + ASSERT_EQ(record_reader->levels_position(), 3); + + const auto read_values = reinterpret_cast(record_reader->values()); + std::vector read_vals(read_values, + read_values + record_reader->values_written()); + std::vector read_defs( + record_reader->def_levels(), + record_reader->def_levels() + record_reader->levels_position()); + std::vector read_reps( + record_reader->rep_levels(), + record_reader->rep_levels() + record_reader->levels_position()); + + ASSERT_TRUE(vector_equal(read_vals, {20, 20, 20})); + ASSERT_TRUE(vector_equal(read_defs, {1, 1, 1})); + ASSERT_TRUE(vector_equal(read_reps, {0, 1, 1})); + } { // Read [40] - records_read = record_reader->ReadRecords(/*num_records=*/1); + int64_t records_read = record_reader->ReadRecords(/*num_records=*/1); ASSERT_EQ(records_read, 1); ASSERT_EQ(record_reader->values_written(), 4); ASSERT_EQ(record_reader->null_count(), 0); - ASSERT_EQ(record_reader->levels_written(), 8); - ASSERT_EQ(record_reader->levels_position(), 8); - - const auto read_values = - reinterpret_cast(record_reader->values()); - std::vector read_vals( - read_values, read_values + record_reader->values_written() - - record_reader->null_count()); + ASSERT_EQ(record_reader->levels_written(), 4); + ASSERT_EQ(record_reader->levels_position(), 4); + + const auto read_values = reinterpret_cast(record_reader->values()); + std::vector read_vals(read_values, + read_values + record_reader->values_written()); + std::vector read_defs( + record_reader->def_levels(), + record_reader->def_levels() + record_reader->levels_position()); + std::vector read_reps( + record_reader->rep_levels(), + record_reader->rep_levels() + record_reader->levels_position()); ASSERT_TRUE(vector_equal(read_vals, {20, 20, 20, 40})); + ASSERT_TRUE(vector_equal(read_defs, {1, 1, 1, 1})); + ASSERT_TRUE(vector_equal(read_reps, {0, 1, 1, 0})); } } @@ -841,8 +906,7 @@ TEST(RecordReaderTest, ReadPartialRecord) { level_info.rep_level = 1; NodePtr type = schema::Int32("b", Repetition::REPEATED); - const ColumnDescriptor descr(type, level_info.def_level, - level_info.rep_level); + const ColumnDescriptor descr(type, level_info.def_level, level_info.rep_level); std::vector> pages; std::unique_ptr pager; @@ -889,11 +953,10 @@ TEST(RecordReaderTest, ReadPartialRecord) { ASSERT_EQ(records_read, 1); - const auto read_values = - reinterpret_cast(record_reader->values()); + const auto read_values = reinterpret_cast(record_reader->values()); std::vector read_vals( - read_values, read_values + record_reader->values_written() - - record_reader->null_count()); + read_values, + read_values + record_reader->values_written() - record_reader->null_count()); ASSERT_TRUE(vector_equal(read_vals, {10})); } @@ -904,11 +967,10 @@ TEST(RecordReaderTest, ReadPartialRecord) { ASSERT_EQ(records_read, 1); - const auto read_values = - reinterpret_cast(record_reader->values()); + const auto read_values = reinterpret_cast(record_reader->values()); std::vector read_vals( - read_values, read_values + record_reader->values_written() - - record_reader->null_count()); + read_values, + read_values + record_reader->values_written() - record_reader->null_count()); ASSERT_TRUE(vector_equal(read_vals, {10, 20, 20, 20, 20, 20, 20})); } @@ -919,11 +981,10 @@ TEST(RecordReaderTest, ReadPartialRecord) { ASSERT_EQ(records_read, 1); - const auto read_values = - reinterpret_cast(record_reader->values()); + const auto read_values = reinterpret_cast(record_reader->values()); std::vector read_vals( - read_values, read_values + record_reader->values_written() - - record_reader->null_count()); + read_values, + read_values + record_reader->values_written() - record_reader->null_count()); ASSERT_TRUE(vector_equal(read_vals, {10, 20, 20, 20, 20, 20, 20, 30})); } @@ -937,8 +998,7 @@ TEST(RecordReaderTest, SkipPartialRecord) { level_info.rep_level = 1; NodePtr type = schema::Int32("b", Repetition::REPEATED); - const ColumnDescriptor descr(type, level_info.def_level, - level_info.rep_level); + const ColumnDescriptor descr(type, level_info.def_level, level_info.rep_level); std::vector> pages; std::unique_ptr pager; @@ -985,11 +1045,10 @@ TEST(RecordReaderTest, SkipPartialRecord) { ASSERT_EQ(records_read, 1); - const auto read_values = - reinterpret_cast(record_reader->values()); + const auto read_values = reinterpret_cast(record_reader->values()); std::vector read_vals( - read_values, read_values + record_reader->values_written() - - record_reader->null_count()); + read_values, + read_values + record_reader->values_written() - record_reader->null_count()); ASSERT_TRUE(vector_equal(read_vals, {10})); ASSERT_EQ(record_reader->values_written(), 1); @@ -1006,10 +1065,8 @@ TEST(RecordReaderTest, SkipPartialRecord) { ASSERT_EQ(records_skipped, 1); ASSERT_EQ(record_reader->values_written(), 1); ASSERT_EQ(record_reader->null_count(), 0); - // For repeated fields, we need to read the levels to find the record - // boundaries and skip. So some levels are read. - ASSERT_EQ(record_reader->levels_written(), 8); - ASSERT_EQ(record_reader->levels_position(), 7); + ASSERT_EQ(record_reader->levels_written(), 2); + ASSERT_EQ(record_reader->levels_position(), 1); } { @@ -1019,16 +1076,22 @@ TEST(RecordReaderTest, SkipPartialRecord) { ASSERT_EQ(records_read, 1); ASSERT_EQ(record_reader->values_written(), 2); ASSERT_EQ(record_reader->null_count(), 0); - ASSERT_EQ(record_reader->levels_written(), 8); - ASSERT_EQ(record_reader->levels_position(), 8); - - const auto read_values = - reinterpret_cast(record_reader->values()); - std::vector read_vals( - read_values, read_values + record_reader->values_written() - - record_reader->null_count()); + ASSERT_EQ(record_reader->levels_written(), 2); + ASSERT_EQ(record_reader->levels_position(), 2); + + const auto read_values = reinterpret_cast(record_reader->values()); + std::vector read_vals(read_values, + read_values + record_reader->values_written()); + std::vector read_defs( + record_reader->def_levels(), + record_reader->def_levels() + record_reader->levels_position()); + std::vector read_reps( + record_reader->rep_levels(), + record_reader->rep_levels() + record_reader->levels_position()); ASSERT_TRUE(vector_equal(read_vals, {10, 30})); + ASSERT_TRUE(vector_equal(read_defs, {1, 1})); + ASSERT_TRUE(vector_equal(read_reps, {0, 0})); } } From ba87e4da3bd53983adec86956afd4815fcf805c2 Mon Sep 17 00:00:00 2001 From: Fatemah Panahi Date: Tue, 20 Sep 2022 21:06:25 +0000 Subject: [PATCH 05/24] Use gmock ElementsAre for comparing vectors. --- cpp/src/parquet/column_reader_test.cc | 50 ++++++++++++++------------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/cpp/src/parquet/column_reader_test.cc b/cpp/src/parquet/column_reader_test.cc index 5f8b3895c28..89305a03062 100644 --- a/cpp/src/parquet/column_reader_test.cc +++ b/cpp/src/parquet/column_reader_test.cc @@ -16,6 +16,7 @@ // under the License. #include +#include #include #include @@ -37,6 +38,7 @@ namespace parquet { using schema::NodePtr; +using testing::ElementsAre; namespace test { @@ -622,9 +624,9 @@ TEST(RecordReaderTest, BasicReadRepeatedField) { record_reader->rep_levels(), record_reader->rep_levels() + record_reader->levels_position()); - ASSERT_TRUE(vector_equal(read_vals, {10, 20, 20})); - ASSERT_TRUE(vector_equal(read_defs, {1, 1, 1})); - ASSERT_TRUE(vector_equal(read_reps, {0, 0, 1})); + ASSERT_THAT(read_vals, ElementsAre(10, 20, 20)); + ASSERT_THAT(read_defs, ElementsAre(1, 1, 1)); + ASSERT_THAT(read_reps, ElementsAre(0, 0, 1)); } // Test that we can skip required top level field. @@ -672,7 +674,7 @@ TEST(RecordReaderTest, SkipRequiredTopLevel) { std::vector read_vals(read_values, read_values + record_reader->values_written()); - ASSERT_TRUE(vector_equal(read_vals, {30, 30})); + ASSERT_THAT(read_vals, ElementsAre(30, 30)); } // Skip an optional field. Intentionally included some null values. @@ -741,8 +743,8 @@ TEST(RecordReaderTest, SkipOptional) { read_values, read_values + record_reader->values_written() - record_reader->null_count()); - ASSERT_TRUE(vector_equal(read_vals, {20, 20})); - ASSERT_TRUE(vector_equal(read_defs, {1, 1, 0})); + ASSERT_THAT(read_vals, ElementsAre(20, 20)); + ASSERT_THAT(read_defs, ElementsAre(1, 1, 0)); } { @@ -770,8 +772,8 @@ TEST(RecordReaderTest, SkipOptional) { read_values, read_values + record_reader->values_written() - record_reader->null_count()); - ASSERT_TRUE(vector_equal(read_vals, {20, 20})); - ASSERT_TRUE(vector_equal(read_defs, {1, 1, 0})); + ASSERT_THAT(read_vals, ElementsAre(20, 20)); + ASSERT_THAT(read_defs, ElementsAre(1, 1, 0)); } // We have exhausted all the records. @@ -842,9 +844,9 @@ TEST(RecordReaderTest, SkipRepeated) { record_reader->rep_levels(), record_reader->rep_levels() + record_reader->levels_position()); - ASSERT_TRUE(vector_equal(read_vals, {20, 20, 20})); - ASSERT_TRUE(vector_equal(read_defs, {1, 1, 1})); - ASSERT_TRUE(vector_equal(read_reps, {0, 1, 1})); + ASSERT_THAT(read_vals, ElementsAre(20, 20, 20)); + ASSERT_THAT(read_defs, ElementsAre(1, 1, 1)); + ASSERT_THAT(read_reps, ElementsAre(0, 1, 1)); } { @@ -868,9 +870,9 @@ TEST(RecordReaderTest, SkipRepeated) { record_reader->rep_levels(), record_reader->rep_levels() + record_reader->levels_position()); - ASSERT_TRUE(vector_equal(read_vals, {20, 20, 20})); - ASSERT_TRUE(vector_equal(read_defs, {1, 1, 1})); - ASSERT_TRUE(vector_equal(read_reps, {0, 1, 1})); + ASSERT_THAT(read_vals, ElementsAre(20, 20, 20)); + ASSERT_THAT(read_defs, ElementsAre(1, 1, 1)); + ASSERT_THAT(read_reps, ElementsAre(0, 1, 1)); } { @@ -893,9 +895,9 @@ TEST(RecordReaderTest, SkipRepeated) { record_reader->rep_levels(), record_reader->rep_levels() + record_reader->levels_position()); - ASSERT_TRUE(vector_equal(read_vals, {20, 20, 20, 40})); - ASSERT_TRUE(vector_equal(read_defs, {1, 1, 1, 1})); - ASSERT_TRUE(vector_equal(read_reps, {0, 1, 1, 0})); + ASSERT_THAT(read_vals, ElementsAre(20, 20, 20, 40)); + ASSERT_THAT(read_defs, ElementsAre(1, 1, 1, 1)); + ASSERT_THAT(read_reps, ElementsAre(0, 1, 1, 0)); } } @@ -958,7 +960,7 @@ TEST(RecordReaderTest, ReadPartialRecord) { read_values, read_values + record_reader->values_written() - record_reader->null_count()); - ASSERT_TRUE(vector_equal(read_vals, {10})); + ASSERT_THAT(read_vals, ElementsAre(10)); } { @@ -972,7 +974,7 @@ TEST(RecordReaderTest, ReadPartialRecord) { read_values, read_values + record_reader->values_written() - record_reader->null_count()); - ASSERT_TRUE(vector_equal(read_vals, {10, 20, 20, 20, 20, 20, 20})); + ASSERT_THAT(read_vals, ElementsAre(10, 20, 20, 20, 20, 20, 20)); } { @@ -986,7 +988,7 @@ TEST(RecordReaderTest, ReadPartialRecord) { read_values, read_values + record_reader->values_written() - record_reader->null_count()); - ASSERT_TRUE(vector_equal(read_vals, {10, 20, 20, 20, 20, 20, 20, 30})); + ASSERT_THAT(read_vals, ElementsAre(10, 20, 20, 20, 20, 20, 20, 30)); } } @@ -1050,7 +1052,7 @@ TEST(RecordReaderTest, SkipPartialRecord) { read_values, read_values + record_reader->values_written() - record_reader->null_count()); - ASSERT_TRUE(vector_equal(read_vals, {10})); + ASSERT_THAT(read_vals, ElementsAre(10)); ASSERT_EQ(record_reader->values_written(), 1); ASSERT_EQ(record_reader->null_count(), 0); // There are 4 levels in the first page. @@ -1089,9 +1091,9 @@ TEST(RecordReaderTest, SkipPartialRecord) { record_reader->rep_levels(), record_reader->rep_levels() + record_reader->levels_position()); - ASSERT_TRUE(vector_equal(read_vals, {10, 30})); - ASSERT_TRUE(vector_equal(read_defs, {1, 1})); - ASSERT_TRUE(vector_equal(read_reps, {0, 0})); + ASSERT_THAT(read_vals, ElementsAre(10, 30)); + ASSERT_THAT(read_defs, ElementsAre(1, 1)); + ASSERT_THAT(read_reps, ElementsAre(0, 0)); } } From 921493b502fc83943edad17348383dd492e64ce3 Mon Sep 17 00:00:00 2001 From: Fatemah Panahi Date: Thu, 22 Sep 2022 22:08:51 +0000 Subject: [PATCH 06/24] Added test for Skipping ByteArrays. Added more scenarios to the existing test cases in previous commit. --- cpp/src/parquet/column_reader.cc | 12 +- cpp/src/parquet/column_reader_test.cc | 197 +++++++++++++++++++------- 2 files changed, 150 insertions(+), 59 deletions(-) diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index ab3243926e4..467f39077c7 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -1383,7 +1383,7 @@ class TypedRecordReader : public TypedColumnReaderImpl, // Now that we have figured out number of values to read, we do not need // these levels anymore. Updates levels_position_ and levels_written. ThrowAwayLevels(start_levels_position); - ReadAndThrowAway(values_to_read); + ReadAndThrowAwayValues(values_to_read); // Mark the levels as read in the underlying column reader. this->ConsumeBufferedValues(skipped_records); @@ -1450,7 +1450,7 @@ class TypedRecordReader : public TypedColumnReaderImpl, int64_t remaining_records = num_records - skipped_records; int64_t values_seen = 0; skipped_records += DelimitRecords(remaining_records, &values_seen); - if (ReadAndThrowAway(values_seen) != values_seen) { + if (ReadAndThrowAwayValues(values_seen) != values_seen) { throw ParquetException("Could not read and throw away requested values"); } // Mark those levels and values as consumed in the the underlying page. @@ -1465,7 +1465,7 @@ class TypedRecordReader : public TypedColumnReaderImpl, } // Read 'num_values' values and throw them away. - int64_t ReadAndThrowAway(int64_t num_values) { + int64_t ReadAndThrowAwayValues(int64_t num_values) { int64_t values_left = num_values; int64_t batch_size = kMinLevelBatchSize; // ReadBatch with a smaller memory footprint int64_t values_read = 0; @@ -1496,10 +1496,8 @@ class TypedRecordReader : public TypedColumnReaderImpl, // First consume whatever is in the buffer. skipped_records = SkipRecordsInBufferNonRepeated(num_records); - // If there are more records left, we should have exhausted all the - // buffer. - ARROW_DCHECK(!this->has_values_to_process() || - skipped_records < num_records); + ARROW_DCHECK_LE(skipped_records, num_records); + // For records that we have not buffered, we will use the column // reader's Skip to do the remaining Skip. Since the field is not // repeated number of levels to skip is the same as number of records diff --git a/cpp/src/parquet/column_reader_test.cc b/cpp/src/parquet/column_reader_test.cc index 89305a03062..7ae251ad815 100644 --- a/cpp/src/parquet/column_reader_test.cc +++ b/cpp/src/parquet/column_reader_test.cc @@ -28,7 +28,9 @@ #include #include "arrow/util/macros.h" +#include "arrow/api.h" #include "arrow/util/make_unique.h" +#include "arrow/util/string_view.h" #include "parquet/column_page.h" #include "parquet/column_reader.h" #include "parquet/schema.h" @@ -39,6 +41,8 @@ namespace parquet { using schema::NodePtr; using testing::ElementsAre; +using parquet::internal::BinaryRecordReader; +using ::arrow::util::string_view; namespace test { @@ -582,9 +586,7 @@ TEST(RecordReaderTest, BasicReadRepeatedField) { level_info.rep_level = 1; NodePtr type = schema::Int32("b", Repetition::OPTIONAL); - const ColumnDescriptor descr(type, level_info.def_level, - level_info.rep_level); - + const ColumnDescriptor descr(type, level_info.def_level, level_info.rep_level); // Records look like: {[10], [20, 20], [30, 30, 30]} std::vector> pages; @@ -613,10 +615,10 @@ TEST(RecordReaderTest, BasicReadRepeatedField) { ASSERT_EQ(record_reader->levels_written(), 6); ASSERT_EQ(record_reader->levels_position(), 3); - const auto read_values = - reinterpret_cast(record_reader->values()); - std::vector read_vals(read_values, - read_values + record_reader->values_written()); + const auto read_values = reinterpret_cast(record_reader->values()); + std::vector read_vals( + read_values, + read_values + record_reader->values_written() ); std::vector read_defs( record_reader->def_levels(), record_reader->def_levels() + record_reader->levels_position()); @@ -636,8 +638,7 @@ TEST(RecordReaderTest, SkipRequiredTopLevel) { level_info.rep_level = 0; NodePtr type = schema::Int32("b", Repetition::REQUIRED); - const ColumnDescriptor descr(type, level_info.def_level, - level_info.rep_level); + const ColumnDescriptor descr(type, level_info.def_level, level_info.rep_level); std::vector> pages; std::vector values = {10, 20, 20, 30, 30, 30}; @@ -666,14 +667,15 @@ TEST(RecordReaderTest, SkipRequiredTopLevel) { ASSERT_EQ(records_read, 2); ASSERT_EQ(record_reader->values_written(), 2); + ASSERT_EQ(record_reader->null_count(), 0); ASSERT_EQ(record_reader->levels_written(), 0); ASSERT_EQ(record_reader->levels_position(), 0); - const auto read_values = - reinterpret_cast(record_reader->values()); + const auto read_values = reinterpret_cast(record_reader->values()); std::vector read_vals(read_values, read_values + record_reader->values_written()); + // There are no null values to account for here. ASSERT_THAT(read_vals, ElementsAre(30, 30)); } @@ -686,10 +688,10 @@ TEST(RecordReaderTest, SkipOptional) { NodePtr type = schema::Int32("b", Repetition::OPTIONAL); const ColumnDescriptor descr(type, level_info.def_level, level_info.rep_level); - // Records look like {null, 20, 20, 20, null, 30, 30} + // Records look like {null, 10, 20, 30, null, 40, 50, 60} std::vector> pages; - std::vector values = {20, 20, 20, 30, 30}; - std::vector def_levels = {0, 1, 1, 1, 0, 1, 1}; + std::vector values = {10, 20, 30, 40, 50, 60}; + std::vector def_levels = {0, 1, 1, 0, 1, 1, 1, 1}; std::unique_ptr pager; std::shared_ptr page = MakeDataPage( @@ -705,7 +707,7 @@ TEST(RecordReaderTest, SkipOptional) { record_reader->SetPageReader(std::move(pager)); { - // Skip {null, 20} + // Skip {null, 10} // This also tests when we start with a Skip. int64_t records_skipped = record_reader->SkipRecords(/*num_records=*/2); @@ -718,62 +720,80 @@ TEST(RecordReaderTest, SkipOptional) { } { - // Read 3 records: {20, 20, null} + // Read 3 records: {20, null, 30} int64_t records_read = record_reader->ReadRecords(/*num_records=*/3); ASSERT_EQ(records_read, 3); // One of these values is null. values_written() includes null values. ASSERT_EQ(record_reader->values_written(), 3); + ASSERT_EQ(record_reader->null_count(), 1); // When we read, we buffer some levels. Since there are only a few levels // in our whole column, all of them are read. - // We had skipped 2 of the levels above. So there is only 5 left in total to + // We had skipped 2 of the levels above. So there is only 6 left in total to // read, and we read 3 of them here. - ASSERT_EQ(record_reader->levels_written(), 5); + ASSERT_EQ(record_reader->levels_written(), 6); ASSERT_EQ(record_reader->levels_position(), 3); - // Check that definition levels are correct. Repetition levels are not - // relevant since this is not a repeated field. std::vector read_defs( record_reader->def_levels(), record_reader->def_levels() + record_reader->levels_position()); - // Double check that the values are untouched. const auto read_values = reinterpret_cast(record_reader->values()); - std::vector read_vals( - read_values, - read_values + record_reader->values_written() - record_reader->null_count()); + std::vector read_vals(read_values, + read_values + record_reader->values_written()); - ASSERT_THAT(read_vals, ElementsAre(20, 20)); - ASSERT_THAT(read_defs, ElementsAre(1, 1, 0)); + // ReadRecords for optional fields uses ReadValuesSpaced, so there is a + // placeholder for null. + ASSERT_EQ(read_vals[0], 20); + // read_vals[1] is a space for null. + ASSERT_EQ(read_vals[2], 30); + ASSERT_THAT(read_defs, ElementsAre(1, 0, 1)); } { - // Skip to the end of the column. Skip {30, 30}. - int64_t records_skipped = record_reader->SkipRecords(/*num_records=*/5); + // Skip {40, 50}. + int64_t records_skipped = record_reader->SkipRecords(/*num_records=*/2); ASSERT_EQ(records_skipped, 2); - // When we skip number of values written does not change. However, we will - // throw away the levels that were skipped from the buffer, thus levels_written() - // is reduced by 2. ASSERT_EQ(record_reader->values_written(), 3); - ASSERT_EQ(record_reader->levels_written(), 3); + ASSERT_EQ(record_reader->levels_written(), 4); ASSERT_EQ(record_reader->levels_position(), 3); - // Check the definition levels to make sure we can read it correctly and we - // threw away the levels in the buffer that were skipped. std::vector read_defs( record_reader->def_levels(), record_reader->def_levels() + record_reader->levels_position()); - // Double check that the values are untouched. const auto read_values = reinterpret_cast(record_reader->values()); - std::vector read_vals( - read_values, - read_values + record_reader->values_written() - record_reader->null_count()); + std::vector read_vals(read_values, + read_values + record_reader->values_written()); + } + + { + // Read to the end of the column. Read {60} + // This test checks that ReadAndThrowAwayValues works, since if it + // does not we would read the wrong values. + int64_t records_read = record_reader->ReadRecords(/*num_records=*/1); + + ASSERT_EQ(records_read, 1); + ASSERT_EQ(record_reader->values_written(), 4); + ASSERT_EQ(record_reader->null_count(), 1); + ASSERT_EQ(record_reader->levels_written(), 4); + ASSERT_EQ(record_reader->levels_position(), 4); + + std::vector read_defs( + record_reader->def_levels(), + record_reader->def_levels() + record_reader->levels_position()); + + const auto read_values = reinterpret_cast(record_reader->values()); + std::vector read_vals(read_values, + read_values + record_reader->values_written()); - ASSERT_THAT(read_vals, ElementsAre(20, 20)); - ASSERT_THAT(read_defs, ElementsAre(1, 1, 0)); + ASSERT_EQ(read_vals[0], 20); + // read_vals[1] is a space for null. + ASSERT_EQ(read_vals[2], 30); + ASSERT_EQ(read_vals[3], 60); + ASSERT_THAT(read_defs, ElementsAre(1, 0, 1, 1)); } // We have exhausted all the records. @@ -956,9 +976,8 @@ TEST(RecordReaderTest, ReadPartialRecord) { ASSERT_EQ(records_read, 1); const auto read_values = reinterpret_cast(record_reader->values()); - std::vector read_vals( - read_values, - read_values + record_reader->values_written() - record_reader->null_count()); + std::vector read_vals(read_values, + read_values + record_reader->values_written()); ASSERT_THAT(read_vals, ElementsAre(10)); } @@ -970,9 +989,8 @@ TEST(RecordReaderTest, ReadPartialRecord) { ASSERT_EQ(records_read, 1); const auto read_values = reinterpret_cast(record_reader->values()); - std::vector read_vals( - read_values, - read_values + record_reader->values_written() - record_reader->null_count()); + std::vector read_vals(read_values, + read_values + record_reader->values_written()); ASSERT_THAT(read_vals, ElementsAre(10, 20, 20, 20, 20, 20, 20)); } @@ -984,9 +1002,8 @@ TEST(RecordReaderTest, ReadPartialRecord) { ASSERT_EQ(records_read, 1); const auto read_values = reinterpret_cast(record_reader->values()); - std::vector read_vals( - read_values, - read_values + record_reader->values_written() - record_reader->null_count()); + std::vector read_vals(read_values, + read_values + record_reader->values_written()); ASSERT_THAT(read_vals, ElementsAre(10, 20, 20, 20, 20, 20, 20, 30)); } @@ -1048,9 +1065,8 @@ TEST(RecordReaderTest, SkipPartialRecord) { ASSERT_EQ(records_read, 1); const auto read_values = reinterpret_cast(record_reader->values()); - std::vector read_vals( - read_values, - read_values + record_reader->values_written() - record_reader->null_count()); + std::vector read_vals(read_values, + read_values + record_reader->values_written()); ASSERT_THAT(read_vals, ElementsAre(10)); ASSERT_EQ(record_reader->values_written(), 1); @@ -1097,5 +1113,82 @@ TEST(RecordReaderTest, SkipPartialRecord) { } } +// Test that Skip works on ByteArrays. Specifically, this is testing +// ReadAndThrowAwayValues for ByteArrays. +TEST(RecordReaderTest, SkipByteArray) { + internal::LevelInfo level_info; + level_info.def_level = 1; + level_info.rep_level = 0; + + // Must use REPEATED to excercise ReadAndThrowAwayValues for ByteArrays. It + // does not do any buffering for Optional or Required fields as it calls + // ResetValues after every read. + NodePtr type = schema::ByteArray("b", Repetition::OPTIONAL); + const ColumnDescriptor descr(type, level_info.def_level, level_info.rep_level); + + std::vector> pages; + std::unique_ptr pager; + + int levels_per_page = 90; + int num_pages = 1; + + std::vector def_levels; + std::vector rep_levels; + std::vector values; + std::vector buffer; + + MakePages(&descr, num_pages, levels_per_page, def_levels, rep_levels, + values, buffer, pages, Encoding::PLAIN); + + pager.reset(new test::MockPageReader(pages)); + + std::shared_ptr record_reader = + internal::RecordReader::Make(&descr, level_info); + record_reader->SetPageReader(std::move(pager)); + + // Read one-third of the page. + ASSERT_EQ(record_reader->ReadRecords(/*num_records=*/30), 30); + + // Skip 30 records. + ASSERT_EQ(record_reader->SkipRecords(/*num_records=*/30), 30); + + // Read 60 more records. Only 30 will be read, since we read 30 and skipped 30, + // so only 30 is left. + ASSERT_EQ(record_reader->ReadRecords(/*num_records=*/60), 30); + + auto binary_reader = dynamic_cast(record_reader.get()); + ASSERT_NE(binary_reader, nullptr); + // Chunks are reset after this call. + ::arrow::ArrayVector array_vector = binary_reader->GetBuilderChunks(); + ASSERT_EQ(array_vector.size(), 1); + auto binary_array = dynamic_cast<::arrow::BinaryArray*>(array_vector[0].get()); + ASSERT_NE(binary_array, nullptr); + ASSERT_EQ(binary_array->length(), 60); + + // Our values above are not spaced, however, the RecordReader will + // read spaced for nullable values. + // Create spaced expected values. + std::vector expected_values; + size_t values_index = 0; + for (int i = 0; i < 90; ++i) { + if (def_levels[i] == 0) { + expected_values.emplace_back(); + continue; + } + expected_values.emplace_back(reinterpret_cast(values[values_index].ptr), + values[values_index].len); + ++values_index; + } + + // Check that the expected values match the actual values. + for (size_t i = 0; i < 30; ++i) { + ASSERT_EQ(expected_values[i].compare(binary_array->GetView(i)), 0); + } + // Repeat for the next range that we read. + for (size_t i = 60; i < 90; ++i) { + ASSERT_EQ(expected_values[i].compare(binary_array->GetView(i - 30)), 0); + } +} + } // namespace test } // namespace parquet From 7897740efafaa33057a0a58408ad886ac3774a78 Mon Sep 17 00:00:00 2001 From: Fatemah Panahi Date: Thu, 22 Sep 2022 22:28:46 +0000 Subject: [PATCH 07/24] Fix minor lint issue. --- cpp/src/parquet/column_reader_test.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cpp/src/parquet/column_reader_test.cc b/cpp/src/parquet/column_reader_test.cc index 7ae251ad815..0cc12450509 100644 --- a/cpp/src/parquet/column_reader_test.cc +++ b/cpp/src/parquet/column_reader_test.cc @@ -616,9 +616,8 @@ TEST(RecordReaderTest, BasicReadRepeatedField) { ASSERT_EQ(record_reader->levels_position(), 3); const auto read_values = reinterpret_cast(record_reader->values()); - std::vector read_vals( - read_values, - read_values + record_reader->values_written() ); + std::vector read_vals(read_values, + read_values + record_reader->values_written()); std::vector read_defs( record_reader->def_levels(), record_reader->def_levels() + record_reader->levels_position()); From 1d0c22e336fdd0ec98aa0d4404f3d79a2c9d6c13 Mon Sep 17 00:00:00 2001 From: Fatemah Panahi Date: Fri, 30 Sep 2022 19:54:44 +0000 Subject: [PATCH 08/24] Address comments --- cpp/src/parquet/column_reader.cc | 106 ++++++++++++++------------ cpp/src/parquet/column_reader.h | 50 ++++++------ cpp/src/parquet/column_reader_test.cc | 5 +- 3 files changed, 81 insertions(+), 80 deletions(-) diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index 467f39077c7..7b365bfd8f9 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -1339,17 +1339,19 @@ class TypedRecordReader : public TypedColumnReaderImpl, // This is inefficient, though necessary to consume levels that we have // already read into the buffer and we want to Skip. void ThrowAwayLevels(int64_t start_levels_position) { - ARROW_DCHECK_LE(levels_position_, levels_written_); - ARROW_DCHECK_LE(start_levels_position, levels_position_); - int64_t gap = levels_position_ - start_levels_position; + ARROW_DCHECK_LE(levels_position_, levels_written_); + ARROW_DCHECK_LE(start_levels_position, levels_position_); - for (int64_t i = levels_position_; i < levels_written_; ++i) { - *(def_levels() + i - gap) = *(def_levels() + i); - *(rep_levels() + i - gap) = *(rep_levels() + i); - } + int64_t gap = levels_position_ - start_levels_position; + if (gap == 0) return; - levels_written_ -= gap; - levels_position_ -= gap; + std::copy(def_levels() + levels_position_, def_levels() + levels_written_, + def_levels() + levels_position_ - gap); + std::copy(rep_levels() + levels_position_, rep_levels() + levels_written_, + rep_levels() + levels_position_ - gap); + + levels_written_ -= gap; + levels_position_ -= gap; } // Skip records that we have in our buffer. This function is only for @@ -1381,8 +1383,12 @@ class TypedRecordReader : public TypedColumnReaderImpl, int64_t values_to_read = validity_io.values_read - validity_io.null_count; // Now that we have figured out number of values to read, we do not need - // these levels anymore. Updates levels_position_ and levels_written. + // these levels anymore. We will remove these values from the buffer. + // This requires shifting the levels in the buffer to left. So this will + // update levels_position_ and levels_written_. ThrowAwayLevels(start_levels_position); + // For values, we do not have them in buffer, so we will read them and + // throw them away. ReadAndThrowAwayValues(values_to_read); // Mark the levels as read in the underlying column reader. @@ -1405,9 +1411,9 @@ class TypedRecordReader : public TypedColumnReaderImpl, // means that for the last record that was counted, we have not seen all // of it's values yet. while (!at_record_start_ || skipped_records < num_records) { - // Is there more data to read in this row group? - // HasNextInternal() will advance to the next page if necessary. - if (!this->HasNextInternal()) { + // Is there more data to read in this row group? + // HasNextInternal() will advance to the next page if necessary. + if (!this->HasNextInternal()) { if (!at_record_start_) { // We ended the row group while inside a record that we haven't seen // the end of yet. So increment the record count for the last record @@ -1416,49 +1422,49 @@ class TypedRecordReader : public TypedColumnReaderImpl, at_record_start_ = true; } break; - } - - // Read some more levels. - int64_t batch_size = std::min(level_batch_size, available_values_current_page()); - // No more data in column. This must be an empty page. - // If we had exhausted the last page, HasNextInternal() must have advanced - // to the next page. So there must be available values to process. - if (batch_size == 0) { + } + + // Read some more levels. + int64_t batch_size = std::min(level_batch_size, available_values_current_page()); + // No more data in column. This must be an empty page. + // If we had exhausted the last page, HasNextInternal() must have advanced + // to the next page. So there must be available values to process. + if (batch_size == 0) { break; - } + } - // For skip we will read the levels and append them to the end - // of the def_levels and rep_levels just like for read. - ReserveLevels(batch_size); + // For skip we will read the levels and append them to the end + // of the def_levels and rep_levels just like for read. + ReserveLevels(batch_size); - int16_t* def_levels = this->def_levels() + levels_written_; - int16_t* rep_levels = this->rep_levels() + levels_written_; + int16_t* def_levels = this->def_levels() + levels_written_; + int16_t* rep_levels = this->rep_levels() + levels_written_; - int64_t levels_read = 0; - levels_read = this->ReadDefinitionLevels(batch_size, def_levels); - if (this->ReadRepetitionLevels(batch_size, rep_levels) != levels_read) { + int64_t levels_read = 0; + levels_read = this->ReadDefinitionLevels(batch_size, def_levels); + if (this->ReadRepetitionLevels(batch_size, rep_levels) != levels_read) { throw ParquetException("Number of decoded rep / def levels did not match"); - } - - levels_written_ += levels_read; - - // Look at the buffered levels, delimit them based on - // (rep_level == 0), report back how many records are in there, and - // fill in how many not-null values (def_level == max_def_level_). - // DelimitRecords updates levels_position_. - int64_t start_levels_position = levels_position_; - int64_t remaining_records = num_records - skipped_records; - int64_t values_seen = 0; - skipped_records += DelimitRecords(remaining_records, &values_seen); - if (ReadAndThrowAwayValues(values_seen) != values_seen) { + } + + levels_written_ += levels_read; + + // Look at the buffered levels, delimit them based on + // (rep_level == 0), report back how many records are in there, and + // fill in how many not-null values (def_level == max_def_level_). + // DelimitRecords updates levels_position_. + int64_t start_levels_position = levels_position_; + int64_t remaining_records = num_records - skipped_records; + int64_t values_seen = 0; + skipped_records += DelimitRecords(remaining_records, &values_seen); + if (ReadAndThrowAwayValues(values_seen) != values_seen) { throw ParquetException("Could not read and throw away requested values"); - } - // Mark those levels and values as consumed in the the underlying page. - // This must be done before we throw away levels since it updates - // levels_position_ and levels_written_. - this->ConsumeBufferedValues(levels_position_ - start_levels_position); - // Updated levels_position_ and levels_written_. - ThrowAwayLevels(start_levels_position); + } + // Mark those levels and values as consumed in the the underlying page. + // This must be done before we throw away levels since it updates + // levels_position_ and levels_written_. + this->ConsumeBufferedValues(levels_position_ - start_levels_position); + // Updated levels_position_ and levels_written_. + ThrowAwayLevels(start_levels_position); } return skipped_records; diff --git a/cpp/src/parquet/column_reader.h b/cpp/src/parquet/column_reader.h index 3c69758833d..34f42e37949 100644 --- a/cpp/src/parquet/column_reader.h +++ b/cpp/src/parquet/column_reader.h @@ -326,6 +326,7 @@ class RecordReader { uint8_t* values() const { return values_->mutable_data(); } /// \brief Number of values written including nulls (if any) + /// There is no read-ahead/buffering for values. int64_t values_written() const { return values_written_; } /// \brief Number of definition / repetition levels (from those that have @@ -334,9 +335,8 @@ class RecordReader { /// \brief Number of definition / repetition levels that have been written /// internally in the reader. This may be larger than values_written() because - // for repeated fields, we need to look at the levels in advance to figure out - // the record boundaries. Once we found the boundaries, we know exactly how - // many values to read. So we do not have buffering for the values. + /// for repeated fields we need to look at the levels in advance to figure out + /// the record boundaries. int64_t levels_written() const { return levels_written_; } /// \brief Number of nulls in the leaf that we have read so far. @@ -349,48 +349,46 @@ class RecordReader { bool read_dictionary() const { return read_dictionary_; } protected: - // Indicates if we can have nullable values. + /// \brief Indicates if we can have nullable values. bool nullable_values_; bool at_record_start_; int64_t records_read_; - // Stores values. These values are populated based on each ReadRecords call. - // No extra values are buffered for the next call. SkipRecords will not - // add any value to this buffer. + /// \brief Stores values. These values are populated based on each ReadRecords + /// call. No extra values are buffered for the next call. SkipRecords will not + /// add any value to this buffer. std::shared_ptr<::arrow::ResizableBuffer> values_; - // In the case of false (BYTE_ARRAY), don't allocate the values buffer - // (when we directly read into builder classes). + /// \brief False for BYTE_ARRAY, in which case we don't allocate the values + /// buffer and we directly read into builder classes. bool uses_values_; - // Values that we have read into 'values_' + 'null_count_'. + /// \brief Values that we have read into 'values_' + 'null_count_'. int64_t values_written_; int64_t values_capacity_; int64_t null_count_; - // Each element corresponds to one element in 'values_' and specifies if it - // is null or not null. + /// \brief Each element corresponds to one element in 'values_' and specifies if it + /// is null or not null. std::shared_ptr<::arrow::ResizableBuffer> valid_bits_; - // Buffers for repetition and definition levels. These buffers may have - // more levels than is actually read. This is because we read levels ahead to - // figure our record boundaries for repeated fields. 'levels_written_' shows - // the total number of levels that is in the buffer. 'levels_position_' points - // to the next level that should be consumed. ReadRecords and SkipRecords both - // advance 'levels_written_' and 'levels_position_'. SkipRecords then will - // remove the skipped levels by shifting the levels to the left. - // For flat required fields, 'def_levels_' and 'rep_levels_' are not - // populated. For non-repeated fields 'rep_levels_' is not populated. - // 'def_levels_' and 'rep_levels_' must be of the same size if present. + /// \brief Buffer for definition levels. May contain more levels than + /// is actually read. This is because we read levels ahead to + /// figure out record boundaries for repeated fields. + /// For flat required fields, 'def_levels_' and 'rep_levels_' are not + /// populated. For non-repeated fields 'rep_levels_' is not populated. + /// 'def_levels_' and 'rep_levels_' must be of the same size if present. std::shared_ptr<::arrow::ResizableBuffer> def_levels_; + /// \brief Buffer for repetition levels. Only populated for repeated + /// fields. std::shared_ptr<::arrow::ResizableBuffer> rep_levels_; /// \brief Number of definition / repetition levels that have been written - /// internally in the reader. This may be larger than values_written() because - // for repeated fields, we need to look at the levels in advance to figure out - // the record boundaries. Once we found the boundaries, we know exactly how - // many values to read. So we do not have buffering for the values. + /// internally in the reader. This may be larger than values_written() since + /// for repeated fields we need to look at the levels in advance to figure out + /// the record boundaries. int64_t levels_written_; + /// \brief Position of the next level that should be consumed. int64_t levels_position_; int64_t levels_capacity_; diff --git a/cpp/src/parquet/column_reader_test.cc b/cpp/src/parquet/column_reader_test.cc index 15d43dc7536..c7a9e0c3e1f 100644 --- a/cpp/src/parquet/column_reader_test.cc +++ b/cpp/src/parquet/column_reader_test.cc @@ -29,8 +29,6 @@ #include "arrow/api.h" #include "arrow/util/macros.h" -#include "arrow/util/make_unique.h" -#include "arrow/util/string_view.h" #include "parquet/column_page.h" #include "parquet/column_reader.h" #include "parquet/schema.h" @@ -42,7 +40,6 @@ namespace parquet { using schema::NodePtr; using testing::ElementsAre; using parquet::internal::BinaryRecordReader; -using ::arrow::util::string_view; namespace test { @@ -1167,7 +1164,7 @@ TEST(RecordReaderTest, SkipByteArray) { // Our values above are not spaced, however, the RecordReader will // read spaced for nullable values. // Create spaced expected values. - std::vector expected_values; + std::vector expected_values; size_t values_index = 0; for (int i = 0; i < 90; ++i) { if (def_levels[i] == 0) { From e91ccf599a5329c722fab34f1f377a0b1694b01f Mon Sep 17 00:00:00 2001 From: Fatemah Panahi Date: Mon, 3 Oct 2022 19:08:46 +0000 Subject: [PATCH 09/24] Fix ClangFormat. --- cpp/src/parquet/column_reader.cc | 13 ++++++------- cpp/src/parquet/column_reader.h | 2 +- cpp/src/parquet/column_reader_test.cc | 4 ++-- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index 7b365bfd8f9..b72121d8c2c 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -1149,7 +1149,8 @@ int64_t TypedColumnReaderImpl::Skip(int64_t num_values_to_skip) { } else { // We need to read this Page // Jump to the right offset in the Page - int64_t batch_size = kMinLevelBatchSize; // ReadBatch with a smaller memory footprint + int64_t batch_size = + kMinLevelBatchSize; // ReadBatch with a smaller memory footprint int64_t values_read = 0; // This will be enough scratch space to accommodate 16-bit levels or any @@ -1225,8 +1226,7 @@ class TypedRecordReader : public TypedColumnReaderImpl, public: using T = typename DType::c_type; using BASE = TypedColumnReaderImpl; - TypedRecordReader(const ColumnDescriptor* descr, LevelInfo leaf_info, - MemoryPool* pool) + TypedRecordReader(const ColumnDescriptor* descr, LevelInfo leaf_info, MemoryPool* pool) // Pager must be set using SetPageReader. : BASE(descr, /* pager = */ nullptr, pool) { leaf_info_ = leaf_info; @@ -1377,8 +1377,7 @@ class TypedRecordReader : public TypedColumnReaderImpl, validity_io.values_read_upper_bound = skipped_records; validity_io.valid_bits = valid_bits->mutable_data(); validity_io.valid_bits_offset = 0; - DefLevelsToBitmap(def_levels() + start_levels_position, - skipped_records, + DefLevelsToBitmap(def_levels() + start_levels_position, skipped_records, this->leaf_info_, &validity_io); int64_t values_to_read = validity_io.values_read - validity_io.null_count; @@ -1483,8 +1482,8 @@ class TypedRecordReader : public TypedColumnReaderImpl, this->pool_, batch_size * std::max(sizeof(int16_t), value_size)); do { batch_size = std::min(batch_size, values_left); - values_read = this->ReadValues( - batch_size, reinterpret_cast(scratch->mutable_data())); + values_read = + this->ReadValues(batch_size, reinterpret_cast(scratch->mutable_data())); values_left -= values_read; } while (values_read > 0 && values_left > 0); return num_values - values_left; diff --git a/cpp/src/parquet/column_reader.h b/cpp/src/parquet/column_reader.h index 34f42e37949..e82fb6776f0 100644 --- a/cpp/src/parquet/column_reader.h +++ b/cpp/src/parquet/column_reader.h @@ -349,7 +349,7 @@ class RecordReader { bool read_dictionary() const { return read_dictionary_; } protected: - /// \brief Indicates if we can have nullable values. + /// \brief Indicates if we can have nullable values. bool nullable_values_; bool at_record_start_; diff --git a/cpp/src/parquet/column_reader_test.cc b/cpp/src/parquet/column_reader_test.cc index c7a9e0c3e1f..b5731d632ea 100644 --- a/cpp/src/parquet/column_reader_test.cc +++ b/cpp/src/parquet/column_reader_test.cc @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -#include #include +#include #include #include @@ -37,9 +37,9 @@ namespace parquet { +using parquet::internal::BinaryRecordReader; using schema::NodePtr; using testing::ElementsAre; -using parquet::internal::BinaryRecordReader; namespace test { From 66e2058089af59ddaa3fe9208497c5e4cdec0692 Mon Sep 17 00:00:00 2001 From: Fatemah Panahi Date: Tue, 4 Oct 2022 18:16:26 +0000 Subject: [PATCH 10/24] Fix a bug in ThrowAwayLevels, only deal with repetition levels for repeated fields. --- cpp/src/parquet/column_reader.cc | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index b72121d8c2c..d1df4e46c12 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -1341,14 +1341,18 @@ class TypedRecordReader : public TypedColumnReaderImpl, void ThrowAwayLevels(int64_t start_levels_position) { ARROW_DCHECK_LE(levels_position_, levels_written_); ARROW_DCHECK_LE(start_levels_position, levels_position_); + ARROW_DCHECK_GT(this->max_def_level_, 0); int64_t gap = levels_position_ - start_levels_position; if (gap == 0) return; std::copy(def_levels() + levels_position_, def_levels() + levels_written_, def_levels() + levels_position_ - gap); - std::copy(rep_levels() + levels_position_, rep_levels() + levels_written_, - rep_levels() + levels_position_ - gap); + + if (this->max_rep_level_ > 0) { + std::copy(rep_levels() + levels_position_, rep_levels() + levels_written_, + rep_levels() + levels_position_ - gap); + } levels_written_ -= gap; levels_position_ -= gap; @@ -1358,7 +1362,7 @@ class TypedRecordReader : public TypedColumnReaderImpl, // non-repeated fields. int64_t SkipRecordsInBufferNonRepeated(int64_t num_records) { ARROW_DCHECK_EQ(this->max_rep_level_, 0); - if (!this->has_values_to_process()) return 0; + if (!this->has_values_to_process() || num_records == 0) return 0; int64_t remaining_records = levels_written_ - levels_position_; int64_t skipped_records = std::min(num_records, remaining_records); @@ -1655,6 +1659,8 @@ class TypedRecordReader : public TypedColumnReaderImpl, } } + +// XXXXXXXXXXX I SHOULD DO SOMETHING LIKE THIS WHEN SHIFTING THE LEVELS. void Reset() override { ResetValues(); From 613f0b03e1c7477a672d35439f86bb7da5f576aa Mon Sep 17 00:00:00 2001 From: Fatemah Panahi Date: Tue, 4 Oct 2022 18:19:34 +0000 Subject: [PATCH 11/24] Remove unnecessary comment --- cpp/src/parquet/column_reader.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index d1df4e46c12..4edac7905e2 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -1659,8 +1659,6 @@ class TypedRecordReader : public TypedColumnReaderImpl, } } - -// XXXXXXXXXXX I SHOULD DO SOMETHING LIKE THIS WHEN SHIFTING THE LEVELS. void Reset() override { ResetValues(); From 59758cea86abee7b8e55e8b14e9cf559197758f8 Mon Sep 17 00:00:00 2001 From: Fatemah Panahi Date: Tue, 4 Oct 2022 20:17:07 +0000 Subject: [PATCH 12/24] Fix a warning. --- cpp/src/parquet/column_reader.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index 4edac7905e2..f10b9b3fc0c 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -1485,7 +1485,7 @@ class TypedRecordReader : public TypedColumnReaderImpl, std::shared_ptr scratch = AllocateBuffer( this->pool_, batch_size * std::max(sizeof(int16_t), value_size)); do { - batch_size = std::min(batch_size, values_left); + batch_size = std::min(batch_size, values_left); values_read = this->ReadValues(batch_size, reinterpret_cast(scratch->mutable_data())); values_left -= values_read; From 57b249edec9d84c2e925175f25be8df883966871 Mon Sep 17 00:00:00 2001 From: Fatemah Panahi Date: Tue, 4 Oct 2022 21:24:35 +0000 Subject: [PATCH 13/24] Fix build warnings. --- cpp/src/parquet/column_reader_test.cc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/src/parquet/column_reader_test.cc b/cpp/src/parquet/column_reader_test.cc index b5731d632ea..d387baf4e4d 100644 --- a/cpp/src/parquet/column_reader_test.cc +++ b/cpp/src/parquet/column_reader_test.cc @@ -593,7 +593,7 @@ TEST(RecordReaderTest, BasicReadRepeatedField) { std::unique_ptr pager; std::shared_ptr page = MakeDataPage( - &descr, values, /*num_values=*/def_levels.size(), Encoding::PLAIN, + &descr, values, /*num_values=*/static_cast(def_levels.size()), Encoding::PLAIN, /*indices=*/{}, /*indices_size=*/0, def_levels, level_info.def_level, rep_levels, level_info.rep_level); @@ -641,8 +641,8 @@ TEST(RecordReaderTest, SkipRequiredTopLevel) { std::unique_ptr pager; std::shared_ptr page = MakeDataPage( - &descr, values, /*num_values=*/values.size(), Encoding::PLAIN, - /*indices=*/{}, + &descr, values, /*num_values=*/static_cast(values.size()), + Encoding::PLAIN, /*indices=*/{}, /*indices_size=*/0, /*def_levels=*/{}, level_info.def_level, /*rep_levels=*/{}, level_info.rep_level); pages.push_back(std::move(page)); @@ -691,8 +691,8 @@ TEST(RecordReaderTest, SkipOptional) { std::unique_ptr pager; std::shared_ptr page = MakeDataPage( - &descr, values, /*num_values=*/values.size(), Encoding::PLAIN, - /*indices=*/{}, + &descr, values, /*num_values=*/static_cast(values.size()), + Encoding::PLAIN, /*indices=*/{}, /*indices_size=*/0, def_levels, level_info.def_level, /*rep_levels=*/{}, level_info.rep_level); pages.push_back(std::move(page)); @@ -814,8 +814,8 @@ TEST(RecordReaderTest, SkipRepeated) { std::unique_ptr pager; std::shared_ptr page = MakeDataPage( - &descr, values, /*num_values=*/values.size(), Encoding::PLAIN, - /*indices=*/{}, + &descr, values, /*num_values=*/static_cast(values.size()), + Encoding::PLAIN, /*indices=*/{}, /*indices_size=*/0, def_levels, level_info.def_level, rep_levels, level_info.rep_level); pages.push_back(std::move(page)); From 8f03b3980ed7deade106e78a9f997d8b2395ffd9 Mon Sep 17 00:00:00 2001 From: Fatemah Panahi Date: Tue, 4 Oct 2022 21:26:49 +0000 Subject: [PATCH 14/24] Fix ClangFormat. --- cpp/src/parquet/column_reader_test.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/src/parquet/column_reader_test.cc b/cpp/src/parquet/column_reader_test.cc index d387baf4e4d..eb0fd22bb62 100644 --- a/cpp/src/parquet/column_reader_test.cc +++ b/cpp/src/parquet/column_reader_test.cc @@ -641,8 +641,8 @@ TEST(RecordReaderTest, SkipRequiredTopLevel) { std::unique_ptr pager; std::shared_ptr page = MakeDataPage( - &descr, values, /*num_values=*/static_cast(values.size()), - Encoding::PLAIN, /*indices=*/{}, + &descr, values, /*num_values=*/static_cast(values.size()), Encoding::PLAIN, + /*indices=*/{}, /*indices_size=*/0, /*def_levels=*/{}, level_info.def_level, /*rep_levels=*/{}, level_info.rep_level); pages.push_back(std::move(page)); @@ -691,8 +691,8 @@ TEST(RecordReaderTest, SkipOptional) { std::unique_ptr pager; std::shared_ptr page = MakeDataPage( - &descr, values, /*num_values=*/static_cast(values.size()), - Encoding::PLAIN, /*indices=*/{}, + &descr, values, /*num_values=*/static_cast(values.size()), Encoding::PLAIN, + /*indices=*/{}, /*indices_size=*/0, def_levels, level_info.def_level, /*rep_levels=*/{}, level_info.rep_level); pages.push_back(std::move(page)); @@ -814,8 +814,8 @@ TEST(RecordReaderTest, SkipRepeated) { std::unique_ptr pager; std::shared_ptr page = MakeDataPage( - &descr, values, /*num_values=*/static_cast(values.size()), - Encoding::PLAIN, /*indices=*/{}, + &descr, values, /*num_values=*/static_cast(values.size()), Encoding::PLAIN, + /*indices=*/{}, /*indices_size=*/0, def_levels, level_info.def_level, rep_levels, level_info.rep_level); pages.push_back(std::move(page)); From 3357881cebe3421c4ab459d7eb516b420dc5114a Mon Sep 17 00:00:00 2001 From: Fatemah Panahi Date: Tue, 4 Oct 2022 22:31:54 +0000 Subject: [PATCH 15/24] Add PARQUET_EXPORT for RecordReader. --- cpp/src/parquet/column_reader.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/parquet/column_reader.h b/cpp/src/parquet/column_reader.h index e82fb6776f0..5d6adfe7e84 100644 --- a/cpp/src/parquet/column_reader.h +++ b/cpp/src/parquet/column_reader.h @@ -267,7 +267,7 @@ namespace internal { /// /// \note API EXPERIMENTAL /// \since 1.3.0 -class RecordReader { +class PARQUET_EXPORT RecordReader { public: static std::shared_ptr Make( const ColumnDescriptor* descr, LevelInfo leaf_info, From ce7c8555a9abba7297b2d2e3f37251eca585f835 Mon Sep 17 00:00:00 2001 From: Fatemah Panahi Date: Wed, 5 Oct 2022 23:04:54 +0000 Subject: [PATCH 16/24] Address comments. Refactor/simplify the SkipRecords tests. --- cpp/src/parquet/column_reader.cc | 3 +- cpp/src/parquet/column_reader.h | 8 +- cpp/src/parquet/column_reader_test.cc | 539 ++++++++++---------------- 3 files changed, 220 insertions(+), 330 deletions(-) diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index f10b9b3fc0c..2fef5b698e7 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -61,11 +61,12 @@ namespace bit_util = arrow::bit_util; namespace parquet { +namespace { + // The minimum number of repetition/definition levels to decode at a time, for // better vectorized performance when doing many smaller record reads constexpr int64_t kMinLevelBatchSize = 1024; -namespace { inline bool HasSpacedValues(const ColumnDescriptor* descr) { if (descr->max_repetition_level() > 0) { // repeated+flat case diff --git a/cpp/src/parquet/column_reader.h b/cpp/src/parquet/column_reader.h index 5d6adfe7e84..b06266de38d 100644 --- a/cpp/src/parquet/column_reader.h +++ b/cpp/src/parquet/column_reader.h @@ -164,7 +164,7 @@ class TypedColumnReader : public ColumnReader { // may be less than the number of repetition and definition levels. With // nested data this is almost certainly true. // - // Set def_levels or rep_levels to nullptr if you want to reading them. + // Set def_levels or rep_levels to nullptr if you want to skip reading them. // This is only safe if you know through some other source that there are no // undefined values. // @@ -282,7 +282,9 @@ class PARQUET_EXPORT RecordReader { /// \return number of records read virtual int64_t ReadRecords(int64_t num_records) = 0; - /// \brief Attempt to skip indicated number of records from column chunk + /// \brief Attempt to skip indicated number of records from column chunk. + /// Note that for repeated fields, a record may have more than one value + /// and all of them are skipped. /// \return number of records skipped virtual int64_t SkipRecords(int64_t num_records) = 0; @@ -368,7 +370,7 @@ class PARQUET_EXPORT RecordReader { int64_t values_capacity_; int64_t null_count_; - /// \brief Each element corresponds to one element in 'values_' and specifies if it + /// \brief Each bit corresponds to one element in 'values_' and specifies if it /// is null or not null. std::shared_ptr<::arrow::ResizableBuffer> valid_bits_; diff --git a/cpp/src/parquet/column_reader_test.cc b/cpp/src/parquet/column_reader_test.cc index eb0fd22bb62..8e4ff55ab3f 100644 --- a/cpp/src/parquet/column_reader_test.cc +++ b/cpp/src/parquet/column_reader_test.cc @@ -37,6 +37,7 @@ namespace parquet { +using parquet::Repetition; using parquet::internal::BinaryRecordReader; using schema::NodePtr; using testing::ElementsAre; @@ -576,14 +577,68 @@ TEST_F(TestPrimitiveReader, TestNonDictionaryEncodedPagesWithExposeEncoding) { pages_.clear(); } -// Tests reading a repeated field using the RecordReader. -TEST(RecordReaderTest, BasicReadRepeatedField) { - internal::LevelInfo level_info; - level_info.def_level = 1; - level_info.rep_level = 1; +class RecordReaderTest : public ::testing::Test { + public: + const int32_t kNullValue = -1; - NodePtr type = schema::Int32("b", Repetition::OPTIONAL); - const ColumnDescriptor descr(type, level_info.def_level, level_info.rep_level); + void Init(int32_t max_def_level, int32_t max_rep_level, Repetition::type repetition) { + level_info_.def_level = max_def_level; + level_info_.rep_level = max_rep_level; + repetition_type_ = repetition; + + NodePtr type = schema::Int32("b", repetition); + descr_ = std::make_unique(type, level_info_.def_level, + level_info_.rep_level); + + record_reader_ = internal::RecordReader::Make(descr_.get(), level_info_); + } + + void CheckReadValues(std::vector expected_values, + std::vector expected_defs, + std::vector expected_reps) { + const auto read_values = reinterpret_cast(record_reader_->values()); + std::vector read_vals(read_values, + read_values + record_reader_->values_written()); + ASSERT_EQ(read_vals.size(), expected_values.size()); + for (size_t i = 0; i < expected_values.size(); ++i) { + if (expected_values[i] != kNullValue) { + ASSERT_EQ(expected_values[i], read_values[i]); + } + } + + if (repetition_type_ != Repetition::REQUIRED) { + std::vector read_defs( + record_reader_->def_levels(), + record_reader_->def_levels() + record_reader_->levels_position()); + ASSERT_TRUE(vector_equal(expected_defs, read_defs)); + } + + if (repetition_type_ == Repetition::REPEATED) { + std::vector read_reps( + record_reader_->rep_levels(), + record_reader_->rep_levels() + record_reader_->levels_position()); + ASSERT_TRUE(vector_equal(expected_reps, read_reps)); + } + } + + void CheckState(int64_t values_written, int64_t null_count, int64_t levels_written, + int64_t levels_position) { + ASSERT_EQ(record_reader_->values_written(), values_written); + ASSERT_EQ(record_reader_->null_count(), null_count); + ASSERT_EQ(record_reader_->levels_written(), levels_written); + ASSERT_EQ(record_reader_->levels_position(), levels_position); + } + + protected: + std::shared_ptr record_reader_; + std::unique_ptr descr_; + internal::LevelInfo level_info_; + Repetition::type repetition_type_; +}; + +// Tests reading a repeated field using the RecordReader. +TEST_F(RecordReaderTest, BasicReadRepeatedField) { + Init(/*max_def_level=*/1, /*max_rep_level=*/1, Repetition::REPEATED); // Records look like: {[10], [20, 20], [30, 30, 30]} std::vector> pages; @@ -593,96 +648,56 @@ TEST(RecordReaderTest, BasicReadRepeatedField) { std::unique_ptr pager; std::shared_ptr page = MakeDataPage( - &descr, values, /*num_values=*/static_cast(def_levels.size()), Encoding::PLAIN, + descr_.get(), values, /*num_values=*/static_cast(def_levels.size()), + Encoding::PLAIN, /*indices=*/{}, - /*indices_size=*/0, def_levels, level_info.def_level, rep_levels, - level_info.rep_level); + /*indices_size=*/0, def_levels, level_info_.def_level, rep_levels, + level_info_.rep_level); pages.push_back(std::move(page)); pager.reset(new test::MockPageReader(pages)); + record_reader_->SetPageReader(std::move(pager)); - std::shared_ptr record_reader = - internal::RecordReader::Make(&descr, level_info); - record_reader->SetPageReader(std::move(pager)); - - int64_t records_read = record_reader->ReadRecords(/*num_records=*/2); - + int64_t records_read = record_reader_->ReadRecords(/*num_records=*/2); ASSERT_EQ(records_read, 2); - ASSERT_EQ(record_reader->values_written(), 3); - ASSERT_EQ(record_reader->null_count(), 0); - ASSERT_EQ(record_reader->levels_written(), 6); - ASSERT_EQ(record_reader->levels_position(), 3); - - const auto read_values = reinterpret_cast(record_reader->values()); - std::vector read_vals(read_values, - read_values + record_reader->values_written()); - std::vector read_defs( - record_reader->def_levels(), - record_reader->def_levels() + record_reader->levels_position()); - std::vector read_reps( - record_reader->rep_levels(), - record_reader->rep_levels() + record_reader->levels_position()); - - ASSERT_THAT(read_vals, ElementsAre(10, 20, 20)); - ASSERT_THAT(read_defs, ElementsAre(1, 1, 1)); - ASSERT_THAT(read_reps, ElementsAre(0, 0, 1)); + CheckState(/*values_written=*/3, /*null_count=*/0, /*levels_written=*/6, + /*levels_position=*/3); + CheckReadValues(/*expected_values=*/{10, 20, 20}, /*expected_defs=*/{1, 1, 1}, + /*expected_reps=*/{0, 0, 1}); } // Test that we can skip required top level field. -TEST(RecordReaderTest, SkipRequiredTopLevel) { - internal::LevelInfo level_info; - level_info.def_level = 0; - level_info.rep_level = 0; - - NodePtr type = schema::Int32("b", Repetition::REQUIRED); - const ColumnDescriptor descr(type, level_info.def_level, level_info.rep_level); +TEST_F(RecordReaderTest, SkipRequiredTopLevel) { + Init(/*max_def_level=*/0, /*max_rep_level=*/0, Repetition::REQUIRED); std::vector> pages; std::vector values = {10, 20, 20, 30, 30, 30}; - std::unique_ptr pager; std::shared_ptr page = MakeDataPage( - &descr, values, /*num_values=*/static_cast(values.size()), Encoding::PLAIN, + descr_.get(), values, /*num_values=*/static_cast(values.size()), + Encoding::PLAIN, /*indices=*/{}, - /*indices_size=*/0, /*def_levels=*/{}, level_info.def_level, - /*rep_levels=*/{}, level_info.rep_level); + /*indices_size=*/0, /*def_levels=*/{}, level_info_.def_level, + /*rep_levels=*/{}, level_info_.rep_level); pages.push_back(std::move(page)); pager.reset(new test::MockPageReader(pages)); + record_reader_->SetPageReader(std::move(pager)); - std::shared_ptr record_reader = - internal::RecordReader::Make(&descr, level_info); - record_reader->SetPageReader(std::move(pager)); - - int64_t records_skipped = record_reader->SkipRecords(/*num_records=*/3); - + int64_t records_skipped = record_reader_->SkipRecords(/*num_records=*/3); ASSERT_EQ(records_skipped, 3); - ASSERT_EQ(record_reader->values_written(), 0); - ASSERT_EQ(record_reader->levels_written(), 0); - ASSERT_EQ(record_reader->levels_position(), 0); - - int64_t records_read = record_reader->ReadRecords(/*num_records=*/2); + CheckState(/*values_written=*/0, /*null_count=*/0, /*levels_written=*/0, + /*levels_position=*/0); + int64_t records_read = record_reader_->ReadRecords(/*num_records=*/2); ASSERT_EQ(records_read, 2); - ASSERT_EQ(record_reader->values_written(), 2); - ASSERT_EQ(record_reader->null_count(), 0); - ASSERT_EQ(record_reader->levels_written(), 0); - ASSERT_EQ(record_reader->levels_position(), 0); - - const auto read_values = reinterpret_cast(record_reader->values()); - std::vector read_vals(read_values, - read_values + record_reader->values_written()); - - // There are no null values to account for here. - ASSERT_THAT(read_vals, ElementsAre(30, 30)); + CheckState(/*values_written=*/2, /*null_count=*/0, /*levels_written=*/0, + /*levels_position=*/0); + CheckReadValues(/*expected_values=*/{30, 30}, /*expected_defs=*/{}, + /*expected_reps=*/{}); } // Skip an optional field. Intentionally included some null values. -TEST(RecordReaderTest, SkipOptional) { - internal::LevelInfo level_info; - level_info.def_level = 1; - level_info.rep_level = 0; - - NodePtr type = schema::Int32("b", Repetition::OPTIONAL); - const ColumnDescriptor descr(type, level_info.def_level, level_info.rep_level); +TEST_F(RecordReaderTest, SkipOptional) { + Init(/*max_def_level=*/1, /*max_rep_level=*/0, Repetition::OPTIONAL); // Records look like {null, 10, 20, 30, null, 40, 50, 60} std::vector> pages; @@ -691,120 +706,73 @@ TEST(RecordReaderTest, SkipOptional) { std::unique_ptr pager; std::shared_ptr page = MakeDataPage( - &descr, values, /*num_values=*/static_cast(values.size()), Encoding::PLAIN, + descr_.get(), values, /*num_values=*/static_cast(values.size()), + Encoding::PLAIN, /*indices=*/{}, - /*indices_size=*/0, def_levels, level_info.def_level, - /*rep_levels=*/{}, level_info.rep_level); + /*indices_size=*/0, def_levels, level_info_.def_level, + /*rep_levels=*/{}, level_info_.rep_level); pages.push_back(std::move(page)); pager.reset(new test::MockPageReader(pages)); - - std::shared_ptr record_reader = - internal::RecordReader::Make(&descr, level_info); - record_reader->SetPageReader(std::move(pager)); + record_reader_->SetPageReader(std::move(pager)); { // Skip {null, 10} // This also tests when we start with a Skip. - int64_t records_skipped = record_reader->SkipRecords(/*num_records=*/2); - + int64_t records_skipped = record_reader_->SkipRecords(/*num_records=*/2); ASSERT_EQ(records_skipped, 2); - ASSERT_EQ(record_reader->values_written(), 0); - // Since we started with skipping, there was nothing in the buffer to consume - // for skipping. - ASSERT_EQ(record_reader->levels_written(), 0); - ASSERT_EQ(record_reader->levels_position(), 0); + CheckState(/*values_written=*/0, /*null_count=*/0, /*levels_written=*/0, + /*levels_position=*/0); } { // Read 3 records: {20, null, 30} - int64_t records_read = record_reader->ReadRecords(/*num_records=*/3); + int64_t records_read = record_reader_->ReadRecords(/*num_records=*/3); ASSERT_EQ(records_read, 3); - // One of these values is null. values_written() includes null values. - ASSERT_EQ(record_reader->values_written(), 3); - ASSERT_EQ(record_reader->null_count(), 1); - // When we read, we buffer some levels. Since there are only a few levels - // in our whole column, all of them are read. + // values_written() includes null values. // We had skipped 2 of the levels above. So there is only 6 left in total to // read, and we read 3 of them here. - ASSERT_EQ(record_reader->levels_written(), 6); - ASSERT_EQ(record_reader->levels_position(), 3); - - std::vector read_defs( - record_reader->def_levels(), - record_reader->def_levels() + record_reader->levels_position()); - - const auto read_values = reinterpret_cast(record_reader->values()); - std::vector read_vals(read_values, - read_values + record_reader->values_written()); + CheckState(/*values_written=*/3, /*null_count=*/1, /*levels_written=*/6, + /*levels_position=*/3); // ReadRecords for optional fields uses ReadValuesSpaced, so there is a // placeholder for null. - ASSERT_EQ(read_vals[0], 20); - // read_vals[1] is a space for null. - ASSERT_EQ(read_vals[2], 30); - ASSERT_THAT(read_defs, ElementsAre(1, 0, 1)); + CheckReadValues(/*expected_values=*/{20, kNullValue, 30}, /*expected_defs=*/{1, 0, 1}, + /*expected_reps=*/{}); } { // Skip {40, 50}. - int64_t records_skipped = record_reader->SkipRecords(/*num_records=*/2); - + int64_t records_skipped = record_reader_->SkipRecords(/*num_records=*/2); ASSERT_EQ(records_skipped, 2); - - ASSERT_EQ(record_reader->values_written(), 3); - ASSERT_EQ(record_reader->levels_written(), 4); - ASSERT_EQ(record_reader->levels_position(), 3); - - std::vector read_defs( - record_reader->def_levels(), - record_reader->def_levels() + record_reader->levels_position()); - - const auto read_values = reinterpret_cast(record_reader->values()); - std::vector read_vals(read_values, - read_values + record_reader->values_written()); + CheckState(/*values_written=*/3, /*null_count=*/1, /*levels_written=*/4, + /*levels_position=*/3); + CheckReadValues(/*expected_values=*/{20, kNullValue, 30}, /*expected_defs=*/{1, 0, 1}, + /*expected_reps=*/{}); } { // Read to the end of the column. Read {60} // This test checks that ReadAndThrowAwayValues works, since if it // does not we would read the wrong values. - int64_t records_read = record_reader->ReadRecords(/*num_records=*/1); + int64_t records_read = record_reader_->ReadRecords(/*num_records=*/1); ASSERT_EQ(records_read, 1); - ASSERT_EQ(record_reader->values_written(), 4); - ASSERT_EQ(record_reader->null_count(), 1); - ASSERT_EQ(record_reader->levels_written(), 4); - ASSERT_EQ(record_reader->levels_position(), 4); - - std::vector read_defs( - record_reader->def_levels(), - record_reader->def_levels() + record_reader->levels_position()); - - const auto read_values = reinterpret_cast(record_reader->values()); - std::vector read_vals(read_values, - read_values + record_reader->values_written()); - - ASSERT_EQ(read_vals[0], 20); - // read_vals[1] is a space for null. - ASSERT_EQ(read_vals[2], 30); - ASSERT_EQ(read_vals[3], 60); - ASSERT_THAT(read_defs, ElementsAre(1, 0, 1, 1)); + CheckState(/*values_written=*/4, /*null_count=*/1, /*levels_written=*/4, + /*levels_position=*/4); + CheckReadValues(/*expected_values=*/{20, kNullValue, 30, 60}, + /*expected_defs=*/{1, 0, 1, 1}, + /*expected_reps=*/{}); } // We have exhausted all the records. - ASSERT_EQ(record_reader->ReadRecords(/*num_records=*/3), 0); - ASSERT_EQ(record_reader->SkipRecords(/*num_records=*/3), 0); + ASSERT_EQ(record_reader_->ReadRecords(/*num_records=*/3), 0); + ASSERT_EQ(record_reader_->SkipRecords(/*num_records=*/3), 0); } // Test skipping for repeated fields. -TEST(RecordReaderTest, SkipRepeated) { - internal::LevelInfo level_info; - level_info.def_level = 1; - level_info.rep_level = 1; - - NodePtr type = schema::Int32("b", Repetition::REPEATED); - const ColumnDescriptor descr(type, level_info.def_level, level_info.rep_level); +TEST_F(RecordReaderTest, SkipRepeated) { + Init(/*max_def_level=*/1, /*max_rep_level=*/1, Repetition::REPEATED); // Records look like {null, [20, 20, 20], null, [30, 30], [40]} std::vector> pages; @@ -814,117 +782,70 @@ TEST(RecordReaderTest, SkipRepeated) { std::unique_ptr pager; std::shared_ptr page = MakeDataPage( - &descr, values, /*num_values=*/static_cast(values.size()), Encoding::PLAIN, + descr_.get(), values, /*num_values=*/static_cast(values.size()), + Encoding::PLAIN, /*indices=*/{}, - /*indices_size=*/0, def_levels, level_info.def_level, rep_levels, - level_info.rep_level); + /*indices_size=*/0, def_levels, level_info_.def_level, rep_levels, + level_info_.rep_level); pages.push_back(std::move(page)); pager.reset(new test::MockPageReader(pages)); - - std::shared_ptr record_reader = - internal::RecordReader::Make(&descr, level_info); - record_reader->SetPageReader(std::move(pager)); + record_reader_->SetPageReader(std::move(pager)); { // This should skip the first null record. - int64_t records_skipped = record_reader->SkipRecords(/*num_records=*/1); - + int64_t records_skipped = record_reader_->SkipRecords(/*num_records=*/1); ASSERT_EQ(records_skipped, 1); - ASSERT_EQ(record_reader->values_written(), 0); - ASSERT_EQ(record_reader->null_count(), 0); + ASSERT_EQ(record_reader_->values_written(), 0); + ASSERT_EQ(record_reader_->null_count(), 0); // For repeated fields, we need to read the levels to find the record // boundaries and skip. So some levels are read, however, the skipped // level should not be there after the skip. That's why levels_position() // is 0. - ASSERT_EQ(record_reader->levels_written(), 7); - ASSERT_EQ(record_reader->levels_position(), 0); + CheckState(/*values_written=*/0, /*null_count=*/0, /*levels_written=*/7, + /*levels_position=*/0); + CheckReadValues(/*expected_values=*/{}, + /*expected_defs=*/{}, + /*expected_reps=*/{}); } { // Read [20, 20, 20] - int64_t records_read = record_reader->ReadRecords(/*num_records=*/1); - + int64_t records_read = record_reader_->ReadRecords(/*num_records=*/1); ASSERT_EQ(records_read, 1); - ASSERT_EQ(record_reader->values_written(), 3); - ASSERT_EQ(record_reader->null_count(), 0); - ASSERT_EQ(record_reader->levels_written(), 7); - ASSERT_EQ(record_reader->levels_position(), 3); - - const auto read_values = reinterpret_cast(record_reader->values()); - std::vector read_vals(read_values, - read_values + record_reader->values_written()); - std::vector read_defs( - record_reader->def_levels(), - record_reader->def_levels() + record_reader->levels_position()); - std::vector read_reps( - record_reader->rep_levels(), - record_reader->rep_levels() + record_reader->levels_position()); - - ASSERT_THAT(read_vals, ElementsAre(20, 20, 20)); - ASSERT_THAT(read_defs, ElementsAre(1, 1, 1)); - ASSERT_THAT(read_reps, ElementsAre(0, 1, 1)); + CheckState(/*values_written=*/3, /*null_count=*/0, /*levels_written=*/7, + /*levels_position=*/3); + CheckReadValues(/*expected_values=*/{20, 20, 20}, + /*expected_defs=*/{1, 1, 1}, + /*expected_reps=*/{0, 1, 1}); } { // Skip the null record and also skip [30, 30] - int64_t records_skipped = record_reader->SkipRecords(/*num_records=*/2); - + int64_t records_skipped = record_reader_->SkipRecords(/*num_records=*/2); ASSERT_EQ(records_skipped, 2); - ASSERT_EQ(record_reader->values_written(), 3); - ASSERT_EQ(record_reader->null_count(), 0); // We remove the skipped levels from the buffer. - ASSERT_EQ(record_reader->levels_written(), 4); - ASSERT_EQ(record_reader->levels_position(), 3); - - const auto read_values = reinterpret_cast(record_reader->values()); - std::vector read_vals(read_values, - read_values + record_reader->values_written()); - std::vector read_defs( - record_reader->def_levels(), - record_reader->def_levels() + record_reader->levels_position()); - std::vector read_reps( - record_reader->rep_levels(), - record_reader->rep_levels() + record_reader->levels_position()); - - ASSERT_THAT(read_vals, ElementsAre(20, 20, 20)); - ASSERT_THAT(read_defs, ElementsAre(1, 1, 1)); - ASSERT_THAT(read_reps, ElementsAre(0, 1, 1)); + CheckState(/*values_written=*/3, /*null_count=*/0, /*levels_written=*/4, + /*levels_position=*/3); + CheckReadValues(/*expected_values=*/{20, 20, 20}, + /*expected_defs=*/{1, 1, 1}, + /*expected_reps=*/{0, 1, 1}); } { // Read [40] - int64_t records_read = record_reader->ReadRecords(/*num_records=*/1); - + int64_t records_read = record_reader_->ReadRecords(/*num_records=*/1); ASSERT_EQ(records_read, 1); - ASSERT_EQ(record_reader->values_written(), 4); - ASSERT_EQ(record_reader->null_count(), 0); - ASSERT_EQ(record_reader->levels_written(), 4); - ASSERT_EQ(record_reader->levels_position(), 4); - - const auto read_values = reinterpret_cast(record_reader->values()); - std::vector read_vals(read_values, - read_values + record_reader->values_written()); - std::vector read_defs( - record_reader->def_levels(), - record_reader->def_levels() + record_reader->levels_position()); - std::vector read_reps( - record_reader->rep_levels(), - record_reader->rep_levels() + record_reader->levels_position()); - - ASSERT_THAT(read_vals, ElementsAre(20, 20, 20, 40)); - ASSERT_THAT(read_defs, ElementsAre(1, 1, 1, 1)); - ASSERT_THAT(read_reps, ElementsAre(0, 1, 1, 0)); + CheckState(/*values_written=*/4, /*null_count=*/0, /*levels_written=*/4, + /*levels_position=*/4); + CheckReadValues(/*expected_values=*/{20, 20, 20, 40}, + /*expected_defs=*/{1, 1, 1, 1}, + /*expected_reps=*/{0, 1, 1, 0}); } } // Test reading when one record spans multiple pages for a repeated field. -TEST(RecordReaderTest, ReadPartialRecord) { - internal::LevelInfo level_info; - level_info.def_level = 1; - level_info.rep_level = 1; - - NodePtr type = schema::Int32("b", Repetition::REPEATED); - const ColumnDescriptor descr(type, level_info.def_level, level_info.rep_level); +TEST_F(RecordReaderTest, ReadPartialRecord) { + Init(/*max_def_level=*/1, /*max_rep_level=*/1, Repetition::REPEATED); std::vector> pages; std::unique_ptr pager; @@ -932,88 +853,74 @@ TEST(RecordReaderTest, ReadPartialRecord) { // Page 1: {[10], [20, 20, 20 ... } continues to next page. { std::shared_ptr page = MakeDataPage( - &descr, /*values=*/{10, 20, 20, 20}, /*num_values=*/4, Encoding::PLAIN, + descr_.get(), /*values=*/{10, 20, 20, 20}, /*num_values=*/4, Encoding::PLAIN, /*indices=*/{}, - /*indices_size=*/0, /*def_levels=*/{1, 1, 1, 1}, level_info.def_level, - /*rep_levels=*/{0, 0, 1, 1}, level_info.rep_level); + /*indices_size=*/0, /*def_levels=*/{1, 1, 1, 1}, level_info_.def_level, + /*rep_levels=*/{0, 0, 1, 1}, level_info_.rep_level); pages.push_back(std::move(page)); } // Page 2: {... 20, 20, ...} continues from previous page and to next page. { std::shared_ptr page = MakeDataPage( - &descr, /*values=*/{20, 20}, /*num_values=*/2, Encoding::PLAIN, + descr_.get(), /*values=*/{20, 20}, /*num_values=*/2, Encoding::PLAIN, /*indices=*/{}, - /*indices_size=*/0, /*def_levels=*/{1, 1}, level_info.def_level, - /*rep_levels=*/{1, 1}, level_info.rep_level); + /*indices_size=*/0, /*def_levels=*/{1, 1}, level_info_.def_level, + /*rep_levels=*/{1, 1}, level_info_.rep_level); pages.push_back(std::move(page)); } - // Page 3: { ... 20, [30]} continues from previous page. + // Page 3: { ... 20], [30]} continues from previous page. { std::shared_ptr page = MakeDataPage( - &descr, /*values=*/{20, 30}, /*num_values=*/2, Encoding::PLAIN, + descr_.get(), /*values=*/{20, 30}, /*num_values=*/2, Encoding::PLAIN, /*indices=*/{}, - /*indices_size=*/0, /*def_levels=*/{1, 1}, level_info.def_level, - /*rep_levels=*/{1, 0}, level_info.rep_level); + /*indices_size=*/0, /*def_levels=*/{1, 1}, level_info_.def_level, + /*rep_levels=*/{1, 0}, level_info_.rep_level); pages.push_back(std::move(page)); } pager.reset(new test::MockPageReader(pages)); - - std::shared_ptr record_reader = - internal::RecordReader::Make(&descr, level_info); - record_reader->SetPageReader(std::move(pager)); + record_reader_->SetPageReader(std::move(pager)); { // Read [10] - int64_t records_read = record_reader->ReadRecords(/*num_records=*/1); - + int64_t records_read = record_reader_->ReadRecords(/*num_records=*/1); ASSERT_EQ(records_read, 1); - - const auto read_values = reinterpret_cast(record_reader->values()); - std::vector read_vals(read_values, - read_values + record_reader->values_written()); - - ASSERT_THAT(read_vals, ElementsAre(10)); + CheckState(/*values_written=*/1, /*null_count=*/0, /*levels_written=*/4, + /*levels_position=*/1); + CheckReadValues(/*expected_values=*/{10}, + /*expected_defs=*/{1}, + /*expected_reps=*/{0}); } { // Read [20, 20, 20, 20, 20, 20] that spans multiple pages. - int64_t records_read = record_reader->ReadRecords(/*num_records=*/1); - + int64_t records_read = record_reader_->ReadRecords(/*num_records=*/1); ASSERT_EQ(records_read, 1); - - const auto read_values = reinterpret_cast(record_reader->values()); - std::vector read_vals(read_values, - read_values + record_reader->values_written()); - - ASSERT_THAT(read_vals, ElementsAre(10, 20, 20, 20, 20, 20, 20)); + CheckState(/*values_written=*/7, /*null_count=*/0, /*levels_written=*/8, + /*levels_position=*/7); + CheckReadValues(/*expected_values=*/{10, 20, 20, 20, 20, 20, 20}, + /*expected_defs=*/{1, 1, 1, 1, 1, 1, 1}, + /*expected_reps=*/{0, 0, 1, 1, 1, 1, 1}); } { // Read [30] - int64_t records_read = record_reader->ReadRecords(/*num_records=*/1); - + int64_t records_read = record_reader_->ReadRecords(/*num_records=*/1); ASSERT_EQ(records_read, 1); - - const auto read_values = reinterpret_cast(record_reader->values()); - std::vector read_vals(read_values, - read_values + record_reader->values_written()); - - ASSERT_THAT(read_vals, ElementsAre(10, 20, 20, 20, 20, 20, 20, 30)); + CheckState(/*values_written=*/8, /*null_count=*/0, /*levels_written=*/8, + /*levels_position=*/8); + CheckReadValues(/*expected_values=*/{10, 20, 20, 20, 20, 20, 20, 30}, + /*expected_defs=*/{1, 1, 1, 1, 1, 1, 1, 1}, + /*expected_reps=*/{0, 0, 1, 1, 1, 1, 1, 0}); } } // Test skipping for repeated fields for the case when one record spans multiple // pages. -TEST(RecordReaderTest, SkipPartialRecord) { - internal::LevelInfo level_info; - level_info.def_level = 1; - level_info.rep_level = 1; - - NodePtr type = schema::Int32("b", Repetition::REPEATED); - const ColumnDescriptor descr(type, level_info.def_level, level_info.rep_level); +TEST_F(RecordReaderTest, SkipPartialRecord) { + Init(/*max_def_level=*/1, /*max_rep_level=*/1, Repetition::REPEATED); std::vector> pages; std::unique_ptr pager; @@ -1021,97 +928,75 @@ TEST(RecordReaderTest, SkipPartialRecord) { // Page 1: {[10], [20, 20, 20 ... } continues to next page. { std::shared_ptr page = MakeDataPage( - &descr, /*values=*/{10, 20, 20, 20}, /*num_values=*/4, Encoding::PLAIN, + descr_.get(), /*values=*/{10, 20, 20, 20}, /*num_values=*/4, Encoding::PLAIN, /*indices=*/{}, - /*indices_size=*/0, /*def_levels=*/{1, 1, 1, 1}, level_info.def_level, - /*rep_levels=*/{0, 0, 1, 1}, level_info.rep_level); + /*indices_size=*/0, /*def_levels=*/{1, 1, 1, 1}, level_info_.def_level, + /*rep_levels=*/{0, 0, 1, 1}, level_info_.rep_level); pages.push_back(std::move(page)); } // Page 2: {... 20, 20, ...} continues from previous page and to next page. { std::shared_ptr page = MakeDataPage( - &descr, /*values=*/{20, 20}, /*num_values=*/2, Encoding::PLAIN, + descr_.get(), /*values=*/{20, 20}, /*num_values=*/2, Encoding::PLAIN, /*indices=*/{}, - /*indices_size=*/0, /*def_levels=*/{1, 1}, level_info.def_level, - /*rep_levels=*/{1, 1}, level_info.rep_level); + /*indices_size=*/0, /*def_levels=*/{1, 1}, level_info_.def_level, + /*rep_levels=*/{1, 1}, level_info_.rep_level); pages.push_back(std::move(page)); } // Page 3: { ... 20, [30]} continues from previous page. { std::shared_ptr page = MakeDataPage( - &descr, /*values=*/{20, 30}, /*num_values=*/2, Encoding::PLAIN, + descr_.get(), /*values=*/{20, 30}, /*num_values=*/2, Encoding::PLAIN, /*indices=*/{}, - /*indices_size=*/0, /*def_levels=*/{1, 1}, level_info.def_level, - /*rep_levels=*/{1, 0}, level_info.rep_level); + /*indices_size=*/0, /*def_levels=*/{1, 1}, level_info_.def_level, + /*rep_levels=*/{1, 0}, level_info_.rep_level); pages.push_back(std::move(page)); } pager.reset(new test::MockPageReader(pages)); - - std::shared_ptr record_reader = - internal::RecordReader::Make(&descr, level_info); - record_reader->SetPageReader(std::move(pager)); + record_reader_->SetPageReader(std::move(pager)); { // Read [10] - int64_t records_read = record_reader->ReadRecords(/*num_records=*/1); - + int64_t records_read = record_reader_->ReadRecords(/*num_records=*/1); ASSERT_EQ(records_read, 1); - - const auto read_values = reinterpret_cast(record_reader->values()); - std::vector read_vals(read_values, - read_values + record_reader->values_written()); - - ASSERT_THAT(read_vals, ElementsAre(10)); - ASSERT_EQ(record_reader->values_written(), 1); - ASSERT_EQ(record_reader->null_count(), 0); // There are 4 levels in the first page. - ASSERT_EQ(record_reader->levels_written(), 4); - ASSERT_EQ(record_reader->levels_position(), 1); + CheckState(/*values_written=*/1, /*null_count=*/0, /*levels_written=*/4, + /*levels_position=*/1); + CheckReadValues(/*expected_values=*/{10}, + /*expected_defs=*/{1}, + /*expected_reps=*/{0}); } { // Skip the record that goes across pages. - int64_t records_skipped = record_reader->SkipRecords(/*num_records=*/1); - + int64_t records_skipped = record_reader_->SkipRecords(/*num_records=*/1); ASSERT_EQ(records_skipped, 1); - ASSERT_EQ(record_reader->values_written(), 1); - ASSERT_EQ(record_reader->null_count(), 0); - ASSERT_EQ(record_reader->levels_written(), 2); - ASSERT_EQ(record_reader->levels_position(), 1); + CheckState(/*values_written=*/1, /*null_count=*/0, /*levels_written=*/2, + /*levels_position=*/1); + CheckReadValues(/*expected_values=*/{10}, + /*expected_defs=*/{1}, + /*expected_reps=*/{0}); } { // Read [30] - int64_t records_read = record_reader->ReadRecords(/*num_records=*/1); + int64_t records_read = record_reader_->ReadRecords(/*num_records=*/1); ASSERT_EQ(records_read, 1); - ASSERT_EQ(record_reader->values_written(), 2); - ASSERT_EQ(record_reader->null_count(), 0); - ASSERT_EQ(record_reader->levels_written(), 2); - ASSERT_EQ(record_reader->levels_position(), 2); - - const auto read_values = reinterpret_cast(record_reader->values()); - std::vector read_vals(read_values, - read_values + record_reader->values_written()); - std::vector read_defs( - record_reader->def_levels(), - record_reader->def_levels() + record_reader->levels_position()); - std::vector read_reps( - record_reader->rep_levels(), - record_reader->rep_levels() + record_reader->levels_position()); - - ASSERT_THAT(read_vals, ElementsAre(10, 30)); - ASSERT_THAT(read_defs, ElementsAre(1, 1)); - ASSERT_THAT(read_reps, ElementsAre(0, 0)); + CheckState(/*values_written=*/2, /*null_count=*/0, /*levels_written=*/2, + /*levels_position=*/2); + CheckReadValues(/*expected_values=*/{10, 30}, + /*expected_defs=*/{1, 1}, + /*expected_reps=*/{0, 0}); } } // Test that Skip works on ByteArrays. Specifically, this is testing // ReadAndThrowAwayValues for ByteArrays. -TEST(RecordReaderTest, SkipByteArray) { +TEST(RecordReaderByteArrayTest, SkipByteArray) { internal::LevelInfo level_info; level_info.def_level = 1; level_info.rep_level = 0; @@ -1179,10 +1064,12 @@ TEST(RecordReaderTest, SkipByteArray) { // Check that the expected values match the actual values. for (size_t i = 0; i < 30; ++i) { ASSERT_EQ(expected_values[i].compare(binary_array->GetView(i)), 0); + ASSERT_EQ(def_levels[i] == 0, binary_array->IsNull(i)); } // Repeat for the next range that we read. for (size_t i = 60; i < 90; ++i) { ASSERT_EQ(expected_values[i].compare(binary_array->GetView(i - 30)), 0); + ASSERT_EQ(def_levels[i] == 0, binary_array->IsNull(i - 30)); } } From 16797472b08f6f9b935fef33f67770ccaf55ad05 Mon Sep 17 00:00:00 2001 From: Fatemah Panahi Date: Wed, 5 Oct 2022 23:56:54 +0000 Subject: [PATCH 17/24] Address a couple more comments. --- cpp/src/parquet/column_reader.cc | 2 +- cpp/src/parquet/column_reader_test.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index 2fef5b698e7..ab14acdcebc 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -1500,9 +1500,9 @@ class TypedRecordReader : public TypedColumnReaderImpl, if (this->max_rep_level_ == 0 && this->max_def_level_ == 0) { return this->Skip(num_records); } - // Non-repeated optional field. int64_t skipped_records = 0; if (this->max_rep_level_ == 0) { + // Non-repeated optional field. // First consume whatever is in the buffer. skipped_records = SkipRecordsInBufferNonRepeated(num_records); diff --git a/cpp/src/parquet/column_reader_test.cc b/cpp/src/parquet/column_reader_test.cc index 8e4ff55ab3f..369f6c07d05 100644 --- a/cpp/src/parquet/column_reader_test.cc +++ b/cpp/src/parquet/column_reader_test.cc @@ -27,7 +27,7 @@ #include #include -#include "arrow/api.h" +#include "arrow/array/array_binary.h" #include "arrow/util/macros.h" #include "parquet/column_page.h" #include "parquet/column_reader.h" From 2e2726214a6e4df2220ac00bda08821724a831d5 Mon Sep 17 00:00:00 2001 From: Fatemah Panahi Date: Mon, 17 Oct 2022 16:32:13 +0000 Subject: [PATCH 18/24] SkipRecords, add a test that we first consume all the levels before loading more levels. This was a performance bug where we kept reading more levels at each SkipRecords call. --- cpp/src/parquet/column_reader.cc | 113 ++++++++++++++------------ cpp/src/parquet/column_reader_test.cc | 65 ++++++++++++++- 2 files changed, 122 insertions(+), 56 deletions(-) diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index ab14acdcebc..dd76173c66a 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -1334,11 +1335,9 @@ class TypedRecordReader : public TypedColumnReaderImpl, } // Throw away levels from start_levels_position to levels_position_. - // Will update levels_position_ and levels_written_ accordingly and move - // the levels to left to fill in the gap. It will not shrink the size - // of the buffer or overwrite the positions after levels_written_. - // This is inefficient, though necessary to consume levels that we have - // already read into the buffer and we want to Skip. + // Will update levels_position_, levels_written_, and levels_capacity_ + // accordingly and move the levels to left to fill in the gap. + // It will shrink the size of the buffer. void ThrowAwayLevels(int64_t start_levels_position) { ARROW_DCHECK_LE(levels_position_, levels_written_); ARROW_DCHECK_LE(start_levels_position, levels_position_); @@ -1347,16 +1346,24 @@ class TypedRecordReader : public TypedColumnReaderImpl, int64_t gap = levels_position_ - start_levels_position; if (gap == 0) return; - std::copy(def_levels() + levels_position_, def_levels() + levels_written_, - def_levels() + levels_position_ - gap); + int64_t levels_remaining = levels_written_ - gap; + + int16_t* def_data = def_levels(); + std::copy(def_data + levels_position_, def_data + levels_written_, + def_data + levels_position_ - gap); + PARQUET_THROW_NOT_OK(def_levels_->Resize(levels_remaining * sizeof(int16_t), false)); if (this->max_rep_level_ > 0) { - std::copy(rep_levels() + levels_position_, rep_levels() + levels_written_, - rep_levels() + levels_position_ - gap); + int16_t* rep_data = rep_levels(); + std::copy(rep_data + levels_position_, rep_data + levels_written_, + rep_data + levels_position_ - gap); + PARQUET_THROW_NOT_OK( + rep_levels_->Resize(levels_remaining * sizeof(int16_t), false)); } levels_written_ -= gap; levels_position_ -= gap; + levels_capacity_ -= gap; } // Skip records that we have in our buffer. This function is only for @@ -1401,16 +1408,50 @@ class TypedRecordReader : public TypedColumnReaderImpl, return skipped_records; } - // Skip records for repeated fields. Returns number of skipped records. + // Attempts to skip num_records from the buffer. Will throw away levels + // and corresponding values for the records it skipped and consumes them from the + // underlying decoder. Will advance levels_position_ and update + // at_record_start_. + // Returns how many records were skipped. + int64_t DelimitAndSkipRecordsInBuffer(int64_t num_records) { + if (num_records == 0) return 0; + // Look at the buffered levels, delimit them based on + // (rep_level == 0), report back how many records are in there, and + // fill in how many not-null values (def_level == max_def_level_). + // DelimitRecords updates levels_position_. + int64_t start_levels_position = levels_position_; + int64_t values_seen = 0; + int64_t skipped_records = DelimitRecords(num_records, &values_seen); + if (ReadAndThrowAwayValues(values_seen) != values_seen) { + throw ParquetException("Could not read and throw away requested values"); + } + // Mark those levels and values as consumed in the the underlying page. + // This must be done before we throw away levels since it updates + // levels_position_ and levels_written_. + this->ConsumeBufferedValues(levels_position_ - start_levels_position); + // Updated levels_position_ and levels_written_. + ThrowAwayLevels(start_levels_position); + return skipped_records; + } + + // Skip records for repeated fields. For repeated fields, we are technically + // reading and throwing away the levels and values since we do not know the record + // boundaries in advance. Keep filling the buffer and skipping until we reach the + // desired number of records or we run out of values in the column chunk. + // Returns number of skipped records. int64_t SkipRecordsRepeated(int64_t num_records) { ARROW_DCHECK_GT(this->max_rep_level_, 0); - - // For repeated fields, we are technically reading and throwing away the - // levels and values since we do not know the record boundaries in advance. - // Keep filling the buffer and skipping until we reach the desired number - // of records or we run out of values in the column chunk. int64_t skipped_records = 0; - int64_t level_batch_size = std::max(kMinLevelBatchSize, num_records); + + // First consume what is in the buffer. + if (levels_position_ < levels_written_) { + // This updates at_record_start_. + skipped_records = DelimitAndSkipRecordsInBuffer(num_records); + } + + int64_t level_batch_size = + std::max(kMinLevelBatchSize, num_records - skipped_records); + // If 'at_record_start_' is false, but (skip_records == num_records), it // means that for the last record that was counted, we have not seen all // of it's values yet. @@ -1451,24 +1492,9 @@ class TypedRecordReader : public TypedColumnReaderImpl, } levels_written_ += levels_read; - - // Look at the buffered levels, delimit them based on - // (rep_level == 0), report back how many records are in there, and - // fill in how many not-null values (def_level == max_def_level_). - // DelimitRecords updates levels_position_. - int64_t start_levels_position = levels_position_; int64_t remaining_records = num_records - skipped_records; - int64_t values_seen = 0; - skipped_records += DelimitRecords(remaining_records, &values_seen); - if (ReadAndThrowAwayValues(values_seen) != values_seen) { - throw ParquetException("Could not read and throw away requested values"); - } - // Mark those levels and values as consumed in the the underlying page. - // This must be done before we throw away levels since it updates - // levels_position_ and levels_written_. - this->ConsumeBufferedValues(levels_position_ - start_levels_position); - // Updated levels_position_ and levels_written_. - ThrowAwayLevels(start_levels_position); + // This updates at_record_start_. + skipped_records += DelimitAndSkipRecordsInBuffer(remaining_records); } return skipped_records; @@ -1664,25 +1690,8 @@ class TypedRecordReader : public TypedColumnReaderImpl, ResetValues(); if (levels_written_ > 0) { - const int64_t levels_remaining = levels_written_ - levels_position_; - // Shift remaining levels to beginning of buffer and trim to only the number - // of decoded levels remaining - int16_t* def_data = def_levels(); - int16_t* rep_data = rep_levels(); - - std::copy(def_data + levels_position_, def_data + levels_written_, def_data); - PARQUET_THROW_NOT_OK( - def_levels_->Resize(levels_remaining * sizeof(int16_t), false)); - - if (this->max_rep_level_ > 0) { - std::copy(rep_data + levels_position_, rep_data + levels_written_, rep_data); - PARQUET_THROW_NOT_OK( - rep_levels_->Resize(levels_remaining * sizeof(int16_t), false)); - } - - levels_written_ -= levels_position_; - levels_position_ = 0; - levels_capacity_ = levels_remaining; + // Throw away levels from 0 to levels_position_. + ThrowAwayLevels(0); } // Call Finish on the binary builders to reset them diff --git a/cpp/src/parquet/column_reader_test.cc b/cpp/src/parquet/column_reader_test.cc index 369f6c07d05..17d72e3a7aa 100644 --- a/cpp/src/parquet/column_reader_test.cc +++ b/cpp/src/parquet/column_reader_test.cc @@ -663,6 +663,9 @@ TEST_F(RecordReaderTest, BasicReadRepeatedField) { /*levels_position=*/3); CheckReadValues(/*expected_values=*/{10, 20, 20}, /*expected_defs=*/{1, 1, 1}, /*expected_reps=*/{0, 0, 1}); + record_reader_->Reset(); + CheckState(/*values_written=*/0, /*null_count=*/0, /*levels_written=*/3, + /*levels_position=*/0); } // Test that we can skip required top level field. @@ -693,6 +696,9 @@ TEST_F(RecordReaderTest, SkipRequiredTopLevel) { /*levels_position=*/0); CheckReadValues(/*expected_values=*/{30, 30}, /*expected_defs=*/{}, /*expected_reps=*/{}); + record_reader_->Reset(); + CheckState(/*values_written=*/0, /*null_count=*/0, /*levels_written=*/0, + /*levels_position=*/0); } // Skip an optional field. Intentionally included some null values. @@ -749,6 +755,10 @@ TEST_F(RecordReaderTest, SkipOptional) { /*levels_position=*/3); CheckReadValues(/*expected_values=*/{20, kNullValue, 30}, /*expected_defs=*/{1, 0, 1}, /*expected_reps=*/{}); + // Reset after a Skip. + record_reader_->Reset(); + CheckState(/*values_written=*/0, /*null_count=*/0, /*levels_written=*/1, + /*levels_position=*/0); } { @@ -758,10 +768,10 @@ TEST_F(RecordReaderTest, SkipOptional) { int64_t records_read = record_reader_->ReadRecords(/*num_records=*/1); ASSERT_EQ(records_read, 1); - CheckState(/*values_written=*/4, /*null_count=*/1, /*levels_written=*/4, - /*levels_position=*/4); - CheckReadValues(/*expected_values=*/{20, kNullValue, 30, 60}, - /*expected_defs=*/{1, 0, 1, 1}, + CheckState(/*values_written=*/1, /*null_count=*/0, /*levels_written=*/1, + /*levels_position=*/1); + CheckReadValues(/*expected_values=*/{60}, + /*expected_defs=*/{1}, /*expected_reps=*/{}); } @@ -843,6 +853,53 @@ TEST_F(RecordReaderTest, SkipRepeated) { } } +// Tests that for repeated fields, we first consume what is in the buffer +// before reading more levels. +TEST_F(RecordReaderTest, SkipRepeatedConsumeBufferFirst) { + Init(/*max_def_level=*/1, /*max_rep_level=*/1, Repetition::REPEATED); + + std::vector> pages; + std::vector values(2048, 10); + std::vector def_levels(2048, 1); + std::vector rep_levels(2048, 0); + + std::unique_ptr pager; + std::shared_ptr page = MakeDataPage( + descr_.get(), values, /*num_values=*/static_cast(values.size()), + Encoding::PLAIN, + /*indices=*/{}, + /*indices_size=*/0, def_levels, level_info_.def_level, rep_levels, + level_info_.rep_level); + pages.push_back(std::move(page)); + pager.reset(new test::MockPageReader(pages)); + record_reader_->SetPageReader(std::move(pager)); + { + // Read 1000 records. We will read 1024 levels because that is the minimum + // number of levels to read. + int64_t records_read = record_reader_->ReadRecords(/*num_records=*/1000); + ASSERT_EQ(records_read, 1000); + CheckState(/*values_written=*/1000, /*null_count=*/0, /*levels_written=*/1024, + /*levels_position=*/1000); + std::vector expected_values(1000, 10); + std::vector expected_def_levels(1000, 1); + std::vector expected_rep_levels(1000, 0); + CheckReadValues(expected_values, expected_def_levels, expected_rep_levels); + // Reset removes the already consumed values and levels. + record_reader_->Reset(); + } + + { // Skip 12 records. Since we already have 24 in the buffer, we should not be + // reading any more levels into the buffer, we will just consume 12 of it. + int64_t records_skipped = record_reader_->SkipRecords(/*num_records=*/12); + ASSERT_EQ(records_skipped, 12); + CheckState(/*values_written=*/0, /*null_count=*/0, /*levels_written=*/12, + /*levels_position=*/0); + // Everthing is empty because we reset the reader before this skip. + CheckReadValues(/*expected_values=*/{}, /*expected_def_levels=*/{}, + /*expected_rep_levels=*/{}); + } +} + // Test reading when one record spans multiple pages for a repeated field. TEST_F(RecordReaderTest, ReadPartialRecord) { Init(/*max_def_level=*/1, /*max_rep_level=*/1, Repetition::REPEATED); From 7355db5899a2b167717035aa626a7f1b8a0c96d5 Mon Sep 17 00:00:00 2001 From: Fatemah Panahi Date: Tue, 25 Oct 2022 19:47:58 +0000 Subject: [PATCH 19/24] Add parameter comment shrink_to_fit for buffer Resize calls. --- cpp/src/parquet/column_reader.cc | 40 +++++++++++++++++++------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index dd76173c66a..2192d6e1c2b 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -403,7 +403,8 @@ std::shared_ptr SerializedPageReader::NextPage() { // Decrypt it if we need to if (crypto_ctx_.data_decryptor != nullptr) { PARQUET_THROW_NOT_OK(decryption_buffer_->Resize( - compressed_len - crypto_ctx_.data_decryptor->CiphertextSizeDelta(), false)); + compressed_len - crypto_ctx_.data_decryptor->CiphertextSizeDelta(), + /*shrink_to_fit=*/false)); compressed_len = crypto_ctx_.data_decryptor->Decrypt( page_buffer->data(), compressed_len, decryption_buffer_->mutable_data()); @@ -505,7 +506,8 @@ std::shared_ptr SerializedPageReader::DecompressIfNeeded( // Grow the uncompressed buffer if we need to. if (uncompressed_len > static_cast(decompression_buffer_->size())) { - PARQUET_THROW_NOT_OK(decompression_buffer_->Resize(uncompressed_len, false)); + PARQUET_THROW_NOT_OK( + decompression_buffer_->Resize(uncompressed_len, /*shrink_to_fit=*/false)); } if (levels_byte_len > 0) { @@ -1351,14 +1353,15 @@ class TypedRecordReader : public TypedColumnReaderImpl, int16_t* def_data = def_levels(); std::copy(def_data + levels_position_, def_data + levels_written_, def_data + levels_position_ - gap); - PARQUET_THROW_NOT_OK(def_levels_->Resize(levels_remaining * sizeof(int16_t), false)); + PARQUET_THROW_NOT_OK( + def_levels_->Resize(levels_remaining * sizeof(int16_t), /*shrink_to_fit=*/false)); if (this->max_rep_level_ > 0) { int16_t* rep_data = rep_levels(); std::copy(rep_data + levels_position_, rep_data + levels_written_, rep_data + levels_position_ - gap); - PARQUET_THROW_NOT_OK( - rep_levels_->Resize(levels_remaining * sizeof(int16_t), false)); + PARQUET_THROW_NOT_OK(rep_levels_->Resize(levels_remaining * sizeof(int16_t), + /*shrink_to_fit=*/false)); } levels_written_ -= gap; @@ -1383,8 +1386,8 @@ class TypedRecordReader : public TypedColumnReaderImpl, // First we need to figure out how many present/not-null values there are. std::shared_ptr<::arrow::ResizableBuffer> valid_bits; valid_bits = AllocateBuffer(this->pool_); - PARQUET_THROW_NOT_OK( - valid_bits->Resize(bit_util::BytesForBits(skipped_records), true)); + PARQUET_THROW_NOT_OK(valid_bits->Resize(bit_util::BytesForBits(skipped_records), + /*shrink_to_fit=*/true)); ValidityBitmapInputOutput validity_io; validity_io.values_read_upper_bound = skipped_records; validity_io.valid_bits = valid_bits->mutable_data(); @@ -1552,7 +1555,8 @@ class TypedRecordReader : public TypedColumnReaderImpl, std::shared_ptr ReleaseValues() override { if (uses_values_) { auto result = values_; - PARQUET_THROW_NOT_OK(result->Resize(bytes_for_values(values_written_), true)); + PARQUET_THROW_NOT_OK( + result->Resize(bytes_for_values(values_written_), /*shrink_to_fit=*/true)); values_ = AllocateBuffer(this->pool_); values_capacity_ = 0; return result; @@ -1564,7 +1568,8 @@ class TypedRecordReader : public TypedColumnReaderImpl, std::shared_ptr ReleaseIsValid() override { if (leaf_info_.HasNullableValues()) { auto result = valid_bits_; - PARQUET_THROW_NOT_OK(result->Resize(bit_util::BytesForBits(values_written_), true)); + PARQUET_THROW_NOT_OK(result->Resize(bit_util::BytesForBits(values_written_), + /*shrink_to_fit=*/true)); valid_bits_ = AllocateBuffer(this->pool_); return result; } else { @@ -1652,9 +1657,11 @@ class TypedRecordReader : public TypedColumnReaderImpl, if (MultiplyWithOverflow(new_levels_capacity, kItemSize, &capacity_in_bytes)) { throw ParquetException("Allocation size too large (corrupt file?)"); } - PARQUET_THROW_NOT_OK(def_levels_->Resize(capacity_in_bytes, false)); + PARQUET_THROW_NOT_OK( + def_levels_->Resize(capacity_in_bytes, /*shrink_to_fit=*/false)); if (this->max_rep_level_ > 0) { - PARQUET_THROW_NOT_OK(rep_levels_->Resize(capacity_in_bytes, false)); + PARQUET_THROW_NOT_OK( + rep_levels_->Resize(capacity_in_bytes, /*shrink_to_fit=*/false)); } levels_capacity_ = new_levels_capacity; } @@ -1668,8 +1675,8 @@ class TypedRecordReader : public TypedColumnReaderImpl, // XXX(wesm): A hack to avoid memory allocation when reading directly // into builder classes if (uses_values_) { - PARQUET_THROW_NOT_OK( - values_->Resize(bytes_for_values(new_values_capacity), false)); + PARQUET_THROW_NOT_OK(values_->Resize(bytes_for_values(new_values_capacity), + /*shrink_to_fit=*/false)); } values_capacity_ = new_values_capacity; } @@ -1677,7 +1684,8 @@ class TypedRecordReader : public TypedColumnReaderImpl, int64_t valid_bytes_new = bit_util::BytesForBits(values_capacity_); if (valid_bits_->size() < valid_bytes_new) { int64_t valid_bytes_old = bit_util::BytesForBits(values_written_); - PARQUET_THROW_NOT_OK(valid_bits_->Resize(valid_bytes_new, false)); + PARQUET_THROW_NOT_OK( + valid_bits_->Resize(valid_bytes_new, /*shrink_to_fit=*/false)); // Avoid valgrind warnings memset(valid_bits_->mutable_data() + valid_bytes_old, 0, @@ -1810,9 +1818,9 @@ class TypedRecordReader : public TypedColumnReaderImpl, if (values_written_ > 0) { // Resize to 0, but do not shrink to fit if (uses_values_) { - PARQUET_THROW_NOT_OK(values_->Resize(0, false)); + PARQUET_THROW_NOT_OK(values_->Resize(0, /*shrink_to_fit=*/false)); } - PARQUET_THROW_NOT_OK(valid_bits_->Resize(0, false)); + PARQUET_THROW_NOT_OK(valid_bits_->Resize(0, /*shrink_to_fit=*/false)); values_written_ = 0; values_capacity_ = 0; null_count_ = 0; From 0223c39e48e559b881d4309cd075014175f85332 Mon Sep 17 00:00:00 2001 From: Fatemah Panahi Date: Tue, 25 Oct 2022 20:52:37 +0000 Subject: [PATCH 20/24] Address comments. --- cpp/src/parquet/column_reader.cc | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index 2192d6e1c2b..551401c8187 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -1339,29 +1339,31 @@ class TypedRecordReader : public TypedColumnReaderImpl, // Throw away levels from start_levels_position to levels_position_. // Will update levels_position_, levels_written_, and levels_capacity_ // accordingly and move the levels to left to fill in the gap. - // It will shrink the size of the buffer. + // It will resize the buffer without releasing the memory allocation. void ThrowAwayLevels(int64_t start_levels_position) { ARROW_DCHECK_LE(levels_position_, levels_written_); ARROW_DCHECK_LE(start_levels_position, levels_position_); ARROW_DCHECK_GT(this->max_def_level_, 0); + ARROW_DCHECK_NE(def_levels_, nullptr); int64_t gap = levels_position_ - start_levels_position; if (gap == 0) return; int64_t levels_remaining = levels_written_ - gap; + int64_t destination = levels_position_ - gap; - int16_t* def_data = def_levels(); - std::copy(def_data + levels_position_, def_data + levels_written_, - def_data + levels_position_ - gap); - PARQUET_THROW_NOT_OK( - def_levels_->Resize(levels_remaining * sizeof(int16_t), /*shrink_to_fit=*/false)); + auto left_shift = [=](::arrow::ResizableBuffer* buffer) { + int16_t* data = reinterpret_cast(buffer->mutable_data()); + std::copy(data + levels_position_, data + levels_written_, data + destination); + PARQUET_THROW_NOT_OK(buffer->Resize(levels_remaining * sizeof(int16_t), + /*shrink_to_fit=*/false)); + }; + + left_shift(def_levels_.get()); if (this->max_rep_level_ > 0) { - int16_t* rep_data = rep_levels(); - std::copy(rep_data + levels_position_, rep_data + levels_written_, - rep_data + levels_position_ - gap); - PARQUET_THROW_NOT_OK(rep_levels_->Resize(levels_remaining * sizeof(int16_t), - /*shrink_to_fit=*/false)); + ARROW_DCHECK_NE(rep_levels_, nullptr); + left_shift(rep_levels_.get()); } levels_written_ -= gap; @@ -1426,7 +1428,9 @@ class TypedRecordReader : public TypedColumnReaderImpl, int64_t values_seen = 0; int64_t skipped_records = DelimitRecords(num_records, &values_seen); if (ReadAndThrowAwayValues(values_seen) != values_seen) { - throw ParquetException("Could not read and throw away requested values"); + std::stringstream ss; + ss << "Could not read and throw away " << values_seen << " values"; + throw ParquetException(ss.str()); } // Mark those levels and values as consumed in the the underlying page. // This must be done before we throw away levels since it updates From 3dd5fa357837c4c4f1dd2a7cb67a90e899baca58 Mon Sep 17 00:00:00 2001 From: Fatemah Panahi Date: Wed, 26 Oct 2022 20:21:56 +0000 Subject: [PATCH 21/24] Use std::make_unique and collapse declaration and definition to make code more concise. --- cpp/src/parquet/column_reader_test.cc | 30 +++++++++------------------ 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/cpp/src/parquet/column_reader_test.cc b/cpp/src/parquet/column_reader_test.cc index 17d72e3a7aa..9a2cf9cf28a 100644 --- a/cpp/src/parquet/column_reader_test.cc +++ b/cpp/src/parquet/column_reader_test.cc @@ -79,9 +79,8 @@ static inline bool vector_equal_with_def_levels(const std::vector& left, class TestPrimitiveReader : public ::testing::Test { public: void InitReader(const ColumnDescriptor* d) { - std::unique_ptr pager_; - pager_.reset(new test::MockPageReader(pages_)); - reader_ = ColumnReader::Make(d, std::move(pager_)); + auto pager = std::make_unique(pages_); + reader_ = ColumnReader::Make(d, std::move(pager)); } void CheckResults() { @@ -646,7 +645,6 @@ TEST_F(RecordReaderTest, BasicReadRepeatedField) { std::vector def_levels = {1, 1, 1, 1, 1, 1}; std::vector rep_levels = {0, 0, 1, 0, 1, 1}; - std::unique_ptr pager; std::shared_ptr page = MakeDataPage( descr_.get(), values, /*num_values=*/static_cast(def_levels.size()), Encoding::PLAIN, @@ -654,7 +652,7 @@ TEST_F(RecordReaderTest, BasicReadRepeatedField) { /*indices_size=*/0, def_levels, level_info_.def_level, rep_levels, level_info_.rep_level); pages.push_back(std::move(page)); - pager.reset(new test::MockPageReader(pages)); + auto pager = std::make_unique(pages); record_reader_->SetPageReader(std::move(pager)); int64_t records_read = record_reader_->ReadRecords(/*num_records=*/2); @@ -674,7 +672,6 @@ TEST_F(RecordReaderTest, SkipRequiredTopLevel) { std::vector> pages; std::vector values = {10, 20, 20, 30, 30, 30}; - std::unique_ptr pager; std::shared_ptr page = MakeDataPage( descr_.get(), values, /*num_values=*/static_cast(values.size()), Encoding::PLAIN, @@ -682,7 +679,7 @@ TEST_F(RecordReaderTest, SkipRequiredTopLevel) { /*indices_size=*/0, /*def_levels=*/{}, level_info_.def_level, /*rep_levels=*/{}, level_info_.rep_level); pages.push_back(std::move(page)); - pager.reset(new test::MockPageReader(pages)); + auto pager = std::make_unique(pages); record_reader_->SetPageReader(std::move(pager)); int64_t records_skipped = record_reader_->SkipRecords(/*num_records=*/3); @@ -710,7 +707,6 @@ TEST_F(RecordReaderTest, SkipOptional) { std::vector values = {10, 20, 30, 40, 50, 60}; std::vector def_levels = {0, 1, 1, 0, 1, 1, 1, 1}; - std::unique_ptr pager; std::shared_ptr page = MakeDataPage( descr_.get(), values, /*num_values=*/static_cast(values.size()), Encoding::PLAIN, @@ -718,7 +714,7 @@ TEST_F(RecordReaderTest, SkipOptional) { /*indices_size=*/0, def_levels, level_info_.def_level, /*rep_levels=*/{}, level_info_.rep_level); pages.push_back(std::move(page)); - pager.reset(new test::MockPageReader(pages)); + auto pager = std::make_unique(pages); record_reader_->SetPageReader(std::move(pager)); { @@ -790,7 +786,6 @@ TEST_F(RecordReaderTest, SkipRepeated) { std::vector def_levels = {0, 1, 1, 1, 0, 1, 1, 1}; std::vector rep_levels = {0, 0, 1, 1, 0, 0, 1, 0}; - std::unique_ptr pager; std::shared_ptr page = MakeDataPage( descr_.get(), values, /*num_values=*/static_cast(values.size()), Encoding::PLAIN, @@ -798,7 +793,7 @@ TEST_F(RecordReaderTest, SkipRepeated) { /*indices_size=*/0, def_levels, level_info_.def_level, rep_levels, level_info_.rep_level); pages.push_back(std::move(page)); - pager.reset(new test::MockPageReader(pages)); + auto pager = std::make_unique(pages); record_reader_->SetPageReader(std::move(pager)); { @@ -863,7 +858,6 @@ TEST_F(RecordReaderTest, SkipRepeatedConsumeBufferFirst) { std::vector def_levels(2048, 1); std::vector rep_levels(2048, 0); - std::unique_ptr pager; std::shared_ptr page = MakeDataPage( descr_.get(), values, /*num_values=*/static_cast(values.size()), Encoding::PLAIN, @@ -871,7 +865,7 @@ TEST_F(RecordReaderTest, SkipRepeatedConsumeBufferFirst) { /*indices_size=*/0, def_levels, level_info_.def_level, rep_levels, level_info_.rep_level); pages.push_back(std::move(page)); - pager.reset(new test::MockPageReader(pages)); + auto pager = std::make_unique(pages); record_reader_->SetPageReader(std::move(pager)); { // Read 1000 records. We will read 1024 levels because that is the minimum @@ -905,7 +899,6 @@ TEST_F(RecordReaderTest, ReadPartialRecord) { Init(/*max_def_level=*/1, /*max_rep_level=*/1, Repetition::REPEATED); std::vector> pages; - std::unique_ptr pager; // Page 1: {[10], [20, 20, 20 ... } continues to next page. { @@ -937,7 +930,7 @@ TEST_F(RecordReaderTest, ReadPartialRecord) { pages.push_back(std::move(page)); } - pager.reset(new test::MockPageReader(pages)); + auto pager = std::make_unique(pages); record_reader_->SetPageReader(std::move(pager)); { @@ -980,7 +973,6 @@ TEST_F(RecordReaderTest, SkipPartialRecord) { Init(/*max_def_level=*/1, /*max_rep_level=*/1, Repetition::REPEATED); std::vector> pages; - std::unique_ptr pager; // Page 1: {[10], [20, 20, 20 ... } continues to next page. { @@ -1012,7 +1004,7 @@ TEST_F(RecordReaderTest, SkipPartialRecord) { pages.push_back(std::move(page)); } - pager.reset(new test::MockPageReader(pages)); + auto pager = std::make_unique(pages); record_reader_->SetPageReader(std::move(pager)); { @@ -1065,8 +1057,6 @@ TEST(RecordReaderByteArrayTest, SkipByteArray) { const ColumnDescriptor descr(type, level_info.def_level, level_info.rep_level); std::vector> pages; - std::unique_ptr pager; - int levels_per_page = 90; int num_pages = 1; @@ -1078,7 +1068,7 @@ TEST(RecordReaderByteArrayTest, SkipByteArray) { MakePages(&descr, num_pages, levels_per_page, def_levels, rep_levels, values, buffer, pages, Encoding::PLAIN); - pager.reset(new test::MockPageReader(pages)); + auto pager = std::make_unique(pages); std::shared_ptr record_reader = internal::RecordReader::Make(&descr, level_info); From 9c4d66ffeb282aa8061661e2a10433abd821871a Mon Sep 17 00:00:00 2001 From: Fatemah Panahi Date: Thu, 27 Oct 2022 18:16:57 +0000 Subject: [PATCH 22/24] Address comments. --- cpp/src/parquet/column_reader.cc | 21 +++++++++++---------- cpp/src/parquet/column_reader_test.cc | 4 ++-- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index 551401c8187..df5eb99cafe 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -1350,11 +1350,11 @@ class TypedRecordReader : public TypedColumnReaderImpl, if (gap == 0) return; int64_t levels_remaining = levels_written_ - gap; - int64_t destination = levels_position_ - gap; auto left_shift = [=](::arrow::ResizableBuffer* buffer) { int16_t* data = reinterpret_cast(buffer->mutable_data()); - std::copy(data + levels_position_, data + levels_written_, data + destination); + std::copy(data + levels_position_, data + levels_written_, + data + start_levels_position); PARQUET_THROW_NOT_OK(buffer->Resize(levels_remaining * sizeof(int16_t), /*shrink_to_fit=*/false)); }; @@ -1427,11 +1427,7 @@ class TypedRecordReader : public TypedColumnReaderImpl, int64_t start_levels_position = levels_position_; int64_t values_seen = 0; int64_t skipped_records = DelimitRecords(num_records, &values_seen); - if (ReadAndThrowAwayValues(values_seen) != values_seen) { - std::stringstream ss; - ss << "Could not read and throw away " << values_seen << " values"; - throw ParquetException(ss.str()); - } + ReadAndThrowAwayValues(values_seen); // Mark those levels and values as consumed in the the underlying page. // This must be done before we throw away levels since it updates // levels_position_ and levels_written_. @@ -1459,7 +1455,7 @@ class TypedRecordReader : public TypedColumnReaderImpl, int64_t level_batch_size = std::max(kMinLevelBatchSize, num_records - skipped_records); - // If 'at_record_start_' is false, but (skip_records == num_records), it + // If 'at_record_start_' is false, but (skipped_records == num_records), it // means that for the last record that was counted, we have not seen all // of it's values yet. while (!at_record_start_ || skipped_records < num_records) { @@ -1508,7 +1504,8 @@ class TypedRecordReader : public TypedColumnReaderImpl, } // Read 'num_values' values and throw them away. - int64_t ReadAndThrowAwayValues(int64_t num_values) { + // Throws an error if it could not read 'num_values'. + void ReadAndThrowAwayValues(int64_t num_values) { int64_t values_left = num_values; int64_t batch_size = kMinLevelBatchSize; // ReadBatch with a smaller memory footprint int64_t values_read = 0; @@ -1524,7 +1521,11 @@ class TypedRecordReader : public TypedColumnReaderImpl, this->ReadValues(batch_size, reinterpret_cast(scratch->mutable_data())); values_left -= values_read; } while (values_read > 0 && values_left > 0); - return num_values - values_left; + if (values_left > 0) { + std::stringstream ss; + ss << "Could not read and throw away " << num_values << " values"; + throw ParquetException(ss.str()); + } } int64_t SkipRecords(int64_t num_records) override { diff --git a/cpp/src/parquet/column_reader_test.cc b/cpp/src/parquet/column_reader_test.cc index 9a2cf9cf28a..14ffb4569de 100644 --- a/cpp/src/parquet/column_reader_test.cc +++ b/cpp/src/parquet/column_reader_test.cc @@ -1043,14 +1043,14 @@ TEST_F(RecordReaderTest, SkipPartialRecord) { } } -// Test that Skip works on ByteArrays. Specifically, this is testing +// Test that SkipRecords works on ByteArrays. Specifically, this is testing // ReadAndThrowAwayValues for ByteArrays. TEST(RecordReaderByteArrayTest, SkipByteArray) { internal::LevelInfo level_info; level_info.def_level = 1; level_info.rep_level = 0; - // Must use REPEATED to excercise ReadAndThrowAwayValues for ByteArrays. It + // Must use REPEATED to exercise ReadAndThrowAwayValues for ByteArrays. It // does not do any buffering for Optional or Required fields as it calls // ResetValues after every read. NodePtr type = schema::ByteArray("b", Repetition::OPTIONAL); From df873c73656d922a2a966701ce2a73dfc278a8f2 Mon Sep 17 00:00:00 2001 From: Fatemah Panahi Date: Thu, 27 Oct 2022 20:23:58 +0000 Subject: [PATCH 23/24] Address some more comments. --- cpp/src/parquet/column_reader_test.cc | 39 +++++++++++++++++++++------ 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/cpp/src/parquet/column_reader_test.cc b/cpp/src/parquet/column_reader_test.cc index 14ffb4569de..9b4c9f5ae97 100644 --- a/cpp/src/parquet/column_reader_test.cc +++ b/cpp/src/parquet/column_reader_test.cc @@ -639,11 +639,11 @@ class RecordReaderTest : public ::testing::Test { TEST_F(RecordReaderTest, BasicReadRepeatedField) { Init(/*max_def_level=*/1, /*max_rep_level=*/1, Repetition::REPEATED); - // Records look like: {[10], [20, 20], [30, 30, 30]} + // Records look like: {[10], null, [20, 20], null, [30, 30, 30], null} std::vector> pages; std::vector values = {10, 20, 20, 30, 30, 30}; - std::vector def_levels = {1, 1, 1, 1, 1, 1}; - std::vector rep_levels = {0, 0, 1, 0, 1, 1}; + std::vector def_levels = {1, 0, 1, 1, 0, 1, 1, 1, 0}; + std::vector rep_levels = {0, 0, 0, 1, 0, 0, 1, 1, 0}; std::shared_ptr page = MakeDataPage( descr_.get(), values, /*num_values=*/static_cast(def_levels.size()), @@ -655,14 +655,37 @@ TEST_F(RecordReaderTest, BasicReadRepeatedField) { auto pager = std::make_unique(pages); record_reader_->SetPageReader(std::move(pager)); + // Read [10], null int64_t records_read = record_reader_->ReadRecords(/*num_records=*/2); ASSERT_EQ(records_read, 2); - CheckState(/*values_written=*/3, /*null_count=*/0, /*levels_written=*/6, - /*levels_position=*/3); - CheckReadValues(/*expected_values=*/{10, 20, 20}, /*expected_defs=*/{1, 1, 1}, - /*expected_reps=*/{0, 0, 1}); + CheckState(/*values_written=*/2, /*null_count=*/1, /*levels_written=*/9, + /*levels_position=*/2); + CheckReadValues(/*expected_values=*/{10, kNullValue}, /*expected_defs=*/{1, 0}, + /*expected_reps=*/{0, 0}); record_reader_->Reset(); - CheckState(/*values_written=*/0, /*null_count=*/0, /*levels_written=*/3, + CheckState(/*values_written=*/0, /*null_count=*/0, /*levels_written=*/7, + /*levels_position=*/0); + // Read [20, 20], null, [30, 30, 30] + records_read = record_reader_->ReadRecords(/*num_records=*/3); + ASSERT_EQ(records_read, 3); + CheckState(/*values_written=*/6, /*null_count=*/1, /*levels_written=*/7, + /*levels_position=*/6); + CheckReadValues(/*expected_values=*/{20, 20, kNullValue, 30, 30, 30}, + /*expected_defs=*/{1, 1, 0, 1, 1, 1}, + /*expected_reps=*/{0, 1, 0, 0, 1, 1}); + record_reader_->Reset(); + CheckState(/*values_written=*/0, /*null_count=*/0, /*levels_written=*/1, + /*levels_position=*/0); + // Read the last null value and read past the end. + records_read = record_reader_->ReadRecords(/*num_records=*/3); + ASSERT_EQ(records_read, 1); + CheckState(/*values_written=*/1, /*null_count=*/1, /*levels_written=*/1, + /*levels_position=*/1); + CheckReadValues(/*expected_values=*/{kNullValue}, + /*expected_defs=*/{0}, + /*expected_reps=*/{0}); + record_reader_->Reset(); + CheckState(/*values_written=*/0, /*null_count=*/0, /*levels_written=*/0, /*levels_position=*/0); } From 312abbc8ecdeacb063c10006233dd6feea560dde Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 31 Oct 2022 16:10:52 +0100 Subject: [PATCH 24/24] Fix lint --- cpp/src/parquet/column_reader_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/parquet/column_reader_test.cc b/cpp/src/parquet/column_reader_test.cc index 83628fc5f07..6916ebe8914 100644 --- a/cpp/src/parquet/column_reader_test.cc +++ b/cpp/src/parquet/column_reader_test.cc @@ -836,7 +836,7 @@ TEST_F(RecordReaderTest, SkipOptional) { // Reset after a Skip. record_reader_->Reset(); CheckState(/*values_written=*/0, /*null_count=*/0, /*levels_written=*/1, - /*levels_position=*/0); + /*levels_position=*/0); } {