diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc index b761de69d33..b9472d72aeb 100644 --- a/cpp/src/parquet/encoding.cc +++ b/cpp/src/parquet/encoding.cc @@ -2459,7 +2459,9 @@ class DeltaBitPackDecoder : public DecoderImpl, virtual public TypedDecoder kMaxDeltaBitWidth) { - throw ParquetException("delta bit width larger than integer bit width"); + throw ParquetException("delta bit width " + std::to_string(bit_width_data[i]) + + " larger than integer bit width " + + std::to_string(kMaxDeltaBitWidth)); } } mini_block_idx_ = 0; @@ -2479,7 +2481,20 @@ class DeltaBitPackDecoder : public DecoderImpl, virtual public TypedDecoder { public: using c_type = typename Type::c_type; static constexpr int TYPE = Type::type_num; - static constexpr size_t ROUND_TRIP_TIMES = 3; + static constexpr size_t kNumRoundTrips = 3; + const std::vector kReadBatchSizes = {1, 11}; void InitBoundData(int nvalues, int repeats, c_type half_range) { num_values_ = nvalues * repeats; @@ -1328,16 +1329,25 @@ class TestDeltaBitPackEncoding : public TestEncodingBase { auto encoder = MakeTypedEncoder(Encoding::DELTA_BINARY_PACKED, false, descr_.get()); auto decoder = MakeTypedDecoder(Encoding::DELTA_BINARY_PACKED, descr_.get()); - - for (size_t i = 0; i < ROUND_TRIP_TIMES; ++i) { + auto read_batch_sizes = kReadBatchSizes; + read_batch_sizes.push_back(num_values_); + // Encode a number of times to exercise the flush logic + for (size_t i = 0; i < kNumRoundTrips; ++i) { encoder->Put(draws_, num_values_); encode_buffer_ = encoder->FlushValues(); - - decoder->SetData(num_values_, encode_buffer_->data(), - static_cast(encode_buffer_->size())); - int values_decoded = decoder->Decode(decode_buf_, num_values_); - ASSERT_EQ(num_values_, values_decoded); - ASSERT_NO_FATAL_FAILURE(VerifyResults(decode_buf_, draws_, num_values_)); + // Exercise different batch sizes + for (const int read_batch_size : read_batch_sizes) { + decoder->SetData(num_values_, encode_buffer_->data(), + static_cast(encode_buffer_->size())); + + int values_decoded = 0; + while (values_decoded < num_values_) { + values_decoded += + decoder->Decode(decode_buf_ + values_decoded, read_batch_size); + } + ASSERT_EQ(num_values_, values_decoded); + ASSERT_NO_FATAL_FAILURE(VerifyResults(decode_buf_, draws_, num_values_)); + } } } @@ -1353,7 +1363,7 @@ class TestDeltaBitPackEncoding : public TestEncodingBase { } } - for (size_t i = 0; i < ROUND_TRIP_TIMES; ++i) { + for (size_t i = 0; i < kNumRoundTrips; ++i) { encoder->PutSpaced(draws_, num_values_, valid_bits, valid_bits_offset); encode_buffer_ = encoder->FlushValues(); decoder->SetData(num_values_ - null_count, encode_buffer_->data(),