Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions cpp/src/arrow/array/builder_binary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -127,14 +127,15 @@ namespace internal {

ChunkedBinaryBuilder::ChunkedBinaryBuilder(int32_t max_chunk_value_length,
MemoryPool* pool)
: max_chunk_value_length_(max_chunk_value_length),
builder_(new BinaryBuilder(pool)) {}
: max_chunk_value_length_(max_chunk_value_length), builder_(new BinaryBuilder(pool)) {
DCHECK_LE(max_chunk_value_length, kBinaryMemoryLimit);
}

ChunkedBinaryBuilder::ChunkedBinaryBuilder(int32_t max_chunk_value_length,
int32_t max_chunk_length, MemoryPool* pool)
: max_chunk_value_length_(max_chunk_value_length),
max_chunk_length_(max_chunk_length),
builder_(new BinaryBuilder(pool)) {}
: ChunkedBinaryBuilder(max_chunk_value_length, pool) {
max_chunk_length_ = max_chunk_length;
}

Status ChunkedBinaryBuilder::Finish(ArrayVector* out) {
if (builder_->length() > 0 || chunks_.size() == 0) {
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/buffer_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ class ARROW_EXPORT BufferBuilder {
return Resize(GrowByFactor(capacity_, min_capacity), false);
}

/// \brief Return a capacity expanded by an unspecified growth factor
/// \brief Return a capacity expanded by the desired growth factor
static int64_t GrowByFactor(int64_t current_capacity, int64_t new_capacity) {
// Doubling capacity except for large Reserve requests. 2x growth strategy
// (versus 1.5x) seems to have slightly better performance when using
Expand Down
25 changes: 14 additions & 11 deletions cpp/src/parquet/column_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1209,37 +1209,40 @@ class ByteArrayChunkedRecordReader : public TypedRecordReader<ByteArrayType>,
virtual public BinaryRecordReader {
public:
ByteArrayChunkedRecordReader(const ColumnDescriptor* descr, ::arrow::MemoryPool* pool)
: TypedRecordReader<ByteArrayType>(descr, pool), builder_(nullptr) {
// ARROW-4688(wesm): Using 2^31 - 1 chunks for now
constexpr int32_t kBinaryChunksize = 2147483647;
: TypedRecordReader<ByteArrayType>(descr, pool) {
DCHECK_EQ(descr_->physical_type(), Type::BYTE_ARRAY);
builder_.reset(
new ::arrow::internal::ChunkedBinaryBuilder(kBinaryChunksize, this->pool_));
accumulator_.builder.reset(new ::arrow::BinaryBuilder(pool));
}

::arrow::ArrayVector GetBuilderChunks() override {
::arrow::ArrayVector chunks;
PARQUET_THROW_NOT_OK(builder_->Finish(&chunks));
return chunks;
::arrow::ArrayVector result = accumulator_.chunks;
if (result.size() == 0 || accumulator_.builder->length() > 0) {
std::shared_ptr<::arrow::Array> last_chunk;
PARQUET_THROW_NOT_OK(accumulator_.builder->Finish(&last_chunk));
result.push_back(last_chunk);
}
accumulator_.chunks = {};
return result;
}

void ReadValuesDense(int64_t values_to_read) override {
int64_t num_decoded = this->current_decoder_->DecodeArrowNonNull(
static_cast<int>(values_to_read), builder_.get());
static_cast<int>(values_to_read), &accumulator_);
DCHECK_EQ(num_decoded, values_to_read);
ResetValues();
}

void ReadValuesSpaced(int64_t values_to_read, int64_t null_count) override {
int64_t num_decoded = this->current_decoder_->DecodeArrow(
static_cast<int>(values_to_read), static_cast<int>(null_count),
valid_bits_->mutable_data(), values_written_, builder_.get());
valid_bits_->mutable_data(), values_written_, &accumulator_);
DCHECK_EQ(num_decoded, values_to_read - null_count);
ResetValues();
}

private:
std::unique_ptr<::arrow::internal::ChunkedBinaryBuilder> builder_;
// Helper data structure for accumulating builder chunks
ArrowBinaryAccumulator accumulator_;
};

class ByteArrayDictionaryRecordReader : public TypedRecordReader<ByteArrayType>,
Expand Down
Loading