From 3af2cf86fb3aef6372ea24ccbd6faa157bf7de7b Mon Sep 17 00:00:00 2001 From: Tobias Zagorni Date: Wed, 4 May 2022 21:13:19 +0200 Subject: [PATCH 1/2] avoid slicing batches if it would create only one slice --- cpp/src/arrow/compute/exec.cc | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/cpp/src/arrow/compute/exec.cc b/cpp/src/arrow/compute/exec.cc index f8a522a2735..ad22ccdcd90 100644 --- a/cpp/src/arrow/compute/exec.cc +++ b/cpp/src/arrow/compute/exec.cc @@ -308,16 +308,24 @@ bool ExecBatchIterator::Next(ExecBatch* batch) { // Now, fill the batch batch->values.resize(args_.size()); batch->length = iteration_size; - for (size_t i = 0; i < args_.size(); ++i) { - if (args_[i].is_scalar()) { - batch->values[i] = args_[i].scalar(); - } else if (args_[i].is_array()) { - batch->values[i] = args_[i].array()->Slice(position_, iteration_size); - } else { - const ChunkedArray& carr = *args_[i].chunked_array(); - const auto& chunk = carr.chunk(chunk_indexes_[i]); - batch->values[i] = chunk->data()->Slice(chunk_positions_[i], iteration_size); - chunk_positions_[i] += iteration_size; + + if (iteration_size == length_) { + ARROW_DCHECK_EQ(position_, 0); + for (size_t i = 0; i < args_.size(); ++i) { + batch->values[i] = std::move(args_[i]); + } + } else { + for (size_t i = 0; i < args_.size(); ++i) { + if (args_[i].is_scalar()) { + batch->values[i] = args_[i].scalar(); + } else if (args_[i].is_array()) { + batch->values[i] = args_[i].array()->Slice(position_, iteration_size); + } else { + const ChunkedArray& carr = *args_[i].chunked_array(); + const auto& chunk = carr.chunk(chunk_indexes_[i]); + batch->values[i] = chunk->data()->Slice(chunk_positions_[i], iteration_size); + chunk_positions_[i] += iteration_size; + } } } position_ += iteration_size; From 7edcdb996fa30d2eaa5ba1a86fd23d4e0645d17e Mon Sep 17 00:00:00 2001 From: Tobias Zagorni Date: Fri, 13 May 2022 14:53:17 +0200 Subject: [PATCH 2/2] add special handling for chunked arrays --- cpp/src/arrow/compute/exec.cc | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/exec.cc b/cpp/src/arrow/compute/exec.cc index ad22ccdcd90..3973d89c369 100644 --- a/cpp/src/arrow/compute/exec.cc +++ b/cpp/src/arrow/compute/exec.cc @@ -312,7 +312,13 @@ bool ExecBatchIterator::Next(ExecBatch* batch) { if (iteration_size == length_) { ARROW_DCHECK_EQ(position_, 0); for (size_t i = 0; i < args_.size(); ++i) { - batch->values[i] = std::move(args_[i]); + if (args_[i].kind() == Datum::CHUNKED_ARRAY) { + const ChunkedArray& carr = *args_[i].chunked_array(); + batch->values[i] = Datum(carr.chunk(chunk_indexes_[i])->data()); + chunk_positions_[i] += iteration_size; + } else { + batch->values[i] = std::move(args_[i]); + } } } else { for (size_t i = 0; i < args_.size(); ++i) {