From 781f7ec3e88f42f6ff510149447312750f53fb9a Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 8 Feb 2024 20:38:08 +0100 Subject: [PATCH 1/3] Increase size of Acero TempStack Certain Acero execution plans can cause an overflow of the TempVectorStack initialized by the QueryContext, and increasing the size of the stack fixes the problem. I don't know exactly what causes the overflow, so I haven't written a test for it. Fixes #39582. --- cpp/src/arrow/acero/query_context.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/acero/query_context.cc b/cpp/src/arrow/acero/query_context.cc index 9f838508fcd..a27397d1207 100644 --- a/cpp/src/arrow/acero/query_context.cc +++ b/cpp/src/arrow/acero/query_context.cc @@ -53,7 +53,7 @@ size_t QueryContext::max_concurrency() const { return thread_indexer_.Capacity() Result QueryContext::GetTempStack(size_t thread_index) { if (!tld_[thread_index].is_init) { RETURN_NOT_OK(tld_[thread_index].stack.Init( - memory_pool(), 8 * util::MiniBatch::kMiniBatchLength * sizeof(uint64_t))); + memory_pool(), 32 * util::MiniBatch::kMiniBatchLength * sizeof(uint64_t))); tld_[thread_index].is_init = true; } return &tld_[thread_index].stack; From 33e353d3f887a668fab7750b8a81ad95c776638b Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 22 Feb 2024 16:06:27 +0100 Subject: [PATCH 2/3] Throw error on overflow in TempVectorStack::alloc --- cpp/src/arrow/compute/util.cc | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/cpp/src/arrow/compute/util.cc b/cpp/src/arrow/compute/util.cc index c55143af0cd..df88da84097 100644 --- a/cpp/src/arrow/compute/util.cc +++ b/cpp/src/arrow/compute/util.cc @@ -32,17 +32,19 @@ using internal::CpuInfo; namespace util { void TempVectorStack::alloc(uint32_t num_bytes, uint8_t** data, int* id) { - int64_t old_top = top_; - top_ += PaddedAllocationSize(num_bytes) + 2 * sizeof(uint64_t); + int64_t new_top = top_ + PaddedAllocationSize(num_bytes) + 2 * sizeof(uint64_t); // Stack overflow check - ARROW_DCHECK(top_ <= buffer_size_); - *data = buffer_->mutable_data() + old_top + sizeof(uint64_t); + if (new_top > buffer_size_) { + throw std::runtime_error("TempVectorStack::alloc overflow"); + } + *data = buffer_->mutable_data() + top_ + sizeof(uint64_t); // We set 8 bytes before the beginning of the allocated range and // 8 bytes after the end to check for stack overflow (which would // result in those known bytes being corrupted). - reinterpret_cast(buffer_->mutable_data() + old_top)[0] = kGuard1; - reinterpret_cast(buffer_->mutable_data() + top_)[-1] = kGuard2; + reinterpret_cast(buffer_->mutable_data() + top_)[0] = kGuard1; + reinterpret_cast(buffer_->mutable_data() + new_top)[-1] = kGuard2; *id = num_vectors_++; + top_ = new_top; } void TempVectorStack::release(int id, uint32_t num_bytes) { From ec3fd3bc2400fb32893306e14744af7af0b73b08 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 26 Feb 2024 16:56:16 +0100 Subject: [PATCH 3/3] Turn exception into a controlled abort with error message --- cpp/src/arrow/compute/util.cc | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/compute/util.cc b/cpp/src/arrow/compute/util.cc index df88da84097..2058ba9f307 100644 --- a/cpp/src/arrow/compute/util.cc +++ b/cpp/src/arrow/compute/util.cc @@ -33,10 +33,9 @@ namespace util { void TempVectorStack::alloc(uint32_t num_bytes, uint8_t** data, int* id) { int64_t new_top = top_ + PaddedAllocationSize(num_bytes) + 2 * sizeof(uint64_t); - // Stack overflow check - if (new_top > buffer_size_) { - throw std::runtime_error("TempVectorStack::alloc overflow"); - } + // Stack overflow check (see GH-39582). + // XXX cannot return a regular Status because most consumers do not either. + ARROW_CHECK_LE(new_top, buffer_size_) << "TempVectorStack::alloc overflow"; *data = buffer_->mutable_data() + top_ + sizeof(uint64_t); // We set 8 bytes before the beginning of the allocated range and // 8 bytes after the end to check for stack overflow (which would