From e88dede43ac1d5d5162c4e76096b79fe4f9028b8 Mon Sep 17 00:00:00 2001 From: Weston Pace Date: Wed, 12 Apr 2023 14:55:15 -0700 Subject: [PATCH] We now detect if the join would result in more than 4GiB of key data which is not supported by the RowArray. We return an invalid status. We cannot support that large of a join without spilling. --- cpp/src/arrow/acero/swiss_join.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cpp/src/arrow/acero/swiss_join.cc b/cpp/src/arrow/acero/swiss_join.cc index ed1608e67d1..3f11b89af39 100644 --- a/cpp/src/arrow/acero/swiss_join.cc +++ b/cpp/src/arrow/acero/swiss_join.cc @@ -473,6 +473,12 @@ Status RowArrayMerge::PrepareForMerge(RowArray* target, (*first_target_row_id)[sources.size()] = num_rows; } + if (num_bytes > std::numeric_limits::max()) { + return Status::Invalid( + "There are more than 2^32 bytes of key data. Acero cannot " + "process a join of this magnitude"); + } + // Allocate target memory // target->rows_.Clean();