From a9900d6adbaa81f3b96bd9f04715a01fd0270c34 Mon Sep 17 00:00:00 2001 From: Tobias Mayer Date: Thu, 13 Nov 2025 09:35:29 +0100 Subject: [PATCH] Make StructArray::field() thread-safe Before this change it was possible for two threads calling `field()` with the same index at the same time to cause a race on the stored entry in `boxed_fields_`. I.e. if a second thread goes into the path that calls `MakeArray` before the first thread stored its own new array, the second thread would also write to the same shared_ptr and invalidate the shared_ptr from the first thread, thereby also invalidating the returned reference. --- cpp/src/arrow/array/array_nested.cc | 29 +++++++++++++++++------------ cpp/src/arrow/array/array_nested.h | 2 +- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/cpp/src/arrow/array/array_nested.cc b/cpp/src/arrow/array/array_nested.cc index b821451419a..b7f16860791 100644 --- a/cpp/src/arrow/array/array_nested.cc +++ b/cpp/src/arrow/array/array_nested.cc @@ -1076,20 +1076,25 @@ const ArrayVector& StructArray::fields() const { return boxed_fields_; } -const std::shared_ptr& StructArray::field(int i) const { +std::shared_ptr StructArray::field(int i) const { std::shared_ptr result = std::atomic_load(&boxed_fields_[i]); - if (!result) { - std::shared_ptr field_data; - if (data_->offset != 0 || data_->child_data[i]->length != data_->length) { - field_data = data_->child_data[i]->Slice(data_->offset, data_->length); - } else { - field_data = data_->child_data[i]; - } - result = MakeArray(field_data); - std::atomic_store(&boxed_fields_[i], std::move(result)); - return boxed_fields_[i]; + if (result) { + return result; } - return boxed_fields_[i]; + std::shared_ptr field_data; + if (data_->offset != 0 || data_->child_data[i]->length != data_->length) { + field_data = data_->child_data[i]->Slice(data_->offset, data_->length); + } else { + field_data = data_->child_data[i]; + } + result = MakeArray(field_data); + // Check if some other thread inserted the array in the meantime and return + // that in that case. + std::shared_ptr expected = nullptr; + if (!std::atomic_compare_exchange_strong(&boxed_fields_[i], &expected, result)) { + result = std::move(expected); + } + return result; } std::shared_ptr StructArray::GetFieldByName(const std::string& name) const { diff --git a/cpp/src/arrow/array/array_nested.h b/cpp/src/arrow/array/array_nested.h index f122f9378b5..2591fdaf414 100644 --- a/cpp/src/arrow/array/array_nested.h +++ b/cpp/src/arrow/array/array_nested.h @@ -692,7 +692,7 @@ class ARROW_EXPORT StructArray : public Array { // Return a shared pointer in case the requestor desires to share ownership // with this array. The returned array has its offset, length and null // count adjusted. - const std::shared_ptr& field(int pos) const; + std::shared_ptr field(int pos) const; const ArrayVector& fields() const;