From 6fa77b0085fa2733b9f87a796b1c25fc79f08538 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Thu, 6 Feb 2025 10:44:31 +0100 Subject: [PATCH] GH-45885: [C++] Require C++20 --- .env | 2 +- .github/workflows/cpp.yml | 12 +-- .github/workflows/cpp_extra.yml | 1 - .github/workflows/cpp_windows.yml | 2 +- .github/workflows/r.yml | 3 +- .github/workflows/ruby.yml | 2 +- c_glib/meson.build | 2 +- ci/docker/almalinux-10-verify-rc.dockerfile | 24 ++++++ ci/docker/debian-experimental-cpp.dockerfile | 2 + ci/scripts/cpp_build.sh | 4 +- ci/scripts/install_sccache.sh | 6 +- ci/scripts/r_windows_build.sh | 2 +- cpp/CMakeLists.txt | 6 +- cpp/cmake_modules/GandivaAddBitcode.cmake | 2 +- cpp/cmake_modules/SetupCxxFlags.cmake | 8 +- cpp/examples/minimal_build/CMakeLists.txt | 4 +- cpp/examples/minimal_build/run_static.sh | 2 +- .../parquet/parquet_arrow/CMakeLists.txt | 4 +- cpp/examples/tutorial_examples/CMakeLists.txt | 2 +- cpp/meson.build | 2 +- cpp/src/arrow/CMakeLists.txt | 8 +- cpp/src/arrow/array/array_nested.cc | 80 ++++++++++++++----- cpp/src/arrow/array/array_nested.h | 18 ++++- .../compute/kernels/hash_aggregate_numeric.cc | 44 +++++++--- cpp/src/arrow/filesystem/gcsfs.cc | 3 +- cpp/src/arrow/filesystem/gcsfs_internal.cc | 5 +- cpp/src/arrow/filesystem/gcsfs_internal.h | 9 ++- cpp/src/arrow/filesystem/s3fs.cc | 24 +++--- .../arrow/flight/server_tracing_middleware.cc | 1 - .../flight/transport/grpc/customize_grpc.h | 20 +++++ .../flight/transport/grpc/grpc_client.cc | 2 + .../flight/transport/grpc/grpc_server.cc | 2 + cpp/src/arrow/record_batch.cc | 23 +++--- cpp/src/arrow/util/range.h | 5 +- cpp/src/arrow/util/string.h | 6 +- cpp/src/arrow/util/vector.h | 9 +-- cpp/src/gandiva/engine.cc | 2 +- cpp/src/gandiva/precompiled/decimal_ops.cc | 3 +- cpp/thirdparty/versions.txt | 4 +- dev/release/setup-rhel-rebuilds.sh | 15 +--- dev/release/verify-apt.sh | 2 +- dev/release/verify-yum.sh | 2 +- dev/tasks/homebrew-formulae/apache-arrow.rb | 2 +- .../apache-arrow/yum/almalinux-10/Dockerfile | 1 - .../apache-arrow/yum/almalinux-8/Dockerfile | 6 +- .../apache-arrow/yum/almalinux-9/Dockerfile | 1 - .../apache-arrow/yum/arrow.spec.in | 21 +++++ dev/tasks/linux-packages/yum/build.sh | 11 +++ dev/tasks/r/github.packages.yml | 2 + dev/tasks/tasks.yml | 17 +--- dev/tasks/verify-rc/github.macos.yml | 15 +--- docs/source/cpp/conventions.rst | 2 +- docs/source/developers/cpp/building.rst | 4 +- docs/source/python/integration/extending.rst | 2 +- matlab/CMakeLists.txt | 2 +- .../cmake/BuildMatlabArrowInterface.cmake | 2 +- python/CMakeLists.txt | 2 +- python/pyarrow/tests/test_cython.py | 4 +- 58 files changed, 295 insertions(+), 178 deletions(-) create mode 100644 ci/docker/almalinux-10-verify-rc.dockerfile diff --git a/.env b/.env index 6a62cad5fcd..dad867f8f66 100644 --- a/.env +++ b/.env @@ -57,7 +57,7 @@ FEDORA=42 UBUNTU=22.04 # Default versions for various dependencies -CLANG_TOOLS=14 +CLANG_TOOLS=18 CMAKE=3.26.0 CUDA=11.7.1 DASK=latest diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index 6586480a81e..45a9c3ba774 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -81,24 +81,24 @@ jobs: matrix: include: - arch: amd64 - clang-tools: 14 + clang-tools: 18 image: conda-cpp - llvm: 14 + llvm: 18 runs-on: ubuntu-latest simd-level: AVX2 title: AMD64 Conda C++ AVX2 ubuntu: 22.04 - arch: amd64 - clang-tools: 14 + clang-tools: 18 image: ubuntu-cpp-sanitizer - llvm: 14 + llvm: 18 runs-on: ubuntu-latest title: AMD64 Ubuntu 24.04 C++ ASAN UBSAN ubuntu: 24.04 - arch: arm64v8 - clang-tools: 14 + clang-tools: 18 image: ubuntu-cpp - llvm: 14 + llvm: 18 runs-on: ubuntu-24.04-arm title: ARM64 Ubuntu 22.04 C++ ubuntu: 22.04 diff --git a/.github/workflows/cpp_extra.yml b/.github/workflows/cpp_extra.yml index b56b74c9a29..ca5a3adb4b7 100644 --- a/.github/workflows/cpp_extra.yml +++ b/.github/workflows/cpp_extra.yml @@ -347,7 +347,6 @@ jobs: ARROW_DEPENDENCY_SOURCE: VCPKG ARROW_FLIGHT_SQL_ODBC: ON ARROW_SIMD_LEVEL: AVX2 - CMAKE_CXX_STANDARD: "17" CMAKE_GENERATOR: Ninja CMAKE_INSTALL_PREFIX: /usr VCPKG_BINARY_SOURCES: 'clear;nuget,GitHub,readwrite' diff --git a/.github/workflows/cpp_windows.yml b/.github/workflows/cpp_windows.yml index ae74e4d695e..394cd8851c3 100644 --- a/.github/workflows/cpp_windows.yml +++ b/.github/workflows/cpp_windows.yml @@ -63,7 +63,7 @@ jobs: ARROW_WITH_ZLIB: ON ARROW_WITH_ZSTD: ON BOOST_SOURCE: BUNDLED - CMAKE_CXX_STANDARD: "17" + CMAKE_CXX_STANDARD: "20" CMAKE_GENERATOR: Ninja CMAKE_INSTALL_PREFIX: /usr CMAKE_UNITY_BUILD: ON diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index 25875c7d833..38bccb1f4de 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -229,12 +229,13 @@ jobs: # static library. The R is not used here but R 4.1 was the last R to use # Rtools40. r-version: "4.1" - rtools-version: 40 + rtools-version: ${{ matrix.config.rtools }} Ncpus: 2 - name: Build Arrow C++ shell: bash env: MINGW_ARCH: ${{ matrix.config.arch }} + RTOOLS_VERSION: ${{ matrix.config.rtools }} run: ci/scripts/r_windows_build.sh - name: Rename libarrow.zip # So that they're unique when multiple are downloaded in the next step diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml index 0a9140bfa2d..3545f1c16a5 100644 --- a/.github/workflows/ruby.yml +++ b/.github/workflows/ruby.yml @@ -349,7 +349,7 @@ jobs: ARROW_WITH_SNAPPY: ON ARROW_WITH_ZLIB: ON ARROW_WITH_ZSTD: ON - CMAKE_CXX_STANDARD: "17" + CMAKE_CXX_STANDARD: "20" CMAKE_GENERATOR: Ninja CMAKE_INSTALL_PREFIX: "${{ github.workspace }}/dist" VCPKG_BINARY_SOURCES: 'clear;nuget,GitHub,readwrite' diff --git a/c_glib/meson.build b/c_glib/meson.build index 46fc56e7dbd..e51024f6fab 100644 --- a/c_glib/meson.build +++ b/c_glib/meson.build @@ -21,7 +21,7 @@ project( 'arrow-glib', 'c', 'cpp', - default_options: ['c_std=c99', 'cpp_std=c++17'], + default_options: ['c_std=c99', 'cpp_std=c++20'], license: 'Apache-2.0', # Debian: # https://packages.debian.org/search?keywords=meson diff --git a/ci/docker/almalinux-10-verify-rc.dockerfile b/ci/docker/almalinux-10-verify-rc.dockerfile new file mode 100644 index 00000000000..efd77a86d10 --- /dev/null +++ b/ci/docker/almalinux-10-verify-rc.dockerfile @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +ARG arch=amd64 +FROM ${arch}/almalinux:10 + +COPY dev/release/setup-rhel-rebuilds.sh / +RUN /setup-rhel-rebuilds.sh && \ + rm /setup-rhel-rebuilds.sh && \ + dnf -y clean all diff --git a/ci/docker/debian-experimental-cpp.dockerfile b/ci/docker/debian-experimental-cpp.dockerfile index 743f5ddd3be..d37b58e2307 100644 --- a/ci/docker/debian-experimental-cpp.dockerfile +++ b/ci/docker/debian-experimental-cpp.dockerfile @@ -59,6 +59,7 @@ RUN if [ -n "${gcc}" ]; then \ libldap-dev \ liblz4-dev \ libnghttp2-dev \ + libopentelemetry-proto-dev \ libprotobuf-dev \ libprotoc-dev \ libpsl-dev \ @@ -88,6 +89,7 @@ RUN if [ -n "${gcc}" ]; then \ rapidjson-dev \ rsync \ tzdata \ + tzdata-legacy \ zlib1g-dev && \ apt-get install -y -q --no-install-recommends -t experimental \ clang${llvm_package_suffix} \ diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index 2f02f8c1496..904b5cccb42 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -168,7 +168,7 @@ elif [ "${ARROW_EMSCRIPTEN:-OFF}" = "ON" ]; then -DCMAKE_VERBOSE_MAKEFILE=${CMAKE_VERBOSE_MAKEFILE:-OFF} \ -DCMAKE_C_FLAGS="${CFLAGS:-}" \ -DCMAKE_CXX_FLAGS="${CXXFLAGS:-}" \ - -DCMAKE_CXX_STANDARD="${CMAKE_CXX_STANDARD:-17}" \ + -DCMAKE_CXX_STANDARD="${CMAKE_CXX_STANDARD:-20}" \ -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR:-lib} \ -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX:-${ARROW_HOME}} \ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \ @@ -260,7 +260,7 @@ else -DCMAKE_VERBOSE_MAKEFILE=${CMAKE_VERBOSE_MAKEFILE:-OFF} \ -DCMAKE_C_FLAGS="${CFLAGS:-}" \ -DCMAKE_CXX_FLAGS="${CXXFLAGS:-}" \ - -DCMAKE_CXX_STANDARD="${CMAKE_CXX_STANDARD:-17}" \ + -DCMAKE_CXX_STANDARD="${CMAKE_CXX_STANDARD:-20}" \ -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR:-lib} \ -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX:-${ARROW_HOME}} \ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \ diff --git a/ci/scripts/install_sccache.sh b/ci/scripts/install_sccache.sh index c571625a3b3..3899c482af9 100755 --- a/ci/scripts/install_sccache.sh +++ b/ci/scripts/install_sccache.sh @@ -19,15 +19,17 @@ set -e +DEFAULT_VERSION=0.12.0 + if [ "$#" -lt 2 ] || [ "$#" -gt 3 ]; then echo "Usage: $0 " - echo "Will default to version=0.3.0 " + echo "Will default to version=${DEFAULT_VERSION}" exit 1 fi BUILD=$1 PREFIX=$2 -VERSION=${3:-0.3.0} +VERSION=${3:-${DEFAULT_VERSION}} ARCH=$(uname -m) if [ "${ARCH}" != x86_64 ] && [ "${ARCH}" != aarch64 ]; then diff --git a/ci/scripts/r_windows_build.sh b/ci/scripts/r_windows_build.sh index e3b68c941c0..ef9c58f6afc 100755 --- a/ci/scripts/r_windows_build.sh +++ b/ci/scripts/r_windows_build.sh @@ -44,7 +44,7 @@ mv mingw* build cd build # This may vary by system/CI provider -MSYS_LIB_DIR="/c/rtools40" +MSYS_LIB_DIR="/c/rtools${RTOOLS_VERSION}" # Untar the builds we made ls *.xz | xargs -n 1 tar -xJf diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index c9f026f926a..5b260c0eb68 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -528,10 +528,10 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARROW_CXXFLAGS}") # C++ specific flags. set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${CXX_COMMON_FLAGS} ${ARROW_CXXFLAGS}") -# Remove --std=c++17 to avoid errors from C compilers -string(REPLACE "-std=c++17" "" CMAKE_C_FLAGS ${CMAKE_C_FLAGS}) +# Remove -std=c++20 to avoid errors from C compilers +string(REPLACE "-std=c++20" "" CMAKE_C_FLAGS ${CMAKE_C_FLAGS}) -# Add C++-only flags, like -std=c++17 +# Add C++-only flags, like -std=c++20 set(CMAKE_CXX_FLAGS "${CXX_ONLY_FLAGS} ${CMAKE_CXX_FLAGS}") include(ThirdpartyToolchain) diff --git a/cpp/cmake_modules/GandivaAddBitcode.cmake b/cpp/cmake_modules/GandivaAddBitcode.cmake index 6b5e5b3e60c..b22581b4a11 100644 --- a/cpp/cmake_modules/GandivaAddBitcode.cmake +++ b/cpp/cmake_modules/GandivaAddBitcode.cmake @@ -17,7 +17,7 @@ # Create bitcode for the given source file. function(gandiva_add_bitcode SOURCE) - set(CLANG_OPTIONS -std=c++17) + set(CLANG_OPTIONS -std=c++20) if(MSVC) # "19.20" means that it's compatible with Visual Studio 16 2019. # We can update this to "19.30" when we dropped support for Visual diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index 3c172aebdf8..c92daf32690 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -141,12 +141,12 @@ endif() # This ensures that things like c++17 get passed correctly if(NOT DEFINED CMAKE_CXX_STANDARD) - set(CMAKE_CXX_STANDARD 17) -elseif(${CMAKE_CXX_STANDARD} VERSION_LESS 17) - message(FATAL_ERROR "Cannot set a CMAKE_CXX_STANDARD smaller than 17") + set(CMAKE_CXX_STANDARD 20) +elseif(${CMAKE_CXX_STANDARD} VERSION_LESS 20) + message(FATAL_ERROR "Cannot set a CMAKE_CXX_STANDARD smaller than 20") endif() -# We require a C++17 compliant compiler +# We require a C++20 compliant compiler set(CMAKE_CXX_STANDARD_REQUIRED ON) # ARROW-6848: Do not use GNU (or other CXX) extensions diff --git a/cpp/examples/minimal_build/CMakeLists.txt b/cpp/examples/minimal_build/CMakeLists.txt index 626b987b093..d0a0a1e0a22 100644 --- a/cpp/examples/minimal_build/CMakeLists.txt +++ b/cpp/examples/minimal_build/CMakeLists.txt @@ -29,10 +29,10 @@ cmake_dependent_option(ARROW_LINK_SHARED OFF) if(NOT DEFINED CMAKE_CXX_STANDARD) - set(CMAKE_CXX_STANDARD 17) + set(CMAKE_CXX_STANDARD 20) endif() -# We require a C++17 compliant compiler +# We require a C++20 compliant compiler set(CMAKE_CXX_STANDARD_REQUIRED ON) if(NOT CMAKE_BUILD_TYPE) diff --git a/cpp/examples/minimal_build/run_static.sh b/cpp/examples/minimal_build/run_static.sh index 189f59a007b..26019227a7b 100755 --- a/cpp/examples/minimal_build/run_static.sh +++ b/cpp/examples/minimal_build/run_static.sh @@ -86,7 +86,7 @@ echo rm -rf $EXAMPLE_BUILD_DIR mkdir -p $EXAMPLE_BUILD_DIR -${CXX:-c++} -std=c++17 \ +${CXX:-c++} -std=c++20 \ -o $EXAMPLE_BUILD_DIR/arrow-example \ $EXAMPLE_DIR/example.cc \ $(PKG_CONFIG_PATH=$ARROW_BUILD_DIR/lib/pkgconfig \ diff --git a/cpp/examples/parquet/parquet_arrow/CMakeLists.txt b/cpp/examples/parquet/parquet_arrow/CMakeLists.txt index 189d17914d6..6c3dac8f2ce 100644 --- a/cpp/examples/parquet/parquet_arrow/CMakeLists.txt +++ b/cpp/examples/parquet/parquet_arrow/CMakeLists.txt @@ -28,10 +28,10 @@ option(PARQUET_LINK_SHARED "Link to the Parquet shared library" ON) # This ensures that things like -std=gnu++... get passed correctly if(NOT DEFINED CMAKE_CXX_STANDARD) - set(CMAKE_CXX_STANDARD 17) + set(CMAKE_CXX_STANDARD 20) endif() -# We require a C++17 compliant compiler +# We require a C++20 compliant compiler set(CMAKE_CXX_STANDARD_REQUIRED ON) # Look for installed packages the system diff --git a/cpp/examples/tutorial_examples/CMakeLists.txt b/cpp/examples/tutorial_examples/CMakeLists.txt index 1466bce48af..d236b7e0a9c 100644 --- a/cpp/examples/tutorial_examples/CMakeLists.txt +++ b/cpp/examples/tutorial_examples/CMakeLists.txt @@ -21,7 +21,7 @@ project(ArrowTutorialExamples) find_package(ArrowDataset) -set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wall -Wextra") set(CMAKE_BUILD_TYPE Release) diff --git a/cpp/meson.build b/cpp/meson.build index f3e0181b68c..16bb844d089 100644 --- a/cpp/meson.build +++ b/cpp/meson.build @@ -22,7 +22,7 @@ project( version: '23.0.0-SNAPSHOT', license: 'Apache-2.0', meson_version: '>=1.3.0', - default_options: ['c_std=c11', 'warning_level=2', 'cpp_std=c++17'], + default_options: ['c_std=c11', 'warning_level=2', 'cpp_std=c++20'], ) project_args = [ diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index deb6db3cdd3..a46db60321a 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -202,7 +202,7 @@ function(arrow_add_object_library PREFIX) add_library(${prefix}_shared OBJECT ${SOURCES}) set_target_properties(${prefix}_shared PROPERTIES POSITION_INDEPENDENT_CODE ON) target_compile_definitions(${prefix}_shared PRIVATE ARROW_EXPORTING) - target_compile_features(${prefix}_shared PRIVATE cxx_std_17) + target_compile_features(${prefix}_shared PRIVATE cxx_std_20) set(${PREFIX}_TARGET_SHARED ${prefix}_shared PARENT_SCOPE) @@ -212,7 +212,7 @@ function(arrow_add_object_library PREFIX) add_library(${prefix}_static OBJECT ${SOURCES}) set_target_properties(${prefix}_static PROPERTIES POSITION_INDEPENDENT_CODE ON) target_compile_definitions(${prefix}_static PRIVATE ARROW_STATIC) - target_compile_features(${prefix}_static PRIVATE cxx_std_17) + target_compile_features(${prefix}_static PRIVATE cxx_std_20) set(${PREFIX}_TARGET_STATIC ${prefix}_static PARENT_SCOPE) @@ -224,7 +224,7 @@ function(arrow_add_object_library PREFIX) else() add_library(${prefix} OBJECT ${SOURCES}) set_target_properties(${prefix} PROPERTIES POSITION_INDEPENDENT_CODE ON) - target_compile_features(${prefix} PRIVATE cxx_std_17) + target_compile_features(${prefix} PRIVATE cxx_std_20) set(${PREFIX}_TARGET_SHARED ${prefix} PARENT_SCOPE) @@ -1181,7 +1181,7 @@ endif() foreach(LIB_TARGET ${ARROW_LIBRARIES}) target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_EXPORTING) # C++17 is required to compile against Arrow C++ headers and libraries - target_compile_features(${LIB_TARGET} PUBLIC cxx_std_17) + target_compile_features(${LIB_TARGET} PUBLIC cxx_std_20) endforeach() if(ARROW_WITH_BACKTRACE) diff --git a/cpp/src/arrow/array/array_nested.cc b/cpp/src/arrow/array/array_nested.cc index b7f16860791..c5a26a475c9 100644 --- a/cpp/src/arrow/array/array_nested.cc +++ b/cpp/src/arrow/array/array_nested.cc @@ -1004,10 +1004,16 @@ Result> FixedSizeListArray::Flatten( // ---------------------------------------------------------------------- // Struct +struct StructArray::Impl { + mutable ArrayVector boxed_fields_; +}; + +StructArray::~StructArray() = default; StructArray::StructArray(const std::shared_ptr& data) { ARROW_CHECK_EQ(data->type->id(), Type::STRUCT); SetData(data); - boxed_fields_.resize(data->child_data.size()); + impl_ = std::make_unique(); + impl_->boxed_fields_.resize(data_->child_data.size()); } StructArray::StructArray(const std::shared_ptr& type, int64_t length, @@ -1016,10 +1022,12 @@ StructArray::StructArray(const std::shared_ptr& type, int64_t length, int64_t offset) { ARROW_CHECK_EQ(type->id(), Type::STRUCT); SetData(ArrayData::Make(type, length, {std::move(null_bitmap)}, null_count, offset)); + data_->child_data.reserve(children.size()); for (const auto& child : children) { data_->child_data.push_back(child->data()); } - boxed_fields_.resize(children.size()); + impl_ = std::make_unique(); + impl_->boxed_fields_.resize(data_->child_data.size()); } Result> StructArray::Make( @@ -1073,11 +1081,16 @@ const ArrayVector& StructArray::fields() const { for (int i = 0; i < num_fields(); ++i) { (void)field(i); } - return boxed_fields_; + return impl_->boxed_fields_; } std::shared_ptr StructArray::field(int i) const { - std::shared_ptr result = std::atomic_load(&boxed_fields_[i]); + // Atomic ops on std::shared_ptr are deprecated in C++20. They should be + // replaced with std::atomic> but not all C++ standard + // libraries implement it yet. :-/ + ARROW_SUPPRESS_DEPRECATION_WARNING + std::shared_ptr result = std::atomic_load(&impl_->boxed_fields_[i]); + ARROW_UNSUPPRESS_DEPRECATION_WARNING if (result) { return result; } @@ -1091,7 +1104,11 @@ std::shared_ptr StructArray::field(int i) const { // Check if some other thread inserted the array in the meantime and return // that in that case. std::shared_ptr expected = nullptr; - if (!std::atomic_compare_exchange_strong(&boxed_fields_[i], &expected, result)) { + ARROW_SUPPRESS_DEPRECATION_WARNING + const bool update_successful = + std::atomic_compare_exchange_strong(&impl_->boxed_fields_[i], &expected, result); + ARROW_UNSUPPRESS_DEPRECATION_WARNING + if (!update_successful) { result = std::move(expected); } return result; @@ -1183,6 +1200,13 @@ Result> StructArray::GetFlattenedField(int index, // ---------------------------------------------------------------------- // UnionArray +struct UnionArray::Impl { + mutable ArrayVector boxed_fields_; +}; + +UnionArray::UnionArray() = default; +UnionArray::~UnionArray() = default; + void UnionArray::SetData(std::shared_ptr data) { this->Array::SetData(std::move(data)); @@ -1190,7 +1214,8 @@ void UnionArray::SetData(std::shared_ptr data) { ARROW_CHECK_GE(data_->buffers.size(), 2); raw_type_codes_ = data->GetValuesSafe(1); - boxed_fields_.resize(data_->child_data.size()); + impl_ = std::make_unique(); + impl_->boxed_fields_.resize(data_->child_data.size()); } void SparseUnionArray::SetData(std::shared_ptr data) { @@ -1214,6 +1239,8 @@ void DenseUnionArray::SetData(const std::shared_ptr& data) { raw_value_offsets_ = data->GetValuesSafe(2); } +SparseUnionArray::~SparseUnionArray() = default; + SparseUnionArray::SparseUnionArray(std::shared_ptr data) { SetData(std::move(data)); } @@ -1267,6 +1294,8 @@ Result> SparseUnionArray::GetFlattenedField( return MakeArray(child_data); } +DenseUnionArray::~DenseUnionArray() = default; + DenseUnionArray::DenseUnionArray(const std::shared_ptr& data) { SetData(data); } @@ -1359,23 +1388,34 @@ Result> SparseUnionArray::Make( } std::shared_ptr UnionArray::field(int i) const { - if (i < 0 || - static_cast(i) >= boxed_fields_.size()) { + if (i < 0 || static_cast(i) >= impl_->boxed_fields_.size()) { return nullptr; } - std::shared_ptr result = std::atomic_load(&boxed_fields_[i]); - if (!result) { - std::shared_ptr child_data = data_->child_data[i]->Copy(); - if (mode() == UnionMode::SPARSE) { - // Sparse union: need to adjust child if union is sliced - // (for dense unions, the need to lookup through the offsets - // makes this unnecessary) - if (data_->offset != 0 || child_data->length > data_->length) { - child_data = child_data->Slice(data_->offset, data_->length); - } + ARROW_SUPPRESS_DEPRECATION_WARNING + std::shared_ptr result = std::atomic_load(&impl_->boxed_fields_[i]); + ARROW_UNSUPPRESS_DEPRECATION_WARNING + if (result) { + return result; + } + std::shared_ptr child_data = data_->child_data[i]->Copy(); + if (mode() == UnionMode::SPARSE) { + // Sparse union: need to adjust child if union is sliced + // (for dense unions, the need to lookup through the offsets + // makes this unnecessary) + if (data_->offset != 0 || child_data->length > data_->length) { + child_data = child_data->Slice(data_->offset, data_->length); } - result = MakeArray(child_data); - std::atomic_store(&boxed_fields_[i], result); + } + result = MakeArray(child_data); + // Check if some other thread inserted the array in the meantime and return + // that in that case. + std::shared_ptr expected = nullptr; + ARROW_SUPPRESS_DEPRECATION_WARNING + const bool update_successful = + std::atomic_compare_exchange_strong(&impl_->boxed_fields_[i], &expected, result); + ARROW_UNSUPPRESS_DEPRECATION_WARNING + if (!update_successful) { + result = std::move(expected); } return result; } diff --git a/cpp/src/arrow/array/array_nested.h b/cpp/src/arrow/array/array_nested.h index 2591fdaf414..bf84f802b1a 100644 --- a/cpp/src/arrow/array/array_nested.h +++ b/cpp/src/arrow/array/array_nested.h @@ -661,6 +661,8 @@ class ARROW_EXPORT StructArray : public Array { public: using TypeClass = StructType; + ~StructArray() override; + explicit StructArray(const std::shared_ptr& data); StructArray(const std::shared_ptr& type, int64_t length, @@ -720,8 +722,8 @@ class ARROW_EXPORT StructArray : public Array { private: // For caching boxed child data - // XXX This is not handled in a thread-safe manner. - mutable ArrayVector boxed_fields_; + struct ARROW_NO_EXPORT Impl; + std::unique_ptr impl_; }; // ---------------------------------------------------------------------- @@ -732,6 +734,8 @@ class ARROW_EXPORT UnionArray : public Array { public: using type_code_t = int8_t; + ~UnionArray() override; + /// Note that this buffer does not account for any slice offset const std::shared_ptr& type_codes() const { return data_->buffers[1]; } @@ -754,13 +758,17 @@ class ARROW_EXPORT UnionArray : public Array { std::shared_ptr field(int pos) const; protected: + UnionArray(); + void SetData(std::shared_ptr data); const type_code_t* raw_type_codes_; const UnionType* union_type_; + private: // For caching boxed child data - mutable std::vector> boxed_fields_; + struct ARROW_NO_EXPORT Impl; + std::unique_ptr impl_; }; /// Concrete Array class for sparse union data @@ -768,6 +776,8 @@ class ARROW_EXPORT SparseUnionArray : public UnionArray { public: using TypeClass = SparseUnionType; + ~SparseUnionArray() override; + explicit SparseUnionArray(std::shared_ptr data); SparseUnionArray(std::shared_ptr type, int64_t length, ArrayVector children, @@ -821,6 +831,8 @@ class ARROW_EXPORT DenseUnionArray : public UnionArray { public: using TypeClass = DenseUnionType; + ~DenseUnionArray() override; + explicit DenseUnionArray(const std::shared_ptr& data); DenseUnionArray(std::shared_ptr type, int64_t length, ArrayVector children, diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate_numeric.cc b/cpp/src/arrow/compute/kernels/hash_aggregate_numeric.cc index acd485f530c..aa231430aa0 100644 --- a/cpp/src/arrow/compute/kernels/hash_aggregate_numeric.cc +++ b/cpp/src/arrow/compute/kernels/hash_aggregate_numeric.cc @@ -866,21 +866,41 @@ using GroupedKurtosisImpl = ConcreteGroupedStatisticImpl; template