From 1a42ed0c2dcedf26d3623597b7d3c38d0beb6363 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 5 Apr 2022 16:30:03 +0900 Subject: [PATCH 1/9] ARROW-16102: [C++] Add support for building with system gRPC and bundled GCS --- .github/workflows/cpp.yml | 1 + cpp/cmake_modules/ThirdpartyToolchain.cmake | 24 ++++++++++++++------- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index 7401fc489c9..cec5dd15646 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -268,6 +268,7 @@ jobs: ARROW_DATASET: ON ARROW_FLIGHT: ON ARROW_GANDIVA: ON + ARROW_GCS: ON ARROW_HDFS: OFF ARROW_HOME: /mingw${{ matrix.mingw-n-bits }} ARROW_JEMALLOC: OFF diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index e315346378c..37964ef6c24 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -2593,8 +2593,9 @@ endmacro() # ---------------------------------------------------------------------- # Dependencies for Arrow Flight RPC -macro(build_absl_once) - if(NOT TARGET absl_ep) +macro(resolve_dependency_absl) + # Choose one of built absl::* targets + if(NOT TARGET absl::algorithm) message(STATUS "Building Abseil-cpp from source") set(ABSL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/absl_ep-install") set(ABSL_INCLUDE_DIR "${ABSL_PREFIX}/include") @@ -3451,6 +3452,7 @@ macro(build_absl_once) # Work around https://gitlab.kitware.com/cmake/cmake/issues/15052 file(MAKE_DIRECTORY ${ABSL_INCLUDE_DIR}) + set(ABSL_VENDORED TRUE) endif() endmacro() @@ -3464,8 +3466,8 @@ macro(build_grpc) get_target_property(c-ares_INCLUDE_DIR c-ares::cares INTERFACE_INCLUDE_DIRECTORIES) include_directories(SYSTEM ${c-ares_INCLUDE_DIR}) - # First need to build Abseil - build_absl_once() + # First need Abseil + resolve_dependency_absl() message(STATUS "Building gRPC from source") @@ -3498,7 +3500,9 @@ macro(build_grpc) add_custom_target(grpc_dependencies) - add_dependencies(grpc_dependencies absl_ep) + if(ABSL_VENDORED) + add_dependencies(grpc_dependencies absl_ep) + endif() if(CARES_VENDORED) add_dependencies(grpc_dependencies cares_ep) endif() @@ -3819,7 +3823,7 @@ macro(build_google_cloud_cpp_storage) message(STATUS "Only building the google-cloud-cpp::storage component") # List of dependencies taken from https://github.com/googleapis/google-cloud-cpp/blob/master/doc/packaging.md - build_absl_once() + resolve_dependency_absl() build_crc32c_once() # Curl is required on all platforms, but building it internally might also trip over S3's copy. @@ -3830,7 +3834,9 @@ macro(build_google_cloud_cpp_storage) # Build google-cloud-cpp, with only storage_client # Inject vendored packages via CMAKE_PREFIX_PATH - list(APPEND GOOGLE_CLOUD_CPP_PREFIX_PATH_LIST ${ABSL_PREFIX}) + if(ABSL_VENDORED) + list(APPEND GOOGLE_CLOUD_CPP_PREFIX_PATH_LIST ${ABSL_PREFIX}) + endif() list(APPEND GOOGLE_CLOUD_CPP_PREFIX_PATH_LIST ${CRC32C_PREFIX}) list(APPEND GOOGLE_CLOUD_CPP_PREFIX_PATH_LIST ${NLOHMANN_JSON_PREFIX}) @@ -3862,7 +3868,9 @@ macro(build_google_cloud_cpp_storage) add_custom_target(google_cloud_cpp_dependencies) - add_dependencies(google_cloud_cpp_dependencies absl_ep) + if(ABSL_VENDORED) + add_dependencies(google_cloud_cpp_dependencies absl_ep) + endif() add_dependencies(google_cloud_cpp_dependencies crc32c_ep) add_dependencies(google_cloud_cpp_dependencies nlohmann_json::nlohmann_json) From dd052b7b14b9a049fbf7b306c52aff9448d50804 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 5 Apr 2022 17:14:38 +0900 Subject: [PATCH 2/9] Use google-cloud-cpp 1.39.0 It's for https://github.com/googleapis/google-cloud-cpp/issues/8536 . --- cpp/thirdparty/versions.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 2d54da83504..4e90f4c9682 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -49,8 +49,8 @@ ARROW_GFLAGS_BUILD_VERSION=v2.2.2 ARROW_GFLAGS_BUILD_SHA256_CHECKSUM=34af2f15cf7367513b352bdcd2493ab14ce43692d2dcd9dfc499492966c64dcf ARROW_GLOG_BUILD_VERSION=v0.5.0 ARROW_GLOG_BUILD_SHA256_CHECKSUM=eede71f28371bf39aa69b45de23b329d37214016e2055269b3b5e7cfd40b59f5 -ARROW_GOOGLE_CLOUD_CPP_BUILD_VERSION=v1.35.0 -ARROW_GOOGLE_CLOUD_CPP_BUILD_SHA256_CHECKSUM=e4e9eac1e7999eff195db270bc2a719004660b3730ebb5d2f444f2d2057e49b2 +ARROW_GOOGLE_CLOUD_CPP_BUILD_VERSION=v1.39.0 +ARROW_GOOGLE_CLOUD_CPP_BUILD_SHA256_CHECKSUM=73e4e840018b24bec2beb49e036a3c2d8c471d4dc4a18b9026ccc4d8ab8e78cc ARROW_GRPC_BUILD_VERSION=v1.35.0 ARROW_GRPC_BUILD_SHA256_CHECKSUM=27dd2fc5c9809ddcde8eb6fa1fa278a3486566dfc28335fca13eb8df8bd3b958 ARROW_GTEST_BUILD_VERSION=1.11.0 @@ -119,7 +119,7 @@ DEPENDENCIES=( "ARROW_GBENCHMARK_URL gbenchmark-${ARROW_GBENCHMARK_BUILD_VERSION}.tar.gz https://github.com/google/benchmark/archive/${ARROW_GBENCHMARK_BUILD_VERSION}.tar.gz" "ARROW_GFLAGS_URL gflags-${ARROW_GFLAGS_BUILD_VERSION}.tar.gz https://github.com/gflags/gflags/archive/${ARROW_GFLAGS_BUILD_VERSION}.tar.gz" "ARROW_GLOG_URL glog-${ARROW_GLOG_BUILD_VERSION}.tar.gz https://github.com/google/glog/archive/${ARROW_GLOG_BUILD_VERSION}.tar.gz" - "ARROW_GOOGLE_CLOUD_CPP_URL google-cloud-cpp-${ARROW_GOOGLE_CLOUD_CPP_BUILD_VERSION}.tar.gz https://github.com/googleapis/google-cloud-cpp/archive/refs/tags/${ARROW_GOOGLE_CLOUD_CPP_BUILD_VERSION}.tar.gz" + "ARROW_GOOGLE_CLOUD_CPP_URL google-cloud-cpp-${ARROW_GOOGLE_CLOUD_CPP_BUILD_VERSION}.tar.gz https://github.com/googleapis/google-cloud-cpp/archive/${ARROW_GOOGLE_CLOUD_CPP_BUILD_VERSION}.tar.gz" "ARROW_GRPC_URL grpc-${ARROW_GRPC_BUILD_VERSION}.tar.gz https://github.com/grpc/grpc/archive/${ARROW_GRPC_BUILD_VERSION}.tar.gz" "ARROW_GTEST_URL gtest-${ARROW_GTEST_BUILD_VERSION}.tar.gz https://github.com/google/googletest/archive/release-${ARROW_GTEST_BUILD_VERSION}.tar.gz" "ARROW_JEMALLOC_URL jemalloc-${ARROW_JEMALLOC_BUILD_VERSION}.tar.bz2 https://github.com/jemalloc/jemalloc/releases/download/${ARROW_JEMALLOC_BUILD_VERSION}/jemalloc-${ARROW_JEMALLOC_BUILD_VERSION}.tar.bz2" From 5e93d4353329e4b29cfbd428d6056e9f9887cd30 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 5 Apr 2022 17:36:55 +0900 Subject: [PATCH 3/9] Add OpenSSL::Crypto dependency to google-cloud-cpp::common --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 37964ef6c24..3a0353bc7db 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -3911,7 +3911,8 @@ macro(build_google_cloud_cpp_storage) absl::memory absl::optional absl::time - Threads::Threads) + Threads::Threads + OpenSSL::Crypto) add_library(google-cloud-cpp::storage STATIC IMPORTED) set_target_properties(google-cloud-cpp::storage From 611e39a6542586d21c14e07c03176f3732c0e325 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 7 Apr 2022 14:47:03 +0900 Subject: [PATCH 4/9] Enable GCS except non ucrt MinGW --- .github/workflows/cpp.yml | 5 ++++- .github/workflows/ruby.yml | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index cec5dd15646..37b952bdbae 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -126,6 +126,7 @@ jobs: ARROW_DATASET: ON ARROW_FLIGHT: ON ARROW_GANDIVA: ON + ARROW_GCS: ON ARROW_HDFS: ON ARROW_HOME: /usr/local ARROW_JEMALLOC: ON @@ -268,7 +269,9 @@ jobs: ARROW_DATASET: ON ARROW_FLIGHT: ON ARROW_GANDIVA: ON - ARROW_GCS: ON + # google-could-cpp uses _dupenv_s() but it can't be used with msvcrt. + # We need to use ucrt to use _dupenv_s(). + # ARROW_GCS: ON ARROW_HDFS: OFF ARROW_HOME: /mingw${{ matrix.mingw-n-bits }} ARROW_JEMALLOC: OFF diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml index 0770f2f32a9..54292eafc84 100644 --- a/.github/workflows/ruby.yml +++ b/.github/workflows/ruby.yml @@ -90,6 +90,7 @@ jobs: ulimit -c unlimited archery docker run \ -e ARROW_FLIGHT=ON \ + -e ARROW_GCS=ON \ -e Protobuf_SOURCE=BUNDLED \ -e gRPC_SOURCE=BUNDLED \ ubuntu-ruby @@ -110,6 +111,7 @@ jobs: ARROW_BUILD_TESTS: OFF ARROW_FLIGHT: ON ARROW_GANDIVA: ON + ARROW_GCS: ON ARROW_GLIB_GTK_DOC: true ARROW_GLIB_WERROR: true ARROW_HOME: /usr/local @@ -188,6 +190,7 @@ jobs: ARROW_BUILD_TYPE: release ARROW_FLIGHT: ON ARROW_GANDIVA: ON + ARROW_GCS: ON ARROW_HDFS: OFF ARROW_HOME: /ucrt${{ matrix.mingw-n-bits }} ARROW_JEMALLOC: OFF From 01f6a2c2a877c4639292bf1456e0032e69298489 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 7 Apr 2022 16:53:39 +0900 Subject: [PATCH 5/9] Use C++ 17 for system Abseil --- .github/workflows/cpp.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index 37b952bdbae..b5d968dd0bb 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -142,6 +142,8 @@ jobs: ARROW_WITH_SNAPPY: ON ARROW_WITH_ZLIB: ON ARROW_WITH_ZSTD: ON + # System Abseil installed by Homebrew uses C++ 17 + CMAKE_CXX_STANDARD: 17 steps: - name: Checkout Arrow uses: actions/checkout@v2 From 3688a5cf6478dce0a6dd157167b508a2e57cf40d Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 8 Apr 2022 09:29:59 +0900 Subject: [PATCH 6/9] Use the latest gcs testbench It's for https://github.com/googleapis/storage-testbench/pull/301 . --- ci/scripts/install_gcs_testbench.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/scripts/install_gcs_testbench.sh b/ci/scripts/install_gcs_testbench.sh index 16e3c4042cf..52d4b409116 100755 --- a/ci/scripts/install_gcs_testbench.sh +++ b/ci/scripts/install_gcs_testbench.sh @@ -31,7 +31,7 @@ fi version=$1 if [[ "${version}" -eq "default" ]]; then - version="v0.7.0" + version="v0.16.0" fi pip install "https://github.com/googleapis/storage-testbench/archive/${version}.tar.gz" From 737084ca878029f85ad9e2a66177d8683f2638f6 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 8 Apr 2022 10:22:33 +0900 Subject: [PATCH 7/9] Install GCS Testbench on macOS --- .github/workflows/cpp.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index b5d968dd0bb..29ae84c0768 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -156,6 +156,9 @@ jobs: rm -f /usr/local/bin/2to3 brew update --preinstall brew bundle --file=cpp/Brewfile + - name: Install Google Cloud Storage Testbench + shell: bash + run: ci/scripts/install_gcs_testbench.sh - name: Setup ccache run: | ci/scripts/ccache_setup.sh From 34fada343c95f18b69b11d6c28b06ecb6b1f33c8 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 8 Apr 2022 11:13:52 +0900 Subject: [PATCH 8/9] Add missing argument --- .github/workflows/cpp.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index 29ae84c0768..00f9e335f51 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -158,7 +158,7 @@ jobs: brew bundle --file=cpp/Brewfile - name: Install Google Cloud Storage Testbench shell: bash - run: ci/scripts/install_gcs_testbench.sh + run: ci/scripts/install_gcs_testbench.sh default - name: Setup ccache run: | ci/scripts/ccache_setup.sh From 3e279ab2928f9498909e106d3d5a89d266d3f82c Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 8 Apr 2022 15:03:35 +0900 Subject: [PATCH 9/9] Use python3 --- ci/scripts/install_gcs_testbench.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/scripts/install_gcs_testbench.sh b/ci/scripts/install_gcs_testbench.sh index 52d4b409116..0282e0fda50 100755 --- a/ci/scripts/install_gcs_testbench.sh +++ b/ci/scripts/install_gcs_testbench.sh @@ -34,4 +34,5 @@ if [[ "${version}" -eq "default" ]]; then version="v0.16.0" fi -pip install "https://github.com/googleapis/storage-testbench/archive/${version}.tar.gz" +${PYTHON:-python3} -m pip install \ + "https://github.com/googleapis/storage-testbench/archive/${version}.tar.gz"