From 22cf96cc183da65f4f082e2a9af65b9837a773d9 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Sun, 23 Jun 2024 08:53:33 +0900 Subject: [PATCH 01/24] GH-42149: [C++] Use FetchContent for bundled ORC This also has a workaround for https://issues.apache.org/jira/browse/ORC-1732 . --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 184 ++++++++++---------- cpp/thirdparty/versions.txt | 4 +- 2 files changed, 96 insertions(+), 92 deletions(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 7dab0a362ff..009ccacd6e1 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -4490,116 +4490,120 @@ target_include_directories(arrow::hadoop INTERFACE "${HADOOP_HOME}/include") # ---------------------------------------------------------------------- # Apache ORC -macro(build_orc) +function(build_orc) + if(CMAKE_VERSION VERSION_LESS 3.22) + # We can't disable installing Apache ORC by + # "set_property(DIRECTORY ${orc_SOURCE_DIR} PROPERTY + # EXCLUDE_FROM_ALL TRUE)" with CMake 3.16. + # + # At least CMake 3.22 on Ubuntu 22.04 works. So we use 3.22 + # here. We may be able to use more earlier version here. + message(FATAL_ERROR "Building Apache ORC requires at least CMake 3.22. " + "(At least we can't use CMake 3.16)") + endif() message(STATUS "Building Apache ORC from source") + fetchcontent_declare(orc + ${FC_DECLARE_COMMON_OPTIONS} + URL ${ORC_SOURCE_URL} + URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}") + prepare_fetchcontent() - set(ORC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/orc_ep-install") - set(ORC_HOME "${ORC_PREFIX}") - set(ORC_INCLUDE_DIR "${ORC_PREFIX}/include") - set(ORC_STATIC_LIB - "${ORC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}orc${CMAKE_STATIC_LIBRARY_SUFFIX}") + get_target_property(LZ4_INCLUDE_DIR LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(LZ4_ROOT "${LZ4_INCLUDE_DIR}" DIRECTORY) + set(LZ4_HOME + ${LZ4_ROOT} + CACHE BOOL "" FORCE) + set(LZ4_LIBRARY $) + set(LZ4_STATIC_LIB $) - get_target_property(ORC_PROTOBUF_ROOT ${ARROW_PROTOBUF_LIBPROTOBUF} + get_target_property(PROTOBUF_INCLUDE_DIR ${ARROW_PROTOBUF_LIBPROTOBUF} INTERFACE_INCLUDE_DIRECTORIES) - get_filename_component(ORC_PROTOBUF_ROOT "${ORC_PROTOBUF_ROOT}" DIRECTORY) - - get_target_property(ORC_SNAPPY_INCLUDE_DIR ${Snappy_TARGET} + get_filename_component(Protobuf_ROOT "${PROTOBUF_INCLUDE_DIR}" DIRECTORY) + set(PROTOBUF_HOME + ${Protobuf_ROOT} + CACHE BOOL "" FORCE) + target_include_directories(${ARROW_PROTOBUF_LIBPROTOC} + INTERFACE "${PROTOBUF_INCLUDE_DIR}") + set(PROTOBUF_EXECUTABLE $) + set(PROTOBUF_LIBRARY $) + set(PROTOC_LIBRARY $) + + get_target_property(SNAPPY_INCLUDE_DIR ${Snappy_TARGET} INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(Snappy_ROOT "${SNAPPY_INCLUDE_DIR}" DIRECTORY) + set(SNAPPY_HOME + ${Snappy_ROOT} + CACHE BOOL "" FORCE) + set(SNAPPY_LIBRARY + $ + CACHE STRING "" FORCE) + + get_target_property(ZLIB_INCLUDE_DIR ZLIB::ZLIB INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ZLIB_ROOT "${ZLIB_INCLUDE_DIR}" DIRECTORY) + set(ZLIB_HOME + ${ZLIB_ROOT} + CACHE BOOL "" FORCE) + + get_target_property(ZSTD_INCLUDE_DIR ${ARROW_ZSTD_LIBZSTD} INTERFACE_INCLUDE_DIRECTORIES) - get_filename_component(ORC_SNAPPY_ROOT "${ORC_SNAPPY_INCLUDE_DIR}" DIRECTORY) - - get_target_property(ORC_LZ4_ROOT LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES) - get_filename_component(ORC_LZ4_ROOT "${ORC_LZ4_ROOT}" DIRECTORY) + get_filename_component(ZSTD_ROOT "${ZSTD_INCLUDE_DIR}" DIRECTORY) + set(ZSTD_HOME + ${ZSTD_ROOT} + CACHE BOOL "" FORCE) + set(ZSTD_LIBRARY $) + set(ZSTD_STATIC_LIBRARY $) + + set(BUILD_CPP_TESTS + OFF + CACHE BOOL "" FORCE) + set(BUILD_JAVA + OFF + CACHE BOOL "" FORCE) + set(BUILD_LIBHDFSPP + OFF + CACHE BOOL "" FORCE) + set(BUILD_TOOLS + OFF + CACHE BOOL "" FORCE) + set(INSTALL_VENDORED_LIBS + OFF + CACHE BOOL "" FORCE) + set(STOP_BUILD_ON_WARNING + OFF + CACHE BOOL "" FORCE) - get_target_property(ORC_ZSTD_ROOT ${ARROW_ZSTD_LIBZSTD} INTERFACE_INCLUDE_DIRECTORIES) - get_filename_component(ORC_ZSTD_ROOT "${ORC_ZSTD_ROOT}" DIRECTORY) + # TODO: This should be fixed in upstream. + list(PREPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_BINARY_DIR}/_deps/orc-src/cmake_modules) - set(ORC_CMAKE_ARGS - ${EP_COMMON_CMAKE_ARGS} - "-DCMAKE_INSTALL_PREFIX=${ORC_PREFIX}" - -DSTOP_BUILD_ON_WARNING=OFF - -DBUILD_LIBHDFSPP=OFF - -DBUILD_JAVA=OFF - -DBUILD_TOOLS=OFF - -DBUILD_CPP_TESTS=OFF - -DINSTALL_VENDORED_LIBS=OFF - "-DLZ4_HOME=${ORC_LZ4_ROOT}" - "-DPROTOBUF_EXECUTABLE=$" - "-DPROTOBUF_HOME=${ORC_PROTOBUF_ROOT}" - "-DPROTOBUF_INCLUDE_DIR=$" - "-DPROTOBUF_LIBRARY=$" - "-DPROTOC_LIBRARY=$" - "-DSNAPPY_HOME=${ORC_SNAPPY_ROOT}" - "-DSNAPPY_LIBRARY=$" - "-DLZ4_LIBRARY=$" - "-DLZ4_STATIC_LIB=$" - "-DLZ4_INCLUDE_DIR=${ORC_LZ4_ROOT}/include" - "-DSNAPPY_INCLUDE_DIR=${ORC_SNAPPY_INCLUDE_DIR}" - "-DZSTD_HOME=${ORC_ZSTD_ROOT}" - "-DZSTD_INCLUDE_DIR=$" - "-DZSTD_LIBRARY=$") - if(ZLIB_ROOT) - set(ORC_CMAKE_ARGS ${ORC_CMAKE_ARGS} "-DZLIB_HOME=${ZLIB_ROOT}") + fetchcontent_makeavailable(orc) + if(CMAKE_VERSION VERSION_LESS 3.28) + set_property(DIRECTORY ${orc_SOURCE_DIR} PROPERTY EXCLUDE_FROM_ALL TRUE) endif() - # Work around CMake bug - file(MAKE_DIRECTORY ${ORC_INCLUDE_DIR}) + set(ORC_VENDORED + TRUE + PARENT_SCOPE) - externalproject_add(orc_ep - ${EP_COMMON_OPTIONS} - URL ${ORC_SOURCE_URL} - URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}" - BUILD_BYPRODUCTS ${ORC_STATIC_LIB} - CMAKE_ARGS ${ORC_CMAKE_ARGS} - DEPENDS ${ARROW_PROTOBUF_LIBPROTOBUF} - ${ARROW_PROTOBUF_PROTOC} - ${ARROW_ZSTD_LIBZSTD} - ${Snappy_TARGET} - LZ4::lz4 - ZLIB::ZLIB) - - set(ORC_VENDORED 1) - - add_library(orc::orc STATIC IMPORTED) - set_target_properties(orc::orc PROPERTIES IMPORTED_LOCATION "${ORC_STATIC_LIB}") - target_include_directories(orc::orc BEFORE INTERFACE "${ORC_INCLUDE_DIR}") - target_link_libraries(orc::orc INTERFACE LZ4::lz4 ZLIB::ZLIB ${ARROW_ZSTD_LIBZSTD} - ${Snappy_TARGET}) - # Protobuf generated files may use ABSL_DCHECK*() and - # absl::log_internal_check_op is needed for them. - if(TARGET absl::log_internal_check_op) - target_link_libraries(orc::orc INTERFACE absl::log_internal_check_op) - endif() - if(NOT MSVC) - if(NOT APPLE AND ARROW_ENABLE_THREADING) - target_link_libraries(orc::orc INTERFACE Threads::Threads) - endif() - target_link_libraries(orc::orc INTERFACE ${CMAKE_DL_LIBS}) - endif() - if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "9") - target_link_libraries(orc::orc INTERFACE stdc++fs) - endif() - elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "8") - target_link_libraries(orc::orc INTERFACE c++fs) - endif() - endif() + target_include_directories(orc INTERFACE "${orc_BINARY_DIR}/c++/include" + "${orc_SOURCE_DIR}/c++/include") - add_dependencies(orc::orc orc_ep) + add_library(orc::orc ALIAS orc) - list(APPEND ARROW_BUNDLED_STATIC_LIBS orc::orc) -endmacro() + list(APPEND ARROW_BUNDLED_STATIC_LIBS orc) + set(ARROW_BUNDLED_STATIC_LIBS + ${ARROW_BUNDLED_STATIC_LIBS} + PARENT_SCOPE) +endfunction() if(ARROW_ORC) resolve_dependency(orc HAVE_ALT TRUE) - target_link_libraries(orc::orc INTERFACE ${ARROW_PROTOBUF_LIBPROTOBUF}) if(ORC_VENDORED) set(ARROW_ORC_VERSION ${ARROW_ORC_BUILD_VERSION}) else() + target_link_libraries(orc::orc INTERFACE ${ARROW_PROTOBUF_LIBPROTOBUF}) set(ARROW_ORC_VERSION ${orcAlt_VERSION}) + message(STATUS "Found ORC static library: ${ORC_STATIC_LIB}") + message(STATUS "Found ORC headers: ${ORC_INCLUDE_DIR}") endif() - message(STATUS "Found ORC static library: ${ORC_STATIC_LIB}") - message(STATUS "Found ORC headers: ${ORC_INCLUDE_DIR}") endif() # ---------------------------------------------------------------------- diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 986ac056b61..ab988badec1 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -90,8 +90,8 @@ ARROW_OPENTELEMETRY_BUILD_VERSION=v1.13.0 ARROW_OPENTELEMETRY_BUILD_SHA256_CHECKSUM=7735cc56507149686e6019e06f588317099d4522480be5f38a2a09ec69af1706 ARROW_OPENTELEMETRY_PROTO_BUILD_VERSION=v0.17.0 ARROW_OPENTELEMETRY_PROTO_BUILD_SHA256_CHECKSUM=f269fbcb30e17b03caa1decd231ce826e59d7651c0f71c3b28eb5140b4bb5412 -ARROW_ORC_BUILD_VERSION=2.0.0 -ARROW_ORC_BUILD_SHA256_CHECKSUM=9107730919c29eb39efaff1b9e36166634d1d4d9477e5fee76bfd6a8fec317df +ARROW_ORC_BUILD_VERSION=2.0.1 +ARROW_ORC_BUILD_SHA256_CHECKSUM=1ffac0228aa83f04a1b1cf2788a3af5953e82587ae3a77c41900e99f2557132d ARROW_PROTOBUF_BUILD_VERSION=v21.3 ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM=2f723218f6cb709ae4cdc4fb5ed56a5951fc5d466f0128ce4c946b8c78c8c49f # Because of https://github.com/Tencent/rapidjson/pull/1323, we require From ccd9a4c31ed4845dab72b68dd2c69f3dcf73f97f Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Wed, 3 Jul 2024 17:47:10 +0900 Subject: [PATCH 02/24] Disable ORC on Ubuntu 20.04 because CMake is old --- ci/docker/ubuntu-20.04-cpp.dockerfile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile b/ci/docker/ubuntu-20.04-cpp.dockerfile index d78c7a99cf4..2c0126b74c0 100644 --- a/ci/docker/ubuntu-20.04-cpp.dockerfile +++ b/ci/docker/ubuntu-20.04-cpp.dockerfile @@ -144,7 +144,8 @@ RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin # - libgtest-dev only provide sources # - libprotobuf-dev only provide sources # ARROW-17051: this build uses static Protobuf, so we must also use -# static Arrow to run Flight/Flight SQL tests +# static Arrow to run Flight/Flight SQL tests. +# We can't use bundled ORC because CMake (3.16.3) on Ubuntu 20.04 is old. ENV absl_SOURCE=BUNDLED \ ARROW_ACERO=ON \ ARROW_AZURE=OFF \ @@ -158,7 +159,7 @@ ENV absl_SOURCE=BUNDLED \ ARROW_HDFS=ON \ ARROW_HOME=/usr/local \ ARROW_INSTALL_NAME_RPATH=OFF \ - ARROW_ORC=ON \ + ARROW_ORC=OFF \ ARROW_PARQUET=ON \ ARROW_S3=ON \ ARROW_SUBSTRAIT=ON \ From 744709fb98fb533b629aaa2939b192cf4ba0dea8 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Wed, 3 Jul 2024 17:47:33 +0900 Subject: [PATCH 03/24] Ensure using bundled zlib --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 009ccacd6e1..4a5a5d374a4 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -4513,8 +4513,12 @@ function(build_orc) set(LZ4_HOME ${LZ4_ROOT} CACHE BOOL "" FORCE) - set(LZ4_LIBRARY $) - set(LZ4_STATIC_LIB $) + set(LZ4_LIBRARY + LZ4::lz4 + CACHE STRING "" FORCE) + set(LZ4_STATIC_LIB + LZ4::lz4 + CACHE STRING "" FORCE) get_target_property(PROTOBUF_INCLUDE_DIR ${ARROW_PROTOBUF_LIBPROTOBUF} INTERFACE_INCLUDE_DIRECTORIES) @@ -4534,7 +4538,7 @@ function(build_orc) ${Snappy_ROOT} CACHE BOOL "" FORCE) set(SNAPPY_LIBRARY - $ + ${Snappy_TARGET} CACHE STRING "" FORCE) get_target_property(ZLIB_INCLUDE_DIR ZLIB::ZLIB INTERFACE_INCLUDE_DIRECTORIES) @@ -4542,6 +4546,15 @@ function(build_orc) set(ZLIB_HOME ${ZLIB_ROOT} CACHE BOOL "" FORCE) + set(ZLIB_LIBRARY + ZLIB::ZLIB + CACHE STRING "" FORCE) + get_target_property(ZLIB_TYPE ZLIB::ZLIB TYPE) + if(ZLIB_TYPE STREQUAL "STATIC_LIBRARY") + set(ZLIB_STATIC_LIB + ZLIB::ZLIB + CACHE STRING "" FORCE) + endif() get_target_property(ZSTD_INCLUDE_DIR ${ARROW_ZSTD_LIBZSTD} INTERFACE_INCLUDE_DIRECTORIES) From 7f3a3ce205a2eb37bc28018f50e0a3c572f10c8b Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Wed, 3 Jul 2024 20:48:54 +0900 Subject: [PATCH 04/24] Disable unity build --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 4a5a5d374a4..b83b5185304 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -4508,6 +4508,8 @@ function(build_orc) URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}") prepare_fetchcontent() + set(CMAKE_UNITY_BUILD FALSE) + get_target_property(LZ4_INCLUDE_DIR LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES) get_filename_component(LZ4_ROOT "${LZ4_INCLUDE_DIR}" DIRECTORY) set(LZ4_HOME From 4adcb203480ca4095769b41e21087a8de5441847 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 4 Jul 2024 11:29:31 +0900 Subject: [PATCH 05/24] Don't prefer static library --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index b83b5185304..b6d216679e6 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -4510,6 +4510,7 @@ function(build_orc) set(CMAKE_UNITY_BUILD FALSE) + set(ORC_PREFER_STATIC_LZ4 OFF CACHE BOOL "" FORCE) get_target_property(LZ4_INCLUDE_DIR LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES) get_filename_component(LZ4_ROOT "${LZ4_INCLUDE_DIR}" DIRECTORY) set(LZ4_HOME @@ -4518,10 +4519,8 @@ function(build_orc) set(LZ4_LIBRARY LZ4::lz4 CACHE STRING "" FORCE) - set(LZ4_STATIC_LIB - LZ4::lz4 - CACHE STRING "" FORCE) + set(ORC_PREFER_STATIC_PROTOBUF OFF CACHE BOOL "" FORCE) get_target_property(PROTOBUF_INCLUDE_DIR ${ARROW_PROTOBUF_LIBPROTOBUF} INTERFACE_INCLUDE_DIRECTORIES) get_filename_component(Protobuf_ROOT "${PROTOBUF_INCLUDE_DIR}" DIRECTORY) @@ -4534,6 +4533,7 @@ function(build_orc) set(PROTOBUF_LIBRARY $) set(PROTOC_LIBRARY $) + set(ORC_PREFER_STATIC_SNAPPY OFF CACHE BOOL "" FORCE) get_target_property(SNAPPY_INCLUDE_DIR ${Snappy_TARGET} INTERFACE_INCLUDE_DIRECTORIES) get_filename_component(Snappy_ROOT "${SNAPPY_INCLUDE_DIR}" DIRECTORY) set(SNAPPY_HOME @@ -4543,6 +4543,7 @@ function(build_orc) ${Snappy_TARGET} CACHE STRING "" FORCE) + set(ORC_PREFER_STATIC_ZLIB OFF CACHE BOOL "" FORCE) get_target_property(ZLIB_INCLUDE_DIR ZLIB::ZLIB INTERFACE_INCLUDE_DIRECTORIES) get_filename_component(ZLIB_ROOT "${ZLIB_INCLUDE_DIR}" DIRECTORY) set(ZLIB_HOME @@ -4551,21 +4552,15 @@ function(build_orc) set(ZLIB_LIBRARY ZLIB::ZLIB CACHE STRING "" FORCE) - get_target_property(ZLIB_TYPE ZLIB::ZLIB TYPE) - if(ZLIB_TYPE STREQUAL "STATIC_LIBRARY") - set(ZLIB_STATIC_LIB - ZLIB::ZLIB - CACHE STRING "" FORCE) - endif() + set(ORC_PREFER_STATIC_ZSTD OFF CACHE BOOL "" FORCE) get_target_property(ZSTD_INCLUDE_DIR ${ARROW_ZSTD_LIBZSTD} INTERFACE_INCLUDE_DIRECTORIES) get_filename_component(ZSTD_ROOT "${ZSTD_INCLUDE_DIR}" DIRECTORY) set(ZSTD_HOME ${ZSTD_ROOT} CACHE BOOL "" FORCE) - set(ZSTD_LIBRARY $) - set(ZSTD_STATIC_LIBRARY $) + set(ZSTD_LIBRARY ${ARROW_ZSTD_LIBZSTD}) set(BUILD_CPP_TESTS OFF From e003fcc0fc00218ee67cc5cb12846294cee31216 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 4 Jul 2024 11:29:43 +0900 Subject: [PATCH 06/24] Update comment --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index b6d216679e6..563f539e538 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -4581,7 +4581,7 @@ function(build_orc) OFF CACHE BOOL "" FORCE) - # TODO: This should be fixed in upstream. + # We can remove this with ORC 2.0.2 or later. list(PREPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_BINARY_DIR}/_deps/orc-src/cmake_modules) fetchcontent_makeavailable(orc) From 977cce30ce76bd104ddf83a6b1ead867ee1d1e5d Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 4 Jul 2024 11:31:24 +0900 Subject: [PATCH 07/24] Don't use ORC with CentOS 7 --- dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in | 3 +++ 1 file changed, 3 insertions(+) diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in index 8d47407c035..cf90effb859 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in +++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in @@ -72,6 +72,7 @@ # %%define use_glog (%%{_rhel} <= 8) %define use_glog 0 %define use_mimalloc (%{_rhel} >= 8) +%define use_orc (%{_rhel} >= 8) # TODO: Enable this. This works on local but is fragile on GitHub Actions and # Travis CI. # %%define use_s3 (%%{_rhel} >= 8) @@ -179,7 +180,9 @@ cd cpp %if %{use_mimalloc} -DARROW_MIMALLOC=ON \ %endif +%if %{use_orc} -DARROW_ORC=ON \ +%endif -DARROW_PACKAGE_KIND=rpm \ -DARROW_PARQUET=ON \ %if %{use_s3} From e884e735883c75b4d8b1bbf180cc8a3c9a42a884 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 4 Jul 2024 11:44:08 +0900 Subject: [PATCH 08/24] Disable ORC with old CMake --- dev/tasks/linux-packages/apache-arrow/debian/rules | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/dev/tasks/linux-packages/apache-arrow/debian/rules b/dev/tasks/linux-packages/apache-arrow/debian/rules index 83bcad98a7a..80ec9ee3efc 100755 --- a/dev/tasks/linux-packages/apache-arrow/debian/rules +++ b/dev/tasks/linux-packages/apache-arrow/debian/rules @@ -19,6 +19,17 @@ override_dh_auto_configure: else \ ARROW_CUDA=OFF; \ fi; \ + cmake_version=$$( \ + cmake --version | \ + grep \ + --extended-regexp \ + --only-matching \ + '[0-9]+\.[0-9]+'); \ + if dpkg --compare-versions $${cmake_version} ge 3.22; \ + ARROW_ORC=ON; \ + else \ + ARROW_ORC=OFF; \ + fi; \ dh_auto_configure \ --sourcedirectory=cpp \ --builddirectory=cpp_build \ From 53ee1d0f67be695bba43570f70682967ad568281 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 4 Jul 2024 11:59:45 +0900 Subject: [PATCH 09/24] Disable ORC on Ubuntu 20.04 --- ci/docker/ubuntu-20.04-cpp-minimal.dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile b/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile index e17c0306f11..fb3c9f2de83 100644 --- a/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile +++ b/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile @@ -75,6 +75,7 @@ RUN /arrow/ci/scripts/install_gcs_testbench.sh default COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin +# We can't use bundled ORC because CMake (3.16.3) on Ubuntu 20.04 is old. ENV ARROW_ACERO=ON \ ARROW_AZURE=OFF \ ARROW_BUILD_TESTS=ON \ @@ -85,7 +86,7 @@ ENV ARROW_ACERO=ON \ ARROW_HDFS=ON \ ARROW_HOME=/usr/local \ ARROW_INSTALL_NAME_RPATH=OFF \ - ARROW_ORC=ON \ + ARROW_ORC=OFF \ ARROW_PARQUET=ON \ ARROW_S3=ON \ ARROW_USE_CCACHE=ON \ From 0393208799d8845dcb35d9eaf43033e3aa608585 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 4 Jul 2024 11:59:52 +0900 Subject: [PATCH 10/24] Disable ORC on Ubuntu 20.04 --- dev/release/verify-release-candidate.sh | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index fcaaa423a4c..e53bb3fcaf6 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -687,6 +687,19 @@ test_and_install_cpp() { ARROW_CMAKE_OPTIONS="${ARROW_CMAKE_OPTIONS:-} -G ${CMAKE_GENERATOR}" fi + if [ -z "${ARROW_ORC}" ]; then + cmake_version=$(cmake --version | \ + grep \ + --extended-regexp \ + --only-matching \ + '[0-9]+\.[0-9]+') + if expr ${cmake_version} '>=' 3.22; then + ARROW_ORC=ON + else + ARROW_ORC=OFF + fi + fi + local ARROW_BUILD_INTEGRATION=OFF local ARROW_BUILD_TESTS=OFF if [ ${TEST_INTEGRATION_CPP} -gt 0 ]; then @@ -714,7 +727,7 @@ test_and_install_cpp() { -DARROW_GCS=${ARROW_GCS} \ -DARROW_HDFS=ON \ -DARROW_JSON=ON \ - -DARROW_ORC=ON \ + -DARROW_ORC=${ARROW_ORC} \ -DARROW_PARQUET=ON \ -DARROW_SUBSTRAIT=ON \ -DARROW_S3=${ARROW_S3} \ @@ -766,7 +779,6 @@ test_python() { export PYARROW_PARALLEL=$NPROC export PYARROW_WITH_DATASET=1 export PYARROW_WITH_HDFS=1 - export PYARROW_WITH_ORC=1 export PYARROW_WITH_PARQUET=1 export PYARROW_WITH_PARQUET_ENCRYPTION=1 if [ "${ARROW_CUDA}" = "ON" ]; then @@ -781,6 +793,9 @@ test_python() { if [ "${ARROW_GCS}" = "ON" ]; then export PYARROW_WITH_GCS=1 fi + if [ "${ARROW_ORC}" = "ON" ]; then + export PYARROW_WITH_ORC=1 + fi if [ "${ARROW_S3}" = "ON" ]; then export PYARROW_WITH_S3=1 fi @@ -798,7 +813,6 @@ import pyarrow.csv import pyarrow.dataset import pyarrow.fs import pyarrow.json -import pyarrow.orc import pyarrow.parquet " if [ "${ARROW_CUDA}" == "ON" ]; then @@ -813,6 +827,9 @@ import pyarrow.parquet if [ "${ARROW_GCS}" == "ON" ]; then python -c "import pyarrow._gcsfs" fi + if [ "${ARROW_ORC}" == "ON" ]; then + python -c "import pyarrow.orc" + fi if [ "${ARROW_S3}" == "ON" ]; then python -c "import pyarrow._s3fs" fi @@ -1316,6 +1333,7 @@ fi : ${ARROW_FLIGHT:=ON} : ${ARROW_GANDIVA:=ON} : ${ARROW_GCS:=OFF} +: ${ARROW_ORC:=} : ${ARROW_S3:=OFF} TEST_SUCCESS=no From 04a4125ff47a8387c78af76a283ef56497cd1158 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 4 Jul 2024 13:55:49 +0900 Subject: [PATCH 11/24] Fix style --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 563f539e538..ff384cb585d 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -4510,7 +4510,9 @@ function(build_orc) set(CMAKE_UNITY_BUILD FALSE) - set(ORC_PREFER_STATIC_LZ4 OFF CACHE BOOL "" FORCE) + set(ORC_PREFER_STATIC_LZ4 + OFF + CACHE BOOL "" FORCE) get_target_property(LZ4_INCLUDE_DIR LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES) get_filename_component(LZ4_ROOT "${LZ4_INCLUDE_DIR}" DIRECTORY) set(LZ4_HOME @@ -4520,7 +4522,9 @@ function(build_orc) LZ4::lz4 CACHE STRING "" FORCE) - set(ORC_PREFER_STATIC_PROTOBUF OFF CACHE BOOL "" FORCE) + set(ORC_PREFER_STATIC_PROTOBUF + OFF + CACHE BOOL "" FORCE) get_target_property(PROTOBUF_INCLUDE_DIR ${ARROW_PROTOBUF_LIBPROTOBUF} INTERFACE_INCLUDE_DIRECTORIES) get_filename_component(Protobuf_ROOT "${PROTOBUF_INCLUDE_DIR}" DIRECTORY) @@ -4533,7 +4537,9 @@ function(build_orc) set(PROTOBUF_LIBRARY $) set(PROTOC_LIBRARY $) - set(ORC_PREFER_STATIC_SNAPPY OFF CACHE BOOL "" FORCE) + set(ORC_PREFER_STATIC_SNAPPY + OFF + CACHE BOOL "" FORCE) get_target_property(SNAPPY_INCLUDE_DIR ${Snappy_TARGET} INTERFACE_INCLUDE_DIRECTORIES) get_filename_component(Snappy_ROOT "${SNAPPY_INCLUDE_DIR}" DIRECTORY) set(SNAPPY_HOME @@ -4543,7 +4549,9 @@ function(build_orc) ${Snappy_TARGET} CACHE STRING "" FORCE) - set(ORC_PREFER_STATIC_ZLIB OFF CACHE BOOL "" FORCE) + set(ORC_PREFER_STATIC_ZLIB + OFF + CACHE BOOL "" FORCE) get_target_property(ZLIB_INCLUDE_DIR ZLIB::ZLIB INTERFACE_INCLUDE_DIRECTORIES) get_filename_component(ZLIB_ROOT "${ZLIB_INCLUDE_DIR}" DIRECTORY) set(ZLIB_HOME @@ -4553,7 +4561,9 @@ function(build_orc) ZLIB::ZLIB CACHE STRING "" FORCE) - set(ORC_PREFER_STATIC_ZSTD OFF CACHE BOOL "" FORCE) + set(ORC_PREFER_STATIC_ZSTD + OFF + CACHE BOOL "" FORCE) get_target_property(ZSTD_INCLUDE_DIR ${ARROW_ZSTD_LIBZSTD} INTERFACE_INCLUDE_DIRECTORIES) get_filename_component(ZSTD_ROOT "${ZSTD_INCLUDE_DIR}" DIRECTORY) From 41512bd99c32b0be53c21dba9cb89dc0e002393c Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 4 Jul 2024 13:56:26 +0900 Subject: [PATCH 12/24] Add missing then --- .../linux-packages/apache-arrow/debian/rules | 98 +++++++++---------- 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/dev/tasks/linux-packages/apache-arrow/debian/rules b/dev/tasks/linux-packages/apache-arrow/debian/rules index 80ec9ee3efc..02879c4e48f 100755 --- a/dev/tasks/linux-packages/apache-arrow/debian/rules +++ b/dev/tasks/linux-packages/apache-arrow/debian/rules @@ -14,55 +14,55 @@ BUILD_TYPE=release dh $@ --with gir override_dh_auto_configure: - if dpkg -l nvidia-cuda-toolkit > /dev/null 2>&1; then \ - ARROW_CUDA=ON; \ - else \ - ARROW_CUDA=OFF; \ - fi; \ - cmake_version=$$( \ - cmake --version | \ - grep \ - --extended-regexp \ - --only-matching \ - '[0-9]+\.[0-9]+'); \ - if dpkg --compare-versions $${cmake_version} ge 3.22; \ - ARROW_ORC=ON; \ - else \ - ARROW_ORC=OFF; \ - fi; \ - dh_auto_configure \ - --sourcedirectory=cpp \ - --builddirectory=cpp_build \ - --buildsystem=cmake+ninja \ - -- \ - -DARROW_BUILD_UTILITIES=ON \ - -DARROW_COMPUTE=ON \ - -DARROW_CSV=ON \ - -DARROW_CUDA=$${ARROW_CUDA} \ - -DARROW_DATASET=ON \ - -DARROW_FILESYSTEM=ON \ - -DARROW_FLIGHT=ON \ - -DARROW_FLIGHT_SQL=ON \ - -DARROW_GANDIVA=ON \ - -DARROW_GCS=ON \ - -DARROW_HDFS=ON \ - -DARROW_JSON=ON \ - -DARROW_MIMALLOC=ON \ - -DARROW_ORC=ON \ - -DARROW_PACKAGE_KIND=deb \ - -DARROW_PARQUET=ON \ - -DARROW_S3=ON \ - -DARROW_USE_CCACHE=OFF \ - -DARROW_USE_MOLD=ON \ - -DARROW_WITH_BROTLI=ON \ - -DARROW_WITH_BZ2=ON \ - -DARROW_WITH_LZ4=ON \ - -DARROW_WITH_SNAPPY=ON \ - -DARROW_WITH_ZLIB=ON \ - -DARROW_WITH_ZSTD=ON \ - -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \ - -DCUDAToolkit_ROOT=/usr \ - -DPARQUET_BUILD_EXECUTABLES=ON \ + if dpkg -l nvidia-cuda-toolkit > /dev/null 2>&1; then \ + ARROW_CUDA=ON; \ + else \ + ARROW_CUDA=OFF; \ + fi; \ + cmake_version=$$( \ + cmake --version | \ + grep \ + --extended-regexp \ + --only-matching \ + '[0-9]+\.[0-9]+'); \ + if dpkg --compare-versions $${cmake_version} ge 3.22; then \ + ARROW_ORC=ON; \ + else \ + ARROW_ORC=OFF; \ + fi; \ + dh_auto_configure \ + --sourcedirectory=cpp \ + --builddirectory=cpp_build \ + --buildsystem=cmake+ninja \ + -- \ + -DARROW_BUILD_UTILITIES=ON \ + -DARROW_COMPUTE=ON \ + -DARROW_CSV=ON \ + -DARROW_CUDA=$${ARROW_CUDA} \ + -DARROW_DATASET=ON \ + -DARROW_FILESYSTEM=ON \ + -DARROW_FLIGHT=ON \ + -DARROW_FLIGHT_SQL=ON \ + -DARROW_GANDIVA=ON \ + -DARROW_GCS=ON \ + -DARROW_HDFS=ON \ + -DARROW_JSON=ON \ + -DARROW_MIMALLOC=ON \ + -DARROW_ORC=ON \ + -DARROW_PACKAGE_KIND=deb \ + -DARROW_PARQUET=ON \ + -DARROW_S3=ON \ + -DARROW_USE_CCACHE=OFF \ + -DARROW_USE_MOLD=ON \ + -DARROW_WITH_BROTLI=ON \ + -DARROW_WITH_BZ2=ON \ + -DARROW_WITH_LZ4=ON \ + -DARROW_WITH_SNAPPY=ON \ + -DARROW_WITH_ZLIB=ON \ + -DARROW_WITH_ZSTD=ON \ + -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \ + -DCUDAToolkit_ROOT=/usr \ + -DPARQUET_BUILD_EXECUTABLES=ON \ -DPARQUET_REQUIRE_ENCRYPTION=ON override_dh_auto_build: From f037124939556c47b06148fd17afa85174df0015 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 4 Jul 2024 13:59:52 +0900 Subject: [PATCH 13/24] Adjust file list --- dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in index cf90effb859..b15772a8153 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in +++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in @@ -72,7 +72,7 @@ # %%define use_glog (%%{_rhel} <= 8) %define use_glog 0 %define use_mimalloc (%{_rhel} >= 8) -%define use_orc (%{_rhel} >= 8) +%define use_orc (%{_rhel} >= 8 || %{is_amazon_linux}) # TODO: Enable this. This works on local but is fragile on GitHub Actions and # Travis CI. # %%define use_s3 (%%{_rhel} >= 8) @@ -328,7 +328,9 @@ Libraries and header files for Apache Arrow C++. %{_libdir}/pkgconfig/arrow-csv.pc %{_libdir}/pkgconfig/arrow-filesystem.pc %{_libdir}/pkgconfig/arrow-json.pc +%if %{use_orc} %{_libdir}/pkgconfig/arrow-orc.pc +%endif %{_libdir}/pkgconfig/arrow.pc %package -n %{name}%{so_version}-acero-libs @@ -596,7 +598,9 @@ Libraries and header files for Apache Arrow GLib. %{_libdir}/libarrow-glib.a %{_libdir}/libarrow-glib.so %{_libdir}/pkgconfig/arrow-glib.pc +%if %{use_orc} %{_libdir}/pkgconfig/arrow-orc-glib.pc +%endif %package glib-doc Summary: Documentation for Apache Arrow GLib From c51893d15e9e9f5f8c92ec16167a3f2be9138bfe Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 4 Jul 2024 14:40:33 +0900 Subject: [PATCH 14/24] Set ZLIB::ZLIB's include directory for Emscripten --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index ff384cb585d..2b6e8250c87 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -2532,6 +2532,7 @@ macro(build_zlib) set_property(TARGET ZLIB::ZLIB PROPERTY IMPORTED_LOCATION "${EMSCRIPTEN_SYSROOT}/lib/wasm32-emscripten/pic/libz.a") + target_include_directories(ZLIB::ZLIB INTERFACE "${EMSCRIPTEN_SYSROOT}/include") list(APPEND ARROW_BUNDLED_STATIC_LIBS ZLIB::ZLIB) else() set(ZLIB_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zlib_ep/src/zlib_ep-install") @@ -4517,7 +4518,7 @@ function(build_orc) get_filename_component(LZ4_ROOT "${LZ4_INCLUDE_DIR}" DIRECTORY) set(LZ4_HOME ${LZ4_ROOT} - CACHE BOOL "" FORCE) + CACHE STRING "" FORCE) set(LZ4_LIBRARY LZ4::lz4 CACHE STRING "" FORCE) @@ -4530,7 +4531,7 @@ function(build_orc) get_filename_component(Protobuf_ROOT "${PROTOBUF_INCLUDE_DIR}" DIRECTORY) set(PROTOBUF_HOME ${Protobuf_ROOT} - CACHE BOOL "" FORCE) + CACHE STRING "" FORCE) target_include_directories(${ARROW_PROTOBUF_LIBPROTOC} INTERFACE "${PROTOBUF_INCLUDE_DIR}") set(PROTOBUF_EXECUTABLE $) @@ -4544,7 +4545,7 @@ function(build_orc) get_filename_component(Snappy_ROOT "${SNAPPY_INCLUDE_DIR}" DIRECTORY) set(SNAPPY_HOME ${Snappy_ROOT} - CACHE BOOL "" FORCE) + CACHE STRING "" FORCE) set(SNAPPY_LIBRARY ${Snappy_TARGET} CACHE STRING "" FORCE) @@ -4556,7 +4557,7 @@ function(build_orc) get_filename_component(ZLIB_ROOT "${ZLIB_INCLUDE_DIR}" DIRECTORY) set(ZLIB_HOME ${ZLIB_ROOT} - CACHE BOOL "" FORCE) + CACHE STRING "" FORCE) set(ZLIB_LIBRARY ZLIB::ZLIB CACHE STRING "" FORCE) @@ -4569,7 +4570,7 @@ function(build_orc) get_filename_component(ZSTD_ROOT "${ZSTD_INCLUDE_DIR}" DIRECTORY) set(ZSTD_HOME ${ZSTD_ROOT} - CACHE BOOL "" FORCE) + CACHE STRING "" FORCE) set(ZSTD_LIBRARY ${ARROW_ZSTD_LIBZSTD}) set(BUILD_CPP_TESTS From e5d13dbb83cb0a1dc4a34941caaed325b34653b1 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 4 Jul 2024 15:03:03 +0900 Subject: [PATCH 15/24] Use ARROW_ORC --- dev/tasks/linux-packages/apache-arrow/debian/rules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/tasks/linux-packages/apache-arrow/debian/rules b/dev/tasks/linux-packages/apache-arrow/debian/rules index 02879c4e48f..d53b2e377dd 100755 --- a/dev/tasks/linux-packages/apache-arrow/debian/rules +++ b/dev/tasks/linux-packages/apache-arrow/debian/rules @@ -48,7 +48,7 @@ override_dh_auto_configure: -DARROW_HDFS=ON \ -DARROW_JSON=ON \ -DARROW_MIMALLOC=ON \ - -DARROW_ORC=ON \ + -DARROW_ORC=$${ARROW_ORC} \ -DARROW_PACKAGE_KIND=deb \ -DARROW_PARQUET=ON \ -DARROW_S3=ON \ From 21943b324da66af33542bfeb8b46131a80a6d04c Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 4 Jul 2024 16:03:46 +0900 Subject: [PATCH 16/24] Use interface library --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 2b6e8250c87..6796d3ff2df 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -4597,6 +4597,8 @@ function(build_orc) fetchcontent_makeavailable(orc) if(CMAKE_VERSION VERSION_LESS 3.28) + message("XXX: ${orc_SOURCE_DIR}") + file(MAKE_DIRECTORY ${orc_SOURCE_DIR}) set_property(DIRECTORY ${orc_SOURCE_DIR} PROPERTY EXCLUDE_FROM_ALL TRUE) endif() @@ -4604,10 +4606,10 @@ function(build_orc) TRUE PARENT_SCOPE) - target_include_directories(orc INTERFACE "${orc_BINARY_DIR}/c++/include" - "${orc_SOURCE_DIR}/c++/include") - - add_library(orc::orc ALIAS orc) + add_library(orc::orc INTERFACE) + target_link_libraries(orc::orc INTERFACE orc) + target_include_directories(orc::orc INTERFACE "${orc_BINARY_DIR}/c++/include" + "${orc_SOURCE_DIR}/c++/include") list(APPEND ARROW_BUNDLED_STATIC_LIBS orc) set(ARROW_BUNDLED_STATIC_LIBS From 27955b47ba57951dc96ea0f13eec04b93c83a836 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 4 Jul 2024 16:04:47 +0900 Subject: [PATCH 17/24] Add missing IMPORTED --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 6796d3ff2df..d2256460d29 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -4606,7 +4606,7 @@ function(build_orc) TRUE PARENT_SCOPE) - add_library(orc::orc INTERFACE) + add_library(orc::orc INTERFACE IMPORTED) target_link_libraries(orc::orc INTERFACE orc) target_include_directories(orc::orc INTERFACE "${orc_BINARY_DIR}/c++/include" "${orc_SOURCE_DIR}/c++/include") From 5b46619d9e4daf20b58a227d1473857bea82ee9c Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 4 Jul 2024 17:54:31 +0900 Subject: [PATCH 18/24] Remove needless code --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index d2256460d29..6059b94ff5b 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -4597,8 +4597,6 @@ function(build_orc) fetchcontent_makeavailable(orc) if(CMAKE_VERSION VERSION_LESS 3.28) - message("XXX: ${orc_SOURCE_DIR}") - file(MAKE_DIRECTORY ${orc_SOURCE_DIR}) set_property(DIRECTORY ${orc_SOURCE_DIR} PROPERTY EXCLUDE_FROM_ALL TRUE) endif() From d9ef3dc652d9f1947047a8c64793ccd8d22113db Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 5 Jul 2024 14:25:48 +0900 Subject: [PATCH 19/24] Use ExternalProject with old CMake --- ci/docker/ubuntu-20.04-cpp-minimal.dockerfile | 3 +- ci/docker/ubuntu-20.04-cpp.dockerfile | 5 +- cpp/cmake_modules/ThirdpartyToolchain.cmake | 288 +++++++++++------- dev/release/verify-release-candidate.sh | 24 +- .../linux-packages/apache-arrow/debian/rules | 87 +++--- .../apache-arrow/yum/arrow.spec.in | 7 - 6 files changed, 226 insertions(+), 188 deletions(-) diff --git a/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile b/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile index fb3c9f2de83..e17c0306f11 100644 --- a/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile +++ b/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile @@ -75,7 +75,6 @@ RUN /arrow/ci/scripts/install_gcs_testbench.sh default COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin -# We can't use bundled ORC because CMake (3.16.3) on Ubuntu 20.04 is old. ENV ARROW_ACERO=ON \ ARROW_AZURE=OFF \ ARROW_BUILD_TESTS=ON \ @@ -86,7 +85,7 @@ ENV ARROW_ACERO=ON \ ARROW_HDFS=ON \ ARROW_HOME=/usr/local \ ARROW_INSTALL_NAME_RPATH=OFF \ - ARROW_ORC=OFF \ + ARROW_ORC=ON \ ARROW_PARQUET=ON \ ARROW_S3=ON \ ARROW_USE_CCACHE=ON \ diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile b/ci/docker/ubuntu-20.04-cpp.dockerfile index 2c0126b74c0..d78c7a99cf4 100644 --- a/ci/docker/ubuntu-20.04-cpp.dockerfile +++ b/ci/docker/ubuntu-20.04-cpp.dockerfile @@ -144,8 +144,7 @@ RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin # - libgtest-dev only provide sources # - libprotobuf-dev only provide sources # ARROW-17051: this build uses static Protobuf, so we must also use -# static Arrow to run Flight/Flight SQL tests. -# We can't use bundled ORC because CMake (3.16.3) on Ubuntu 20.04 is old. +# static Arrow to run Flight/Flight SQL tests ENV absl_SOURCE=BUNDLED \ ARROW_ACERO=ON \ ARROW_AZURE=OFF \ @@ -159,7 +158,7 @@ ENV absl_SOURCE=BUNDLED \ ARROW_HDFS=ON \ ARROW_HOME=/usr/local \ ARROW_INSTALL_NAME_RPATH=OFF \ - ARROW_ORC=OFF \ + ARROW_ORC=ON \ ARROW_PARQUET=ON \ ARROW_S3=ON \ ARROW_SUBSTRAIT=ON \ diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 6059b94ff5b..f6092998a2d 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -4492,124 +4492,200 @@ target_include_directories(arrow::hadoop INTERFACE "${HADOOP_HOME}/include") # Apache ORC function(build_orc) - if(CMAKE_VERSION VERSION_LESS 3.22) - # We can't disable installing Apache ORC by - # "set_property(DIRECTORY ${orc_SOURCE_DIR} PROPERTY - # EXCLUDE_FROM_ALL TRUE)" with CMake 3.16. - # - # At least CMake 3.22 on Ubuntu 22.04 works. So we use 3.22 - # here. We may be able to use more earlier version here. - message(FATAL_ERROR "Building Apache ORC requires at least CMake 3.22. " - "(At least we can't use CMake 3.16)") - endif() message(STATUS "Building Apache ORC from source") - fetchcontent_declare(orc - ${FC_DECLARE_COMMON_OPTIONS} - URL ${ORC_SOURCE_URL} - URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}") - prepare_fetchcontent() - set(CMAKE_UNITY_BUILD FALSE) + if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.28) + fetchcontent_declare(orc + ${FC_DECLARE_COMMON_OPTIONS} + URL ${ORC_SOURCE_URL} + URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}") + prepare_fetchcontent() + + set(CMAKE_UNITY_BUILD FALSE) + + set(ORC_PREFER_STATIC_LZ4 + OFF + CACHE BOOL "" FORCE) + get_target_property(LZ4_INCLUDE_DIR LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(LZ4_ROOT "${LZ4_INCLUDE_DIR}" DIRECTORY) + set(LZ4_HOME + ${LZ4_ROOT} + CACHE STRING "" FORCE) + set(LZ4_LIBRARY + LZ4::lz4 + CACHE STRING "" FORCE) + + set(ORC_PREFER_STATIC_PROTOBUF + OFF + CACHE BOOL "" FORCE) + get_target_property(PROTOBUF_INCLUDE_DIR ${ARROW_PROTOBUF_LIBPROTOBUF} + INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(Protobuf_ROOT "${PROTOBUF_INCLUDE_DIR}" DIRECTORY) + set(PROTOBUF_HOME + ${Protobuf_ROOT} + CACHE STRING "" FORCE) + target_include_directories(${ARROW_PROTOBUF_LIBPROTOC} + INTERFACE "${PROTOBUF_INCLUDE_DIR}") + set(PROTOBUF_EXECUTABLE $) + set(PROTOBUF_LIBRARY $) + set(PROTOC_LIBRARY $) + + set(ORC_PREFER_STATIC_SNAPPY + OFF + CACHE BOOL "" FORCE) + get_target_property(SNAPPY_INCLUDE_DIR ${Snappy_TARGET} INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(Snappy_ROOT "${SNAPPY_INCLUDE_DIR}" DIRECTORY) + set(SNAPPY_HOME + ${Snappy_ROOT} + CACHE STRING "" FORCE) + set(SNAPPY_LIBRARY + ${Snappy_TARGET} + CACHE STRING "" FORCE) + + set(ORC_PREFER_STATIC_ZLIB + OFF + CACHE BOOL "" FORCE) + get_target_property(ZLIB_INCLUDE_DIR ZLIB::ZLIB INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ZLIB_ROOT "${ZLIB_INCLUDE_DIR}" DIRECTORY) + set(ZLIB_HOME + ${ZLIB_ROOT} + CACHE STRING "" FORCE) + set(ZLIB_LIBRARY + ZLIB::ZLIB + CACHE STRING "" FORCE) + + set(ORC_PREFER_STATIC_ZSTD + OFF + CACHE BOOL "" FORCE) + get_target_property(ZSTD_INCLUDE_DIR ${ARROW_ZSTD_LIBZSTD} + INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ZSTD_ROOT "${ZSTD_INCLUDE_DIR}" DIRECTORY) + set(ZSTD_HOME + ${ZSTD_ROOT} + CACHE STRING "" FORCE) + set(ZSTD_LIBRARY ${ARROW_ZSTD_LIBZSTD}) + + set(BUILD_CPP_TESTS + OFF + CACHE BOOL "" FORCE) + set(BUILD_JAVA + OFF + CACHE BOOL "" FORCE) + set(BUILD_LIBHDFSPP + OFF + CACHE BOOL "" FORCE) + set(BUILD_TOOLS + OFF + CACHE BOOL "" FORCE) + set(INSTALL_VENDORED_LIBS + OFF + CACHE BOOL "" FORCE) + set(STOP_BUILD_ON_WARNING + OFF + CACHE BOOL "" FORCE) + + # We can remove this with ORC 2.0.2 or later. + list(PREPEND CMAKE_MODULE_PATH + ${CMAKE_CURRENT_BINARY_DIR}/_deps/orc-src/cmake_modules) + + fetchcontent_makeavailable(orc) + if(CMAKE_VERSION VERSION_LESS 3.28) + set_property(DIRECTORY ${orc_SOURCE_DIR} PROPERTY EXCLUDE_FROM_ALL TRUE) + endif() - set(ORC_PREFER_STATIC_LZ4 - OFF - CACHE BOOL "" FORCE) - get_target_property(LZ4_INCLUDE_DIR LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES) - get_filename_component(LZ4_ROOT "${LZ4_INCLUDE_DIR}" DIRECTORY) - set(LZ4_HOME - ${LZ4_ROOT} - CACHE STRING "" FORCE) - set(LZ4_LIBRARY - LZ4::lz4 - CACHE STRING "" FORCE) - - set(ORC_PREFER_STATIC_PROTOBUF - OFF - CACHE BOOL "" FORCE) - get_target_property(PROTOBUF_INCLUDE_DIR ${ARROW_PROTOBUF_LIBPROTOBUF} - INTERFACE_INCLUDE_DIRECTORIES) - get_filename_component(Protobuf_ROOT "${PROTOBUF_INCLUDE_DIR}" DIRECTORY) - set(PROTOBUF_HOME - ${Protobuf_ROOT} - CACHE STRING "" FORCE) - target_include_directories(${ARROW_PROTOBUF_LIBPROTOC} - INTERFACE "${PROTOBUF_INCLUDE_DIR}") - set(PROTOBUF_EXECUTABLE $) - set(PROTOBUF_LIBRARY $) - set(PROTOC_LIBRARY $) + add_library(orc::orc INTERFACE IMPORTED) + target_link_libraries(orc::orc INTERFACE orc) + target_include_directories(orc::orc INTERFACE "${orc_BINARY_DIR}/c++/include" + "${orc_SOURCE_DIR}/c++/include") - set(ORC_PREFER_STATIC_SNAPPY - OFF - CACHE BOOL "" FORCE) - get_target_property(SNAPPY_INCLUDE_DIR ${Snappy_TARGET} INTERFACE_INCLUDE_DIRECTORIES) - get_filename_component(Snappy_ROOT "${SNAPPY_INCLUDE_DIR}" DIRECTORY) - set(SNAPPY_HOME - ${Snappy_ROOT} - CACHE STRING "" FORCE) - set(SNAPPY_LIBRARY - ${Snappy_TARGET} - CACHE STRING "" FORCE) - - set(ORC_PREFER_STATIC_ZLIB - OFF - CACHE BOOL "" FORCE) - get_target_property(ZLIB_INCLUDE_DIR ZLIB::ZLIB INTERFACE_INCLUDE_DIRECTORIES) - get_filename_component(ZLIB_ROOT "${ZLIB_INCLUDE_DIR}" DIRECTORY) - set(ZLIB_HOME - ${ZLIB_ROOT} - CACHE STRING "" FORCE) - set(ZLIB_LIBRARY - ZLIB::ZLIB - CACHE STRING "" FORCE) - - set(ORC_PREFER_STATIC_ZSTD - OFF - CACHE BOOL "" FORCE) - get_target_property(ZSTD_INCLUDE_DIR ${ARROW_ZSTD_LIBZSTD} - INTERFACE_INCLUDE_DIRECTORIES) - get_filename_component(ZSTD_ROOT "${ZSTD_INCLUDE_DIR}" DIRECTORY) - set(ZSTD_HOME - ${ZSTD_ROOT} - CACHE STRING "" FORCE) - set(ZSTD_LIBRARY ${ARROW_ZSTD_LIBZSTD}) + list(APPEND ARROW_BUNDLED_STATIC_LIBS orc) + else() + set(ORC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/orc_ep-install") + set(ORC_HOME "${ORC_PREFIX}") + set(ORC_INCLUDE_DIR "${ORC_PREFIX}/include") + set(ORC_STATIC_LIB + "${ORC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}orc${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) - set(BUILD_CPP_TESTS - OFF - CACHE BOOL "" FORCE) - set(BUILD_JAVA - OFF - CACHE BOOL "" FORCE) - set(BUILD_LIBHDFSPP - OFF - CACHE BOOL "" FORCE) - set(BUILD_TOOLS - OFF - CACHE BOOL "" FORCE) - set(INSTALL_VENDORED_LIBS - OFF - CACHE BOOL "" FORCE) - set(STOP_BUILD_ON_WARNING - OFF - CACHE BOOL "" FORCE) + get_target_property(ORC_PROTOBUF_ROOT ${ARROW_PROTOBUF_LIBPROTOBUF} + INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ORC_PROTOBUF_ROOT "${ORC_PROTOBUF_ROOT}" DIRECTORY) - # We can remove this with ORC 2.0.2 or later. - list(PREPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_BINARY_DIR}/_deps/orc-src/cmake_modules) + get_target_property(ORC_SNAPPY_INCLUDE_DIR ${Snappy_TARGET} + INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ORC_SNAPPY_ROOT "${ORC_SNAPPY_INCLUDE_DIR}" DIRECTORY) - fetchcontent_makeavailable(orc) - if(CMAKE_VERSION VERSION_LESS 3.28) - set_property(DIRECTORY ${orc_SOURCE_DIR} PROPERTY EXCLUDE_FROM_ALL TRUE) + get_target_property(ORC_LZ4_ROOT LZ4::lz4 INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ORC_LZ4_ROOT "${ORC_LZ4_ROOT}" DIRECTORY) + + get_target_property(ORC_ZSTD_ROOT ${ARROW_ZSTD_LIBZSTD} INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ORC_ZSTD_ROOT "${ORC_ZSTD_ROOT}" DIRECTORY) + + set(ORC_CMAKE_ARGS + ${EP_COMMON_CMAKE_ARGS} + "-DCMAKE_INSTALL_PREFIX=${ORC_PREFIX}" + -DSTOP_BUILD_ON_WARNING=OFF + -DBUILD_LIBHDFSPP=OFF + -DBUILD_JAVA=OFF + -DBUILD_TOOLS=OFF + -DBUILD_CPP_TESTS=OFF + -DINSTALL_VENDORED_LIBS=OFF + "-DLZ4_HOME=${ORC_LZ4_ROOT}" + "-DPROTOBUF_EXECUTABLE=$" + "-DPROTOBUF_HOME=${ORC_PROTOBUF_ROOT}" + "-DPROTOBUF_INCLUDE_DIR=$" + "-DPROTOBUF_LIBRARY=$" + "-DPROTOC_LIBRARY=$" + "-DSNAPPY_HOME=${ORC_SNAPPY_ROOT}" + "-DSNAPPY_LIBRARY=$" + "-DLZ4_LIBRARY=$" + "-DLZ4_STATIC_LIB=$" + "-DLZ4_INCLUDE_DIR=${ORC_LZ4_ROOT}/include" + "-DSNAPPY_INCLUDE_DIR=${ORC_SNAPPY_INCLUDE_DIR}" + "-DZSTD_HOME=${ORC_ZSTD_ROOT}" + "-DZSTD_INCLUDE_DIR=$" + "-DZSTD_LIBRARY=$") + if(ZLIB_ROOT) + set(ORC_CMAKE_ARGS ${ORC_CMAKE_ARGS} "-DZLIB_HOME=${ZLIB_ROOT}") + endif() + + # Work around CMake bug + file(MAKE_DIRECTORY ${ORC_INCLUDE_DIR}) + + externalproject_add(orc_ep + ${EP_COMMON_OPTIONS} + URL ${ORC_SOURCE_URL} + URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}" + BUILD_BYPRODUCTS ${ORC_STATIC_LIB} + CMAKE_ARGS ${ORC_CMAKE_ARGS} + DEPENDS ${ARROW_PROTOBUF_LIBPROTOBUF} + ${ARROW_ZSTD_LIBZSTD} + ${Snappy_TARGET} + LZ4::lz4 + ZLIB::ZLIB) + add_library(orc::orc STATIC IMPORTED) + set_target_properties(orc::orc PROPERTIES IMPORTED_LOCATION "${ORC_STATIC_LIB}") + target_include_directories(orc::orc BEFORE INTERFACE "${ORC_INCLUDE_DIR}") + target_link_libraries(orc::orc INTERFACE LZ4::lz4 ZLIB::ZLIB ${ARROW_ZSTD_LIBZSTD} + ${Snappy_TARGET}) + # Protobuf generated files may use ABSL_DCHECK*() and + # absl::log_internal_check_op is needed for them. + if(TARGET absl::log_internal_check_op) + target_link_libraries(orc::orc INTERFACE absl::log_internal_check_op) + endif() + if(NOT MSVC) + if(NOT APPLE AND ARROW_ENABLE_THREADING) + target_link_libraries(orc::orc INTERFACE Threads::Threads) + endif() + target_link_libraries(orc::orc INTERFACE ${CMAKE_DL_LIBS}) + endif() + add_dependencies(orc::orc orc_ep) + list(APPEND ARROW_BUNDLED_STATIC_LIBS orc::orc) endif() set(ORC_VENDORED TRUE PARENT_SCOPE) - - add_library(orc::orc INTERFACE IMPORTED) - target_link_libraries(orc::orc INTERFACE orc) - target_include_directories(orc::orc INTERFACE "${orc_BINARY_DIR}/c++/include" - "${orc_SOURCE_DIR}/c++/include") - - list(APPEND ARROW_BUNDLED_STATIC_LIBS orc) set(ARROW_BUNDLED_STATIC_LIBS ${ARROW_BUNDLED_STATIC_LIBS} PARENT_SCOPE) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index e53bb3fcaf6..fcaaa423a4c 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -687,19 +687,6 @@ test_and_install_cpp() { ARROW_CMAKE_OPTIONS="${ARROW_CMAKE_OPTIONS:-} -G ${CMAKE_GENERATOR}" fi - if [ -z "${ARROW_ORC}" ]; then - cmake_version=$(cmake --version | \ - grep \ - --extended-regexp \ - --only-matching \ - '[0-9]+\.[0-9]+') - if expr ${cmake_version} '>=' 3.22; then - ARROW_ORC=ON - else - ARROW_ORC=OFF - fi - fi - local ARROW_BUILD_INTEGRATION=OFF local ARROW_BUILD_TESTS=OFF if [ ${TEST_INTEGRATION_CPP} -gt 0 ]; then @@ -727,7 +714,7 @@ test_and_install_cpp() { -DARROW_GCS=${ARROW_GCS} \ -DARROW_HDFS=ON \ -DARROW_JSON=ON \ - -DARROW_ORC=${ARROW_ORC} \ + -DARROW_ORC=ON \ -DARROW_PARQUET=ON \ -DARROW_SUBSTRAIT=ON \ -DARROW_S3=${ARROW_S3} \ @@ -779,6 +766,7 @@ test_python() { export PYARROW_PARALLEL=$NPROC export PYARROW_WITH_DATASET=1 export PYARROW_WITH_HDFS=1 + export PYARROW_WITH_ORC=1 export PYARROW_WITH_PARQUET=1 export PYARROW_WITH_PARQUET_ENCRYPTION=1 if [ "${ARROW_CUDA}" = "ON" ]; then @@ -793,9 +781,6 @@ test_python() { if [ "${ARROW_GCS}" = "ON" ]; then export PYARROW_WITH_GCS=1 fi - if [ "${ARROW_ORC}" = "ON" ]; then - export PYARROW_WITH_ORC=1 - fi if [ "${ARROW_S3}" = "ON" ]; then export PYARROW_WITH_S3=1 fi @@ -813,6 +798,7 @@ import pyarrow.csv import pyarrow.dataset import pyarrow.fs import pyarrow.json +import pyarrow.orc import pyarrow.parquet " if [ "${ARROW_CUDA}" == "ON" ]; then @@ -827,9 +813,6 @@ import pyarrow.parquet if [ "${ARROW_GCS}" == "ON" ]; then python -c "import pyarrow._gcsfs" fi - if [ "${ARROW_ORC}" == "ON" ]; then - python -c "import pyarrow.orc" - fi if [ "${ARROW_S3}" == "ON" ]; then python -c "import pyarrow._s3fs" fi @@ -1333,7 +1316,6 @@ fi : ${ARROW_FLIGHT:=ON} : ${ARROW_GANDIVA:=ON} : ${ARROW_GCS:=OFF} -: ${ARROW_ORC:=} : ${ARROW_S3:=OFF} TEST_SUCCESS=no diff --git a/dev/tasks/linux-packages/apache-arrow/debian/rules b/dev/tasks/linux-packages/apache-arrow/debian/rules index d53b2e377dd..83bcad98a7a 100755 --- a/dev/tasks/linux-packages/apache-arrow/debian/rules +++ b/dev/tasks/linux-packages/apache-arrow/debian/rules @@ -14,55 +14,44 @@ BUILD_TYPE=release dh $@ --with gir override_dh_auto_configure: - if dpkg -l nvidia-cuda-toolkit > /dev/null 2>&1; then \ - ARROW_CUDA=ON; \ - else \ - ARROW_CUDA=OFF; \ - fi; \ - cmake_version=$$( \ - cmake --version | \ - grep \ - --extended-regexp \ - --only-matching \ - '[0-9]+\.[0-9]+'); \ - if dpkg --compare-versions $${cmake_version} ge 3.22; then \ - ARROW_ORC=ON; \ - else \ - ARROW_ORC=OFF; \ - fi; \ - dh_auto_configure \ - --sourcedirectory=cpp \ - --builddirectory=cpp_build \ - --buildsystem=cmake+ninja \ - -- \ - -DARROW_BUILD_UTILITIES=ON \ - -DARROW_COMPUTE=ON \ - -DARROW_CSV=ON \ - -DARROW_CUDA=$${ARROW_CUDA} \ - -DARROW_DATASET=ON \ - -DARROW_FILESYSTEM=ON \ - -DARROW_FLIGHT=ON \ - -DARROW_FLIGHT_SQL=ON \ - -DARROW_GANDIVA=ON \ - -DARROW_GCS=ON \ - -DARROW_HDFS=ON \ - -DARROW_JSON=ON \ - -DARROW_MIMALLOC=ON \ - -DARROW_ORC=$${ARROW_ORC} \ - -DARROW_PACKAGE_KIND=deb \ - -DARROW_PARQUET=ON \ - -DARROW_S3=ON \ - -DARROW_USE_CCACHE=OFF \ - -DARROW_USE_MOLD=ON \ - -DARROW_WITH_BROTLI=ON \ - -DARROW_WITH_BZ2=ON \ - -DARROW_WITH_LZ4=ON \ - -DARROW_WITH_SNAPPY=ON \ - -DARROW_WITH_ZLIB=ON \ - -DARROW_WITH_ZSTD=ON \ - -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \ - -DCUDAToolkit_ROOT=/usr \ - -DPARQUET_BUILD_EXECUTABLES=ON \ + if dpkg -l nvidia-cuda-toolkit > /dev/null 2>&1; then \ + ARROW_CUDA=ON; \ + else \ + ARROW_CUDA=OFF; \ + fi; \ + dh_auto_configure \ + --sourcedirectory=cpp \ + --builddirectory=cpp_build \ + --buildsystem=cmake+ninja \ + -- \ + -DARROW_BUILD_UTILITIES=ON \ + -DARROW_COMPUTE=ON \ + -DARROW_CSV=ON \ + -DARROW_CUDA=$${ARROW_CUDA} \ + -DARROW_DATASET=ON \ + -DARROW_FILESYSTEM=ON \ + -DARROW_FLIGHT=ON \ + -DARROW_FLIGHT_SQL=ON \ + -DARROW_GANDIVA=ON \ + -DARROW_GCS=ON \ + -DARROW_HDFS=ON \ + -DARROW_JSON=ON \ + -DARROW_MIMALLOC=ON \ + -DARROW_ORC=ON \ + -DARROW_PACKAGE_KIND=deb \ + -DARROW_PARQUET=ON \ + -DARROW_S3=ON \ + -DARROW_USE_CCACHE=OFF \ + -DARROW_USE_MOLD=ON \ + -DARROW_WITH_BROTLI=ON \ + -DARROW_WITH_BZ2=ON \ + -DARROW_WITH_LZ4=ON \ + -DARROW_WITH_SNAPPY=ON \ + -DARROW_WITH_ZLIB=ON \ + -DARROW_WITH_ZSTD=ON \ + -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \ + -DCUDAToolkit_ROOT=/usr \ + -DPARQUET_BUILD_EXECUTABLES=ON \ -DPARQUET_REQUIRE_ENCRYPTION=ON override_dh_auto_build: diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in index b15772a8153..8d47407c035 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in +++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in @@ -72,7 +72,6 @@ # %%define use_glog (%%{_rhel} <= 8) %define use_glog 0 %define use_mimalloc (%{_rhel} >= 8) -%define use_orc (%{_rhel} >= 8 || %{is_amazon_linux}) # TODO: Enable this. This works on local but is fragile on GitHub Actions and # Travis CI. # %%define use_s3 (%%{_rhel} >= 8) @@ -180,9 +179,7 @@ cd cpp %if %{use_mimalloc} -DARROW_MIMALLOC=ON \ %endif -%if %{use_orc} -DARROW_ORC=ON \ -%endif -DARROW_PACKAGE_KIND=rpm \ -DARROW_PARQUET=ON \ %if %{use_s3} @@ -328,9 +325,7 @@ Libraries and header files for Apache Arrow C++. %{_libdir}/pkgconfig/arrow-csv.pc %{_libdir}/pkgconfig/arrow-filesystem.pc %{_libdir}/pkgconfig/arrow-json.pc -%if %{use_orc} %{_libdir}/pkgconfig/arrow-orc.pc -%endif %{_libdir}/pkgconfig/arrow.pc %package -n %{name}%{so_version}-acero-libs @@ -598,9 +593,7 @@ Libraries and header files for Apache Arrow GLib. %{_libdir}/libarrow-glib.a %{_libdir}/libarrow-glib.so %{_libdir}/pkgconfig/arrow-glib.pc -%if %{use_orc} %{_libdir}/pkgconfig/arrow-orc-glib.pc -%endif %package glib-doc Summary: Documentation for Apache Arrow GLib From 921b22d39c14e79db2472b8091bd10b6ed8b413b Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 5 Jul 2024 15:31:02 +0900 Subject: [PATCH 20/24] Add missing libprotobuf dependency --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index f6092998a2d..5d25137b7d0 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -4679,6 +4679,7 @@ function(build_orc) endif() target_link_libraries(orc::orc INTERFACE ${CMAKE_DL_LIBS}) endif() + target_link_libraries(orc::orc INTERFACE ${ARROW_PROTOBUF_LIBPROTOBUF}) add_dependencies(orc::orc orc_ep) list(APPEND ARROW_BUNDLED_STATIC_LIBS orc::orc) endif() From 77dc35ff14662823b3ba8eed16881c8a60940e46 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Sat, 6 Jul 2024 06:47:37 +0900 Subject: [PATCH 21/24] Require CMake 3.29+ for FetchContent --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 5d25137b7d0..3b87e63beaa 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -4494,7 +4494,7 @@ target_include_directories(arrow::hadoop INTERFACE "${HADOOP_HOME}/include") function(build_orc) message(STATUS "Building Apache ORC from source") - if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.28) + if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.29) fetchcontent_declare(orc ${FC_DECLARE_COMMON_OPTIONS} URL ${ORC_SOURCE_URL} @@ -4589,9 +4589,6 @@ function(build_orc) ${CMAKE_CURRENT_BINARY_DIR}/_deps/orc-src/cmake_modules) fetchcontent_makeavailable(orc) - if(CMAKE_VERSION VERSION_LESS 3.28) - set_property(DIRECTORY ${orc_SOURCE_DIR} PROPERTY EXCLUDE_FROM_ALL TRUE) - endif() add_library(orc::orc INTERFACE IMPORTED) target_link_libraries(orc::orc INTERFACE orc) From e0b6f52ea2d513003cfbe76f15c5db240cbdae01 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Sat, 6 Jul 2024 07:07:08 +0900 Subject: [PATCH 22/24] Use CMake targets directly --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 3b87e63beaa..44e1764edd3 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -4524,11 +4524,9 @@ function(build_orc) set(PROTOBUF_HOME ${Protobuf_ROOT} CACHE STRING "" FORCE) - target_include_directories(${ARROW_PROTOBUF_LIBPROTOC} - INTERFACE "${PROTOBUF_INCLUDE_DIR}") - set(PROTOBUF_EXECUTABLE $) - set(PROTOBUF_LIBRARY $) - set(PROTOC_LIBRARY $) + set(PROTOBUF_EXECUTABLE ${ARROW_PROTOBUF_PROTOC}) + set(PROTOBUF_LIBRARY ${ARROW_PROTOBUF_LIBPROTOBUF}) + set(PROTOC_LIBRARY ${ARROW_PROTOBUF_LIBPROTOC}) set(ORC_PREFER_STATIC_SNAPPY OFF @@ -4656,6 +4654,7 @@ function(build_orc) BUILD_BYPRODUCTS ${ORC_STATIC_LIB} CMAKE_ARGS ${ORC_CMAKE_ARGS} DEPENDS ${ARROW_PROTOBUF_LIBPROTOBUF} + ${ARROW_PROTOBUF_PROTOC} ${ARROW_ZSTD_LIBZSTD} ${Snappy_TARGET} LZ4::lz4 From cf33494f70ead7f05ccb15ab11f46f8b88418bb1 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Sat, 6 Jul 2024 09:33:54 +0900 Subject: [PATCH 23/24] Add target_include_directories() again --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 44e1764edd3..8cb3ec83f57 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -4524,6 +4524,9 @@ function(build_orc) set(PROTOBUF_HOME ${Protobuf_ROOT} CACHE STRING "" FORCE) + # ORC uses this. + target_include_directories(${ARROW_PROTOBUF_LIBPROTOC} + INTERFACE "${PROTOBUF_INCLUDE_DIR}") set(PROTOBUF_EXECUTABLE ${ARROW_PROTOBUF_PROTOC}) set(PROTOBUF_LIBRARY ${ARROW_PROTOBUF_LIBPROTOBUF}) set(PROTOC_LIBRARY ${ARROW_PROTOBUF_LIBPROTOC}) From 2e4112d5f70ab140fb4aff00b211f9cb46f38dc0 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Sun, 7 Jul 2024 07:30:28 +0900 Subject: [PATCH 24/24] Disable FETCHCONTENT_FULLY_DISCONNECTED --- cpp/CMakeLists.txt | 8 ++++++++ dev/tasks/linux-packages/apache-arrow/debian/rules | 1 + 2 files changed, 9 insertions(+) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 679842c31e0..2e2a4971840 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -71,6 +71,14 @@ if(POLICY CMP0135) cmake_policy(SET CMP0135 NEW) endif() +# https://cmake.org/cmake/help/latest/policy/CMP0170.html +# +# CMP0170 is for enforcing dependency populations by users with +# FETCHCONTENT_FULLY_DISCONNECTED=ON. +if(POLICY CMP0170) + cmake_policy(SET CMP0170 NEW) +endif() + set(ARROW_VERSION "17.0.0-SNAPSHOT") string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_BASE_VERSION "${ARROW_VERSION}") diff --git a/dev/tasks/linux-packages/apache-arrow/debian/rules b/dev/tasks/linux-packages/apache-arrow/debian/rules index 83bcad98a7a..6c3074ab234 100755 --- a/dev/tasks/linux-packages/apache-arrow/debian/rules +++ b/dev/tasks/linux-packages/apache-arrow/debian/rules @@ -51,6 +51,7 @@ override_dh_auto_configure: -DARROW_WITH_ZSTD=ON \ -DCMAKE_BUILD_TYPE=$(BUILD_TYPE) \ -DCUDAToolkit_ROOT=/usr \ + -DFETCHCONTENT_FULLY_DISCONNECTED=OFF \ -DPARQUET_BUILD_EXECUTABLES=ON \ -DPARQUET_REQUIRE_ENCRYPTION=ON