From 856516e33573d35afa28d4dda73b72f50b3e4437 Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Sun, 12 Jan 2025 00:14:49 +0800 Subject: [PATCH 01/13] GH-45225: [C++] Upgrade ORC to 2.1.0 --- cpp/thirdparty/versions.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 53d2034600a..29f0cc7d1b4 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -90,8 +90,8 @@ ARROW_OPENTELEMETRY_BUILD_VERSION=v1.13.0 ARROW_OPENTELEMETRY_BUILD_SHA256_CHECKSUM=7735cc56507149686e6019e06f588317099d4522480be5f38a2a09ec69af1706 ARROW_OPENTELEMETRY_PROTO_BUILD_VERSION=v0.17.0 ARROW_OPENTELEMETRY_PROTO_BUILD_SHA256_CHECKSUM=f269fbcb30e17b03caa1decd231ce826e59d7651c0f71c3b28eb5140b4bb5412 -ARROW_ORC_BUILD_VERSION=2.0.3 -ARROW_ORC_BUILD_SHA256_CHECKSUM=082cba862b5a8a0d14c225404d0b51cd8d1b64ca81b8f1e500322ce8922cb86d +ARROW_ORC_BUILD_VERSION=2.1.0 +ARROW_ORC_BUILD_SHA256_CHECKSUM=69d45665bfb5699b709094ba630ae4b186b19e083c4438855fc29c77125c149c ARROW_PROTOBUF_BUILD_VERSION=v21.3 ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM=2f723218f6cb709ae4cdc4fb5ed56a5951fc5d466f0128ce4c946b8c78c8c49f # Because of https://github.com/Tencent/rapidjson/pull/1323, we require From b27ea214216058e32e41ac5f32ff383a107289d5 Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Mon, 13 Jan 2025 00:15:37 +0800 Subject: [PATCH 02/13] apply patch to avoid CMAKE_SOURCE_DIR --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 11 +++++++---- cpp/cmake_modules/orc.diff | 15 +++++++++++++++ 2 files changed, 22 insertions(+), 4 deletions(-) create mode 100644 cpp/cmake_modules/orc.diff diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index abfe6d274f7..3898860d4b8 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -4574,10 +4574,14 @@ function(build_orc) message(STATUS "Building Apache ORC from source") if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.29) + find_program(PATCH patch REQUIRED) + set(ORC_PATCH_COMMAND ${PATCH} -p1 -i ${CMAKE_CURRENT_LIST_DIR}/orc.diff) + fetchcontent_declare(orc ${FC_DECLARE_COMMON_OPTIONS} URL ${ORC_SOURCE_URL} - URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}") + URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}" + PATCH_COMMAND ${ORC_PATCH_COMMAND}) prepare_fetchcontent() set(CMAKE_UNITY_BUILD FALSE) @@ -4667,7 +4671,8 @@ function(build_orc) OFF CACHE BOOL "" FORCE) - # We can remove this with ORC 2.0.2 or later. + # We can remove this with ORC 2.1.1 or later + # See https://github.com/apache/orc/blob/5bbafbb847f6e23b5a25d83c4d817741d36d9cc8/CMakeLists.txt#L33 list(PREPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_BINARY_DIR}/_deps/orc-src/cmake_modules) @@ -4675,8 +4680,6 @@ function(build_orc) add_library(orc::orc INTERFACE IMPORTED) target_link_libraries(orc::orc INTERFACE orc) - target_include_directories(orc::orc INTERFACE "${orc_BINARY_DIR}/c++/include" - "${orc_SOURCE_DIR}/c++/include") list(APPEND ARROW_BUNDLED_STATIC_LIBS orc) else() diff --git a/cpp/cmake_modules/orc.diff b/cpp/cmake_modules/orc.diff new file mode 100644 index 00000000000..c360c68679a --- /dev/null +++ b/cpp/cmake_modules/orc.diff @@ -0,0 +1,15 @@ +diff --git a/c++/src/CMakeLists.txt b/c++/src/CMakeLists.txt +index 694667c06..af13a94aa 100644 +--- a/c++/src/CMakeLists.txt ++++ b/c++/src/CMakeLists.txt +@@ -218,8 +218,8 @@ target_include_directories (orc + INTERFACE + $ + PUBLIC +- $ +- $ ++ $ ++ $ + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_BINARY_DIR} From 45a812852c09b9d490389c7f9759a4171ee5d1ee Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Mon, 13 Jan 2025 00:18:11 +0800 Subject: [PATCH 03/13] add license header --- cpp/cmake_modules/orc.diff | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/cpp/cmake_modules/orc.diff b/cpp/cmake_modules/orc.diff index c360c68679a..c9850777a2b 100644 --- a/cpp/cmake_modules/orc.diff +++ b/cpp/cmake_modules/orc.diff @@ -1,3 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + diff --git a/c++/src/CMakeLists.txt b/c++/src/CMakeLists.txt index 694667c06..af13a94aa 100644 --- a/c++/src/CMakeLists.txt From 12eb1539682d2ca46250f944dc215fa966de0072 Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Mon, 13 Jan 2025 09:21:29 +0800 Subject: [PATCH 04/13] disable CMakeFormat from ORC --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 10 ++++++---- cpp/cmake_modules/orc.diff | 10 ++++++++++ 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 3898860d4b8..06a0e586342 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -4573,10 +4573,11 @@ target_include_directories(arrow::hadoop INTERFACE "${HADOOP_HOME}/include") function(build_orc) message(STATUS "Building Apache ORC from source") - if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.29) - find_program(PATCH patch REQUIRED) - set(ORC_PATCH_COMMAND ${PATCH} -p1 -i ${CMAKE_CURRENT_LIST_DIR}/orc.diff) + # Remove this once we have a patch for ORC 2.1.1 + find_program(PATCH patch REQUIRED) + set(ORC_PATCH_COMMAND ${PATCH} -p1 -i ${CMAKE_CURRENT_LIST_DIR}/orc.diff) + if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.29) fetchcontent_declare(orc ${FC_DECLARE_COMMON_OPTIONS} URL ${ORC_SOURCE_URL} @@ -4746,7 +4747,8 @@ function(build_orc) ${ARROW_ZSTD_LIBZSTD} ${Snappy_TARGET} LZ4::lz4 - ZLIB::ZLIB) + ZLIB::ZLIB + PATCH_COMMAND ${ORC_PATCH_COMMAND}) add_library(orc::orc STATIC IMPORTED) set_target_properties(orc::orc PROPERTIES IMPORTED_LOCATION "${ORC_STATIC_LIB}") target_include_directories(orc::orc BEFORE INTERFACE "${ORC_INCLUDE_DIR}") diff --git a/cpp/cmake_modules/orc.diff b/cpp/cmake_modules/orc.diff index c9850777a2b..8226008e8a5 100644 --- a/cpp/cmake_modules/orc.diff +++ b/cpp/cmake_modules/orc.diff @@ -15,6 +15,16 @@ # specific language governing permissions and limitations # under the License. +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 1f8931508..7bfc71aea 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -225,5 +225,3 @@ if (BUILD_CPP_TESTS) + ) + endif () + endif () +- +-INCLUDE(CheckFormat) diff --git a/c++/src/CMakeLists.txt b/c++/src/CMakeLists.txt index 694667c06..af13a94aa 100644 --- a/c++/src/CMakeLists.txt From 634e1377b6ed4dbdca271e49ca8cf162f784ac23 Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Mon, 13 Jan 2025 09:55:09 +0800 Subject: [PATCH 05/13] fix name conflict of provide_cmake_module --- cpp/cmake_modules/orc.diff | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/cpp/cmake_modules/orc.diff b/cpp/cmake_modules/orc.diff index 8226008e8a5..b9f679a636e 100644 --- a/cpp/cmake_modules/orc.diff +++ b/cpp/cmake_modules/orc.diff @@ -40,3 +40,25 @@ index 694667c06..af13a94aa 100644 PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR} +diff --git a/cmake_modules/ThirdpartyToolchain.cmake b/cmake_modules/ThirdpartyToolchain.cmake +index 017e6c5b8..26bb5c69a 100644 +--- a/cmake_modules/ThirdpartyToolchain.cmake ++++ b/cmake_modules/ThirdpartyToolchain.cmake +@@ -122,7 +122,7 @@ macro (add_built_library external_project_name target_name link_lib include_dir) + endif () + endmacro () + +-function(provide_cmake_module MODULE_NAME) ++function(orc_provide_cmake_module MODULE_NAME) + set(module "${CMAKE_SOURCE_DIR}/cmake_modules/${MODULE_NAME}.cmake") + if(EXISTS "${module}") + message(STATUS "Providing CMake module for ${MODULE_NAME} as part of CMake package") +@@ -131,7 +131,7 @@ function(provide_cmake_module MODULE_NAME) + endfunction() + + function(provide_find_module PACKAGE_NAME) +- provide_cmake_module("Find${PACKAGE_NAME}") ++ orc_provide_cmake_module("Find${PACKAGE_NAME}") + endfunction() + + # ---------------------------------------------------------------------- From 988c8c635c3ae04722958e08768741559ed83fef Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Mon, 13 Jan 2025 10:04:21 +0800 Subject: [PATCH 06/13] resolve more name conflict --- cpp/cmake_modules/orc.diff | 220 ++++++++++++++++++++++++++++++++++++- 1 file changed, 217 insertions(+), 3 deletions(-) diff --git a/cpp/cmake_modules/orc.diff b/cpp/cmake_modules/orc.diff index b9f679a636e..71ad37a4364 100644 --- a/cpp/cmake_modules/orc.diff +++ b/cpp/cmake_modules/orc.diff @@ -41,9 +41,25 @@ index 694667c06..af13a94aa 100644 ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR} diff --git a/cmake_modules/ThirdpartyToolchain.cmake b/cmake_modules/ThirdpartyToolchain.cmake -index 017e6c5b8..26bb5c69a 100644 +index 017e6c5b8..ca233ea65 100644 --- a/cmake_modules/ThirdpartyToolchain.cmake +++ b/cmake_modules/ThirdpartyToolchain.cmake +@@ -103,13 +103,13 @@ endif () + + # ---------------------------------------------------------------------- + # Macros for adding third-party libraries +-macro (add_resolved_library target_name link_lib include_dir) ++macro (orc_add_resolved_library target_name link_lib include_dir) + add_library (${target_name} INTERFACE IMPORTED) + target_link_libraries (${target_name} INTERFACE ${link_lib}) + target_include_directories (${target_name} SYSTEM INTERFACE ${include_dir}) + endmacro () + +-macro (add_built_library external_project_name target_name link_lib include_dir) ++macro (orc_add_built_library external_project_name target_name link_lib include_dir) + file (MAKE_DIRECTORY "${include_dir}") + + add_library (${target_name} STATIC IMPORTED) @@ -122,7 +122,7 @@ macro (add_built_library external_project_name target_name link_lib include_dir) endif () endmacro () @@ -53,12 +69,210 @@ index 017e6c5b8..26bb5c69a 100644 set(module "${CMAKE_SOURCE_DIR}/cmake_modules/${MODULE_NAME}.cmake") if(EXISTS "${module}") message(STATUS "Providing CMake module for ${MODULE_NAME} as part of CMake package") -@@ -131,7 +131,7 @@ function(provide_cmake_module MODULE_NAME) +@@ -130,8 +130,8 @@ function(provide_cmake_module MODULE_NAME) + endif() endfunction() - function(provide_find_module PACKAGE_NAME) +-function(provide_find_module PACKAGE_NAME) - provide_cmake_module("Find${PACKAGE_NAME}") ++function(orc_provide_find_module PACKAGE_NAME) + orc_provide_cmake_module("Find${PACKAGE_NAME}") endfunction() # ---------------------------------------------------------------------- +@@ -156,7 +156,7 @@ ExternalProject_Add (orc-format_ep + # Snappy + if (ORC_PACKAGE_KIND STREQUAL "conan") + find_package (Snappy REQUIRED CONFIG) +- add_resolved_library (orc_snappy ${Snappy_LIBRARIES} ${Snappy_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_snappy ${Snappy_LIBRARIES} ${Snappy_INCLUDE_DIR}) + list (APPEND ORC_SYSTEM_DEPENDENCIES Snappy) + list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$") + elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg") +@@ -168,13 +168,13 @@ elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg") + elseif (NOT "${SNAPPY_HOME}" STREQUAL "") + find_package (Snappy REQUIRED) + if (ORC_PREFER_STATIC_SNAPPY AND SNAPPY_STATIC_LIB) +- add_resolved_library (orc_snappy ${SNAPPY_STATIC_LIB} ${SNAPPY_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_snappy ${SNAPPY_STATIC_LIB} ${SNAPPY_INCLUDE_DIR}) + else () +- add_resolved_library (orc_snappy ${SNAPPY_LIBRARY} ${SNAPPY_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_snappy ${SNAPPY_LIBRARY} ${SNAPPY_INCLUDE_DIR}) + endif () + list (APPEND ORC_SYSTEM_DEPENDENCIES Snappy) + list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$") +- provide_find_module (Snappy) ++ orc_provide_find_module (Snappy) + else () + set(SNAPPY_HOME "${THIRDPARTY_DIR}/snappy_ep-install") + set(SNAPPY_INCLUDE_DIR "${SNAPPY_HOME}/include") +@@ -194,7 +194,7 @@ else () + ${THIRDPARTY_LOG_OPTIONS} + BUILD_BYPRODUCTS "${SNAPPY_STATIC_LIB}") + +- add_built_library (snappy_ep orc_snappy ${SNAPPY_STATIC_LIB} ${SNAPPY_INCLUDE_DIR}) ++ orc_add_built_library (snappy_ep orc_snappy ${SNAPPY_STATIC_LIB} ${SNAPPY_INCLUDE_DIR}) + + list (APPEND ORC_VENDOR_DEPENDENCIES "orc::vendored_snappy|${SNAPPY_STATIC_LIB_NAME}") + list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$") +@@ -207,7 +207,7 @@ add_library (orc::snappy ALIAS orc_snappy) + + if (ORC_PACKAGE_KIND STREQUAL "conan") + find_package (ZLIB REQUIRED CONFIG) +- add_resolved_library (orc_zlib ${ZLIB_LIBRARIES} ${ZLIB_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_zlib ${ZLIB_LIBRARIES} ${ZLIB_INCLUDE_DIR}) + list (APPEND ORC_SYSTEM_DEPENDENCIES ZLIB) + list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$") + elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg") +@@ -219,13 +219,13 @@ elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg") + elseif (NOT "${ZLIB_HOME}" STREQUAL "") + find_package (ZLIB REQUIRED) + if (ORC_PREFER_STATIC_ZLIB AND ZLIB_STATIC_LIB) +- add_resolved_library (orc_zlib ${ZLIB_STATIC_LIB} ${ZLIB_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_zlib ${ZLIB_STATIC_LIB} ${ZLIB_INCLUDE_DIR}) + else () +- add_resolved_library (orc_zlib ${ZLIB_LIBRARY} ${ZLIB_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_zlib ${ZLIB_LIBRARY} ${ZLIB_INCLUDE_DIR}) + endif () + list (APPEND ORC_SYSTEM_DEPENDENCIES ZLIB) + list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$") +- provide_find_module (ZLIB) ++ orc_provide_find_module (ZLIB) + else () + set(ZLIB_PREFIX "${THIRDPARTY_DIR}/zlib_ep-install") + set(ZLIB_INCLUDE_DIR "${ZLIB_PREFIX}/include") +@@ -252,7 +252,7 @@ else () + ${THIRDPARTY_LOG_OPTIONS} + BUILD_BYPRODUCTS "${ZLIB_STATIC_LIB}") + +- add_built_library (zlib_ep orc_zlib ${ZLIB_STATIC_LIB} ${ZLIB_INCLUDE_DIR}) ++ orc_add_built_library (zlib_ep orc_zlib ${ZLIB_STATIC_LIB} ${ZLIB_INCLUDE_DIR}) + + list (APPEND ORC_VENDOR_DEPENDENCIES "orc::vendored_zlib|${ZLIB_STATIC_LIB_NAME}") + list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$") +@@ -265,7 +265,7 @@ add_library (orc::zlib ALIAS orc_zlib) + + if (ORC_PACKAGE_KIND STREQUAL "conan") + find_package (ZSTD REQUIRED CONFIG) +- add_resolved_library (orc_zstd ${zstd_LIBRARIES} ${zstd_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_zstd ${zstd_LIBRARIES} ${zstd_INCLUDE_DIR}) + list (APPEND ORC_SYSTEM_DEPENDENCIES ZSTD) + list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$,zstd::libzstd_shared,zstd::libzstd_static>>") + elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg") +@@ -277,14 +277,14 @@ elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg") + elseif (NOT "${ZSTD_HOME}" STREQUAL "") + find_package (ZSTD REQUIRED) + if (ORC_PREFER_STATIC_ZSTD AND ZSTD_STATIC_LIB) +- add_resolved_library (orc_zstd ${ZSTD_STATIC_LIB} ${ZSTD_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_zstd ${ZSTD_STATIC_LIB} ${ZSTD_INCLUDE_DIR}) + list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$") + else () +- add_resolved_library (orc_zstd ${ZSTD_LIBRARY} ${ZSTD_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_zstd ${ZSTD_LIBRARY} ${ZSTD_INCLUDE_DIR}) + list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$,zstd::libzstd_shared,zstd::libzstd_static>>") + endif () + list (APPEND ORC_SYSTEM_DEPENDENCIES ZSTD) +- provide_find_module (ZSTD) ++ orc_provide_find_module (ZSTD) + else () + set(ZSTD_HOME "${THIRDPARTY_DIR}/zstd_ep-install") + set(ZSTD_INCLUDE_DIR "${ZSTD_HOME}/include") +@@ -318,7 +318,7 @@ else () + ${THIRDPARTY_LOG_OPTIONS} + BUILD_BYPRODUCTS ${ZSTD_STATIC_LIB}) + +- add_built_library (zstd_ep orc_zstd ${ZSTD_STATIC_LIB} ${ZSTD_INCLUDE_DIR}) ++ orc_add_built_library (zstd_ep orc_zstd ${ZSTD_STATIC_LIB} ${ZSTD_INCLUDE_DIR}) + + list (APPEND ORC_VENDOR_DEPENDENCIES "orc::vendored_zstd|${ZSTD_STATIC_LIB_NAME}") + list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$") +@@ -330,7 +330,7 @@ add_library (orc::zstd ALIAS orc_zstd) + # LZ4 + if (ORC_PACKAGE_KIND STREQUAL "conan") + find_package (LZ4 REQUIRED CONFIG) +- add_resolved_library (orc_lz4 ${lz4_LIBRARIES} ${lz4_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_lz4 ${lz4_LIBRARIES} ${lz4_INCLUDE_DIR}) + list (APPEND ORC_SYSTEM_DEPENDENCIES LZ4) + list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$") + elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg") +@@ -342,13 +342,13 @@ elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg") + elseif (NOT "${LZ4_HOME}" STREQUAL "") + find_package (LZ4 REQUIRED) + if (ORC_PREFER_STATIC_LZ4 AND LZ4_STATIC_LIB) +- add_resolved_library (orc_lz4 ${LZ4_STATIC_LIB} ${LZ4_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_lz4 ${LZ4_STATIC_LIB} ${LZ4_INCLUDE_DIR}) + else () +- add_resolved_library (orc_lz4 ${LZ4_LIBRARY} ${LZ4_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_lz4 ${LZ4_LIBRARY} ${LZ4_INCLUDE_DIR}) + endif () + list (APPEND ORC_SYSTEM_DEPENDENCIES LZ4) + list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$") +- provide_find_module (LZ4) ++ orc_provide_find_module (LZ4) + else () + set(LZ4_PREFIX "${THIRDPARTY_DIR}/lz4_ep-install") + set(LZ4_INCLUDE_DIR "${LZ4_PREFIX}/include") +@@ -375,7 +375,7 @@ else () + ${THIRDPARTY_LOG_OPTIONS} + BUILD_BYPRODUCTS ${LZ4_STATIC_LIB}) + +- add_built_library (lz4_ep orc_lz4 ${LZ4_STATIC_LIB} ${LZ4_INCLUDE_DIR}) ++ orc_add_built_library (lz4_ep orc_lz4 ${LZ4_STATIC_LIB} ${LZ4_INCLUDE_DIR}) + + list (APPEND ORC_VENDOR_DEPENDENCIES "orc::vendored_lz4|${LZ4_STATIC_LIB_NAME}") + list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$") +@@ -491,7 +491,7 @@ endif () + + if (ORC_PACKAGE_KIND STREQUAL "conan") + find_package (Protobuf REQUIRED CONFIG) +- add_resolved_library (orc_protobuf ${protobuf_LIBRARIES} ${protobuf_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_protobuf ${protobuf_LIBRARIES} ${protobuf_INCLUDE_DIR}) + list (APPEND ORC_SYSTEM_DEPENDENCIES Protobuf) + list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$") + elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg") +@@ -505,20 +505,20 @@ elseif (NOT "${PROTOBUF_HOME}" STREQUAL "") + find_package (Protobuf REQUIRED) + + if (ORC_PREFER_STATIC_PROTOBUF AND PROTOBUF_STATIC_LIB) +- add_resolved_library (orc_protobuf ${PROTOBUF_STATIC_LIB} ${PROTOBUF_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_protobuf ${PROTOBUF_STATIC_LIB} ${PROTOBUF_INCLUDE_DIR}) + else () +- add_resolved_library (orc_protobuf ${PROTOBUF_LIBRARY} ${PROTOBUF_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_protobuf ${PROTOBUF_LIBRARY} ${PROTOBUF_INCLUDE_DIR}) + endif () + + if (ORC_PREFER_STATIC_PROTOBUF AND PROTOC_STATIC_LIB) +- add_resolved_library (orc_protoc ${PROTOC_STATIC_LIB} ${PROTOBUF_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_protoc ${PROTOC_STATIC_LIB} ${PROTOBUF_INCLUDE_DIR}) + else () +- add_resolved_library (orc_protoc ${PROTOC_LIBRARY} ${PROTOBUF_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_protoc ${PROTOC_LIBRARY} ${PROTOBUF_INCLUDE_DIR}) + endif () + + list (APPEND ORC_SYSTEM_DEPENDENCIES Protobuf) + list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$") +- provide_find_module (Protobuf) ++ orc_provide_find_module (Protobuf) + else () + set(PROTOBUF_PREFIX "${THIRDPARTY_DIR}/protobuf_ep-install") + set(PROTOBUF_INCLUDE_DIR "${PROTOBUF_PREFIX}/include") +@@ -556,8 +556,8 @@ else () + ${THIRDPARTY_LOG_OPTIONS} + BUILD_BYPRODUCTS "${PROTOBUF_STATIC_LIB}" "${PROTOC_STATIC_LIB}") + +- add_built_library (protobuf_ep orc_protobuf ${PROTOBUF_STATIC_LIB} ${PROTOBUF_INCLUDE_DIR}) +- add_built_library (protobuf_ep orc_protoc ${PROTOC_STATIC_LIB} ${PROTOBUF_INCLUDE_DIR}) ++ orc_add_built_library (protobuf_ep orc_protobuf ${PROTOBUF_STATIC_LIB} ${PROTOBUF_INCLUDE_DIR}) ++ orc_add_built_library (protobuf_ep orc_protoc ${PROTOC_STATIC_LIB} ${PROTOBUF_INCLUDE_DIR}) + + list (APPEND ORC_VENDOR_DEPENDENCIES "orc::vendored_protobuf|${PROTOBUF_STATIC_LIB_NAME}") + list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$") +@@ -610,7 +610,7 @@ if(BUILD_LIBHDFSPP) + BUILD_BYPRODUCTS "${LIBHDFSPP_STATIC_LIB}" + CMAKE_ARGS ${LIBHDFSPP_CMAKE_ARGS}) + +- add_built_library(libhdfspp_ep libhdfspp ${LIBHDFSPP_STATIC_LIB} ${LIBHDFSPP_INCLUDE_DIR}) ++ orc_add_built_library(libhdfspp_ep libhdfspp ${LIBHDFSPP_STATIC_LIB} ${LIBHDFSPP_INCLUDE_DIR}) + + set (LIBHDFSPP_LIBRARIES + libhdfspp From d888b50cbebdafa0cce663e172d2a6e73676d6d9 Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Mon, 13 Jan 2025 10:45:33 +0800 Subject: [PATCH 07/13] fix test --- cpp/src/arrow/adapters/orc/adapter_test.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/adapters/orc/adapter_test.cc b/cpp/src/arrow/adapters/orc/adapter_test.cc index b9d6c53215b..b3c314fccc0 100644 --- a/cpp/src/arrow/adapters/orc/adapter_test.cc +++ b/cpp/src/arrow/adapters/orc/adapter_test.cc @@ -235,7 +235,7 @@ void AssertTableWriteReadEqual(const std::vector>& input_ write_options.compression = Compression::UNCOMPRESSED; #endif write_options.file_version = adapters::orc::FileVersion(0, 11); - write_options.compression_block_size = 32768; + write_options.compression_block_size = 64 * 1024; write_options.row_index_stride = 5000; EXPECT_OK_AND_ASSIGN(auto writer, adapters::orc::ORCFileWriter::Open( buffer_output_stream.get(), write_options)); @@ -272,7 +272,7 @@ void AssertBatchWriteReadEqual( write_options.compression = Compression::UNCOMPRESSED; #endif write_options.file_version = adapters::orc::FileVersion(0, 11); - write_options.compression_block_size = 32768; + write_options.compression_block_size = 64 * 1024; write_options.row_index_stride = 5000; EXPECT_OK_AND_ASSIGN(auto writer, adapters::orc::ORCFileWriter::Open( buffer_output_stream.get(), write_options)); @@ -330,7 +330,7 @@ std::unique_ptr CreateWriter(uint64_t stripe_size, liborc::OutputStream* stream) { liborc::WriterOptions options; options.setStripeSize(stripe_size); - options.setCompressionBlockSize(1024); + options.setCompressionBlockSize(64 * 1024); options.setMemoryPool(liborc::getDefaultPool()); options.setRowIndexStride(0); return liborc::createWriter(type, stream, options); @@ -668,7 +668,7 @@ TEST_F(TestORCWriterTrivialNoWrite, noWrite) { write_options.compression = Compression::UNCOMPRESSED; #endif write_options.file_version = adapters::orc::FileVersion(0, 11); - write_options.compression_block_size = 32768; + write_options.compression_block_size = 64 * 1024; write_options.row_index_stride = 5000; EXPECT_OK_AND_ASSIGN(auto writer, adapters::orc::ORCFileWriter::Open( buffer_output_stream.get(), write_options)); From b18c5c19c9d7725783c86b7b6a22f2d3bdff949a Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Mon, 13 Jan 2025 11:09:45 +0800 Subject: [PATCH 08/13] fix test_orc.py --- python/pyarrow/tests/test_orc.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/pyarrow/tests/test_orc.py b/python/pyarrow/tests/test_orc.py index b0f9e813b10..706fb3fe45c 100644 --- a/python/pyarrow/tests/test_orc.py +++ b/python/pyarrow/tests/test_orc.py @@ -334,7 +334,7 @@ def test_buffer_readwrite_with_writeoptions(): compression='snappy', file_version='0.11', row_index_stride=5000, - compression_block_size=32768, + compression_block_size=65536, ) buffer_reader = pa.BufferReader(buffer_output_stream.getvalue()) orc_file = orc.ORCFile(buffer_reader) @@ -344,7 +344,7 @@ def test_buffer_readwrite_with_writeoptions(): assert orc_file.compression == 'SNAPPY' assert orc_file.file_version == '0.11' assert orc_file.row_index_stride == 5000 - assert orc_file.compression_size == 32768 + assert orc_file.compression_size == 65536 # deprecated keyword order buffer_output_stream = pa.BufferOutputStream() @@ -355,7 +355,7 @@ def test_buffer_readwrite_with_writeoptions(): compression='uncompressed', file_version='0.11', row_index_stride=20000, - compression_block_size=16384, + compression_block_size=65536, ) buffer_reader = pa.BufferReader(buffer_output_stream.getvalue()) orc_file = orc.ORCFile(buffer_reader) @@ -365,7 +365,7 @@ def test_buffer_readwrite_with_writeoptions(): assert orc_file.compression == 'UNCOMPRESSED' assert orc_file.file_version == '0.11' assert orc_file.row_index_stride == 20000 - assert orc_file.compression_size == 16384 + assert orc_file.compression_size == 65536 def test_buffer_readwrite_with_bad_writeoptions(): From ae9a8a838d2353b5960d46d1efd6ebf50febafc3 Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Mon, 13 Jan 2025 13:07:11 +0800 Subject: [PATCH 09/13] fix ubuntu 20.04 --- ci/docker/ubuntu-20.04-cpp.dockerfile | 1 + cpp/cmake_modules/ThirdpartyToolchain.cmake | 5 ----- cpp/cmake_modules/orc.diff | 12 +++++++++++- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile b/ci/docker/ubuntu-20.04-cpp.dockerfile index 8dc778d544a..259c5fb77fa 100644 --- a/ci/docker/ubuntu-20.04-cpp.dockerfile +++ b/ci/docker/ubuntu-20.04-cpp.dockerfile @@ -106,6 +106,7 @@ RUN apt-get update -y -q && \ ninja-build \ nlohmann-json3-dev \ npm \ + patch \ pkg-config \ protobuf-compiler \ python3-dev \ diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 06a0e586342..453973ee363 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -4672,11 +4672,6 @@ function(build_orc) OFF CACHE BOOL "" FORCE) - # We can remove this with ORC 2.1.1 or later - # See https://github.com/apache/orc/blob/5bbafbb847f6e23b5a25d83c4d817741d36d9cc8/CMakeLists.txt#L33 - list(PREPEND CMAKE_MODULE_PATH - ${CMAKE_CURRENT_BINARY_DIR}/_deps/orc-src/cmake_modules) - fetchcontent_makeavailable(orc) add_library(orc::orc INTERFACE IMPORTED) diff --git a/cpp/cmake_modules/orc.diff b/cpp/cmake_modules/orc.diff index 71ad37a4364..1c847d82b3f 100644 --- a/cpp/cmake_modules/orc.diff +++ b/cpp/cmake_modules/orc.diff @@ -16,9 +16,19 @@ # under the License. diff --git a/CMakeLists.txt b/CMakeLists.txt -index 1f8931508..7bfc71aea 100644 +index 1f8931508..f8e57bf5f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt +@@ -30,8 +30,8 @@ SET(CPACK_PACKAGE_VERSION_MAJOR "2") + SET(CPACK_PACKAGE_VERSION_MINOR "1") + SET(CPACK_PACKAGE_VERSION_PATCH "0") + SET(ORC_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}") +-set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules") + set(CMAKE_EXPORT_COMPILE_COMMANDS ON) # For clang-tidy. ++list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules") + + option (BUILD_JAVA + "Include ORC Java library in the build process" @@ -225,5 +225,3 @@ if (BUILD_CPP_TESTS) ) endif () From bf0368ff81a57370d8c8fd0230ca34e340f8c2d8 Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Mon, 13 Jan 2025 13:36:17 +0800 Subject: [PATCH 10/13] fix target visibility for ubuntu 20.04 --- cpp/cmake_modules/orc.diff | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/cmake_modules/orc.diff b/cpp/cmake_modules/orc.diff index 1c847d82b3f..7bdbfa1cf5d 100644 --- a/cpp/cmake_modules/orc.diff +++ b/cpp/cmake_modules/orc.diff @@ -51,7 +51,7 @@ index 694667c06..af13a94aa 100644 ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR} diff --git a/cmake_modules/ThirdpartyToolchain.cmake b/cmake_modules/ThirdpartyToolchain.cmake -index 017e6c5b8..ca233ea65 100644 +index 017e6c5b8..fe376ed16 100644 --- a/cmake_modules/ThirdpartyToolchain.cmake +++ b/cmake_modules/ThirdpartyToolchain.cmake @@ -103,13 +103,13 @@ endif () @@ -59,8 +59,9 @@ index 017e6c5b8..ca233ea65 100644 # ---------------------------------------------------------------------- # Macros for adding third-party libraries -macro (add_resolved_library target_name link_lib include_dir) +- add_library (${target_name} INTERFACE IMPORTED) +macro (orc_add_resolved_library target_name link_lib include_dir) - add_library (${target_name} INTERFACE IMPORTED) ++ add_library (${target_name} INTERFACE IMPORTED GLOBAL) target_link_libraries (${target_name} INTERFACE ${link_lib}) target_include_directories (${target_name} SYSTEM INTERFACE ${include_dir}) endmacro () From f9cf61cbe52261246674d9bb06d04f343350c81a Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Mon, 13 Jan 2025 13:55:56 +0800 Subject: [PATCH 11/13] fix zlib --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 453973ee363..8ed13e2937e 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -4700,6 +4700,9 @@ function(build_orc) get_target_property(ORC_ZSTD_ROOT ${ARROW_ZSTD_LIBZSTD} INTERFACE_INCLUDE_DIRECTORIES) get_filename_component(ORC_ZSTD_ROOT "${ORC_ZSTD_ROOT}" DIRECTORY) + get_target_property(ORC_ZLIB_ROOT ZLIB::ZLIB INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ORC_ZLIB_ROOT "${ORC_ZLIB_ROOT}" DIRECTORY) + set(ORC_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} "-DCMAKE_INSTALL_PREFIX=${ORC_PREFIX}" @@ -4709,7 +4712,6 @@ function(build_orc) -DBUILD_TOOLS=OFF -DBUILD_CPP_TESTS=OFF -DINSTALL_VENDORED_LIBS=OFF - "-DLZ4_HOME=${ORC_LZ4_ROOT}" "-DPROTOBUF_EXECUTABLE=$" "-DPROTOBUF_HOME=${ORC_PROTOBUF_ROOT}" "-DPROTOBUF_INCLUDE_DIR=$" @@ -4717,16 +4719,17 @@ function(build_orc) "-DPROTOC_LIBRARY=$" "-DSNAPPY_HOME=${ORC_SNAPPY_ROOT}" "-DSNAPPY_LIBRARY=$" + "-DLZ4_HOME=${ORC_LZ4_ROOT}" "-DLZ4_LIBRARY=$" "-DLZ4_STATIC_LIB=$" "-DLZ4_INCLUDE_DIR=${ORC_LZ4_ROOT}/include" "-DSNAPPY_INCLUDE_DIR=${ORC_SNAPPY_INCLUDE_DIR}" "-DZSTD_HOME=${ORC_ZSTD_ROOT}" "-DZSTD_INCLUDE_DIR=$" - "-DZSTD_LIBRARY=$") - if(ZLIB_ROOT) - set(ORC_CMAKE_ARGS ${ORC_CMAKE_ARGS} "-DZLIB_HOME=${ZLIB_ROOT}") - endif() + "-DZSTD_LIBRARY=$" + "-DZLIB_HOME=${ORC_ZLIB_ROOT}" + "-DZLIB_INCLUDE_DIR=$" + "-DZLIB_LIBRARY=$") # Work around CMake bug file(MAKE_DIRECTORY ${ORC_INCLUDE_DIR}) From 2b0a4ab83302bcf7e2686eb2ed545ca54dda6512 Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Mon, 13 Jan 2025 15:32:59 +0800 Subject: [PATCH 12/13] install patch cmd --- ci/docker/debian-12-cpp.dockerfile | 1 + ci/docker/ubuntu-22.04-cpp.dockerfile | 1 + ci/docker/ubuntu-24.04-cpp.dockerfile | 1 + 3 files changed, 3 insertions(+) diff --git a/ci/docker/debian-12-cpp.dockerfile b/ci/docker/debian-12-cpp.dockerfile index f486d07ff88..fe3976248cc 100644 --- a/ci/docker/debian-12-cpp.dockerfile +++ b/ci/docker/debian-12-cpp.dockerfile @@ -84,6 +84,7 @@ RUN apt-get update -y -q && \ ninja-build \ nlohmann-json3-dev \ npm \ + patch \ pkg-config \ protobuf-compiler-grpc \ python3-dev \ diff --git a/ci/docker/ubuntu-22.04-cpp.dockerfile b/ci/docker/ubuntu-22.04-cpp.dockerfile index 28cef294638..721b37dcae8 100644 --- a/ci/docker/ubuntu-22.04-cpp.dockerfile +++ b/ci/docker/ubuntu-22.04-cpp.dockerfile @@ -111,6 +111,7 @@ RUN apt-get update -y -q && \ ninja-build \ nlohmann-json3-dev \ npm \ + patch \ pkg-config \ protobuf-compiler \ protobuf-compiler-grpc \ diff --git a/ci/docker/ubuntu-24.04-cpp.dockerfile b/ci/docker/ubuntu-24.04-cpp.dockerfile index 3f486b09f95..592a9a6a232 100644 --- a/ci/docker/ubuntu-24.04-cpp.dockerfile +++ b/ci/docker/ubuntu-24.04-cpp.dockerfile @@ -111,6 +111,7 @@ RUN apt-get update -y -q && \ ninja-build \ nlohmann-json3-dev \ npm \ + patch \ pkg-config \ protobuf-compiler \ protobuf-compiler-grpc \ From a3493d386ae35d04dd241a956a504c1900e962f8 Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Tue, 14 Jan 2025 09:22:45 +0800 Subject: [PATCH 13/13] Update cpp/cmake_modules/ThirdpartyToolchain.cmake Co-authored-by: Sutou Kouhei --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 8ed13e2937e..f9459f4175c 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -4573,7 +4573,7 @@ target_include_directories(arrow::hadoop INTERFACE "${HADOOP_HOME}/include") function(build_orc) message(STATUS "Building Apache ORC from source") - # Remove this once we have a patch for ORC 2.1.1 + # Remove this and "patch" in "ci/docker/{debian,ubuntu}-*.dockerfile" once we have a patch for ORC 2.1.1 find_program(PATCH patch REQUIRED) set(ORC_PATCH_COMMAND ${PATCH} -p1 -i ${CMAKE_CURRENT_LIST_DIR}/orc.diff)