diff --git a/ci/docker/debian-12-cpp.dockerfile b/ci/docker/debian-12-cpp.dockerfile index f486d07ff88..fe3976248cc 100644 --- a/ci/docker/debian-12-cpp.dockerfile +++ b/ci/docker/debian-12-cpp.dockerfile @@ -84,6 +84,7 @@ RUN apt-get update -y -q && \ ninja-build \ nlohmann-json3-dev \ npm \ + patch \ pkg-config \ protobuf-compiler-grpc \ python3-dev \ diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile b/ci/docker/ubuntu-20.04-cpp.dockerfile index 8dc778d544a..259c5fb77fa 100644 --- a/ci/docker/ubuntu-20.04-cpp.dockerfile +++ b/ci/docker/ubuntu-20.04-cpp.dockerfile @@ -106,6 +106,7 @@ RUN apt-get update -y -q && \ ninja-build \ nlohmann-json3-dev \ npm \ + patch \ pkg-config \ protobuf-compiler \ python3-dev \ diff --git a/ci/docker/ubuntu-22.04-cpp.dockerfile b/ci/docker/ubuntu-22.04-cpp.dockerfile index 28cef294638..721b37dcae8 100644 --- a/ci/docker/ubuntu-22.04-cpp.dockerfile +++ b/ci/docker/ubuntu-22.04-cpp.dockerfile @@ -111,6 +111,7 @@ RUN apt-get update -y -q && \ ninja-build \ nlohmann-json3-dev \ npm \ + patch \ pkg-config \ protobuf-compiler \ protobuf-compiler-grpc \ diff --git a/ci/docker/ubuntu-24.04-cpp.dockerfile b/ci/docker/ubuntu-24.04-cpp.dockerfile index 3f486b09f95..592a9a6a232 100644 --- a/ci/docker/ubuntu-24.04-cpp.dockerfile +++ b/ci/docker/ubuntu-24.04-cpp.dockerfile @@ -111,6 +111,7 @@ RUN apt-get update -y -q && \ ninja-build \ nlohmann-json3-dev \ npm \ + patch \ pkg-config \ protobuf-compiler \ protobuf-compiler-grpc \ diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index abfe6d274f7..f9459f4175c 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -4573,11 +4573,16 @@ target_include_directories(arrow::hadoop INTERFACE "${HADOOP_HOME}/include") function(build_orc) message(STATUS "Building Apache ORC from source") + # Remove this and "patch" in "ci/docker/{debian,ubuntu}-*.dockerfile" once we have a patch for ORC 2.1.1 + find_program(PATCH patch REQUIRED) + set(ORC_PATCH_COMMAND ${PATCH} -p1 -i ${CMAKE_CURRENT_LIST_DIR}/orc.diff) + if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.29) fetchcontent_declare(orc ${FC_DECLARE_COMMON_OPTIONS} URL ${ORC_SOURCE_URL} - URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}") + URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}" + PATCH_COMMAND ${ORC_PATCH_COMMAND}) prepare_fetchcontent() set(CMAKE_UNITY_BUILD FALSE) @@ -4667,16 +4672,10 @@ function(build_orc) OFF CACHE BOOL "" FORCE) - # We can remove this with ORC 2.0.2 or later. - list(PREPEND CMAKE_MODULE_PATH - ${CMAKE_CURRENT_BINARY_DIR}/_deps/orc-src/cmake_modules) - fetchcontent_makeavailable(orc) add_library(orc::orc INTERFACE IMPORTED) target_link_libraries(orc::orc INTERFACE orc) - target_include_directories(orc::orc INTERFACE "${orc_BINARY_DIR}/c++/include" - "${orc_SOURCE_DIR}/c++/include") list(APPEND ARROW_BUNDLED_STATIC_LIBS orc) else() @@ -4701,6 +4700,9 @@ function(build_orc) get_target_property(ORC_ZSTD_ROOT ${ARROW_ZSTD_LIBZSTD} INTERFACE_INCLUDE_DIRECTORIES) get_filename_component(ORC_ZSTD_ROOT "${ORC_ZSTD_ROOT}" DIRECTORY) + get_target_property(ORC_ZLIB_ROOT ZLIB::ZLIB INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(ORC_ZLIB_ROOT "${ORC_ZLIB_ROOT}" DIRECTORY) + set(ORC_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} "-DCMAKE_INSTALL_PREFIX=${ORC_PREFIX}" @@ -4710,7 +4712,6 @@ function(build_orc) -DBUILD_TOOLS=OFF -DBUILD_CPP_TESTS=OFF -DINSTALL_VENDORED_LIBS=OFF - "-DLZ4_HOME=${ORC_LZ4_ROOT}" "-DPROTOBUF_EXECUTABLE=$" "-DPROTOBUF_HOME=${ORC_PROTOBUF_ROOT}" "-DPROTOBUF_INCLUDE_DIR=$" @@ -4718,16 +4719,17 @@ function(build_orc) "-DPROTOC_LIBRARY=$" "-DSNAPPY_HOME=${ORC_SNAPPY_ROOT}" "-DSNAPPY_LIBRARY=$" + "-DLZ4_HOME=${ORC_LZ4_ROOT}" "-DLZ4_LIBRARY=$" "-DLZ4_STATIC_LIB=$" "-DLZ4_INCLUDE_DIR=${ORC_LZ4_ROOT}/include" "-DSNAPPY_INCLUDE_DIR=${ORC_SNAPPY_INCLUDE_DIR}" "-DZSTD_HOME=${ORC_ZSTD_ROOT}" "-DZSTD_INCLUDE_DIR=$" - "-DZSTD_LIBRARY=$") - if(ZLIB_ROOT) - set(ORC_CMAKE_ARGS ${ORC_CMAKE_ARGS} "-DZLIB_HOME=${ZLIB_ROOT}") - endif() + "-DZSTD_LIBRARY=$" + "-DZLIB_HOME=${ORC_ZLIB_ROOT}" + "-DZLIB_INCLUDE_DIR=$" + "-DZLIB_LIBRARY=$") # Work around CMake bug file(MAKE_DIRECTORY ${ORC_INCLUDE_DIR}) @@ -4743,7 +4745,8 @@ function(build_orc) ${ARROW_ZSTD_LIBZSTD} ${Snappy_TARGET} LZ4::lz4 - ZLIB::ZLIB) + ZLIB::ZLIB + PATCH_COMMAND ${ORC_PATCH_COMMAND}) add_library(orc::orc STATIC IMPORTED) set_target_properties(orc::orc PROPERTIES IMPORTED_LOCATION "${ORC_STATIC_LIB}") target_include_directories(orc::orc BEFORE INTERFACE "${ORC_INCLUDE_DIR}") diff --git a/cpp/cmake_modules/orc.diff b/cpp/cmake_modules/orc.diff new file mode 100644 index 00000000000..7bdbfa1cf5d --- /dev/null +++ b/cpp/cmake_modules/orc.diff @@ -0,0 +1,289 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 1f8931508..f8e57bf5f 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -30,8 +30,8 @@ SET(CPACK_PACKAGE_VERSION_MAJOR "2") + SET(CPACK_PACKAGE_VERSION_MINOR "1") + SET(CPACK_PACKAGE_VERSION_PATCH "0") + SET(ORC_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}") +-set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules") + set(CMAKE_EXPORT_COMPILE_COMMANDS ON) # For clang-tidy. ++list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules") + + option (BUILD_JAVA + "Include ORC Java library in the build process" +@@ -225,5 +225,3 @@ if (BUILD_CPP_TESTS) + ) + endif () + endif () +- +-INCLUDE(CheckFormat) +diff --git a/c++/src/CMakeLists.txt b/c++/src/CMakeLists.txt +index 694667c06..af13a94aa 100644 +--- a/c++/src/CMakeLists.txt ++++ b/c++/src/CMakeLists.txt +@@ -218,8 +218,8 @@ target_include_directories (orc + INTERFACE + $ + PUBLIC +- $ +- $ ++ $ ++ $ + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_BINARY_DIR} +diff --git a/cmake_modules/ThirdpartyToolchain.cmake b/cmake_modules/ThirdpartyToolchain.cmake +index 017e6c5b8..fe376ed16 100644 +--- a/cmake_modules/ThirdpartyToolchain.cmake ++++ b/cmake_modules/ThirdpartyToolchain.cmake +@@ -103,13 +103,13 @@ endif () + + # ---------------------------------------------------------------------- + # Macros for adding third-party libraries +-macro (add_resolved_library target_name link_lib include_dir) +- add_library (${target_name} INTERFACE IMPORTED) ++macro (orc_add_resolved_library target_name link_lib include_dir) ++ add_library (${target_name} INTERFACE IMPORTED GLOBAL) + target_link_libraries (${target_name} INTERFACE ${link_lib}) + target_include_directories (${target_name} SYSTEM INTERFACE ${include_dir}) + endmacro () + +-macro (add_built_library external_project_name target_name link_lib include_dir) ++macro (orc_add_built_library external_project_name target_name link_lib include_dir) + file (MAKE_DIRECTORY "${include_dir}") + + add_library (${target_name} STATIC IMPORTED) +@@ -122,7 +122,7 @@ macro (add_built_library external_project_name target_name link_lib include_dir) + endif () + endmacro () + +-function(provide_cmake_module MODULE_NAME) ++function(orc_provide_cmake_module MODULE_NAME) + set(module "${CMAKE_SOURCE_DIR}/cmake_modules/${MODULE_NAME}.cmake") + if(EXISTS "${module}") + message(STATUS "Providing CMake module for ${MODULE_NAME} as part of CMake package") +@@ -130,8 +130,8 @@ function(provide_cmake_module MODULE_NAME) + endif() + endfunction() + +-function(provide_find_module PACKAGE_NAME) +- provide_cmake_module("Find${PACKAGE_NAME}") ++function(orc_provide_find_module PACKAGE_NAME) ++ orc_provide_cmake_module("Find${PACKAGE_NAME}") + endfunction() + + # ---------------------------------------------------------------------- +@@ -156,7 +156,7 @@ ExternalProject_Add (orc-format_ep + # Snappy + if (ORC_PACKAGE_KIND STREQUAL "conan") + find_package (Snappy REQUIRED CONFIG) +- add_resolved_library (orc_snappy ${Snappy_LIBRARIES} ${Snappy_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_snappy ${Snappy_LIBRARIES} ${Snappy_INCLUDE_DIR}) + list (APPEND ORC_SYSTEM_DEPENDENCIES Snappy) + list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$") + elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg") +@@ -168,13 +168,13 @@ elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg") + elseif (NOT "${SNAPPY_HOME}" STREQUAL "") + find_package (Snappy REQUIRED) + if (ORC_PREFER_STATIC_SNAPPY AND SNAPPY_STATIC_LIB) +- add_resolved_library (orc_snappy ${SNAPPY_STATIC_LIB} ${SNAPPY_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_snappy ${SNAPPY_STATIC_LIB} ${SNAPPY_INCLUDE_DIR}) + else () +- add_resolved_library (orc_snappy ${SNAPPY_LIBRARY} ${SNAPPY_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_snappy ${SNAPPY_LIBRARY} ${SNAPPY_INCLUDE_DIR}) + endif () + list (APPEND ORC_SYSTEM_DEPENDENCIES Snappy) + list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$") +- provide_find_module (Snappy) ++ orc_provide_find_module (Snappy) + else () + set(SNAPPY_HOME "${THIRDPARTY_DIR}/snappy_ep-install") + set(SNAPPY_INCLUDE_DIR "${SNAPPY_HOME}/include") +@@ -194,7 +194,7 @@ else () + ${THIRDPARTY_LOG_OPTIONS} + BUILD_BYPRODUCTS "${SNAPPY_STATIC_LIB}") + +- add_built_library (snappy_ep orc_snappy ${SNAPPY_STATIC_LIB} ${SNAPPY_INCLUDE_DIR}) ++ orc_add_built_library (snappy_ep orc_snappy ${SNAPPY_STATIC_LIB} ${SNAPPY_INCLUDE_DIR}) + + list (APPEND ORC_VENDOR_DEPENDENCIES "orc::vendored_snappy|${SNAPPY_STATIC_LIB_NAME}") + list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$") +@@ -207,7 +207,7 @@ add_library (orc::snappy ALIAS orc_snappy) + + if (ORC_PACKAGE_KIND STREQUAL "conan") + find_package (ZLIB REQUIRED CONFIG) +- add_resolved_library (orc_zlib ${ZLIB_LIBRARIES} ${ZLIB_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_zlib ${ZLIB_LIBRARIES} ${ZLIB_INCLUDE_DIR}) + list (APPEND ORC_SYSTEM_DEPENDENCIES ZLIB) + list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$") + elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg") +@@ -219,13 +219,13 @@ elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg") + elseif (NOT "${ZLIB_HOME}" STREQUAL "") + find_package (ZLIB REQUIRED) + if (ORC_PREFER_STATIC_ZLIB AND ZLIB_STATIC_LIB) +- add_resolved_library (orc_zlib ${ZLIB_STATIC_LIB} ${ZLIB_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_zlib ${ZLIB_STATIC_LIB} ${ZLIB_INCLUDE_DIR}) + else () +- add_resolved_library (orc_zlib ${ZLIB_LIBRARY} ${ZLIB_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_zlib ${ZLIB_LIBRARY} ${ZLIB_INCLUDE_DIR}) + endif () + list (APPEND ORC_SYSTEM_DEPENDENCIES ZLIB) + list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$") +- provide_find_module (ZLIB) ++ orc_provide_find_module (ZLIB) + else () + set(ZLIB_PREFIX "${THIRDPARTY_DIR}/zlib_ep-install") + set(ZLIB_INCLUDE_DIR "${ZLIB_PREFIX}/include") +@@ -252,7 +252,7 @@ else () + ${THIRDPARTY_LOG_OPTIONS} + BUILD_BYPRODUCTS "${ZLIB_STATIC_LIB}") + +- add_built_library (zlib_ep orc_zlib ${ZLIB_STATIC_LIB} ${ZLIB_INCLUDE_DIR}) ++ orc_add_built_library (zlib_ep orc_zlib ${ZLIB_STATIC_LIB} ${ZLIB_INCLUDE_DIR}) + + list (APPEND ORC_VENDOR_DEPENDENCIES "orc::vendored_zlib|${ZLIB_STATIC_LIB_NAME}") + list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$") +@@ -265,7 +265,7 @@ add_library (orc::zlib ALIAS orc_zlib) + + if (ORC_PACKAGE_KIND STREQUAL "conan") + find_package (ZSTD REQUIRED CONFIG) +- add_resolved_library (orc_zstd ${zstd_LIBRARIES} ${zstd_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_zstd ${zstd_LIBRARIES} ${zstd_INCLUDE_DIR}) + list (APPEND ORC_SYSTEM_DEPENDENCIES ZSTD) + list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$,zstd::libzstd_shared,zstd::libzstd_static>>") + elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg") +@@ -277,14 +277,14 @@ elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg") + elseif (NOT "${ZSTD_HOME}" STREQUAL "") + find_package (ZSTD REQUIRED) + if (ORC_PREFER_STATIC_ZSTD AND ZSTD_STATIC_LIB) +- add_resolved_library (orc_zstd ${ZSTD_STATIC_LIB} ${ZSTD_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_zstd ${ZSTD_STATIC_LIB} ${ZSTD_INCLUDE_DIR}) + list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$") + else () +- add_resolved_library (orc_zstd ${ZSTD_LIBRARY} ${ZSTD_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_zstd ${ZSTD_LIBRARY} ${ZSTD_INCLUDE_DIR}) + list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$,zstd::libzstd_shared,zstd::libzstd_static>>") + endif () + list (APPEND ORC_SYSTEM_DEPENDENCIES ZSTD) +- provide_find_module (ZSTD) ++ orc_provide_find_module (ZSTD) + else () + set(ZSTD_HOME "${THIRDPARTY_DIR}/zstd_ep-install") + set(ZSTD_INCLUDE_DIR "${ZSTD_HOME}/include") +@@ -318,7 +318,7 @@ else () + ${THIRDPARTY_LOG_OPTIONS} + BUILD_BYPRODUCTS ${ZSTD_STATIC_LIB}) + +- add_built_library (zstd_ep orc_zstd ${ZSTD_STATIC_LIB} ${ZSTD_INCLUDE_DIR}) ++ orc_add_built_library (zstd_ep orc_zstd ${ZSTD_STATIC_LIB} ${ZSTD_INCLUDE_DIR}) + + list (APPEND ORC_VENDOR_DEPENDENCIES "orc::vendored_zstd|${ZSTD_STATIC_LIB_NAME}") + list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$") +@@ -330,7 +330,7 @@ add_library (orc::zstd ALIAS orc_zstd) + # LZ4 + if (ORC_PACKAGE_KIND STREQUAL "conan") + find_package (LZ4 REQUIRED CONFIG) +- add_resolved_library (orc_lz4 ${lz4_LIBRARIES} ${lz4_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_lz4 ${lz4_LIBRARIES} ${lz4_INCLUDE_DIR}) + list (APPEND ORC_SYSTEM_DEPENDENCIES LZ4) + list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$") + elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg") +@@ -342,13 +342,13 @@ elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg") + elseif (NOT "${LZ4_HOME}" STREQUAL "") + find_package (LZ4 REQUIRED) + if (ORC_PREFER_STATIC_LZ4 AND LZ4_STATIC_LIB) +- add_resolved_library (orc_lz4 ${LZ4_STATIC_LIB} ${LZ4_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_lz4 ${LZ4_STATIC_LIB} ${LZ4_INCLUDE_DIR}) + else () +- add_resolved_library (orc_lz4 ${LZ4_LIBRARY} ${LZ4_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_lz4 ${LZ4_LIBRARY} ${LZ4_INCLUDE_DIR}) + endif () + list (APPEND ORC_SYSTEM_DEPENDENCIES LZ4) + list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$") +- provide_find_module (LZ4) ++ orc_provide_find_module (LZ4) + else () + set(LZ4_PREFIX "${THIRDPARTY_DIR}/lz4_ep-install") + set(LZ4_INCLUDE_DIR "${LZ4_PREFIX}/include") +@@ -375,7 +375,7 @@ else () + ${THIRDPARTY_LOG_OPTIONS} + BUILD_BYPRODUCTS ${LZ4_STATIC_LIB}) + +- add_built_library (lz4_ep orc_lz4 ${LZ4_STATIC_LIB} ${LZ4_INCLUDE_DIR}) ++ orc_add_built_library (lz4_ep orc_lz4 ${LZ4_STATIC_LIB} ${LZ4_INCLUDE_DIR}) + + list (APPEND ORC_VENDOR_DEPENDENCIES "orc::vendored_lz4|${LZ4_STATIC_LIB_NAME}") + list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$") +@@ -491,7 +491,7 @@ endif () + + if (ORC_PACKAGE_KIND STREQUAL "conan") + find_package (Protobuf REQUIRED CONFIG) +- add_resolved_library (orc_protobuf ${protobuf_LIBRARIES} ${protobuf_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_protobuf ${protobuf_LIBRARIES} ${protobuf_INCLUDE_DIR}) + list (APPEND ORC_SYSTEM_DEPENDENCIES Protobuf) + list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$") + elseif (ORC_PACKAGE_KIND STREQUAL "vcpkg") +@@ -505,20 +505,20 @@ elseif (NOT "${PROTOBUF_HOME}" STREQUAL "") + find_package (Protobuf REQUIRED) + + if (ORC_PREFER_STATIC_PROTOBUF AND PROTOBUF_STATIC_LIB) +- add_resolved_library (orc_protobuf ${PROTOBUF_STATIC_LIB} ${PROTOBUF_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_protobuf ${PROTOBUF_STATIC_LIB} ${PROTOBUF_INCLUDE_DIR}) + else () +- add_resolved_library (orc_protobuf ${PROTOBUF_LIBRARY} ${PROTOBUF_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_protobuf ${PROTOBUF_LIBRARY} ${PROTOBUF_INCLUDE_DIR}) + endif () + + if (ORC_PREFER_STATIC_PROTOBUF AND PROTOC_STATIC_LIB) +- add_resolved_library (orc_protoc ${PROTOC_STATIC_LIB} ${PROTOBUF_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_protoc ${PROTOC_STATIC_LIB} ${PROTOBUF_INCLUDE_DIR}) + else () +- add_resolved_library (orc_protoc ${PROTOC_LIBRARY} ${PROTOBUF_INCLUDE_DIR}) ++ orc_add_resolved_library (orc_protoc ${PROTOC_LIBRARY} ${PROTOBUF_INCLUDE_DIR}) + endif () + + list (APPEND ORC_SYSTEM_DEPENDENCIES Protobuf) + list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$") +- provide_find_module (Protobuf) ++ orc_provide_find_module (Protobuf) + else () + set(PROTOBUF_PREFIX "${THIRDPARTY_DIR}/protobuf_ep-install") + set(PROTOBUF_INCLUDE_DIR "${PROTOBUF_PREFIX}/include") +@@ -556,8 +556,8 @@ else () + ${THIRDPARTY_LOG_OPTIONS} + BUILD_BYPRODUCTS "${PROTOBUF_STATIC_LIB}" "${PROTOC_STATIC_LIB}") + +- add_built_library (protobuf_ep orc_protobuf ${PROTOBUF_STATIC_LIB} ${PROTOBUF_INCLUDE_DIR}) +- add_built_library (protobuf_ep orc_protoc ${PROTOC_STATIC_LIB} ${PROTOBUF_INCLUDE_DIR}) ++ orc_add_built_library (protobuf_ep orc_protobuf ${PROTOBUF_STATIC_LIB} ${PROTOBUF_INCLUDE_DIR}) ++ orc_add_built_library (protobuf_ep orc_protoc ${PROTOC_STATIC_LIB} ${PROTOBUF_INCLUDE_DIR}) + + list (APPEND ORC_VENDOR_DEPENDENCIES "orc::vendored_protobuf|${PROTOBUF_STATIC_LIB_NAME}") + list (APPEND ORC_INSTALL_INTERFACE_TARGETS "$") +@@ -610,7 +610,7 @@ if(BUILD_LIBHDFSPP) + BUILD_BYPRODUCTS "${LIBHDFSPP_STATIC_LIB}" + CMAKE_ARGS ${LIBHDFSPP_CMAKE_ARGS}) + +- add_built_library(libhdfspp_ep libhdfspp ${LIBHDFSPP_STATIC_LIB} ${LIBHDFSPP_INCLUDE_DIR}) ++ orc_add_built_library(libhdfspp_ep libhdfspp ${LIBHDFSPP_STATIC_LIB} ${LIBHDFSPP_INCLUDE_DIR}) + + set (LIBHDFSPP_LIBRARIES + libhdfspp diff --git a/cpp/src/arrow/adapters/orc/adapter_test.cc b/cpp/src/arrow/adapters/orc/adapter_test.cc index b9d6c53215b..b3c314fccc0 100644 --- a/cpp/src/arrow/adapters/orc/adapter_test.cc +++ b/cpp/src/arrow/adapters/orc/adapter_test.cc @@ -235,7 +235,7 @@ void AssertTableWriteReadEqual(const std::vector>& input_ write_options.compression = Compression::UNCOMPRESSED; #endif write_options.file_version = adapters::orc::FileVersion(0, 11); - write_options.compression_block_size = 32768; + write_options.compression_block_size = 64 * 1024; write_options.row_index_stride = 5000; EXPECT_OK_AND_ASSIGN(auto writer, adapters::orc::ORCFileWriter::Open( buffer_output_stream.get(), write_options)); @@ -272,7 +272,7 @@ void AssertBatchWriteReadEqual( write_options.compression = Compression::UNCOMPRESSED; #endif write_options.file_version = adapters::orc::FileVersion(0, 11); - write_options.compression_block_size = 32768; + write_options.compression_block_size = 64 * 1024; write_options.row_index_stride = 5000; EXPECT_OK_AND_ASSIGN(auto writer, adapters::orc::ORCFileWriter::Open( buffer_output_stream.get(), write_options)); @@ -330,7 +330,7 @@ std::unique_ptr CreateWriter(uint64_t stripe_size, liborc::OutputStream* stream) { liborc::WriterOptions options; options.setStripeSize(stripe_size); - options.setCompressionBlockSize(1024); + options.setCompressionBlockSize(64 * 1024); options.setMemoryPool(liborc::getDefaultPool()); options.setRowIndexStride(0); return liborc::createWriter(type, stream, options); @@ -668,7 +668,7 @@ TEST_F(TestORCWriterTrivialNoWrite, noWrite) { write_options.compression = Compression::UNCOMPRESSED; #endif write_options.file_version = adapters::orc::FileVersion(0, 11); - write_options.compression_block_size = 32768; + write_options.compression_block_size = 64 * 1024; write_options.row_index_stride = 5000; EXPECT_OK_AND_ASSIGN(auto writer, adapters::orc::ORCFileWriter::Open( buffer_output_stream.get(), write_options)); diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 53d2034600a..29f0cc7d1b4 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -90,8 +90,8 @@ ARROW_OPENTELEMETRY_BUILD_VERSION=v1.13.0 ARROW_OPENTELEMETRY_BUILD_SHA256_CHECKSUM=7735cc56507149686e6019e06f588317099d4522480be5f38a2a09ec69af1706 ARROW_OPENTELEMETRY_PROTO_BUILD_VERSION=v0.17.0 ARROW_OPENTELEMETRY_PROTO_BUILD_SHA256_CHECKSUM=f269fbcb30e17b03caa1decd231ce826e59d7651c0f71c3b28eb5140b4bb5412 -ARROW_ORC_BUILD_VERSION=2.0.3 -ARROW_ORC_BUILD_SHA256_CHECKSUM=082cba862b5a8a0d14c225404d0b51cd8d1b64ca81b8f1e500322ce8922cb86d +ARROW_ORC_BUILD_VERSION=2.1.0 +ARROW_ORC_BUILD_SHA256_CHECKSUM=69d45665bfb5699b709094ba630ae4b186b19e083c4438855fc29c77125c149c ARROW_PROTOBUF_BUILD_VERSION=v21.3 ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM=2f723218f6cb709ae4cdc4fb5ed56a5951fc5d466f0128ce4c946b8c78c8c49f # Because of https://github.com/Tencent/rapidjson/pull/1323, we require diff --git a/python/pyarrow/tests/test_orc.py b/python/pyarrow/tests/test_orc.py index b0f9e813b10..706fb3fe45c 100644 --- a/python/pyarrow/tests/test_orc.py +++ b/python/pyarrow/tests/test_orc.py @@ -334,7 +334,7 @@ def test_buffer_readwrite_with_writeoptions(): compression='snappy', file_version='0.11', row_index_stride=5000, - compression_block_size=32768, + compression_block_size=65536, ) buffer_reader = pa.BufferReader(buffer_output_stream.getvalue()) orc_file = orc.ORCFile(buffer_reader) @@ -344,7 +344,7 @@ def test_buffer_readwrite_with_writeoptions(): assert orc_file.compression == 'SNAPPY' assert orc_file.file_version == '0.11' assert orc_file.row_index_stride == 5000 - assert orc_file.compression_size == 32768 + assert orc_file.compression_size == 65536 # deprecated keyword order buffer_output_stream = pa.BufferOutputStream() @@ -355,7 +355,7 @@ def test_buffer_readwrite_with_writeoptions(): compression='uncompressed', file_version='0.11', row_index_stride=20000, - compression_block_size=16384, + compression_block_size=65536, ) buffer_reader = pa.BufferReader(buffer_output_stream.getvalue()) orc_file = orc.ORCFile(buffer_reader) @@ -365,7 +365,7 @@ def test_buffer_readwrite_with_writeoptions(): assert orc_file.compression == 'UNCOMPRESSED' assert orc_file.file_version == '0.11' assert orc_file.row_index_stride == 20000 - assert orc_file.compression_size == 16384 + assert orc_file.compression_size == 65536 def test_buffer_readwrite_with_bad_writeoptions():