-
Notifications
You must be signed in to change notification settings - Fork 4k
GH-47399: [C++] Update bundled Apache ORC to 2.2.0 with Protobuf patch #47408
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1024,6 +1024,14 @@ macro(prepare_fetchcontent) | |
| # We should remove it once we have updated the dependencies: | ||
| # https://github.com/apache/arrow/issues/45985 | ||
| set(CMAKE_POLICY_VERSION_MINIMUM 3.5) | ||
| # Use "NEW" for CMP0077 by default. | ||
| # | ||
| # https://cmake.org/cmake/help/latest/policy/CMP0077.html | ||
| # | ||
| # option() honors normal variables. | ||
| set(CMAKE_POLICY_DEFAULT_CMP0077 | ||
| NEW | ||
| CACHE STRING "") | ||
| set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "") | ||
|
|
||
| if(MSVC) | ||
|
|
@@ -4599,8 +4607,26 @@ target_include_directories(arrow::hadoop INTERFACE "${HADOOP_HOME}/include") | |
| # Apache ORC | ||
|
|
||
| function(build_orc) | ||
| list(APPEND CMAKE_MESSAGE_INDENT "Apache ORC: ") | ||
|
|
||
| message(STATUS "Building Apache ORC from source") | ||
|
|
||
| set(ORC_PATCHES) | ||
| if(MSVC) | ||
| # We can remove this once bundled Apache ORC is 2.2.1 or later. | ||
| list(APPEND ORC_PATCHES ${CMAKE_CURRENT_LIST_DIR}/orc-2345.patch) | ||
| endif() | ||
| if(Protobuf_VERSION VERSION_GREATER_EQUAL 32.0) | ||
| # We can remove this once bundled Apache ORC is 2.2.1 or later. | ||
| list(APPEND ORC_PATCHES ${CMAKE_CURRENT_LIST_DIR}/orc-2357.patch) | ||
| endif() | ||
| if(ORC_PATCHES) | ||
| find_program(PATCH patch REQUIRED) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I recall that there are some docker images required to add patch dependency when I was fixing a previous ORC build failure. Is it still the case?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Based on the CI result, we don't need to add additional patch dependency. But let's try R and wheel related CI jobs before we merge this.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. None of the CI failures are related and are currently failing on the nightlies. |
||
| set(ORC_PATCH_COMMAND ${PATCH} -p1 -i ${ORC_PATCHES}) | ||
| else() | ||
| set(ORC_PATCH_COMMAND) | ||
| endif() | ||
|
|
||
| if(LZ4_VENDORED) | ||
| set(ORC_LZ4_TARGET lz4_static) | ||
| set(ORC_LZ4_ROOT "${lz4_SOURCE_DIR}") | ||
|
|
@@ -4615,98 +4641,62 @@ function(build_orc) | |
| if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.29) | ||
| fetchcontent_declare(orc | ||
| ${FC_DECLARE_COMMON_OPTIONS} | ||
| PATCH_COMMAND ${ORC_PATCH_COMMAND} | ||
| URL ${ORC_SOURCE_URL} | ||
| URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}") | ||
| prepare_fetchcontent() | ||
|
|
||
| set(CMAKE_UNITY_BUILD FALSE) | ||
|
|
||
| set(ORC_PREFER_STATIC_LZ4 | ||
| OFF | ||
| CACHE BOOL "" FORCE) | ||
| set(LZ4_HOME | ||
| "${ORC_LZ4_ROOT}" | ||
| CACHE STRING "" FORCE) | ||
| set(LZ4_INCLUDE_DIR | ||
| "${ORC_LZ4_INCLUDE_DIR}" | ||
| CACHE STRING "" FORCE) | ||
| set(LZ4_LIBRARY | ||
| ${ORC_LZ4_TARGET} | ||
| CACHE STRING "" FORCE) | ||
| set(ORC_PREFER_STATIC_LZ4 OFF) | ||
| set(LZ4_HOME "${ORC_LZ4_ROOT}") | ||
| set(LZ4_INCLUDE_DIR "${ORC_LZ4_INCLUDE_DIR}") | ||
| set(LZ4_LIBRARY ${ORC_LZ4_TARGET}) | ||
|
|
||
| set(ORC_PREFER_STATIC_PROTOBUF | ||
| OFF | ||
| CACHE BOOL "" FORCE) | ||
| set(ORC_PREFER_STATIC_PROTOBUF OFF) | ||
| get_target_property(PROTOBUF_INCLUDE_DIR ${ARROW_PROTOBUF_LIBPROTOBUF} | ||
| INTERFACE_INCLUDE_DIRECTORIES) | ||
| get_filename_component(Protobuf_ROOT "${PROTOBUF_INCLUDE_DIR}" DIRECTORY) | ||
| set(PROTOBUF_HOME | ||
| ${Protobuf_ROOT} | ||
| CACHE STRING "" FORCE) | ||
| set(PROTOBUF_HOME ${Protobuf_ROOT}) | ||
| # ORC uses this. | ||
| target_include_directories(${ARROW_PROTOBUF_LIBPROTOC} | ||
| INTERFACE "${PROTOBUF_INCLUDE_DIR}") | ||
| set(PROTOBUF_EXECUTABLE ${ARROW_PROTOBUF_PROTOC}) | ||
| set(PROTOBUF_LIBRARY ${ARROW_PROTOBUF_LIBPROTOBUF}) | ||
| set(PROTOC_LIBRARY ${ARROW_PROTOBUF_LIBPROTOC}) | ||
|
|
||
| set(ORC_PREFER_STATIC_SNAPPY | ||
| OFF | ||
| CACHE BOOL "" FORCE) | ||
| set(ORC_PREFER_STATIC_SNAPPY OFF) | ||
| get_target_property(SNAPPY_INCLUDE_DIR ${Snappy_TARGET} INTERFACE_INCLUDE_DIRECTORIES) | ||
| get_filename_component(Snappy_ROOT "${SNAPPY_INCLUDE_DIR}" DIRECTORY) | ||
| set(SNAPPY_HOME | ||
| ${Snappy_ROOT} | ||
| CACHE STRING "" FORCE) | ||
| set(SNAPPY_LIBRARY | ||
| ${Snappy_TARGET} | ||
| CACHE STRING "" FORCE) | ||
| set(SNAPPY_HOME ${Snappy_ROOT}) | ||
| set(SNAPPY_LIBRARY ${Snappy_TARGET}) | ||
|
|
||
| set(ORC_PREFER_STATIC_ZLIB | ||
| OFF | ||
| CACHE BOOL "" FORCE) | ||
| set(ORC_PREFER_STATIC_ZLIB OFF) | ||
| get_target_property(ZLIB_INCLUDE_DIR ZLIB::ZLIB INTERFACE_INCLUDE_DIRECTORIES) | ||
| get_filename_component(ZLIB_ROOT "${ZLIB_INCLUDE_DIR}" DIRECTORY) | ||
| set(ZLIB_HOME | ||
| ${ZLIB_ROOT} | ||
| CACHE STRING "" FORCE) | ||
| # From CMake 3.21 onwards the set(CACHE) command does not remove any normal | ||
| # variable of the same name from the current scope. We have to manually remove | ||
| # the variable via unset to avoid ORC not finding the ZLIB_LIBRARY. | ||
| set(ZLIB_HOME ${ZLIB_ROOT}) | ||
| # From CMake 3.21 onwards the set(CACHE) command does not remove | ||
| # any normal variable of the same name from the current scope. We | ||
| # have to manually remove the variable via unset to avoid ORC not | ||
| # finding the ZLIB_LIBRARY. | ||
| unset(ZLIB_LIBRARY) | ||
| set(ZLIB_LIBRARY | ||
| ZLIB::ZLIB | ||
| CACHE STRING "" FORCE) | ||
|
|
||
| set(ORC_PREFER_STATIC_ZSTD | ||
| OFF | ||
| CACHE BOOL "" FORCE) | ||
| set(ORC_PREFER_STATIC_ZSTD OFF) | ||
| get_target_property(ZSTD_INCLUDE_DIR ${ARROW_ZSTD_LIBZSTD} | ||
| INTERFACE_INCLUDE_DIRECTORIES) | ||
| get_filename_component(ZSTD_ROOT "${ZSTD_INCLUDE_DIR}" DIRECTORY) | ||
| set(ZSTD_HOME | ||
| ${ZSTD_ROOT} | ||
| CACHE STRING "" FORCE) | ||
| set(ZSTD_HOME ${ZSTD_ROOT}) | ||
| set(ZSTD_LIBRARY ${ARROW_ZSTD_LIBZSTD}) | ||
|
|
||
| set(BUILD_CPP_TESTS | ||
| OFF | ||
| CACHE BOOL "" FORCE) | ||
| set(BUILD_JAVA | ||
| OFF | ||
| CACHE BOOL "" FORCE) | ||
| set(BUILD_LIBHDFSPP | ||
| OFF | ||
| CACHE BOOL "" FORCE) | ||
| set(BUILD_TOOLS | ||
| OFF | ||
| CACHE BOOL "" FORCE) | ||
| set(INSTALL_VENDORED_LIBS | ||
| OFF | ||
| CACHE BOOL "" FORCE) | ||
| set(STOP_BUILD_ON_WARNING | ||
| OFF | ||
| CACHE BOOL "" FORCE) | ||
| set(BUILD_CPP_TESTS OFF) | ||
| set(BUILD_JAVA OFF) | ||
| set(BUILD_LIBHDFSPP OFF) | ||
| set(BUILD_TOOLS OFF) | ||
| set(INSTALL_VENDORED_LIBS OFF) | ||
| set(STOP_BUILD_ON_WARNING OFF) | ||
|
|
||
| fetchcontent_makeavailable(orc) | ||
|
|
||
|
|
@@ -4769,16 +4759,17 @@ function(build_orc) | |
|
|
||
| externalproject_add(orc_ep | ||
| ${EP_COMMON_OPTIONS} | ||
| URL ${ORC_SOURCE_URL} | ||
| URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}" | ||
| BUILD_BYPRODUCTS ${ORC_STATIC_LIB} | ||
| CMAKE_ARGS ${ORC_CMAKE_ARGS} | ||
| DEPENDS ${ARROW_PROTOBUF_LIBPROTOBUF} | ||
| ${ARROW_PROTOBUF_PROTOC} | ||
| ${ARROW_ZSTD_LIBZSTD} | ||
| ${Snappy_TARGET} | ||
| ${ORC_LZ4_TARGET} | ||
| ZLIB::ZLIB) | ||
| ZLIB::ZLIB | ||
| PATCH_COMMAND ${ORC_PATCH_COMMAND} | ||
| URL ${ORC_SOURCE_URL} | ||
| URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}") | ||
| add_library(orc::orc STATIC IMPORTED) | ||
| set_target_properties(orc::orc PROPERTIES IMPORTED_LOCATION "${ORC_STATIC_LIB}") | ||
| target_include_directories(orc::orc BEFORE INTERFACE "${ORC_INCLUDE_DIR}") | ||
|
|
@@ -4806,6 +4797,8 @@ function(build_orc) | |
| set(ARROW_BUNDLED_STATIC_LIBS | ||
| ${ARROW_BUNDLED_STATIC_LIBS} | ||
| PARENT_SCOPE) | ||
|
|
||
| list(POP_BACK CMAKE_MESSAGE_INDENT) | ||
| endfunction() | ||
|
|
||
| if(ARROW_ORC) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,43 @@ | ||
| # Licensed to the Apache Software Foundation (ASF) under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the | ||
| # "License"); you may not use this file except in compliance | ||
| # with the License. You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, | ||
| # software distributed under the License is distributed on an | ||
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| # KIND, either express or implied. See the License for the | ||
| # specific language governing permissions and limitations | ||
| # under the License. | ||
|
|
||
| From a76249e13a6e364e0507a12cb71abaaf1647252e Mon Sep 17 00:00:00 2001 | ||
| From: Yuriy Chernyshov <thegeorg@yandex-team.com> | ||
| Date: Thu, 31 Jul 2025 13:20:15 +0200 | ||
| Subject: [PATCH] Fix Windows build | ||
|
|
||
| See | ||
| https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/byteswap-uint64-byteswap-ulong-byteswap-ushort?view=msvc-170 | ||
| --- | ||
| c++/src/Geospatial.cc | 4 ++-- | ||
| 1 file changed, 2 insertions(+), 2 deletions(-) | ||
|
|
||
| diff --git a/c++/src/Geospatial.cc b/c++/src/Geospatial.cc | ||
| index 6d7d268703..2b110cacb6 100644 | ||
| --- a/c++/src/Geospatial.cc | ||
| +++ b/c++/src/Geospatial.cc | ||
| @@ -66,8 +66,8 @@ namespace orc::geospatial { | ||
|
|
||
| #if defined(_MSC_VER) | ||
| #include <intrin.h> // IWYU pragma: keep | ||
| -#define ORC_BYTE_SWAP64 _byteSwap_uint64 | ||
| -#define ORC_BYTE_SWAP32 _byteSwap_ulong | ||
| +#define ORC_BYTE_SWAP64 _byteswap_uint64 | ||
| +#define ORC_BYTE_SWAP32 _byteswap_ulong | ||
| #else | ||
| #define ORC_BYTE_SWAP64 __builtin_bswap64 | ||
| #define ORC_BYTE_SWAP32 __builtin_bswap32 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,86 @@ | ||
| # Licensed to the Apache Software Foundation (ASF) under one | ||
| # or more contributor license agreements. See the NOTICE file | ||
| # distributed with this work for additional information | ||
| # regarding copyright ownership. The ASF licenses this file | ||
| # to you under the Apache License, Version 2.0 (the | ||
| # "License"); you may not use this file except in compliance | ||
| # with the License. You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, | ||
| # software distributed under the License is distributed on an | ||
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| # KIND, either express or implied. See the License for the | ||
| # specific language governing permissions and limitations | ||
| # under the License. | ||
|
|
||
| From a66baec5731b65a81189f48c242433d01580f344 Mon Sep 17 00:00:00 2001 | ||
| From: Dongjoon Hyun <dongjoon@apache.org> | ||
| Date: Fri, 15 Aug 2025 12:31:09 -0700 | ||
| Subject: [PATCH] ORC-1973: [C++] Use `int64_t` instead of | ||
| `google::protobuf::int64` | ||
|
|
||
| --- | ||
| c++/src/io/InputStream.cc | 4 ++-- | ||
| c++/src/io/InputStream.hh | 2 +- | ||
| c++/src/io/OutputStream.cc | 4 ++-- | ||
| c++/src/io/OutputStream.hh | 2 +- | ||
| 4 files changed, 6 insertions(+), 6 deletions(-) | ||
|
|
||
| diff --git a/c++/src/io/InputStream.cc b/c++/src/io/InputStream.cc | ||
| index 06ef40bd4c..5e1dc00ccd 100644 | ||
| --- a/c++/src/io/InputStream.cc | ||
| +++ b/c++/src/io/InputStream.cc | ||
| @@ -112,8 +112,8 @@ namespace orc { | ||
| return false; | ||
| } | ||
|
|
||
| - google::protobuf::int64 SeekableArrayInputStream::ByteCount() const { | ||
| - return static_cast<google::protobuf::int64>(position_); | ||
| + int64_t SeekableArrayInputStream::ByteCount() const { | ||
| + return static_cast<int64_t>(position_); | ||
| } | ||
|
|
||
| void SeekableArrayInputStream::seek(PositionProvider& seekPosition) { | ||
| diff --git a/c++/src/io/InputStream.hh b/c++/src/io/InputStream.hh | ||
| index 07aa623b5f..8b251c9301 100644 | ||
| --- a/c++/src/io/InputStream.hh | ||
| +++ b/c++/src/io/InputStream.hh | ||
| @@ -72,7 +72,7 @@ namespace orc { | ||
| virtual bool Next(const void** data, int* size) override; | ||
| virtual void BackUp(int count) override; | ||
| virtual bool Skip(int count) override; | ||
| - virtual google::protobuf::int64 ByteCount() const override; | ||
| + virtual int64_t ByteCount() const override; | ||
| virtual void seek(PositionProvider& position) override; | ||
| virtual std::string getName() const override; | ||
| }; | ||
| diff --git a/c++/src/io/OutputStream.cc b/c++/src/io/OutputStream.cc | ||
| index fbf1ca61dd..a55050d122 100644 | ||
| --- a/c++/src/io/OutputStream.cc | ||
| +++ b/c++/src/io/OutputStream.cc | ||
| @@ -65,8 +65,8 @@ namespace orc { | ||
| // PASS | ||
| } | ||
|
|
||
| - google::protobuf::int64 BufferedOutputStream::ByteCount() const { | ||
| - return static_cast<google::protobuf::int64>(dataBuffer_->size()); | ||
| + int64_t BufferedOutputStream::ByteCount() const { | ||
| + return static_cast<int64_t>(dataBuffer_->size()); | ||
| } | ||
|
|
||
| bool BufferedOutputStream::WriteAliasedRaw(const void*, int) { | ||
| diff --git a/c++/src/io/OutputStream.hh b/c++/src/io/OutputStream.hh | ||
| index 6319de96d6..b029818125 100644 | ||
| --- a/c++/src/io/OutputStream.hh | ||
| +++ b/c++/src/io/OutputStream.hh | ||
| @@ -61,7 +61,7 @@ namespace orc { | ||
|
|
||
| virtual bool Next(void** data, int* size) override; | ||
| virtual void BackUp(int count) override; | ||
| - virtual google::protobuf::int64 ByteCount() const override; | ||
| + virtual int64_t ByteCount() const override; | ||
| virtual bool WriteAliasedRaw(const void* data, int size) override; | ||
| virtual bool AllowsAliasing() const override; | ||
|
|
Uh oh!
There was an error while loading. Please reload this page.