Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 56 additions & 63 deletions cpp/cmake_modules/ThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1024,6 +1024,14 @@ macro(prepare_fetchcontent)
# We should remove it once we have updated the dependencies:
# https://github.com/apache/arrow/issues/45985
set(CMAKE_POLICY_VERSION_MINIMUM 3.5)
# Use "NEW" for CMP0077 by default.
#
# https://cmake.org/cmake/help/latest/policy/CMP0077.html
#
# option() honors normal variables.
set(CMAKE_POLICY_DEFAULT_CMP0077
NEW
CACHE STRING "")
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "")

if(MSVC)
Expand Down Expand Up @@ -4599,8 +4607,26 @@ target_include_directories(arrow::hadoop INTERFACE "${HADOOP_HOME}/include")
# Apache ORC

function(build_orc)
list(APPEND CMAKE_MESSAGE_INDENT "Apache ORC: ")

message(STATUS "Building Apache ORC from source")

set(ORC_PATCHES)
if(MSVC)
# We can remove this once bundled Apache ORC is 2.2.1 or later.
list(APPEND ORC_PATCHES ${CMAKE_CURRENT_LIST_DIR}/orc-2345.patch)
endif()
if(Protobuf_VERSION VERSION_GREATER_EQUAL 32.0)
# We can remove this once bundled Apache ORC is 2.2.1 or later.
list(APPEND ORC_PATCHES ${CMAKE_CURRENT_LIST_DIR}/orc-2357.patch)
endif()
if(ORC_PATCHES)
find_program(PATCH patch REQUIRED)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I recall that there are some docker images required to add patch dependency when I was fixing a previous ORC build failure. Is it still the case?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Based on the CI result, we don't need to add additional patch dependency. But let's try R and wheel related CI jobs before we merge this.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

None of the CI failures are related and are currently failing on the nightlies.

set(ORC_PATCH_COMMAND ${PATCH} -p1 -i ${ORC_PATCHES})
else()
set(ORC_PATCH_COMMAND)
endif()

if(LZ4_VENDORED)
set(ORC_LZ4_TARGET lz4_static)
set(ORC_LZ4_ROOT "${lz4_SOURCE_DIR}")
Expand All @@ -4615,98 +4641,62 @@ function(build_orc)
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.29)
fetchcontent_declare(orc
${FC_DECLARE_COMMON_OPTIONS}
PATCH_COMMAND ${ORC_PATCH_COMMAND}
URL ${ORC_SOURCE_URL}
URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}")
prepare_fetchcontent()

set(CMAKE_UNITY_BUILD FALSE)

set(ORC_PREFER_STATIC_LZ4
OFF
CACHE BOOL "" FORCE)
set(LZ4_HOME
"${ORC_LZ4_ROOT}"
CACHE STRING "" FORCE)
set(LZ4_INCLUDE_DIR
"${ORC_LZ4_INCLUDE_DIR}"
CACHE STRING "" FORCE)
set(LZ4_LIBRARY
${ORC_LZ4_TARGET}
CACHE STRING "" FORCE)
set(ORC_PREFER_STATIC_LZ4 OFF)
set(LZ4_HOME "${ORC_LZ4_ROOT}")
set(LZ4_INCLUDE_DIR "${ORC_LZ4_INCLUDE_DIR}")
set(LZ4_LIBRARY ${ORC_LZ4_TARGET})

set(ORC_PREFER_STATIC_PROTOBUF
OFF
CACHE BOOL "" FORCE)
set(ORC_PREFER_STATIC_PROTOBUF OFF)
get_target_property(PROTOBUF_INCLUDE_DIR ${ARROW_PROTOBUF_LIBPROTOBUF}
INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(Protobuf_ROOT "${PROTOBUF_INCLUDE_DIR}" DIRECTORY)
set(PROTOBUF_HOME
${Protobuf_ROOT}
CACHE STRING "" FORCE)
set(PROTOBUF_HOME ${Protobuf_ROOT})
# ORC uses this.
target_include_directories(${ARROW_PROTOBUF_LIBPROTOC}
INTERFACE "${PROTOBUF_INCLUDE_DIR}")
set(PROTOBUF_EXECUTABLE ${ARROW_PROTOBUF_PROTOC})
set(PROTOBUF_LIBRARY ${ARROW_PROTOBUF_LIBPROTOBUF})
set(PROTOC_LIBRARY ${ARROW_PROTOBUF_LIBPROTOC})

set(ORC_PREFER_STATIC_SNAPPY
OFF
CACHE BOOL "" FORCE)
set(ORC_PREFER_STATIC_SNAPPY OFF)
get_target_property(SNAPPY_INCLUDE_DIR ${Snappy_TARGET} INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(Snappy_ROOT "${SNAPPY_INCLUDE_DIR}" DIRECTORY)
set(SNAPPY_HOME
${Snappy_ROOT}
CACHE STRING "" FORCE)
set(SNAPPY_LIBRARY
${Snappy_TARGET}
CACHE STRING "" FORCE)
set(SNAPPY_HOME ${Snappy_ROOT})
set(SNAPPY_LIBRARY ${Snappy_TARGET})

set(ORC_PREFER_STATIC_ZLIB
OFF
CACHE BOOL "" FORCE)
set(ORC_PREFER_STATIC_ZLIB OFF)
get_target_property(ZLIB_INCLUDE_DIR ZLIB::ZLIB INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(ZLIB_ROOT "${ZLIB_INCLUDE_DIR}" DIRECTORY)
set(ZLIB_HOME
${ZLIB_ROOT}
CACHE STRING "" FORCE)
# From CMake 3.21 onwards the set(CACHE) command does not remove any normal
# variable of the same name from the current scope. We have to manually remove
# the variable via unset to avoid ORC not finding the ZLIB_LIBRARY.
set(ZLIB_HOME ${ZLIB_ROOT})
# From CMake 3.21 onwards the set(CACHE) command does not remove
# any normal variable of the same name from the current scope. We
# have to manually remove the variable via unset to avoid ORC not
# finding the ZLIB_LIBRARY.
unset(ZLIB_LIBRARY)
set(ZLIB_LIBRARY
ZLIB::ZLIB
CACHE STRING "" FORCE)

set(ORC_PREFER_STATIC_ZSTD
OFF
CACHE BOOL "" FORCE)
set(ORC_PREFER_STATIC_ZSTD OFF)
get_target_property(ZSTD_INCLUDE_DIR ${ARROW_ZSTD_LIBZSTD}
INTERFACE_INCLUDE_DIRECTORIES)
get_filename_component(ZSTD_ROOT "${ZSTD_INCLUDE_DIR}" DIRECTORY)
set(ZSTD_HOME
${ZSTD_ROOT}
CACHE STRING "" FORCE)
set(ZSTD_HOME ${ZSTD_ROOT})
set(ZSTD_LIBRARY ${ARROW_ZSTD_LIBZSTD})

set(BUILD_CPP_TESTS
OFF
CACHE BOOL "" FORCE)
set(BUILD_JAVA
OFF
CACHE BOOL "" FORCE)
set(BUILD_LIBHDFSPP
OFF
CACHE BOOL "" FORCE)
set(BUILD_TOOLS
OFF
CACHE BOOL "" FORCE)
set(INSTALL_VENDORED_LIBS
OFF
CACHE BOOL "" FORCE)
set(STOP_BUILD_ON_WARNING
OFF
CACHE BOOL "" FORCE)
set(BUILD_CPP_TESTS OFF)
set(BUILD_JAVA OFF)
set(BUILD_LIBHDFSPP OFF)
set(BUILD_TOOLS OFF)
set(INSTALL_VENDORED_LIBS OFF)
set(STOP_BUILD_ON_WARNING OFF)

fetchcontent_makeavailable(orc)

Expand Down Expand Up @@ -4769,16 +4759,17 @@ function(build_orc)

externalproject_add(orc_ep
${EP_COMMON_OPTIONS}
URL ${ORC_SOURCE_URL}
URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}"
BUILD_BYPRODUCTS ${ORC_STATIC_LIB}
CMAKE_ARGS ${ORC_CMAKE_ARGS}
DEPENDS ${ARROW_PROTOBUF_LIBPROTOBUF}
${ARROW_PROTOBUF_PROTOC}
${ARROW_ZSTD_LIBZSTD}
${Snappy_TARGET}
${ORC_LZ4_TARGET}
ZLIB::ZLIB)
ZLIB::ZLIB
PATCH_COMMAND ${ORC_PATCH_COMMAND}
URL ${ORC_SOURCE_URL}
URL_HASH "SHA256=${ARROW_ORC_BUILD_SHA256_CHECKSUM}")
add_library(orc::orc STATIC IMPORTED)
set_target_properties(orc::orc PROPERTIES IMPORTED_LOCATION "${ORC_STATIC_LIB}")
target_include_directories(orc::orc BEFORE INTERFACE "${ORC_INCLUDE_DIR}")
Expand Down Expand Up @@ -4806,6 +4797,8 @@ function(build_orc)
set(ARROW_BUNDLED_STATIC_LIBS
${ARROW_BUNDLED_STATIC_LIBS}
PARENT_SCOPE)

list(POP_BACK CMAKE_MESSAGE_INDENT)
endfunction()

if(ARROW_ORC)
Expand Down
43 changes: 43 additions & 0 deletions cpp/cmake_modules/orc-2345.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

From a76249e13a6e364e0507a12cb71abaaf1647252e Mon Sep 17 00:00:00 2001
From: Yuriy Chernyshov <thegeorg@yandex-team.com>
Date: Thu, 31 Jul 2025 13:20:15 +0200
Subject: [PATCH] Fix Windows build

See
https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/byteswap-uint64-byteswap-ulong-byteswap-ushort?view=msvc-170
---
c++/src/Geospatial.cc | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/c++/src/Geospatial.cc b/c++/src/Geospatial.cc
index 6d7d268703..2b110cacb6 100644
--- a/c++/src/Geospatial.cc
+++ b/c++/src/Geospatial.cc
@@ -66,8 +66,8 @@ namespace orc::geospatial {

#if defined(_MSC_VER)
#include <intrin.h> // IWYU pragma: keep
-#define ORC_BYTE_SWAP64 _byteSwap_uint64
-#define ORC_BYTE_SWAP32 _byteSwap_ulong
+#define ORC_BYTE_SWAP64 _byteswap_uint64
+#define ORC_BYTE_SWAP32 _byteswap_ulong
#else
#define ORC_BYTE_SWAP64 __builtin_bswap64
#define ORC_BYTE_SWAP32 __builtin_bswap32
86 changes: 86 additions & 0 deletions cpp/cmake_modules/orc-2357.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

From a66baec5731b65a81189f48c242433d01580f344 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Fri, 15 Aug 2025 12:31:09 -0700
Subject: [PATCH] ORC-1973: [C++] Use `int64_t` instead of
`google::protobuf::int64`

---
c++/src/io/InputStream.cc | 4 ++--
c++/src/io/InputStream.hh | 2 +-
c++/src/io/OutputStream.cc | 4 ++--
c++/src/io/OutputStream.hh | 2 +-
4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/c++/src/io/InputStream.cc b/c++/src/io/InputStream.cc
index 06ef40bd4c..5e1dc00ccd 100644
--- a/c++/src/io/InputStream.cc
+++ b/c++/src/io/InputStream.cc
@@ -112,8 +112,8 @@ namespace orc {
return false;
}

- google::protobuf::int64 SeekableArrayInputStream::ByteCount() const {
- return static_cast<google::protobuf::int64>(position_);
+ int64_t SeekableArrayInputStream::ByteCount() const {
+ return static_cast<int64_t>(position_);
}

void SeekableArrayInputStream::seek(PositionProvider& seekPosition) {
diff --git a/c++/src/io/InputStream.hh b/c++/src/io/InputStream.hh
index 07aa623b5f..8b251c9301 100644
--- a/c++/src/io/InputStream.hh
+++ b/c++/src/io/InputStream.hh
@@ -72,7 +72,7 @@ namespace orc {
virtual bool Next(const void** data, int* size) override;
virtual void BackUp(int count) override;
virtual bool Skip(int count) override;
- virtual google::protobuf::int64 ByteCount() const override;
+ virtual int64_t ByteCount() const override;
virtual void seek(PositionProvider& position) override;
virtual std::string getName() const override;
};
diff --git a/c++/src/io/OutputStream.cc b/c++/src/io/OutputStream.cc
index fbf1ca61dd..a55050d122 100644
--- a/c++/src/io/OutputStream.cc
+++ b/c++/src/io/OutputStream.cc
@@ -65,8 +65,8 @@ namespace orc {
// PASS
}

- google::protobuf::int64 BufferedOutputStream::ByteCount() const {
- return static_cast<google::protobuf::int64>(dataBuffer_->size());
+ int64_t BufferedOutputStream::ByteCount() const {
+ return static_cast<int64_t>(dataBuffer_->size());
}

bool BufferedOutputStream::WriteAliasedRaw(const void*, int) {
diff --git a/c++/src/io/OutputStream.hh b/c++/src/io/OutputStream.hh
index 6319de96d6..b029818125 100644
--- a/c++/src/io/OutputStream.hh
+++ b/c++/src/io/OutputStream.hh
@@ -61,7 +61,7 @@ namespace orc {

virtual bool Next(void** data, int* size) override;
virtual void BackUp(int count) override;
- virtual google::protobuf::int64 ByteCount() const override;
+ virtual int64_t ByteCount() const override;
virtual bool WriteAliasedRaw(const void* data, int size) override;
virtual bool AllowsAliasing() const override;

4 changes: 2 additions & 2 deletions cpp/thirdparty/versions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ ARROW_OPENTELEMETRY_BUILD_VERSION=v1.21.0
ARROW_OPENTELEMETRY_BUILD_SHA256_CHECKSUM=98e5546f577a11b52a57faed1f4cc60d8c1daa44760eba393f43eab5a8ec46a2
ARROW_OPENTELEMETRY_PROTO_BUILD_VERSION=v1.7.0
ARROW_OPENTELEMETRY_PROTO_BUILD_SHA256_CHECKSUM=11330d850f5e24d34c4246bc8cb21fcd311e7565d219195713455a576bb11bed
ARROW_ORC_BUILD_VERSION=2.1.2
ARROW_ORC_BUILD_SHA256_CHECKSUM=55451e65dea6ed42afb39fe33a88f9dcea8928dca0a0c9c23ef5545587810b4c
ARROW_ORC_BUILD_VERSION=2.2.0
ARROW_ORC_BUILD_SHA256_CHECKSUM=b15aca45a7e73ffbd1bbc36a78cd1422d41f07721092a25f43448e6e16f4763b
ARROW_PROTOBUF_BUILD_VERSION=v21.3
ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM=2f723218f6cb709ae4cdc4fb5ed56a5951fc5d466f0128ce4c946b8c78c8c49f
# Because of https://github.com/Tencent/rapidjson/pull/1323, we require
Expand Down
Loading