From 4d398bd1633e3721652752c213560bcd5c987882 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Fri, 14 Nov 2025 10:25:32 +0100 Subject: [PATCH 1/9] Temporary fetch content protobug initial tests --- cpp/CMakePresets.json | 1 - cpp/cmake_modules/ThirdpartyToolchain.cmake | 204 +++++++++++++------- cpp/src/arrow/flight/CMakeLists.txt | 5 + cpp/src/arrow/flight/sql/CMakeLists.txt | 11 +- 4 files changed, 143 insertions(+), 78 deletions(-) diff --git a/cpp/CMakePresets.json b/cpp/CMakePresets.json index 8c29c9a2672a..193562d6c2e0 100644 --- a/cpp/CMakePresets.json +++ b/cpp/CMakePresets.json @@ -158,7 +158,6 @@ { "name": "features-python-maximal", "inherits": [ - "features-cuda", "features-filesystems", "features-flight-sql", "features-gandiva", diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index fae39acd1bd0..e9f3c8f88476 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -1867,83 +1867,123 @@ endif() # ---------------------------------------------------------------------- # Protocol Buffers (required for ORC, Flight and Substrait libraries) -macro(build_protobuf) - message(STATUS "Building Protocol Buffers from source") - set(PROTOBUF_VENDORED TRUE) - set(PROTOBUF_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/protobuf_ep-install") +function(build_protobuf) + list(APPEND CMAKE_MESSAGE_INDENT "PROTOBUF: ") + message(STATUS "Building Protocol Buffers from source using FetchContent") + set(PROTOBUF_VENDORED + TRUE + PARENT_SCOPE) + set(PROTOBUF_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/protobuf_fc-install") + set(PROTOBUF_PREFIX + "${PROTOBUF_PREFIX}" + PARENT_SCOPE) set(PROTOBUF_INCLUDE_DIR "${PROTOBUF_PREFIX}/include") + set(PROTOBUF_INCLUDE_DIR + "${PROTOBUF_INCLUDE_DIR}" + PARENT_SCOPE) + + fetchcontent_declare(protobuf + URL ${PROTOBUF_SOURCE_URL} + URL_HASH "SHA256=${ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM}" + SOURCE_SUBDIR cmake) + + prepare_fetchcontent() + # Configure Protobuf options before FetchContent + # This flag is based on what the user initially requested but if # we've fallen back to building protobuf we always build it statically # so we need to reset the flag so that we can link against it correctly # later. set(Protobuf_USE_STATIC_LIBS ON) - # Newer protobuf releases always have a lib prefix independent from CMAKE_STATIC_LIBRARY_PREFIX - set(PROTOBUF_STATIC_LIB - "${PROTOBUF_PREFIX}/lib/libprotobuf${CMAKE_STATIC_LIBRARY_SUFFIX}") - set(PROTOC_STATIC_LIB "${PROTOBUF_PREFIX}/lib/libprotoc${CMAKE_STATIC_LIBRARY_SUFFIX}") - set(Protobuf_PROTOC_LIBRARY "${PROTOC_STATIC_LIB}") - set(PROTOBUF_COMPILER "${PROTOBUF_PREFIX}/bin/protoc") # Strip lto flags (which may be added by dh_auto_configure) # See https://github.com/protocolbuffers/protobuf/issues/7092 - set(PROTOBUF_C_FLAGS ${EP_C_FLAGS}) - set(PROTOBUF_CXX_FLAGS ${EP_CXX_FLAGS}) - string(REPLACE "-flto=auto" "" PROTOBUF_C_FLAGS "${PROTOBUF_C_FLAGS}") - string(REPLACE "-ffat-lto-objects" "" PROTOBUF_C_FLAGS "${PROTOBUF_C_FLAGS}") - string(REPLACE "-flto=auto" "" PROTOBUF_CXX_FLAGS "${PROTOBUF_CXX_FLAGS}") - string(REPLACE "-ffat-lto-objects" "" PROTOBUF_CXX_FLAGS "${PROTOBUF_CXX_FLAGS}") - set(PROTOBUF_CMAKE_ARGS - ${EP_COMMON_CMAKE_ARGS} - "-DCMAKE_CXX_FLAGS=${PROTOBUF_CXX_FLAGS}" - "-DCMAKE_C_FLAGS=${PROTOBUF_C_FLAGS}" - "-DCMAKE_INSTALL_PREFIX=${PROTOBUF_PREFIX}" - -Dprotobuf_BUILD_TESTS=OFF - -Dprotobuf_DEBUG_POSTFIX=) + string(REPLACE "-flto=auto" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") + string(REPLACE "-ffat-lto-objects" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") + string(REPLACE "-flto=auto" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + string(REPLACE "-ffat-lto-objects" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + + set(protobuf_BUILD_TESTS OFF) if(MSVC AND NOT ARROW_USE_STATIC_CRT) - list(APPEND PROTOBUF_CMAKE_ARGS "-Dprotobuf_MSVC_STATIC_RUNTIME=OFF") - endif() - if(ZLIB_ROOT) - list(APPEND PROTOBUF_CMAKE_ARGS "-DZLIB_ROOT=${ZLIB_ROOT}") + set(protobuf_MSVC_STATIC_RUNTIME OFF) endif() - set(PROTOBUF_EXTERNAL_PROJECT_ADD_ARGS CMAKE_ARGS ${PROTOBUF_CMAKE_ARGS} SOURCE_SUBDIR - "cmake") - externalproject_add(protobuf_ep - ${EP_COMMON_OPTIONS} ${PROTOBUF_EXTERNAL_PROJECT_ADD_ARGS} - BUILD_BYPRODUCTS "${PROTOBUF_STATIC_LIB}" "${PROTOBUF_COMPILER}" - BUILD_IN_SOURCE 1 - URL ${PROTOBUF_SOURCE_URL} - URL_HASH "SHA256=${ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM}") + fetchcontent_makeavailable(protobuf) - file(MAKE_DIRECTORY "${PROTOBUF_INCLUDE_DIR}") + # Get the actual include directory from the protobuf target + # For FetchContent, this points to the source directory which contains the .proto files + set(PROTOBUF_INCLUDE_DIR "${protobuf_SOURCE_DIR}/src") # For compatibility of CMake's FindProtobuf.cmake. set(Protobuf_INCLUDE_DIRS "${PROTOBUF_INCLUDE_DIR}") + set(Protobuf_INCLUDE_DIRS + "${Protobuf_INCLUDE_DIRS}" + PARENT_SCOPE) + # Set import dirs so protoc can find well-known types like timestamp.proto + set(Protobuf_IMPORT_DIRS "${PROTOBUF_INCLUDE_DIR}") + set(Protobuf_IMPORT_DIRS + "${Protobuf_IMPORT_DIRS}" + PARENT_SCOPE) + + # For FetchContent builds, protoc and libprotoc are regular targets, not imported + # We can't get their locations at configure time, so we set placeholders + # The actual locations will be resolved at build time or by the install step + set(PROTOBUF_COMPILER "$") + set(PROTOC_STATIC_LIB "$") + set(Protobuf_PROTOC_LIBRARY "${PROTOC_STATIC_LIB}") + + # gRPC requires Protobuf to be installed to a known location. + # We have to do this in two steps to avoid double installation of Protobuf + # when Arrow is installed. + # This custom target ensures Protobuf is built before we install + # TODO: Investigate if libprotobuf-lite is actually needed by Arrow/gRPC + add_custom_target(protobuf_built + DEPENDS protobuf::libprotobuf protobuf::libprotobuf-lite + protobuf::libprotoc protobuf::protoc) + + # Disable Protobuf's install script after it's built to prevent double installation + add_custom_command(OUTPUT "${protobuf_BINARY_DIR}/cmake_install.cmake.saved" + COMMAND ${CMAKE_COMMAND} -E copy_if_different + "${protobuf_BINARY_DIR}/cmake_install.cmake" + "${protobuf_BINARY_DIR}/cmake_install.cmake.saved" + COMMAND ${CMAKE_COMMAND} -E echo + "# Protobuf install disabled to prevent double installation with Arrow" + > "${protobuf_BINARY_DIR}/cmake_install.cmake" + DEPENDS protobuf_built + COMMENT "Disabling Protobuf install to prevent double installation" + VERBATIM) + + add_custom_target(protobuf_install_disabled ALL + DEPENDS "${protobuf_BINARY_DIR}/cmake_install.cmake.saved") + + # Install Protobuf to PROTOBUF_PREFIX for gRPC to find + add_custom_command(OUTPUT "${PROTOBUF_PREFIX}/.protobuf_installed" + COMMAND ${CMAKE_COMMAND} -E copy_if_different + "${protobuf_BINARY_DIR}/cmake_install.cmake.saved" + "${protobuf_BINARY_DIR}/cmake_install.cmake.tmp" + COMMAND ${CMAKE_COMMAND} -DCMAKE_INSTALL_PREFIX=${PROTOBUF_PREFIX} + -DCMAKE_INSTALL_CONFIG_NAME=$ -P + "${protobuf_BINARY_DIR}/cmake_install.cmake.tmp" || + ${CMAKE_COMMAND} -E true + COMMAND ${CMAKE_COMMAND} -E touch + "${PROTOBUF_PREFIX}/.protobuf_installed" + DEPENDS protobuf_install_disabled + COMMENT "Installing Protobuf to ${PROTOBUF_PREFIX} for gRPC" + VERBATIM) + + # Make protobuf_fc depend on the install completion marker + add_custom_target(protobuf_fc DEPENDS "${PROTOBUF_PREFIX}/.protobuf_installed") + + # For FetchContent, we don't create arrow::protobuf::* aliases because + # protobuf::* are themselves aliases and CMake doesn't allow alias-to-alias. + # The code in resolve_dependency below will use protobuf::* targets directly. - add_library(arrow::protobuf::libprotobuf STATIC IMPORTED) - set_target_properties(arrow::protobuf::libprotobuf PROPERTIES IMPORTED_LOCATION - "${PROTOBUF_STATIC_LIB}") - target_include_directories(arrow::protobuf::libprotobuf BEFORE - INTERFACE "${PROTOBUF_INCLUDE_DIR}") - add_library(arrow::protobuf::libprotoc STATIC IMPORTED) - set_target_properties(arrow::protobuf::libprotoc PROPERTIES IMPORTED_LOCATION - "${PROTOC_STATIC_LIB}") - target_include_directories(arrow::protobuf::libprotoc BEFORE - INTERFACE "${PROTOBUF_INCLUDE_DIR}") - add_executable(arrow::protobuf::protoc IMPORTED) - set_target_properties(arrow::protobuf::protoc PROPERTIES IMPORTED_LOCATION - "${PROTOBUF_COMPILER}") - - add_dependencies(arrow::protobuf::libprotobuf protobuf_ep) - add_dependencies(arrow::protobuf::protoc protobuf_ep) - - list(APPEND ARROW_BUNDLED_STATIC_LIBS arrow::protobuf::libprotobuf) + list(APPEND ARROW_BUNDLED_STATIC_LIBS protobuf::libprotobuf) if(CMAKE_CROSSCOMPILING) # If we are cross compiling, we need to build protoc for the host # system also, as it is used when building Arrow - # We do this by calling CMake as a child process - # with CXXFLAGS / CFLAGS and CMake flags cleared. - set(PROTOBUF_HOST_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/protobuf_ep_host-install") + # We reuse the FetchContent downloaded source but build it with host compiler + set(PROTOBUF_HOST_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/protobuf_fc_host-install") set(PROTOBUF_HOST_COMPILER "${PROTOBUF_HOST_PREFIX}/bin/protoc") set(PROTOBUF_HOST_CMAKE_ARGS @@ -1953,21 +1993,23 @@ macro(build_protobuf) -Dprotobuf_BUILD_TESTS=OFF -Dprotobuf_DEBUG_POSTFIX=) - externalproject_add(protobuf_ep_host + # Use the already-downloaded FetchContent source directory + externalproject_add(protobuf_fc_host ${EP_COMMON_OPTIONS} CMAKE_ARGS ${PROTOBUF_HOST_CMAKE_ARGS} + SOURCE_DIR "${protobuf_SOURCE_DIR}" + SOURCE_SUBDIR cmake BUILD_BYPRODUCTS "${PROTOBUF_HOST_COMPILER}" - BUILD_IN_SOURCE 1 - URL ${PROTOBUF_SOURCE_URL} - URL_HASH "SHA256=${ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM}") + DOWNLOAD_COMMAND "" DOWNLOAD_EXTRACT_TIMESTAMP TRUE) add_executable(arrow::protobuf::host_protoc IMPORTED) set_target_properties(arrow::protobuf::host_protoc PROPERTIES IMPORTED_LOCATION "${PROTOBUF_HOST_COMPILER}") - add_dependencies(arrow::protobuf::host_protoc protobuf_ep_host) + add_dependencies(arrow::protobuf::host_protoc protobuf_fc_host) endif() -endmacro() + list(POP_BACK CMAKE_MESSAGE_INDENT) +endfunction() if(ARROW_WITH_PROTOBUF) if(ARROW_FLIGHT_SQL) @@ -2128,8 +2170,13 @@ macro(build_substrait) SKIP_UNITY_BUILD_INCLUSION TRUE) list(APPEND SUBSTRAIT_PROTO_GEN_ALL "${SUBSTRAIT_PROTO_GEN}.${EXT}") endforeach() + # Add protobuf include directory for well-known types when using vendored protobuf + set(SUBSTRAIT_PROTOC_INCLUDES "-I${SUBSTRAIT_LOCAL_DIR}/proto") + if(PROTOBUF_VENDORED AND Protobuf_INCLUDE_DIRS) + list(APPEND SUBSTRAIT_PROTOC_INCLUDES "-I${Protobuf_INCLUDE_DIRS}") + endif() add_custom_command(OUTPUT "${SUBSTRAIT_PROTO_GEN}.cc" "${SUBSTRAIT_PROTO_GEN}.h" - COMMAND ${ARROW_PROTOBUF_PROTOC} "-I${SUBSTRAIT_LOCAL_DIR}/proto" + COMMAND ${ARROW_PROTOBUF_PROTOC} ${SUBSTRAIT_PROTOC_INCLUDES} "--cpp_out=${SUBSTRAIT_CPP_DIR}" "${SUBSTRAIT_LOCAL_DIR}/proto/substrait/${SUBSTRAIT_PROTO}.proto" DEPENDS ${PROTO_DEPENDS} substrait_ep) @@ -2147,10 +2194,15 @@ macro(build_substrait) SKIP_UNITY_BUILD_INCLUSION TRUE) list(APPEND SUBSTRAIT_PROTO_GEN_ALL "${ARROW_SUBSTRAIT_PROTO_GEN}.${EXT}") endforeach() + # Add protobuf include directory for well-known types when using vendored protobuf + set(ARROW_SUBSTRAIT_PROTOC_INCLUDES "-I${SUBSTRAIT_LOCAL_DIR}/proto" + "-I${ARROW_SUBSTRAIT_PROTOS_DIR}") + if(PROTOBUF_VENDORED AND Protobuf_INCLUDE_DIRS) + list(APPEND ARROW_SUBSTRAIT_PROTOC_INCLUDES "-I${Protobuf_INCLUDE_DIRS}") + endif() add_custom_command(OUTPUT "${ARROW_SUBSTRAIT_PROTO_GEN}.cc" "${ARROW_SUBSTRAIT_PROTO_GEN}.h" - COMMAND ${ARROW_PROTOBUF_PROTOC} "-I${SUBSTRAIT_LOCAL_DIR}/proto" - "-I${ARROW_SUBSTRAIT_PROTOS_DIR}" + COMMAND ${ARROW_PROTOBUF_PROTOC} ${ARROW_SUBSTRAIT_PROTOC_INCLUDES} "--cpp_out=${SUBSTRAIT_CPP_DIR}" "${ARROW_SUBSTRAIT_PROTOS_DIR}/substrait/${ARROW_SUBSTRAIT_PROTO}.proto" DEPENDS ${PROTO_DEPENDS} substrait_ep) @@ -3257,6 +3309,9 @@ macro(build_grpc) if(CARES_VENDORED) add_dependencies(grpc_dependencies cares_fc) endif() + if(PROTOBUF_VENDORED) + add_dependencies(grpc_dependencies protobuf_fc) + endif() if(GFLAGS_VENDORED) add_dependencies(grpc_dependencies gflags_ep) @@ -3269,9 +3324,15 @@ macro(build_grpc) add_dependencies(grpc_dependencies ${ARROW_PROTOBUF_LIBPROTOBUF} c-ares::cares ZLIB::ZLIB) - get_target_property(GRPC_PROTOBUF_INCLUDE_DIR ${ARROW_PROTOBUF_LIBPROTOBUF} - INTERFACE_INCLUDE_DIRECTORIES) - get_filename_component(GRPC_PB_ROOT "${GRPC_PROTOBUF_INCLUDE_DIR}" DIRECTORY) + # For FetchContent Protobuf, use the install prefix directly + if(PROTOBUF_VENDORED) + set(GRPC_PB_ROOT "${PROTOBUF_PREFIX}") + else() + get_target_property(GRPC_PROTOBUF_INCLUDE_DIR ${ARROW_PROTOBUF_LIBPROTOBUF} + INTERFACE_INCLUDE_DIRECTORIES) + get_filename_component(GRPC_PB_ROOT "${GRPC_PROTOBUF_INCLUDE_DIR}" DIRECTORY) + endif() + get_target_property(GRPC_Protobuf_PROTOC_LIBRARY ${ARROW_PROTOBUF_LIBPROTOC} IMPORTED_LOCATION) @@ -3946,11 +4007,6 @@ function(build_orc) INTERFACE_INCLUDE_DIRECTORIES) get_filename_component(Protobuf_ROOT "${PROTOBUF_INCLUDE_DIR}" DIRECTORY) set(PROTOBUF_HOME ${Protobuf_ROOT}) - # ORC uses this. - if(PROTOBUF_VENDORED) - target_include_directories(${ARROW_PROTOBUF_LIBPROTOC} - INTERFACE "${PROTOBUF_INCLUDE_DIR}") - endif() set(PROTOBUF_EXECUTABLE ${ARROW_PROTOBUF_PROTOC}) set(PROTOBUF_LIBRARY ${ARROW_PROTOBUF_LIBPROTOBUF}) set(PROTOC_LIBRARY ${ARROW_PROTOBUF_LIBPROTOC}) diff --git a/cpp/src/arrow/flight/CMakeLists.txt b/cpp/src/arrow/flight/CMakeLists.txt index 359f6d372778..966cf0ed9395 100644 --- a/cpp/src/arrow/flight/CMakeLists.txt +++ b/cpp/src/arrow/flight/CMakeLists.txt @@ -101,6 +101,11 @@ set(FLIGHT_GENERATED_PROTO_FILES set(PROTO_DEPENDS ${FLIGHT_PROTO} ${ARROW_PROTOBUF_LIBPROTOBUF} gRPC::grpc_cpp_plugin) set(FLIGHT_PROTOC_COMMAND ${ARROW_PROTOBUF_PROTOC} "-I${FLIGHT_PROTO_PATH}") +# Add protobuf include directory for well-known types (e.g., timestamp.proto) +# This is needed when using vendored protobuf where protoc doesn't have built-in paths +if(PROTOBUF_VENDORED AND Protobuf_INCLUDE_DIRS) + list(APPEND FLIGHT_PROTOC_COMMAND "-I${Protobuf_INCLUDE_DIRS}") +endif() if(Protobuf_VERSION VERSION_LESS 3.15) list(APPEND FLIGHT_PROTOC_COMMAND "--experimental_allow_proto3_optional") endif() diff --git a/cpp/src/arrow/flight/sql/CMakeLists.txt b/cpp/src/arrow/flight/sql/CMakeLists.txt index 6fcdaba2ec80..9695e0c9917a 100644 --- a/cpp/src/arrow/flight/sql/CMakeLists.txt +++ b/cpp/src/arrow/flight/sql/CMakeLists.txt @@ -36,9 +36,14 @@ set(FLIGHT_SQL_GENERATED_PROTO_FILES "${CMAKE_CURRENT_BINARY_DIR}/FlightSql.pb.c set(PROTO_DEPENDS ${FLIGHT_SQL_PROTO} ${ARROW_PROTOBUF_LIBPROTOBUF}) -set(FLIGHT_SQL_PROTOC_COMMAND - ${ARROW_PROTOBUF_PROTOC} "-I${FLIGHT_SQL_PROTO_PATH}" - "--cpp_out=dllexport_decl=ARROW_FLIGHT_SQL_EXPORT:${CMAKE_CURRENT_BINARY_DIR}") +set(FLIGHT_SQL_PROTOC_COMMAND ${ARROW_PROTOBUF_PROTOC} "-I${FLIGHT_SQL_PROTO_PATH}") +# Add protobuf include directory for well-known types (e.g., descriptor.proto) +# This is needed when using vendored protobuf where protoc doesn't have built-in paths +if(PROTOBUF_VENDORED AND Protobuf_INCLUDE_DIRS) + list(APPEND FLIGHT_SQL_PROTOC_COMMAND "-I${Protobuf_INCLUDE_DIRS}") +endif() +list(APPEND FLIGHT_SQL_PROTOC_COMMAND + "--cpp_out=dllexport_decl=ARROW_FLIGHT_SQL_EXPORT:${CMAKE_CURRENT_BINARY_DIR}") if(Protobuf_VERSION VERSION_LESS 3.15) list(APPEND FLIGHT_SQL_PROTOC_COMMAND "--experimental_allow_proto3_optional") endif() From 09fc6f8023a73b5864b717968e27a47acacddf6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 20 Nov 2025 09:23:20 +0100 Subject: [PATCH 2/9] Minor fixes for protobuf build --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index e9f3c8f88476..8b2ff528b790 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -1908,6 +1908,9 @@ function(build_protobuf) set(protobuf_MSVC_STATIC_RUNTIME OFF) endif() + # Unity build causes some build errors + set(CMAKE_UNITY_BUILD OFF) + fetchcontent_makeavailable(protobuf) # Get the actual include directory from the protobuf target @@ -4290,6 +4293,11 @@ macro(build_opentelemetry) add_dependencies(opentelemetry_dependencies nlohmann_json::nlohmann_json opentelemetry_proto_ep ${ARROW_PROTOBUF_LIBPROTOBUF}) + # Ensure vendored protobuf is installed before OpenTelemetry builds + if(PROTOBUF_VENDORED) + add_dependencies(opentelemetry_dependencies protobuf_fc) + endif() + string(JOIN "${EP_LIST_SEPARATOR}" OPENTELEMETRY_PREFIX_PATH ${OPENTELEMETRY_PREFIX_PATH_LIST}) list(APPEND OPENTELEMETRY_CMAKE_ARGS "-DCMAKE_PREFIX_PATH=${OPENTELEMETRY_PREFIX_PATH}") From 894de4afde83576836d140ebd5a87503fd83e16e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 20 Nov 2025 10:23:10 +0100 Subject: [PATCH 3/9] Try fixing windows builds --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 32 +++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 8b2ff528b790..a2e3c065c31c 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -1873,6 +1873,11 @@ function(build_protobuf) set(PROTOBUF_VENDORED TRUE PARENT_SCOPE) + # When building protobuf from source via FetchContent, we always build it as static + # Update ARROW_PROTOBUF_USE_SHARED to reflect this (must be CACHE to be visible to ORC) + set(ARROW_PROTOBUF_USE_SHARED + OFF + CACHE BOOL "" FORCE) set(PROTOBUF_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/protobuf_fc-install") set(PROTOBUF_PREFIX "${PROTOBUF_PREFIX}" @@ -1882,6 +1887,12 @@ function(build_protobuf) "${PROTOBUF_INCLUDE_DIR}" PARENT_SCOPE) + # Always build protobuf as static library to avoid DLL import/export issues on Windows + # Must be set BEFORE fetchcontent_declare and as CACHE FORCE to override the option() + set(protobuf_BUILD_SHARED_LIBS + OFF + CACHE BOOL "Build protobuf shared libs" FORCE) + fetchcontent_declare(protobuf URL ${PROTOBUF_SOURCE_URL} URL_HASH "SHA256=${ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM}" @@ -1913,6 +1924,27 @@ function(build_protobuf) fetchcontent_makeavailable(protobuf) + # Ensure protobuf targets don't have PROTOBUF_USE_DLLS defined + # This is critical on Windows to avoid DLL import/export issues when linking static protobuf + if(TARGET protobuf::libprotobuf) + get_target_property(_protobuf_compile_defs protobuf::libprotobuf + INTERFACE_COMPILE_DEFINITIONS) + if(_protobuf_compile_defs) + list(REMOVE_ITEM _protobuf_compile_defs PROTOBUF_USE_DLLS) + set_target_properties(protobuf::libprotobuf PROPERTIES INTERFACE_COMPILE_DEFINITIONS + "${_protobuf_compile_defs}") + endif() + endif() + if(TARGET protobuf::libprotoc) + get_target_property(_protoc_compile_defs protobuf::libprotoc + INTERFACE_COMPILE_DEFINITIONS) + if(_protoc_compile_defs) + list(REMOVE_ITEM _protoc_compile_defs PROTOBUF_USE_DLLS) + set_target_properties(protobuf::libprotoc PROPERTIES INTERFACE_COMPILE_DEFINITIONS + "${_protoc_compile_defs}") + endif() + endif() + # Get the actual include directory from the protobuf target # For FetchContent, this points to the source directory which contains the .proto files set(PROTOBUF_INCLUDE_DIR "${protobuf_SOURCE_DIR}/src") From 1840e6eb9e609f28d96bcf919ad5d3721a704c9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 24 Nov 2025 14:39:38 +0100 Subject: [PATCH 4/9] Fix for ORC Windows --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index a2e3c065c31c..04673eef3a35 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -2094,7 +2094,10 @@ if(ARROW_WITH_PROTOBUF) REQUIRED_VERSION ${ARROW_PROTOBUF_REQUIRED_VERSION}) - if(NOT Protobuf_USE_STATIC_LIBS AND MSVC_TOOLCHAIN) + # If PROTOBUF_VENDORED we build static protobuf from source via FetchContent + if(NOT PROTOBUF_VENDORED + AND NOT Protobuf_USE_STATIC_LIBS + AND MSVC_TOOLCHAIN) add_definitions(-DPROTOBUF_USE_DLLS) endif() From b4901bffef49ee40a9df70ae82e6c23a291a6800 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 24 Nov 2025 15:39:12 +0100 Subject: [PATCH 5/9] Use generator expression for OPENTELEMETRY_PROTOBUF_LIBRARY and OPENTELEMETRY_PROTOC_EXECUTABLE so it works with FetchContent targets too --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 04673eef3a35..941ef6742ac1 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -4288,10 +4288,8 @@ macro(build_opentelemetry) get_target_property(OPENTELEMETRY_PROTOBUF_INCLUDE_DIR ${ARROW_PROTOBUF_LIBPROTOBUF} INTERFACE_INCLUDE_DIRECTORIES) - get_target_property(OPENTELEMETRY_PROTOBUF_LIBRARY ${ARROW_PROTOBUF_LIBPROTOBUF} - IMPORTED_LOCATION) - get_target_property(OPENTELEMETRY_PROTOC_EXECUTABLE ${ARROW_PROTOBUF_PROTOC} - IMPORTED_LOCATION) + set(OPENTELEMETRY_PROTOBUF_LIBRARY "$") + set(OPENTELEMETRY_PROTOC_EXECUTABLE "$") list(APPEND OPENTELEMETRY_CMAKE_ARGS -DWITH_OTLP_HTTP=ON From 92be5ec1f6caac51252916581e49e938ce0b141d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 24 Nov 2025 18:32:35 +0100 Subject: [PATCH 6/9] Clean up of unnecessary tests --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 53 ++------------------- 1 file changed, 4 insertions(+), 49 deletions(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 941ef6742ac1..0bc21044c82d 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -1873,11 +1873,6 @@ function(build_protobuf) set(PROTOBUF_VENDORED TRUE PARENT_SCOPE) - # When building protobuf from source via FetchContent, we always build it as static - # Update ARROW_PROTOBUF_USE_SHARED to reflect this (must be CACHE to be visible to ORC) - set(ARROW_PROTOBUF_USE_SHARED - OFF - CACHE BOOL "" FORCE) set(PROTOBUF_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/protobuf_fc-install") set(PROTOBUF_PREFIX "${PROTOBUF_PREFIX}" @@ -1887,12 +1882,6 @@ function(build_protobuf) "${PROTOBUF_INCLUDE_DIR}" PARENT_SCOPE) - # Always build protobuf as static library to avoid DLL import/export issues on Windows - # Must be set BEFORE fetchcontent_declare and as CACHE FORCE to override the option() - set(protobuf_BUILD_SHARED_LIBS - OFF - CACHE BOOL "Build protobuf shared libs" FORCE) - fetchcontent_declare(protobuf URL ${PROTOBUF_SOURCE_URL} URL_HASH "SHA256=${ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM}" @@ -1924,27 +1913,6 @@ function(build_protobuf) fetchcontent_makeavailable(protobuf) - # Ensure protobuf targets don't have PROTOBUF_USE_DLLS defined - # This is critical on Windows to avoid DLL import/export issues when linking static protobuf - if(TARGET protobuf::libprotobuf) - get_target_property(_protobuf_compile_defs protobuf::libprotobuf - INTERFACE_COMPILE_DEFINITIONS) - if(_protobuf_compile_defs) - list(REMOVE_ITEM _protobuf_compile_defs PROTOBUF_USE_DLLS) - set_target_properties(protobuf::libprotobuf PROPERTIES INTERFACE_COMPILE_DEFINITIONS - "${_protobuf_compile_defs}") - endif() - endif() - if(TARGET protobuf::libprotoc) - get_target_property(_protoc_compile_defs protobuf::libprotoc - INTERFACE_COMPILE_DEFINITIONS) - if(_protoc_compile_defs) - list(REMOVE_ITEM _protoc_compile_defs PROTOBUF_USE_DLLS) - set_target_properties(protobuf::libprotoc PROPERTIES INTERFACE_COMPILE_DEFINITIONS - "${_protoc_compile_defs}") - endif() - endif() - # Get the actual include directory from the protobuf target # For FetchContent, this points to the source directory which contains the .proto files set(PROTOBUF_INCLUDE_DIR "${protobuf_SOURCE_DIR}/src") @@ -1970,7 +1938,6 @@ function(build_protobuf) # We have to do this in two steps to avoid double installation of Protobuf # when Arrow is installed. # This custom target ensures Protobuf is built before we install - # TODO: Investigate if libprotobuf-lite is actually needed by Arrow/gRPC add_custom_target(protobuf_built DEPENDS protobuf::libprotobuf protobuf::libprotobuf-lite protobuf::libprotoc protobuf::protoc) @@ -1990,7 +1957,7 @@ function(build_protobuf) add_custom_target(protobuf_install_disabled ALL DEPENDS "${protobuf_BINARY_DIR}/cmake_install.cmake.saved") - # Install Protobuf to PROTOBUF_PREFIX for gRPC to find + # Install Protobuf to PROTOBUF_PREFIX for dependendants to find add_custom_command(OUTPUT "${PROTOBUF_PREFIX}/.protobuf_installed" COMMAND ${CMAKE_COMMAND} -E copy_if_different "${protobuf_BINARY_DIR}/cmake_install.cmake.saved" @@ -2007,18 +1974,13 @@ function(build_protobuf) # Make protobuf_fc depend on the install completion marker add_custom_target(protobuf_fc DEPENDS "${PROTOBUF_PREFIX}/.protobuf_installed") - - # For FetchContent, we don't create arrow::protobuf::* aliases because - # protobuf::* are themselves aliases and CMake doesn't allow alias-to-alias. - # The code in resolve_dependency below will use protobuf::* targets directly. - list(APPEND ARROW_BUNDLED_STATIC_LIBS protobuf::libprotobuf) if(CMAKE_CROSSCOMPILING) # If we are cross compiling, we need to build protoc for the host # system also, as it is used when building Arrow # We reuse the FetchContent downloaded source but build it with host compiler - set(PROTOBUF_HOST_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/protobuf_fc_host-install") + set(PROTOBUF_HOST_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/protobuf_ep_host-install") set(PROTOBUF_HOST_COMPILER "${PROTOBUF_HOST_PREFIX}/bin/protoc") set(PROTOBUF_HOST_CMAKE_ARGS @@ -2041,7 +2003,7 @@ function(build_protobuf) set_target_properties(arrow::protobuf::host_protoc PROPERTIES IMPORTED_LOCATION "${PROTOBUF_HOST_COMPILER}") - add_dependencies(arrow::protobuf::host_protoc protobuf_fc_host) + add_dependencies(arrow::protobuf::host_protoc protobuf_ep_host) endif() list(POP_BACK CMAKE_MESSAGE_INDENT) endfunction() @@ -2208,7 +2170,6 @@ macro(build_substrait) SKIP_UNITY_BUILD_INCLUSION TRUE) list(APPEND SUBSTRAIT_PROTO_GEN_ALL "${SUBSTRAIT_PROTO_GEN}.${EXT}") endforeach() - # Add protobuf include directory for well-known types when using vendored protobuf set(SUBSTRAIT_PROTOC_INCLUDES "-I${SUBSTRAIT_LOCAL_DIR}/proto") if(PROTOBUF_VENDORED AND Protobuf_INCLUDE_DIRS) list(APPEND SUBSTRAIT_PROTOC_INCLUDES "-I${Protobuf_INCLUDE_DIRS}") @@ -2232,15 +2193,9 @@ macro(build_substrait) SKIP_UNITY_BUILD_INCLUSION TRUE) list(APPEND SUBSTRAIT_PROTO_GEN_ALL "${ARROW_SUBSTRAIT_PROTO_GEN}.${EXT}") endforeach() - # Add protobuf include directory for well-known types when using vendored protobuf - set(ARROW_SUBSTRAIT_PROTOC_INCLUDES "-I${SUBSTRAIT_LOCAL_DIR}/proto" - "-I${ARROW_SUBSTRAIT_PROTOS_DIR}") - if(PROTOBUF_VENDORED AND Protobuf_INCLUDE_DIRS) - list(APPEND ARROW_SUBSTRAIT_PROTOC_INCLUDES "-I${Protobuf_INCLUDE_DIRS}") - endif() add_custom_command(OUTPUT "${ARROW_SUBSTRAIT_PROTO_GEN}.cc" "${ARROW_SUBSTRAIT_PROTO_GEN}.h" - COMMAND ${ARROW_PROTOBUF_PROTOC} ${ARROW_SUBSTRAIT_PROTOC_INCLUDES} + COMMAND ${ARROW_PROTOBUF_PROTOC} ${SUBSTRAIT_PROTOC_INCLUDES} "--cpp_out=${SUBSTRAIT_CPP_DIR}" "${ARROW_SUBSTRAIT_PROTOS_DIR}/substrait/${ARROW_SUBSTRAIT_PROTO}.proto" DEPENDS ${PROTO_DEPENDS} substrait_ep) From 361595d4a03372406723a48db128f0caa69206c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 24 Nov 2025 19:04:41 +0100 Subject: [PATCH 7/9] Add back missing ARROW_SUBSTRAIT_PROTOS_DIR --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 0bc21044c82d..cf6d9c40aa1f 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -2193,9 +2193,11 @@ macro(build_substrait) SKIP_UNITY_BUILD_INCLUSION TRUE) list(APPEND SUBSTRAIT_PROTO_GEN_ALL "${ARROW_SUBSTRAIT_PROTO_GEN}.${EXT}") endforeach() + set(ARROW_SUBSTRAIT_PROTOC_INCLUDES ${SUBSTRAIT_PROTOC_INCLUDES} + "-I${ARROW_SUBSTRAIT_PROTOS_DIR}") add_custom_command(OUTPUT "${ARROW_SUBSTRAIT_PROTO_GEN}.cc" "${ARROW_SUBSTRAIT_PROTO_GEN}.h" - COMMAND ${ARROW_PROTOBUF_PROTOC} ${SUBSTRAIT_PROTOC_INCLUDES} + COMMAND ${ARROW_PROTOBUF_PROTOC} ${ARROW_SUBSTRAIT_PROTOC_INCLUDES} "--cpp_out=${SUBSTRAIT_CPP_DIR}" "${ARROW_SUBSTRAIT_PROTOS_DIR}/substrait/${ARROW_SUBSTRAIT_PROTO}.proto" DEPENDS ${PROTO_DEPENDS} substrait_ep) From 7919c19969414472b9d086ad782a53990764c7c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 24 Nov 2025 19:12:59 +0100 Subject: [PATCH 8/9] Some minor fixes and wrong commit on CMakePresets.json --- cpp/CMakePresets.json | 1 + cpp/cmake_modules/ThirdpartyToolchain.cmake | 6 ++---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/cpp/CMakePresets.json b/cpp/CMakePresets.json index 193562d6c2e0..8c29c9a2672a 100644 --- a/cpp/CMakePresets.json +++ b/cpp/CMakePresets.json @@ -158,6 +158,7 @@ { "name": "features-python-maximal", "inherits": [ + "features-cuda", "features-filesystems", "features-flight-sql", "features-gandiva", diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index cf6d9c40aa1f..4e0ad66d7d49 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -1888,7 +1888,6 @@ function(build_protobuf) SOURCE_SUBDIR cmake) prepare_fetchcontent() - # Configure Protobuf options before FetchContent # This flag is based on what the user initially requested but if # we've fallen back to building protobuf we always build it statically @@ -1979,7 +1978,6 @@ function(build_protobuf) if(CMAKE_CROSSCOMPILING) # If we are cross compiling, we need to build protoc for the host # system also, as it is used when building Arrow - # We reuse the FetchContent downloaded source but build it with host compiler set(PROTOBUF_HOST_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/protobuf_ep_host-install") set(PROTOBUF_HOST_COMPILER "${PROTOBUF_HOST_PREFIX}/bin/protoc") @@ -1990,8 +1988,8 @@ function(build_protobuf) -Dprotobuf_BUILD_TESTS=OFF -Dprotobuf_DEBUG_POSTFIX=) - # Use the already-downloaded FetchContent source directory - externalproject_add(protobuf_fc_host + # We reuse the FetchContent downloaded source but build it with host compiler + externalproject_add(protobuf_ep_host ${EP_COMMON_OPTIONS} CMAKE_ARGS ${PROTOBUF_HOST_CMAKE_ARGS} SOURCE_DIR "${protobuf_SOURCE_DIR}" From 46d86e26f24147db7a6b57e70dbc6e1c50410f94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 25 Nov 2025 09:41:14 +0100 Subject: [PATCH 9/9] Apply suggestions from code review Co-authored-by: Sutou Kouhei --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 12 ++++++------ cpp/src/arrow/flight/CMakeLists.txt | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 4e0ad66d7d49..3f381649f5af 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -1868,7 +1868,7 @@ endif() # Protocol Buffers (required for ORC, Flight and Substrait libraries) function(build_protobuf) - list(APPEND CMAKE_MESSAGE_INDENT "PROTOBUF: ") + list(APPEND CMAKE_MESSAGE_INDENT "Protobuf: ") message(STATUS "Building Protocol Buffers from source using FetchContent") set(PROTOBUF_VENDORED TRUE @@ -1912,8 +1912,8 @@ function(build_protobuf) fetchcontent_makeavailable(protobuf) - # Get the actual include directory from the protobuf target - # For FetchContent, this points to the source directory which contains the .proto files + # Get the actual include directory from the Protobuf target. + # For FetchContent, this points to the source directory which contains the .proto files. set(PROTOBUF_INCLUDE_DIR "${protobuf_SOURCE_DIR}/src") # For compatibility of CMake's FindProtobuf.cmake. set(Protobuf_INCLUDE_DIRS "${PROTOBUF_INCLUDE_DIR}") @@ -1926,9 +1926,9 @@ function(build_protobuf) "${Protobuf_IMPORT_DIRS}" PARENT_SCOPE) - # For FetchContent builds, protoc and libprotoc are regular targets, not imported - # We can't get their locations at configure time, so we set placeholders - # The actual locations will be resolved at build time or by the install step + # For FetchContent builds, protoc and libprotoc are regular targets, not imported. + # We can't get their locations at configure time, so we set placeholders. + # The actual locations will be resolved at build time or by the install step. set(PROTOBUF_COMPILER "$") set(PROTOC_STATIC_LIB "$") set(Protobuf_PROTOC_LIBRARY "${PROTOC_STATIC_LIB}") diff --git a/cpp/src/arrow/flight/CMakeLists.txt b/cpp/src/arrow/flight/CMakeLists.txt index 966cf0ed9395..8974c9581f7c 100644 --- a/cpp/src/arrow/flight/CMakeLists.txt +++ b/cpp/src/arrow/flight/CMakeLists.txt @@ -101,8 +101,8 @@ set(FLIGHT_GENERATED_PROTO_FILES set(PROTO_DEPENDS ${FLIGHT_PROTO} ${ARROW_PROTOBUF_LIBPROTOBUF} gRPC::grpc_cpp_plugin) set(FLIGHT_PROTOC_COMMAND ${ARROW_PROTOBUF_PROTOC} "-I${FLIGHT_PROTO_PATH}") -# Add protobuf include directory for well-known types (e.g., timestamp.proto) -# This is needed when using vendored protobuf where protoc doesn't have built-in paths +# Add protobuf include directory for well-known types (e.g., timestamp.proto). +# This is needed when using vendored Protobuf where protoc doesn't have built-in paths. if(PROTOBUF_VENDORED AND Protobuf_INCLUDE_DIRS) list(APPEND FLIGHT_PROTOC_COMMAND "-I${Protobuf_INCLUDE_DIRS}") endif()