diff --git a/cpp/cmake_modules/BuildUtils.cmake b/cpp/cmake_modules/BuildUtils.cmake index 391c43e0ac5..87932ba766c 100644 --- a/cpp/cmake_modules/BuildUtils.cmake +++ b/cpp/cmake_modules/BuildUtils.cmake @@ -375,6 +375,16 @@ function(ADD_ARROW_LIB LIB_NAME) LINK_PRIVATE ${ARG_SHARED_PRIVATE_LINK_LIBS}) + if(USE_OBJLIB) + # Ensure that dependencies are built before compilation of objects in + # object library, rather than only before the final link step + foreach(SHARED_LINK_LIB ${ARG_SHARED_LINK_LIBS}) + if(TARGET ${SHARED_LINK_LIB}) + add_dependencies(${LIB_NAME}_objlib ${SHARED_LINK_LIB}) + endif() + endforeach() + endif() + if(ARROW_RPATH_ORIGIN) if(APPLE) set(_lib_install_rpath "@loader_path") @@ -449,6 +459,15 @@ function(ADD_ARROW_LIB LIB_NAME) if(ARG_STATIC_LINK_LIBS) target_link_libraries(${LIB_NAME}_static LINK_PRIVATE "$") + if(USE_OBJLIB) + # Ensure that dependencies are built before compilation of objects in + # object library, rather than only before the final link step + foreach(STATIC_LINK_LIB ${ARG_STATIC_LINK_LIBS}) + if(TARGET ${STATIC_LINK_LIB}) + add_dependencies(${LIB_NAME}_objlib ${STATIC_LINK_LIB}) + endif() + endforeach() + endif() endif() install(TARGETS ${LIB_NAME}_static ${INSTALL_IS_OPTIONAL} diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake index 30b1d0e075b..05fc14bbc72 100644 --- a/cpp/cmake_modules/DefineOptions.cmake +++ b/cpp/cmake_modules/DefineOptions.cmake @@ -478,16 +478,6 @@ advised that if this is enabled 'install' will fail silently on components;\ that have not been built" OFF) - set(ARROW_SUBSTRAIT_REPO_DEFAULT "https://github.com/substrait-io/substrait") - define_option_string(ARROW_SUBSTRAIT_REPO - "Custom git repository URL for downloading Substrait sources.;\ -See also ARROW_SUBSTRAIT_TAG" "${ARROW_SUBSTRAIT_REPO_DEFAULT}") - - set(ARROW_SUBSTRAIT_TAG_DEFAULT "e1b4c04a1b518912f4c4065b16a1b2c0ac8e14cf") - define_option_string(ARROW_SUBSTRAIT_TAG - "Custom git hash/tag/branch for Substrait repository.;\ -See also ARROW_SUBSTRAIT_REPO" "${ARROW_SUBSTRAIT_TAG_DEFAULT}") - option(ARROW_BUILD_CONFIG_SUMMARY_JSON "Summarize build configuration in a JSON file" ON) endif() diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 5b560591235..82ed145b139 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -69,6 +69,7 @@ set(ARROW_THIRDPARTY_DEPENDENCIES Protobuf RapidJSON Snappy + Substrait Thrift utf8proc xsimd @@ -173,6 +174,8 @@ macro(build_dependency DEPENDENCY_NAME) build_re2() elseif("${DEPENDENCY_NAME}" STREQUAL "Snappy") build_snappy() + elseif("${DEPENDENCY_NAME}" STREQUAL "Substrait") + build_substrait() elseif("${DEPENDENCY_NAME}" STREQUAL "Thrift") build_thrift() elseif("${DEPENDENCY_NAME}" STREQUAL "utf8proc") @@ -309,8 +312,15 @@ endif() if(ARROW_ORC OR ARROW_FLIGHT - OR ARROW_GANDIVA - OR ARROW_ENGINE) + OR ARROW_GANDIVA) + set(ARROW_WITH_PROTOBUF ON) +endif() + +if(ARROW_ENGINE) + set(ARROW_WITH_SUBSTRAIT ON) +endif() + +if(ARROW_WITH_SUBSTRAIT) set(ARROW_WITH_PROTOBUF ON) endif() @@ -610,6 +620,14 @@ else() endif() endif() +if(DEFINED ENV{ARROW_SUBSTRAIT_URL}) + set(SUBSTRAIT_SOURCE_URL "$ENV{ARROW_SUBSTRAIT_URL}") +else() + set_urls(SUBSTRAIT_SOURCE_URL + "https://github.com/substrait-io/substrait/archive/${ARROW_SUBSTRAIT_BUILD_VERSION}.tar.gz" + ) +endif() + if(DEFINED ENV{ARROW_THRIFT_URL}) set(THRIFT_SOURCE_URL "$ENV{ARROW_THRIFT_URL}") else() @@ -1421,7 +1439,7 @@ if(ARROW_WITH_THRIFT) endif() # ---------------------------------------------------------------------- -# Protocol Buffers (required for ORC and Flight and Gandiva libraries) +# Protocol Buffers (required for ORC, Flight, Gandiva and Substrait libraries) macro(build_protobuf) message("Building Protocol Buffers from source") @@ -1605,6 +1623,87 @@ if(ARROW_WITH_PROTOBUF) message(STATUS "Found protobuf headers: ${PROTOBUF_INCLUDE_DIR}") endif() +# ---------------------------------------------------------------------- +# Substrait (required by compute engine) + +macro(build_substrait) + message("Building Substrait from source") + + set(SUBSTRAIT_PROTOS + capabilities + expression + extensions/extensions + function + parameterized_types + plan + relations + type + type_expressions) + + externalproject_add(substrait_ep + CONFIGURE_COMMAND "" + BUILD_COMMAND "" + INSTALL_COMMAND "" + URL ${SUBSTRAIT_SOURCE_URL} + URL_HASH "SHA256=${ARROW_SUBSTRAIT_BUILD_SHA256_CHECKSUM}") + + externalproject_get_property(substrait_ep SOURCE_DIR) + set(SUBSTRAIT_LOCAL_DIR ${SOURCE_DIR}) + + set(SUBSTRAIT_CPP_DIR "${CMAKE_CURRENT_BINARY_DIR}/substrait_ep-generated") + + set(SUBSTRAIT_SUPPRESSED_WARNINGS) + if(MSVC) + # Protobuf generated files trigger some spurious warnings on MSVC. + + # Implicit conversion from uint64_t to uint32_t: + list(APPEND SUBSTRAIT_SUPPRESSED_WARNINGS "/wd4244") + + # Missing dll-interface: + list(APPEND SUBSTRAIT_SUPPRESSED_WARNINGS "/wd4251") + endif() + + set(SUBSTRAIT_SOURCES) + set(SUBSTRAIT_PROTO_GEN_ALL) + foreach(SUBSTRAIT_PROTO ${SUBSTRAIT_PROTOS}) + set(SUBSTRAIT_PROTO_GEN "${SUBSTRAIT_CPP_DIR}/substrait/${SUBSTRAIT_PROTO}.pb") + + foreach(EXT h cc) + set_source_files_properties("${SUBSTRAIT_PROTO_GEN}.${EXT}" + PROPERTIES COMPILE_OPTIONS + "${SUBSTRAIT_SUPPRESSED_WARNINGS}" + GENERATED TRUE + SKIP_UNITY_BUILD_INCLUSION TRUE) + list(APPEND SUBSTRAIT_PROTO_GEN_ALL "${SUBSTRAIT_PROTO_GEN}.${EXT}") + endforeach() + add_custom_command(OUTPUT "${SUBSTRAIT_PROTO_GEN}.cc" "${SUBSTRAIT_PROTO_GEN}.h" + COMMAND ${ARROW_PROTOBUF_PROTOC} "-I${SUBSTRAIT_LOCAL_DIR}/proto" + "--cpp_out=${SUBSTRAIT_CPP_DIR}" + "${SUBSTRAIT_LOCAL_DIR}/proto/substrait/${SUBSTRAIT_PROTO}.proto" + DEPENDS ${PROTO_DEPENDS} substrait_ep) + + list(APPEND SUBSTRAIT_SOURCES "${SUBSTRAIT_PROTO_GEN}.cc") + endforeach() + + add_custom_target(substrait_gen ALL DEPENDS ${SUBSTRAIT_PROTO_GEN_ALL}) + + set(SUBSTRAIT_INCLUDES ${SUBSTRAIT_CPP_DIR} ${PROTOBUF_INCLUDE_DIR}) + + add_library(substrait STATIC ${SUBSTRAIT_SOURCES}) + set_target_properties(substrait PROPERTIES POSITION_INDEPENDENT_CODE ON) + target_include_directories(substrait PUBLIC ${SUBSTRAIT_INCLUDES}) + target_link_libraries(substrait INTERFACE ${ARROW_PROTOBUF_LIBPROTOBUF}) + add_dependencies(substrait substrait_gen) + + list(APPEND ARROW_BUNDLED_STATIC_LIBS substrait) +endmacro() + +if(ARROW_WITH_SUBSTRAIT) + # Currently, we can only build Substrait from source. + set(Substrait_SOURCE "BUNDLED") + resolve_dependency(Substrait) +endif() + # ---------------------------------------------------------------------- # jemalloc - Unix-only high-performance allocator diff --git a/cpp/src/arrow/engine/CMakeLists.txt b/cpp/src/arrow/engine/CMakeLists.txt index 1e66e7fb5f7..edb878939f5 100644 --- a/cpp/src/arrow/engine/CMakeLists.txt +++ b/cpp/src/arrow/engine/CMakeLists.txt @@ -19,12 +19,6 @@ add_custom_target(arrow_engine) arrow_install_all_headers("arrow/engine") -set(ARROW_ENGINE_LINK_LIBS ${ARROW_PROTOBUF_LIBPROTOBUF}) - -#if(WIN32) -# list(APPEND ARROW_ENGINE_LINK_LIBS ws2_32.lib) -#endif() - set(ARROW_ENGINE_SRCS substrait/expression_internal.cc substrait/extension_set.cc @@ -34,71 +28,6 @@ set(ARROW_ENGINE_SRCS substrait/relation_internal.cc substrait/type_internal.cc) -set(SUBSTRAIT_LOCAL_DIR "${CMAKE_CURRENT_BINARY_DIR}/substrait") -set(SUBSTRAIT_GEN_DIR "${CMAKE_CURRENT_BINARY_DIR}/generated") -set(SUBSTRAIT_PROTOS - capabilities - expression - extensions/extensions - function - parameterized_types - plan - relations - type - type_expressions) - -externalproject_add(substrait_ep - GIT_REPOSITORY "${ARROW_SUBSTRAIT_REPO}" - GIT_TAG "${ARROW_SUBSTRAIT_TAG}" - SOURCE_DIR "${SUBSTRAIT_LOCAL_DIR}" - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND "") - -set(SUBSTRAIT_SUPPRESSED_WARNINGS) -if(MSVC) - # Protobuf generated files trigger some spurious warnings on MSVC. - - # Implicit conversion from uint64_t to uint32_t: - list(APPEND SUBSTRAIT_SUPPRESSED_WARNINGS "/wd4244") - - # Missing dll-interface: - list(APPEND SUBSTRAIT_SUPPRESSED_WARNINGS "/wd4251") -endif() - -file(MAKE_DIRECTORY "${SUBSTRAIT_GEN_DIR}/substrait") - -set(SUBSTRAIT_PROTO_GEN_ALL) -foreach(SUBSTRAIT_PROTO ${SUBSTRAIT_PROTOS}) - set(SUBSTRAIT_PROTO_GEN "${SUBSTRAIT_GEN_DIR}/substrait/${SUBSTRAIT_PROTO}.pb") - - foreach(EXT h cc) - set_source_files_properties("${SUBSTRAIT_PROTO_GEN}.${EXT}" - PROPERTIES COMPILE_OPTIONS - "${SUBSTRAIT_SUPPRESSED_WARNINGS}" - GENERATED TRUE - SKIP_UNITY_BUILD_INCLUSION TRUE) - add_custom_command(OUTPUT "${SUBSTRAIT_PROTO_GEN}.${EXT}" - COMMAND ${ARROW_PROTOBUF_PROTOC} "-I${SUBSTRAIT_LOCAL_DIR}/proto" - "--cpp_out=${SUBSTRAIT_GEN_DIR}" - "${SUBSTRAIT_LOCAL_DIR}/proto/substrait/${SUBSTRAIT_PROTO}.proto" - DEPENDS ${PROTO_DEPENDS} substrait_ep) - list(APPEND SUBSTRAIT_PROTO_GEN_ALL "${SUBSTRAIT_PROTO_GEN}.${EXT}") - endforeach() - - list(APPEND ARROW_ENGINE_SRCS "${SUBSTRAIT_PROTO_GEN}.cc") -endforeach() - -add_custom_target(substrait_gen ALL DEPENDS ${SUBSTRAIT_PROTO_GEN_ALL}) - -find_package(Git) -add_custom_target(substrait_gen_verify - COMMENT "Verifying that generated substrait accessors are consistent with \ - ARROW_SUBSTRAIT_REPO_AND_TAG='${ARROW_SUBSTRAIT_REPO_AND_TAG}'" - COMMAND ${GIT_EXECUTABLE} diff --exit-code ${SUBSTRAIT_GEN_DIR} - DEPENDS substrait_gen_clear - DEPENDS substrait_gen) - add_arrow_lib(arrow_engine CMAKE_PACKAGE_NAME ArrowEngine @@ -115,13 +44,13 @@ add_arrow_lib(arrow_engine SHARED_LINK_LIBS arrow_shared arrow_dataset_shared - ${ARROW_ENGINE_LINK_LIBS} + substrait STATIC_LINK_LIBS arrow_static arrow_dataset_static - ${ARROW_ENGINE_LINK_LIBS} + substrait PRIVATE_INCLUDES - ${SUBSTRAIT_GEN_DIR}) + ${SUBSTRAIT_INCLUDES}) foreach(LIB_TARGET ${ARROW_ENGINE_LIBRARIES}) target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_ENGINE_EXPORTING) diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 6a86b4d4bdc..eb3b445430f 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -87,6 +87,8 @@ ARROW_SNAPPY_BUILD_SHA256_CHECKSUM=75c1fbb3d618dd3a0483bff0e26d0a92b495bbe5059c8 # There is a bug in GCC < 4.9 with Snappy 1.1.9, so revert to 1.1.8 for those (ARROW-14661) ARROW_SNAPPY_OLD_BUILD_VERSION=1.1.8 ARROW_SNAPPY_OLD_BUILD_SHA256_CHECKSUM=16b677f07832a612b0836178db7f374e414f94657c138e6993cbfc5dcc58651f +ARROW_SUBSTRAIT_BUILD_VERSION=e1b4c04a +ARROW_SUBSTRAIT_BUILD_SHA256_CHECKSUM=65f83e5f5d979ede5fc8ac9f8bbaf793e0c72d9c415f1a162ba522f6d0bb5bbe ARROW_THRIFT_BUILD_VERSION=0.13.0 ARROW_THRIFT_BUILD_SHA256_CHECKSUM=7ad348b88033af46ce49148097afe354d513c1fca7c607b59c33ebb6064b5179 ARROW_UTF8PROC_BUILD_VERSION=v2.7.0