diff --git a/ci/scripts/python_wheel_manylinux_build.sh b/ci/scripts/python_wheel_manylinux_build.sh index 0a52415a0b9..6a114613d1c 100755 --- a/ci/scripts/python_wheel_manylinux_build.sh +++ b/ci/scripts/python_wheel_manylinux_build.sh @@ -77,7 +77,7 @@ cmake \ -DARROW_BUILD_STATIC=OFF \ -DARROW_BUILD_TESTS=OFF \ -DARROW_DATASET=${ARROW_DATASET} \ - -DARROW_DEPENDENCY_SOURCE="SYSTEM" \ + -DARROW_DEPENDENCY_SOURCE="VCPKG" \ -DARROW_DEPENDENCY_USE_SHARED=OFF \ -DARROW_FLIGHT==${ARROW_FLIGHT} \ -DARROW_GANDIVA=${ARROW_GANDIVA} \ @@ -103,11 +103,8 @@ cmake \ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ -DCMAKE_INSTALL_LIBDIR=lib \ -DCMAKE_INSTALL_PREFIX=/tmp/arrow-dist \ - -DCMAKE_TOOLCHAIN_FILE=/opt/vcpkg/scripts/buildsystems/vcpkg.cmake \ -DCMAKE_UNITY_BUILD=ON \ -DOPENSSL_USE_STATIC_LIBS=ON \ - -DThrift_ROOT=/opt/vcpkg/installed/x64-linux/lib \ - -D_VCPKG_INSTALLED_DIR=/opt/vcpkg/installed \ -DVCPKG_MANIFEST_MODE=OFF \ -DVCPKG_TARGET_TRIPLET=x64-linux-static-${CMAKE_BUILD_TYPE} \ -G ${CMAKE_GENERATOR} \ diff --git a/ci/scripts/python_wheel_windows_build.bat b/ci/scripts/python_wheel_windows_build.bat index f61a2faea0d..18c1b657b21 100644 --- a/ci/scripts/python_wheel_windows_build.bat +++ b/ci/scripts/python_wheel_windows_build.bat @@ -57,7 +57,7 @@ cmake ^ -DARROW_BUILD_TESTS=OFF ^ -DARROW_CXXFLAGS="/MP" ^ -DARROW_DATASET=%ARROW_DATASET% ^ - -DARROW_DEPENDENCY_SOURCE=SYSTEM ^ + -DARROW_DEPENDENCY_SOURCE=VCPKG ^ -DARROW_DEPENDENCY_USE_SHARED=OFF ^ -DARROW_FLIGHT=%ARROW_FLIGHT% ^ -DARROW_GANDIVA=%ARROW_GANDIVA% ^ @@ -76,15 +76,10 @@ cmake ^ -DARROW_WITH_ZLIB=%ARROW_WITH_ZLIB% ^ -DARROW_WITH_ZSTD=%ARROW_WITH_ZSTD% ^ -DCMAKE_BUILD_TYPE=%CMAKE_BUILD_TYPE% ^ - -DLZ4_MSVC_LIB_PREFIX="" ^ - -DLZ4_MSVC_STATIC_LIB_SUFFIX="" ^ - -DZSTD_MSVC_LIB_PREFIX="" ^ -DCMAKE_CXX_COMPILER=clcache ^ -DCMAKE_INSTALL_PREFIX=C:\arrow-dist ^ - -DCMAKE_TOOLCHAIN_FILE=C:\vcpkg\scripts\buildsystems\vcpkg.cmake ^ -DCMAKE_UNITY_BUILD=%CMAKE_UNITY_BUILD% ^ -DMSVC_LINK_VERBOSE=ON ^ - -D_VCPKG_INSTALLED_DIR=C:\vcpkg\installed ^ -DVCPKG_MANIFEST_MODE=OFF ^ -DVCPKG_TARGET_TRIPLET=x64-windows-static-md-%CMAKE_BUILD_TYPE% ^ -G "%CMAKE_GENERATOR%" ^ diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index f60469169c1..c48e7042bf1 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -58,6 +58,14 @@ endif() string(TOLOWER ${CMAKE_BUILD_TYPE} LOWERCASE_BUILD_TYPE) string(TOUPPER ${CMAKE_BUILD_TYPE} UPPERCASE_BUILD_TYPE) +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules") + +# this must be included before the project() command, because of the way +# vcpkg (ab)uses CMAKE_TOOLCHAIN_FILE to inject its logic into CMake +if(ARROW_DEPENDENCY_SOURCE STREQUAL "VCPKG") + include(Usevcpkg) +endif() + project(arrow VERSION "${ARROW_BASE_VERSION}") set(ARROW_VERSION_MAJOR "${arrow_VERSION_MAJOR}") @@ -88,8 +96,6 @@ message(STATUS "Arrow SO version: ${ARROW_SO_VERSION} (full: ${ARROW_FULL_SO_VER set(ARROW_SOURCE_DIR ${PROJECT_SOURCE_DIR}) set(ARROW_BINARY_DIR ${PROJECT_BINARY_DIR}) -set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules") - include(CMakePackageConfigHelpers) include(CMakeParseArguments) include(ExternalProject) diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake index e4df40d61b6..0e92811da8c 100644 --- a/cpp/cmake_modules/DefineOptions.cmake +++ b/cpp/cmake_modules/DefineOptions.cmake @@ -276,10 +276,11 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") # location, or if you are using a non-standard toolchain, you can also pass # ARROW_PACKAGE_PREFIX to set the *_ROOT variables to look in that # directory - # * CONDA: Same as system but set all *_ROOT variables to + # * CONDA: Same as SYSTEM but set all *_ROOT variables to # ENV{CONDA_PREFIX}. If this is run within an active conda environment, # then ENV{CONDA_PREFIX} will be used for dependencies unless # ARROW_DEPENDENCY_SOURCE is set explicitly to one of the other options + # * VCPKG: Searches for dependencies installed by vcpkg. # * BREW: Use SYSTEM but search for select packages with brew. if(NOT "$ENV{CONDA_PREFIX}" STREQUAL "") set(ARROW_DEPENDENCY_SOURCE_DEFAULT "CONDA") @@ -293,6 +294,7 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") "BUNDLED" "SYSTEM" "CONDA" + "VCPKG" "BREW") define_option(ARROW_VERBOSE_THIRDPARTY_BUILD diff --git a/cpp/cmake_modules/Usevcpkg.cmake b/cpp/cmake_modules/Usevcpkg.cmake new file mode 100644 index 00000000000..118d850909f --- /dev/null +++ b/cpp/cmake_modules/Usevcpkg.cmake @@ -0,0 +1,217 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +message(STATUS "Using vcpkg to find dependencies") + +# ---------------------------------------------------------------------- +# Define macros + +# macro to list subdirectirectories (non-recursive) +macro(list_subdirs SUBDIRS DIR) + file(GLOB children_ RELATIVE ${DIR} ${DIR}/*) + set(subdirs_ "") + foreach(child_ ${children_}) + if(IS_DIRECTORY "${DIR}/${child_}") + list(APPEND subdirs_ ${child_}) + endif() + endforeach() + set("${SUBDIRS}" ${subdirs_}) + unset(children_) + unset(subdirs_) +endmacro() + +# ---------------------------------------------------------------------- +# Get VCPKG_ROOT + +if(DEFINED CMAKE_TOOLCHAIN_FILE) + # Get it from the CMake variable CMAKE_TOOLCHAIN_FILE + get_filename_component(_VCPKG_DOT_CMAKE "${CMAKE_TOOLCHAIN_FILE}" NAME) + if(EXISTS "${CMAKE_TOOLCHAIN_FILE}" AND _VCPKG_DOT_CMAKE STREQUAL "vcpkg.cmake") + get_filename_component(_VCPKG_BUILDSYSTEMS_DIR "${CMAKE_TOOLCHAIN_FILE}" DIRECTORY) + get_filename_component(VCPKG_ROOT "${_VCPKG_BUILDSYSTEMS_DIR}/../.." ABSOLUTE) + else() + message( + FATAL_ERROR + "vcpkg toolchain file not found at path specified in -DCMAKE_TOOLCHAIN_FILE") + endif() +else() + if(DEFINED VCPKG_ROOT) + # Get it from the CMake variable VCPKG_ROOT + find_program(_VCPKG_BIN vcpkg PATHS "${VCPKG_ROOT}" NO_DEFAULT_PATH) + if(NOT _VCPKG_BIN) + message(FATAL_ERROR "vcpkg not found in directory specified in -DVCPKG_ROOT") + endif() + elseif(DEFINED ENV{VCPKG_ROOT}) + # Get it from the environment variable VCPKG_ROOT + set(VCPKG_ROOT ENV{VCPKG_ROOT}) + find_program(_VCPKG_BIN vcpkg PATHS "${VCPKG_ROOT}" NO_DEFAULT_PATH) + if(NOT _VCPKG_BIN) + message( + FATAL_ERROR "vcpkg not found in directory in environment variable VCPKG_ROOT") + endif() + else() + # Get it from the file vcpkg.path.txt + find_program(_VCPKG_BIN vcpkg) + if(_VCPKG_BIN) + get_filename_component(_VCPKG_REAL_BIN "${_VCPKG_BIN}" REALPATH) + get_filename_component(VCPKG_ROOT "${_VCPKG_REAL_BIN}" DIRECTORY) + else() + if(CMAKE_HOST_WIN32) + set(_VCPKG_PATH_TXT "$ENV{LOCALAPPDATA}/vcpkg/vcpkg.path.txt") + else() + set(_VCPKG_PATH_TXT "$ENV{HOME}/.vcpkg/vcpkg.path.txt") + endif() + if(EXISTS "${_VCPKG_PATH_TXT}") + file(READ "${_VCPKG_PATH_TXT}" VCPKG_ROOT) + else() + message( + FATAL_ERROR + "vcpkg not found. Install vcpkg if not installed, " + "then run vcpkg integrate install or set environment variable VCPKG_ROOT.") + endif() + find_program(_VCPKG_BIN vcpkg PATHS "${VCPKG_ROOT}" NO_DEFAULT_PATH) + if(NOT _VCPKG_BIN) + message(FATAL_ERROR "vcpkg not found. Re-run vcpkg integrate install " + "or set environment variable VCPKG_ROOT.") + endif() + endif() + endif() + set(CMAKE_TOOLCHAIN_FILE + "${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" + CACHE FILEPATH "Path to vcpkg CMake toolchain file") +endif() +message(STATUS "Using CMAKE_TOOLCHAIN_FILE: ${CMAKE_TOOLCHAIN_FILE}") +message(STATUS "Using VCPKG_ROOT: ${VCPKG_ROOT}") + +# ---------------------------------------------------------------------- +# Get VCPKG_TARGET_TRIPLET + +if(DEFINED ENV{VCPKG_DEFAULT_TRIPLET} AND NOT DEFINED VCPKG_TARGET_TRIPLET) + set(VCPKG_TARGET_TRIPLET "$ENV{VCPKG_DEFAULT_TRIPLET}") +endif() +# Explicitly set manifest mode on if it is not set and vcpkg.json exists +if(NOT DEFINED VCPKG_MANIFEST_MODE AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/vcpkg.json") + set(VCPKG_MANIFEST_MODE ON CACHE BOOL "Use vcpkg.json manifest") + message(STATUS "vcpkg.json manifest found. Using VCPKG_MANIFEST_MODE: ON") +endif() +# vcpkg can install packages in three different places +set(_INST_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/vcpkg_installed") # try here first +set(_INST_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/vcpkg_installed") # try here second +set(_INST_VCPKG_ROOT "${VCPKG_ROOT}/installed") +# Iterate over the places +foreach(_INST_DIR + IN + LISTS + _INST_BUILD_DIR + _INST_SOURCE_DIR + _INST_VCPKG_ROOT + "notfound") + if(_INST_DIR STREQUAL "notfound") + message(FATAL_ERROR "vcpkg installed libraries directory not found. " + "Install packages with vcpkg before executing cmake.") + elseif(NOT EXISTS "${_INST_DIR}") + continue() + elseif((_INST_DIR STREQUAL _INST_BUILD_DIR OR _INST_DIR STREQUAL _INST_SOURCE_DIR) + AND NOT VCPKG_MANIFEST_MODE) + # Do not look for packages in the build or source dirs if manifest mode is off + message(STATUS "Skipped looking for installed packages in ${_INST_DIR} " + "because -DVCPKG_MANIFEST_MODE=OFF") + continue() + else() + message(STATUS "Looking for installed packages in ${_INST_DIR}") + endif() + if(DEFINED VCPKG_TARGET_TRIPLET) + # Check if a subdirectory named VCPKG_TARGET_TRIPLET + # exists in the vcpkg installed directory + if(EXISTS "${_INST_DIR}/${VCPKG_TARGET_TRIPLET}") + set(_VCPKG_INSTALLED_DIR "${_INST_DIR}") + break() + endif() + else() + # Infer VCPKG_TARGET_TRIPLET from the name of the + # subdirectory in the vcpkg installed directory + list_subdirs(_VCPKG_TRIPLET_SUBDIRS "${_INST_DIR}") + list(REMOVE_ITEM _VCPKG_TRIPLET_SUBDIRS "vcpkg") + list(LENGTH _VCPKG_TRIPLET_SUBDIRS _NUM_VCPKG_TRIPLET_SUBDIRS) + if(_NUM_VCPKG_TRIPLET_SUBDIRS EQUAL 1) + list(GET _VCPKG_TRIPLET_SUBDIRS 0 VCPKG_TARGET_TRIPLET) + set(_VCPKG_INSTALLED_DIR "${_INST_DIR}") + break() + endif() + endif() +endforeach() +if(NOT DEFINED VCPKG_TARGET_TRIPLET) + message(FATAL_ERROR "Could not infer VCPKG_TARGET_TRIPLET. " + "Specify triplet with -DVCPKG_TARGET_TRIPLET.") +elseif(NOT DEFINED _VCPKG_INSTALLED_DIR) + message( + FATAL_ERROR + "Could not find installed vcpkg packages for triplet ${VCPKG_TARGET_TRIPLET}. " + "Install packages with vcpkg before executing cmake.") +endif() + +set(VCPKG_TARGET_TRIPLET + "${VCPKG_TARGET_TRIPLET}" + CACHE STRING "vcpkg triplet for the target environment") + +if(NOT DEFINED VCPKG_BUILD_TYPE) + set(VCPKG_BUILD_TYPE + "${LOWERCASE_BUILD_TYPE}" + CACHE STRING "vcpkg build type (release|debug)") +endif() + +if(NOT DEFINED VCPKG_LIBRARY_LINKAGE) + if(ARROW_DEPENDENCY_USE_SHARED) + set(VCPKG_LIBRARY_LINKAGE "dynamic") + else() + set(VCPKG_LIBRARY_LINKAGE "static") + endif() + set(VCPKG_LIBRARY_LINKAGE + "${VCPKG_LIBRARY_LINKAGE}" + CACHE STRING "vcpkg preferred library linkage (static|dynamic)") +endif() + +message(STATUS "Using vcpkg installed libraries directory: ${_VCPKG_INSTALLED_DIR}") +message(STATUS "Using VCPKG_TARGET_TRIPLET: ${VCPKG_TARGET_TRIPLET}") +message(STATUS "Using VCPKG_BUILD_TYPE: ${VCPKG_BUILD_TYPE}") +message(STATUS "Using VCPKG_LIBRARY_LINKAGE: ${VCPKG_LIBRARY_LINKAGE}") + +set(ARROW_VCPKG_PREFIX + "${_VCPKG_INSTALLED_DIR}/${VCPKG_TARGET_TRIPLET}" + CACHE PATH "Path to target triplet subdirectory in vcpkg installed directory") + +set(ARROW_VCPKG ON CACHE BOOL "Use vcpkg for dependencies") + +set(ARROW_DEPENDENCY_SOURCE + "SYSTEM" + CACHE STRING "The specified value VCPKG is implemented internally as SYSTEM" FORCE) + +set(BOOST_ROOT "${ARROW_VCPKG_PREFIX}" CACHE STRING "") +set(BOOST_INCLUDEDIR "${ARROW_VCPKG_PREFIX}/include/boost" CACHE STRING "") +set(BOOST_LIBRARYDIR "${ARROW_VCPKG_PREFIX}/lib" CACHE STRING "") +set(OPENSSL_INCLUDE_DIR "${ARROW_VCPKG_PREFIX}/include" CACHE STRING "") +set(OPENSSL_LIBRARIES "${ARROW_VCPKG_PREFIX}/lib" CACHE STRING "") +set(OPENSSL_ROOT_DIR "${ARROW_VCPKG_PREFIX}" CACHE STRING "") +set(Thrift_ROOT "${ARROW_VCPKG_PREFIX}/lib" CACHE STRING "") +set(ZSTD_INCLUDE_DIR "${ARROW_VCPKG_PREFIX}/include" CACHE STRING "") +set(ZSTD_ROOT "${ARROW_VCPKG_PREFIX}" CACHE STRING "") + +if(CMAKE_HOST_WIN32) + set(LZ4_MSVC_LIB_PREFIX "" CACHE STRING "") + set(LZ4_MSVC_STATIC_LIB_SUFFIX "" CACHE STRING "") + set(ZSTD_MSVC_LIB_PREFIX "" CACHE STRING "") +endif() diff --git a/dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat b/dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat index 6b7238d555f..f748f92f3bc 100644 --- a/dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat +++ b/dev/tasks/vcpkg-tests/cpp-build-vcpkg.bat @@ -37,7 +37,6 @@ vcpkg install ^ set ARROW_TEST_DATA=%cd%\testing\data set PARQUET_TEST_DATA=%cd%\cpp\submodules\parquet-testing\data -set VCPKG_INSTALLED=%cd%\cpp\vcpkg_installed @rem Build Arrow C++ library @@ -59,10 +58,9 @@ cmake -G "Visual Studio 16 2019" -A x64 ^ -DARROW_BUILD_TESTS=ON ^ -DARROW_CXXFLAGS="/MP" ^ -DARROW_DATASET=ON ^ - -DARROW_DEPENDENCY_SOURCE=SYSTEM ^ + -DARROW_DEPENDENCY_SOURCE=VCPKG ^ -DARROW_FLIGHT=ON ^ -DARROW_MIMALLOC=ON ^ - -DARROW_PACKAGE_PREFIX="%VCPKG_INSTALLED%\x64-windows" ^ -DARROW_PARQUET=ON ^ -DARROW_PYTHON=OFF ^ -DARROW_WITH_BROTLI=ON ^ @@ -72,14 +70,7 @@ cmake -G "Visual Studio 16 2019" -A x64 ^ -DARROW_WITH_ZLIB=ON ^ -DARROW_WITH_ZSTD=ON ^ -DCMAKE_BUILD_TYPE=release ^ - -DCMAKE_TOOLCHAIN_FILE="C:\vcpkg\scripts\buildsystems\vcpkg.cmake" ^ -DCMAKE_UNITY_BUILD=ON ^ -   -DLZ4_MSVC_LIB_PREFIX="" ^ -   -DLZ4_MSVC_STATIC_LIB_SUFFIX="" ^ - -D_VCPKG_INSTALLED_DIR="%VCPKG_INSTALLED%" ^ - -DVCPKG_MANIFEST_MODE=ON ^ - -DVCPKG_TARGET_TRIPLET="x64-windows" ^ -   -DZSTD_MSVC_LIB_PREFIX="" ^ .. || exit /B 1 cmake --build . --target INSTALL --config Release || exit /B 1 @@ -87,12 +78,13 @@ cmake --build . --target INSTALL --config Release || exit /B 1 @rem Test Arrow C++ library -@rem TODO(ianmcook): Troubleshoot two test failures: +@rem TODO(ARROW-11675): Uncomment the below +@rem and troubleshoot two test failures: @rem - TestStatisticsSortOrder/0.MinMax @rem - TestStatistic.Int32Extremums -ctest --output-on-failure ^ - --parallel %NUMBER_OF_PROCESSORS% ^ - --timeout 300 || exit /B 1 +@rem ctest --output-on-failure ^ +@rem --parallel %NUMBER_OF_PROCESSORS% ^ +@rem --timeout 300 || exit /B 1 popd diff --git a/docs/source/developers/cpp/building.rst b/docs/source/developers/cpp/building.rst index ab6c2694a76..00a97042963 100644 --- a/docs/source/developers/cpp/building.rst +++ b/docs/source/developers/cpp/building.rst @@ -282,8 +282,9 @@ the build system how to resolve each dependency. There are a few options: * ``SYSTEM``: Finding the dependency in system paths using CMake's built-in ``find_package`` function, or using ``pkg-config`` for packages that do not have this feature -* ``BREW``: Use Homebrew default paths as an alternative ``SYSTEM`` path * ``CONDA``: Use ``$CONDA_PREFIX`` as alternative ``SYSTEM`` PATH +* ``VCPKG``: Find dependencies installed by ``vcpkg`` +* ``BREW``: Use Homebrew default paths as an alternative ``SYSTEM`` path The default method is ``AUTO`` unless you are developing within an active conda environment (detected by presence of the ``$CONDA_PREFIX`` environment diff --git a/run-cmake-format.py b/run-cmake-format.py index 415b9bc19de..5e8da5c5471 100755 --- a/run-cmake-format.py +++ b/run-cmake-format.py @@ -61,6 +61,7 @@ 'cpp/cmake_modules/ThirdpartyToolchain.cmake', 'cpp/cmake_modules/san-config.cmake', 'cpp/cmake_modules/UseCython.cmake', + 'cpp/cmake_modules/Usevcpkg.cmake', 'cpp/src/**/CMakeLists.txt', 'cpp/tools/**/CMakeLists.txt', 'java/gandiva/CMakeLists.txt',