Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions cpp/cmake_modules/FindLLVMAlt.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,11 @@ endif()

mark_as_advanced(CLANG_EXECUTABLE LLVM_LINK_EXECUTABLE)

find_package_handle_standard_args(LLVMAlt REQUIRED_VARS
# The first variable is used for display.
LLVM_PACKAGE_VERSION CLANG_EXECUTABLE LLVM_FOUND
find_package_handle_standard_args(LLVMAlt
REQUIRED_VARS # The first variable is used for display.
LLVM_PACKAGE_VERSION
CLANG_EXECUTABLE
LLVM_FOUND
LLVM_LINK_EXECUTABLE)
if(LLVMAlt_FOUND)
message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}")
Expand Down
27 changes: 20 additions & 7 deletions cpp/cmake_modules/ThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -694,7 +694,7 @@ set(Boost_ADDITIONAL_VERSIONS
"1.60.0"
"1.60")

# Thrift needs Boost if we're building the bundled version,
# Thrift needs Boost if we're building the bundled version with version < 0.13,
# so we first need to determine whether we're building it
if(ARROW_WITH_THRIFT AND Thrift_SOURCE STREQUAL "AUTO")
find_package(Thrift 0.11.0 MODULE COMPONENTS ${ARROW_THRIFT_REQUIRED_COMPONENTS})
Expand All @@ -703,19 +703,32 @@ if(ARROW_WITH_THRIFT AND Thrift_SOURCE STREQUAL "AUTO")
endif()
endif()

# - Parquet requires boost only with gcc 4.8 (because of missing std::regex).
# Thrift < 0.13 has a compile-time header dependency on boost
if(Thrift_SOURCE STREQUAL "BUNDLED" AND ARROW_THRIFT_BUILD_VERSION VERSION_LESS "0.13")
set(THRIFT_REQUIRES_BOOST TRUE)
elseif(THRIFT_VERSION VERSION_LESS "0.13")
set(THRIFT_REQUIRES_BOOST TRUE)
else()
set(THRIFT_REQUIRES_BOOST FALSE)
endif()

# Parquet requires boost only with gcc 4.8 (because of missing std::regex).
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.9")
set(PARQUET_REQUIRES_BOOST TRUE)
else()
set(PARQUET_REQUIRES_BOOST FALSE)
endif()

# - Gandiva has a compile-time (header-only) dependency on Boost, not runtime.
# - Tests need Boost at runtime.
# - S3FS and Flight benchmarks need Boost at runtime.
if(ARROW_BUILD_INTEGRATION
OR ARROW_BUILD_TESTS
OR ARROW_GANDIVA
OR (ARROW_FLIGHT AND ARROW_BUILD_BENCHMARKS)
OR (ARROW_S3 AND ARROW_BUILD_BENCHMARKS)
OR ARROW_GANDIVA
OR (ARROW_WITH_THRIFT AND Thrift_SOURCE STREQUAL "BUNDLED")
OR (ARROW_PARQUET
AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU"
AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.9"))
OR (ARROW_WITH_THRIFT AND THRIFT_REQUIRES_BOOST)
OR (ARROW_PARQUET AND PARQUET_REQUIRES_BOOST))
set(ARROW_BOOST_REQUIRED TRUE)
else()
set(ARROW_BOOST_REQUIRED FALSE)
Expand Down
2 changes: 1 addition & 1 deletion dev/tasks/python-wheels/manylinux-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
set -e

export ARROW_TEST_DATA=/arrow/testing/data
export PYARROW_TEST_CYTHON=OFF

python --version
# Install built wheel
Expand All @@ -44,5 +45,4 @@ import pyarrow.fs
import pyarrow._hdfs
import pyarrow.dataset
import pyarrow.flight
import pyarrow.gandiva
"
8 changes: 5 additions & 3 deletions dev/tasks/python-wheels/osx-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,9 @@ function build_wheel {

pip install $(pip_opts) -r python/requirements-wheel-build.txt

export PYARROW_WITH_GANDIVA=1
export BUILD_ARROW_GANDIVA=ON
export PYARROW_INSTALL_TESTS=1
export PYARROW_WITH_GANDIVA=0
export BUILD_ARROW_GANDIVA=OFF

git submodule update --init
export ARROW_TEST_DATA=`pwd`/testing/data
Expand Down Expand Up @@ -137,6 +138,8 @@ function install_wheel {
function run_unit_tests {
pushd $1

export PYARROW_TEST_CYTHON=OFF

# Install test dependencies
pip install $(pip_opts) -r python/requirements-wheel-test.txt

Expand All @@ -156,6 +159,5 @@ import pyarrow.fs
import pyarrow._hdfs
import pyarrow.dataset
import pyarrow.flight
import pyarrow.gandiva
"
}
12 changes: 8 additions & 4 deletions dev/tasks/python-wheels/win-build.bat
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,14 @@

@echo on

@rem Building Gandiva in the wheels is disabled for now to make the wheels
@rem smaller.

@rem --file=%ARROW_SRC%\ci\conda_env_gandiva.yml ^

@rem create conda environment for compiling
call conda create -n wheel-build -q -y -c conda-forge ^
--file=%ARROW_SRC%\ci\conda_env_cpp.yml ^
--file=%ARROW_SRC%\ci\conda_env_gandiva.yml ^
"vs2015_runtime<14.16" ^
python=%PYTHON_VERSION% || exit /B

Expand Down Expand Up @@ -61,7 +65,7 @@ cmake -G "%GENERATOR%" ^
-DARROW_FLIGHT=ON ^
-DARROW_PYTHON=ON ^
-DARROW_PARQUET=ON ^
-DARROW_GANDIVA=ON ^
-DARROW_GANDIVA=OFF ^
-DARROW_MIMAllOC=ON ^
-DZSTD_SOURCE=BUNDLED ^
.. || exit /B
Expand All @@ -70,9 +74,10 @@ popd

set PYARROW_BUILD_TYPE=Release
set PYARROW_PARALLEL=8
set PYARROW_INSTALL_TESTS=1
set PYARROW_WITH_DATASET=1
set PYARROW_WITH_FLIGHT=1
set PYARROW_WITH_GANDIVA=1
set PYARROW_WITH_GANDIVA=0
set PYARROW_WITH_PARQUET=1
set PYARROW_WITH_STATIC_BOOST=1
set PYARROW_BUNDLE_ARROW_CPP=1
Expand All @@ -96,7 +101,6 @@ set ARROW_TEST_DATA=%ARROW_SRC%\testing\data
%PYTHON_INTERPRETER% -c "import pyarrow" || exit /B
%PYTHON_INTERPRETER% -c "import pyarrow.parquet" || exit /B
%PYTHON_INTERPRETER% -c "import pyarrow.flight" || exit /B
%PYTHON_INTERPRETER% -c "import pyarrow.gandiva" || exit /B
%PYTHON_INTERPRETER% -c "import pyarrow.dataset" || exit /B

@rem run the python tests, but disable the cython because there is a linking
Expand Down
17 changes: 17 additions & 0 deletions docs/source/python/extending.rst
Original file line number Diff line number Diff line change
Expand Up @@ -401,3 +401,20 @@ Compile the extension:
.. code-block:: bash

python setup.py build_ext --inplace

Building Extensions against PyPI Wheels
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

The Python wheels have the Arrow C++ libraries bundled in the top level
``pyarrow/`` install directory. On Linux and macOS, these libraries have an ABI
tag like ``libarrow.so.17`` which means that linking with ``-larrow`` using the
linker path provided by ``pyarrow.get_library_dirs()`` will not work right out
of the box. To fix this, you must run ``pyarrow.create_library_symlinks()``
once as a user with write access to the directory where pyarrow is
installed. This function will attempt to create symlinks like
``pyarrow/libarrow.so``. For example:

.. code-block:: bash

pip install pyarrow
python -c "import pyarrow; pyarrow.create_library_symlinks()"
14 changes: 9 additions & 5 deletions python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -225,16 +225,20 @@ function(bundle_arrow_lib library_path)

get_filename_component(LIBRARY_DIR ${${library_path}} DIRECTORY)
get_filename_component(LIBRARY_NAME ${${library_path}} NAME_WE)
configure_file(
${${library_path}}
${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX} COPYONLY)

if(APPLE)
# Only copy the shared library with ABI version on Linux and macOS

if(MSVC)
configure_file(
${${library_path}}
${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}
COPYONLY)
elseif(APPLE)
configure_file(
${LIBRARY_DIR}/${LIBRARY_NAME}.${ARG_SO_VERSION}${CMAKE_SHARED_LIBRARY_SUFFIX}
${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME}.${ARG_SO_VERSION}${CMAKE_SHARED_LIBRARY_SUFFIX}
COPYONLY)
elseif(NOT MSVC)
else()
configure_file(
${${library_path}}.${ARG_SO_VERSION}
${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}.${ARG_SO_VERSION}
Expand Down
7 changes: 3 additions & 4 deletions python/manylinux1/build_arrow.sh
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,13 @@ touch ${CPYTHON_PATH}/lib/${py_libname}
echo "=== (${PYTHON_VERSION}) Install the wheel build dependencies ==="
$PIP install -r requirements-wheel-build.txt

export PYARROW_INSTALL_TESTS=1
export PYARROW_WITH_DATASET=1
export PYARROW_WITH_FLIGHT=1
export PYARROW_WITH_GANDIVA=1
export PYARROW_WITH_GANDIVA=0
export BUILD_ARROW_DATASET=ON
export BUILD_ARROW_FLIGHT=ON
export BUILD_ARROW_GANDIVA=ON
export BUILD_ARROW_GANDIVA=OFF

# ARROW-3052(wesm): ORC is being bundled until it can be added to the
# manylinux1 image
Expand Down Expand Up @@ -120,7 +121,6 @@ cmake \
-DCMAKE_INSTALL_PREFIX=/arrow-dist \
-DOPENSSL_USE_STATIC_LIBS=ON \
-DORC_SOURCE=BUNDLED \
-DPythonInterp_FIND_VERSION=${PYTHON_VERSION} \
-GNinja /arrow/cpp
ninja
ninja install
Expand Down Expand Up @@ -158,7 +158,6 @@ import sys
import pyarrow
import pyarrow.dataset
import pyarrow.flight
import pyarrow.gandiva
import pyarrow.fs
import pyarrow._hdfs
import pyarrow.parquet
Expand Down
7 changes: 3 additions & 4 deletions python/manylinux201x/build_arrow.sh
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,13 @@ touch ${CPYTHON_PATH}/lib/${py_libname}
echo "=== (${PYTHON_VERSION}) Install the wheel build dependencies ==="
$PIP install -r requirements-wheel-build.txt

export PYARROW_INSTALL_TESTS=1
export PYARROW_WITH_DATASET=1
export PYARROW_WITH_FLIGHT=1
export PYARROW_WITH_GANDIVA=1
export PYARROW_WITH_GANDIVA=0
export BUILD_ARROW_DATASET=ON
export BUILD_ARROW_FLIGHT=ON
export BUILD_ARROW_GANDIVA=ON
export BUILD_ARROW_GANDIVA=OFF

# ARROW-3052(wesm): ORC is being bundled until it can be added to the
# manylinux1 image
Expand Down Expand Up @@ -120,7 +121,6 @@ PATH="${CPYTHON_PATH}/bin:${PATH}" cmake \
-DCMAKE_INSTALL_PREFIX=/arrow-dist \
-DOPENSSL_USE_STATIC_LIBS=ON \
-DORC_SOURCE=BUNDLED \
-DPythonInterp_FIND_VERSION=${PYTHON_VERSION} \
-DZLIB_ROOT=/usr/local \
-GNinja /arrow/cpp
ninja install
Expand Down Expand Up @@ -153,7 +153,6 @@ $PYTHON_INTERPRETER -c "
import pyarrow
import pyarrow.dataset
import pyarrow.flight
import pyarrow.gandiva
import pyarrow.fs
import pyarrow._hdfs
import pyarrow.parquet
Expand Down
43 changes: 43 additions & 0 deletions python/pyarrow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,49 @@ def get_libraries():
return ['arrow', 'arrow_python']


def create_library_symlinks():
"""
With Linux and macOS wheels, the bundled shared libraries have an embedded
ABI version like libarrow.so.17 or libarrow.17.dylib and so linking to them
with -larrow won't work unless we create symlinks at locations like
site-packages/pyarrow/libarrow.so. This unfortunate workaround addresses
prior problems we had with shipping two copies of the shared libraries to
permit third party projects like turbodbc to build their C++ extensions
against the pyarrow wheels.

This function must only be invoked once and only when the shared libraries
are bundled with the Python package, which should only apply to wheel-based
installs. It requires write access to the site-packages/pyarrow directory
and so depending on your system may need to be run with root.
"""
import glob
if _sys.platform == 'win32':
return
package_cwd = _os.path.dirname(__file__)

if _sys.platform == 'linux':
bundled_libs = glob.glob(_os.path.join(package_cwd, '*.so.*'))

def get_symlink_path(hard_path):
return hard_path.rsplit('.', 1)[0]
else:
bundled_libs = glob.glob(_os.path.join(package_cwd, '*.*.dylib'))

def get_symlink_path(hard_path):
return '.'.join((hard_path.split('.')[0], 'dylib'))

for lib_hard_path in bundled_libs:
symlink_path = get_symlink_path(lib_hard_path)
if _os.path.exists(symlink_path):
continue
try:
_os.symlink(lib_hard_path, symlink_path)
except PermissionError:
print("Tried creating symlink {}. If you need to link to "
"bundled shared libraries, run "
"pyarrow._setup_bundled_symlinks() as root")


def get_library_dirs():
"""
Return lists of directories likely to contain Arrow C++ libraries for
Expand Down
Loading