From e1ad13ee6fe158ed16a664e27653d17e56ed520a Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Tue, 2 Jun 2020 16:17:22 -0500 Subject: [PATCH 01/12] Do not bundle shared libraries with ABI tags try 2 failure start try 3 Add post-install symlinking code for wheels, add option to not install tests --- python/CMakeLists.txt | 13 +-- python/manylinux1/build_arrow.sh | 6 +- python/manylinux201x/build_arrow.sh | 6 +- python/pyarrow/__init__.py | 34 ++++++++ python/setup.py | 127 +++++++++++++++------------- 5 files changed, 118 insertions(+), 68 deletions(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 99a08df91f4..a4fbd2bc606 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -225,16 +225,19 @@ function(bundle_arrow_lib library_path) get_filename_component(LIBRARY_DIR ${${library_path}} DIRECTORY) get_filename_component(LIBRARY_NAME ${${library_path}} NAME_WE) - configure_file( - ${${library_path}} - ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX} COPYONLY) - if(APPLE) + # Only copy the shared library with ABI version on Linux and macOS + + if(MSVC) + configure_file( + ${${library_path}} + ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX} COPYONLY) + elseif(APPLE) configure_file( ${LIBRARY_DIR}/${LIBRARY_NAME}.${ARG_SO_VERSION}${CMAKE_SHARED_LIBRARY_SUFFIX} ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME}.${ARG_SO_VERSION}${CMAKE_SHARED_LIBRARY_SUFFIX} COPYONLY) - elseif(NOT MSVC) + else() configure_file( ${${library_path}}.${ARG_SO_VERSION} ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}.${ARG_SO_VERSION} diff --git a/python/manylinux1/build_arrow.sh b/python/manylinux1/build_arrow.sh index 7664d2b016b..7dc184f9868 100755 --- a/python/manylinux1/build_arrow.sh +++ b/python/manylinux1/build_arrow.sh @@ -75,12 +75,13 @@ touch ${CPYTHON_PATH}/lib/${py_libname} echo "=== (${PYTHON_VERSION}) Install the wheel build dependencies ===" $PIP install -r requirements-wheel-build.txt +export PYARROW_INSTALL_TESTS=0 export PYARROW_WITH_DATASET=1 export PYARROW_WITH_FLIGHT=1 -export PYARROW_WITH_GANDIVA=1 +export PYARROW_WITH_GANDIVA=0 export BUILD_ARROW_DATASET=ON export BUILD_ARROW_FLIGHT=ON -export BUILD_ARROW_GANDIVA=ON +export BUILD_ARROW_GANDIVA=OFF # ARROW-3052(wesm): ORC is being bundled until it can be added to the # manylinux1 image @@ -158,7 +159,6 @@ import sys import pyarrow import pyarrow.dataset import pyarrow.flight -import pyarrow.gandiva import pyarrow.fs import pyarrow._hdfs import pyarrow.parquet diff --git a/python/manylinux201x/build_arrow.sh b/python/manylinux201x/build_arrow.sh index 5c162c00181..9550b46ac3f 100755 --- a/python/manylinux201x/build_arrow.sh +++ b/python/manylinux201x/build_arrow.sh @@ -74,12 +74,13 @@ touch ${CPYTHON_PATH}/lib/${py_libname} echo "=== (${PYTHON_VERSION}) Install the wheel build dependencies ===" $PIP install -r requirements-wheel-build.txt +export PYARROW_INSTALL_TESTS=0 export PYARROW_WITH_DATASET=1 export PYARROW_WITH_FLIGHT=1 -export PYARROW_WITH_GANDIVA=1 +export PYARROW_WITH_GANDIVA=0 export BUILD_ARROW_DATASET=ON export BUILD_ARROW_FLIGHT=ON -export BUILD_ARROW_GANDIVA=ON +export BUILD_ARROW_GANDIVA=OFF # ARROW-3052(wesm): ORC is being bundled until it can be added to the # manylinux1 image @@ -153,7 +154,6 @@ $PYTHON_INTERPRETER -c " import pyarrow import pyarrow.dataset import pyarrow.flight -import pyarrow.gandiva import pyarrow.fs import pyarrow._hdfs import pyarrow.parquet diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index e8b8c6eae2d..d1e584e6b51 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -287,6 +287,38 @@ def get_libraries(): return ['arrow', 'arrow_python'] +def _setup_bundled_symlinks(): + """ + With Linux and macOS wheels, the bundled shared libraries have an embedded + ABI version like libarrow.so.18 and so linking to them with -larrow won't + work unless we create symlinks at locations like + site-packages/pyarrow/libarrow.so. This unfortunate workaround addresses + prior problems we had with shipping two copies of the shared libraries to + permit third party projects like turbodbc to build their C++ extensions + against the pyarrow wheels. + """ + import glob + if _sys.platform == 'win32': + return + package_cwd = _os.path.dirname(__file__) + + if _sys.platform == 'linux': + bundled_libs = glob.glob(_os.path.join(package_cwd, '*.so.*')) + + def get_symlink_path(hard_path): + return hard_path.rsplit('.', 1)[0] + else: + bundled_libs = glob.glob(_os.path.join(package_cwd, '*.*.dylib')) + def get_symlink_path(hard_path): + return '.'.join((hard_path.split('.')[0], 'dylib')) + + for lib_hard_path in bundled_libs: + symlink_path = get_symlink_path(lib_hard_path) + if _os.path.exists(symlink_path): + continue + _os.symlink(lib_hard_path, symlink_path) + + def get_library_dirs(): """ Return lists of directories likely to contain Arrow C++ libraries for @@ -295,6 +327,8 @@ def get_library_dirs(): package_cwd = _os.path.dirname(__file__) library_dirs = [package_cwd] + _setup_bundled_symlinks() + def append_library_dir(library_dir): if library_dir not in library_dirs: library_dirs.append(library_dir) diff --git a/python/setup.py b/python/setup.py index 453cc38e65e..21741ae89f1 100755 --- a/python/setup.py +++ b/python/setup.py @@ -119,6 +119,9 @@ def run(self): 'enable Cython code coverage'), ('bundle-boost', None, 'bundle the (shared) Boost libraries'), + ('bundle-cython-cpp', None, + 'bundle generated Cython C++ code ' + '(used for code coverage)'), ('bundle-arrow-cpp', None, 'bundle the Arrow C++ libraries')] + _build_ext.user_options) @@ -170,6 +173,8 @@ def initialize_options(self): os.environ.get('PYARROW_GENERATE_COVERAGE', '0')) self.bundle_arrow_cpp = strtobool( os.environ.get('PYARROW_BUNDLE_ARROW_CPP', '0')) + self.bundle_cython_cpp = strtobool( + os.environ.get('PYARROW_BUNDLE_CYTHON_CPP', '0')) self.bundle_boost = strtobool( os.environ.get('PYARROW_BUNDLE_BOOST', '0')) @@ -279,7 +284,6 @@ def append_cmake_bool(value, varname): self.spawn(['cmake'] + extra_cmake_args + cmake_options + [source]) print("-- Finished cmake for pyarrow") - # Do the build print("-- Running cmake --build for pyarrow") self.spawn(['cmake', '--build', '.', '--config', self.build_type] + build_tool_args) @@ -320,27 +324,15 @@ def append_cmake_bool(value, varname): raise RuntimeError('pyarrow C-extension failed to build:', os.path.abspath(built_path)) - cpp_generated_path = self.get_ext_generated_cpp_source(name) - if not os.path.exists(cpp_generated_path): - raise RuntimeError('expected to find generated C++ file ' - 'in {0!r}'.format(cpp_generated_path)) - - # The destination path to move the generated C++ source to - # (for Cython source coverage) - cpp_path = pjoin(build_lib, self._get_build_dir(), - os.path.basename(cpp_generated_path)) - if os.path.exists(cpp_path): - os.remove(cpp_path) - # The destination path to move the built C extension to ext_path = pjoin(build_lib, self._get_cmake_ext_path(name)) if os.path.exists(ext_path): os.remove(ext_path) self.mkpath(os.path.dirname(ext_path)) - print('Moving generated C++ source', cpp_generated_path, - 'to build path', cpp_path) - shutil.move(cpp_generated_path, cpp_path) + if self.bundle_cython_cpp: + self._bundle_cython_cpp(name, build_lib) + print('Moving built C-extension', built_path, 'to build path', ext_path) shutil.move(built_path, ext_path) @@ -352,40 +344,7 @@ def append_cmake_bool(value, varname): name + '_api.h')) if self.bundle_arrow_cpp: - print(pjoin(build_lib, 'pyarrow')) - move_shared_libs(build_prefix, build_lib, "arrow") - move_shared_libs(build_prefix, build_lib, "arrow_python") - if self.with_cuda: - move_shared_libs(build_prefix, build_lib, "arrow_cuda") - if self.with_flight: - move_shared_libs(build_prefix, build_lib, "arrow_flight") - move_shared_libs(build_prefix, build_lib, - "arrow_python_flight") - if self.with_dataset: - move_shared_libs(build_prefix, build_lib, "arrow_dataset") - if self.with_plasma: - move_shared_libs(build_prefix, build_lib, "plasma") - if self.with_gandiva: - move_shared_libs(build_prefix, build_lib, "gandiva") - if self.with_parquet and not self.with_static_parquet: - move_shared_libs(build_prefix, build_lib, "parquet") - if not self.with_static_boost and self.bundle_boost: - move_shared_libs( - build_prefix, build_lib, - "{}_regex".format(self.boost_namespace), - implib_required=False) - if sys.platform == 'win32': - # zlib uses zlib.dll for Windows - zlib_lib_name = 'zlib' - move_shared_libs(build_prefix, build_lib, zlib_lib_name, - implib_required=False) - if self.with_flight: - # DLL dependencies for gRPC / Flight - for lib_name in ['cares', 'libprotobuf', - 'libcrypto-1_1-x64', - 'libssl-1_1-x64']: - move_shared_libs(build_prefix, build_lib, lib_name, - implib_required=False) + self._bundle_arrow_cpp(build_prefix, build_lib) if self.with_plasma: # Move the plasma store @@ -395,6 +354,58 @@ def append_cmake_bool(value, varname): "plasma-store-server") shutil.move(source, target) + def _bundle_arrow_cpp(self, build_prefix, build_lib): + print(pjoin(build_lib, 'pyarrow')) + move_shared_libs(build_prefix, build_lib, "arrow") + move_shared_libs(build_prefix, build_lib, "arrow_python") + if self.with_cuda: + move_shared_libs(build_prefix, build_lib, "arrow_cuda") + if self.with_flight: + move_shared_libs(build_prefix, build_lib, "arrow_flight") + move_shared_libs(build_prefix, build_lib, + "arrow_python_flight") + if self.with_dataset: + move_shared_libs(build_prefix, build_lib, "arrow_dataset") + if self.with_plasma: + move_shared_libs(build_prefix, build_lib, "plasma") + if self.with_gandiva: + move_shared_libs(build_prefix, build_lib, "gandiva") + if self.with_parquet and not self.with_static_parquet: + move_shared_libs(build_prefix, build_lib, "parquet") + if not self.with_static_boost and self.bundle_boost: + move_shared_libs( + build_prefix, build_lib, + "{}_regex".format(self.boost_namespace), + implib_required=False) + if sys.platform == 'win32': + # zlib uses zlib.dll for Windows + zlib_lib_name = 'zlib' + move_shared_libs(build_prefix, build_lib, zlib_lib_name, + implib_required=False) + if self.with_flight: + # DLL dependencies for gRPC / Flight + for lib_name in ['cares', 'libprotobuf', + 'libcrypto-1_1-x64', + 'libssl-1_1-x64']: + move_shared_libs(build_prefix, build_lib, lib_name, + implib_required=False) + + def _bundle_cython_cpp(self, name, lib_path): + cpp_generated_path = self.get_ext_generated_cpp_source(name) + if not os.path.exists(cpp_generated_path): + raise RuntimeError('expected to find generated C++ file ' + 'in {0!r}'.format(cpp_generated_path)) + + # The destination path to move the generated C++ source to + # (for Cython source coverage) + cpp_path = pjoin(lib_path, self._get_build_dir(), + os.path.basename(cpp_generated_path)) + if os.path.exists(cpp_path): + os.remove(cpp_path) + print('Moving generated C++ source', cpp_generated_path, + 'to build path', cpp_path) + shutil.move(cpp_generated_path, cpp_path) + def _failure_permitted(self, name): if name == '_parquet' and not self.with_parquet: return True @@ -497,17 +508,13 @@ def _move_shared_libs_unix(build_prefix, build_lib, lib_name): raise Exception('Could not find library:' + lib_filename + ' in ' + build_prefix) - # Longest suffix library should be copied, all others symlinked + # Longest suffix library should be copied, all others ignored and can be + # symlinked later after the library has been installed libs.sort(key=lambda s: -len(s)) print(libs, libs[0]) lib_filename = os.path.basename(libs[0]) shutil.move(pjoin(build_prefix, lib_filename), pjoin(build_lib, 'pyarrow', lib_filename)) - for lib in libs[1:]: - filename = os.path.basename(lib) - link_name = pjoin(build_lib, 'pyarrow', filename) - if not os.path.exists(link_name): - os.symlink(lib_filename, link_name) # If the event of not running from a git clone (e.g. from a git archive @@ -560,9 +567,15 @@ def has_ext_modules(foo): setup_requires = [] +if strtobool(os.environ.get('PYARROW_INSTALL_TESTS', '0')): + packages = ['pyarrow', 'pyarrow.tests'] +else: + packages = ['pyarrow'] + + setup( name='pyarrow', - packages=['pyarrow', 'pyarrow.tests'], + packages=packages, zip_safe=False, package_data={'pyarrow': ['*.pxd', '*.pyx', 'includes/*.pxd']}, include_package_data=True, From b353f17879abb9e6a4cc73c2b26adb7f0e598263 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Tue, 2 Jun 2020 17:18:30 -0500 Subject: [PATCH 02/12] Disable Gandiva in wheel builds --- dev/tasks/python-wheels/osx-build.sh | 6 +++--- dev/tasks/python-wheels/win-build.bat | 14 ++++++++++---- docs/source/python/extending.rst | 3 +++ 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/dev/tasks/python-wheels/osx-build.sh b/dev/tasks/python-wheels/osx-build.sh index aea4c62e00b..43d8aacc6e6 100755 --- a/dev/tasks/python-wheels/osx-build.sh +++ b/dev/tasks/python-wheels/osx-build.sh @@ -52,8 +52,9 @@ function build_wheel { pip install $(pip_opts) -r python/requirements-wheel-build.txt - export PYARROW_WITH_GANDIVA=1 - export BUILD_ARROW_GANDIVA=ON + export PYARROW_INSTALL_TESTS=OFF + export PYARROW_WITH_GANDIVA=0 + export BUILD_ARROW_GANDIVA=OFF git submodule update --init export ARROW_TEST_DATA=`pwd`/testing/data @@ -156,6 +157,5 @@ import pyarrow.fs import pyarrow._hdfs import pyarrow.dataset import pyarrow.flight -import pyarrow.gandiva " } diff --git a/dev/tasks/python-wheels/win-build.bat b/dev/tasks/python-wheels/win-build.bat index b7bd25faefb..c722c8d393f 100644 --- a/dev/tasks/python-wheels/win-build.bat +++ b/dev/tasks/python-wheels/win-build.bat @@ -17,10 +17,14 @@ @echo on +@rem Building Gandiva in the wheels is disabled for now to make the wheels +@rem smaller. + +@rem --file=%ARROW_SRC%\ci\conda_env_gandiva.yml ^ + @rem create conda environment for compiling call conda create -n wheel-build -q -y -c conda-forge ^ --file=%ARROW_SRC%\ci\conda_env_cpp.yml ^ - --file=%ARROW_SRC%\ci\conda_env_gandiva.yml ^ "vs2015_runtime<14.16" ^ python=%PYTHON_VERSION% || exit /B @@ -61,7 +65,7 @@ cmake -G "%GENERATOR%" ^ -DARROW_FLIGHT=ON ^ -DARROW_PYTHON=ON ^ -DARROW_PARQUET=ON ^ - -DARROW_GANDIVA=ON ^ + -DARROW_GANDIVA=OFF ^ -DARROW_MIMAllOC=ON ^ -DZSTD_SOURCE=BUNDLED ^ .. || exit /B @@ -70,9 +74,10 @@ popd set PYARROW_BUILD_TYPE=Release set PYARROW_PARALLEL=8 +set PYARROW_INSTALL_TESTS=0 set PYARROW_WITH_DATASET=1 set PYARROW_WITH_FLIGHT=1 -set PYARROW_WITH_GANDIVA=1 +set PYARROW_WITH_GANDIVA=0 set PYARROW_WITH_PARQUET=1 set PYARROW_WITH_STATIC_BOOST=1 set PYARROW_BUNDLE_ARROW_CPP=1 @@ -96,9 +101,10 @@ set ARROW_TEST_DATA=%ARROW_SRC%\testing\data %PYTHON_INTERPRETER% -c "import pyarrow" || exit /B %PYTHON_INTERPRETER% -c "import pyarrow.parquet" || exit /B %PYTHON_INTERPRETER% -c "import pyarrow.flight" || exit /B -%PYTHON_INTERPRETER% -c "import pyarrow.gandiva" || exit /B %PYTHON_INTERPRETER% -c "import pyarrow.dataset" || exit /B +@rem %PYTHON_INTERPRETER% -c "import pyarrow.gandiva" || exit /B + @rem run the python tests, but disable the cython because there is a linking @rem issue on python 3.8 set PYARROW_TEST_CYTHON=OFF diff --git a/docs/source/python/extending.rst b/docs/source/python/extending.rst index 284e0488e0c..a5638743659 100644 --- a/docs/source/python/extending.rst +++ b/docs/source/python/extending.rst @@ -401,3 +401,6 @@ Compile the extension: .. code-block:: bash python setup.py build_ext --inplace + +Building Extensions against PyPI Wheels +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 5b016f8ca8866f0f706288e4a477d40c3d950589 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Tue, 2 Jun 2020 17:22:23 -0500 Subject: [PATCH 03/12] Don't fail if creating symlink fails --- python/pyarrow/__init__.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index d1e584e6b51..091a2a835ed 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -316,7 +316,12 @@ def get_symlink_path(hard_path): symlink_path = get_symlink_path(lib_hard_path) if _os.path.exists(symlink_path): continue - _os.symlink(lib_hard_path, symlink_path) + try: + _os.symlink(lib_hard_path, symlink_path) + except PermissionError: + print("Tried creating symlink {}. If you need to link to " + "bundled shared libraries, run " + "pyarrow._setup_bundled_symlinks() as root") def get_library_dirs(): From fa33a02750476e574a853cc322b760d00c8971dc Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Tue, 2 Jun 2020 17:39:13 -0500 Subject: [PATCH 04/12] Add Sphinx documentation about the bundled wheel shared libraries --- docs/source/python/extending.rst | 10 ++++++++++ python/CMakeLists.txt | 3 ++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/docs/source/python/extending.rst b/docs/source/python/extending.rst index a5638743659..f3e31ff3994 100644 --- a/docs/source/python/extending.rst +++ b/docs/source/python/extending.rst @@ -404,3 +404,13 @@ Compile the extension: Building Extensions against PyPI Wheels ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The Python wheels have the Arrow C++ libraries bundled in the top level +``pyarrow/`` install directory. On Linux and macOS, these libraries have an ABI +tag like ``libarrow.so.17`` which means that linking with ``-larrow`` will not +work right out of the box. To work around this, the +``pyarrow.get_library_dirs()`` directory will attempt to create symlinks like +``pyarrow/libarrow.so`` the first time it is called. If ``pyarrow`` is +installed someplace where you do not have write access, you will have to either +run this function once as root or have someone with root access run it to +create the symlinks. diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index a4fbd2bc606..2a43b165d7e 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -231,7 +231,8 @@ function(bundle_arrow_lib library_path) if(MSVC) configure_file( ${${library_path}} - ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX} COPYONLY) + ${BUILD_OUTPUT_ROOT_DIRECTORY}/${LIBRARY_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX} + COPYONLY) elseif(APPLE) configure_file( ${LIBRARY_DIR}/${LIBRARY_NAME}.${ARG_SO_VERSION}${CMAKE_SHARED_LIBRARY_SUFFIX} From c74b10921cc06633d654e2f2a422337b491ef26c Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Tue, 2 Jun 2020 17:47:35 -0500 Subject: [PATCH 05/12] lint --- python/pyarrow/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index 091a2a835ed..e68797c1b1b 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -309,6 +309,7 @@ def get_symlink_path(hard_path): return hard_path.rsplit('.', 1)[0] else: bundled_libs = glob.glob(_os.path.join(package_cwd, '*.*.dylib')) + def get_symlink_path(hard_path): return '.'.join((hard_path.split('.')[0], 'dylib')) From a133530c813f4ff4e77086f4b5901c7e523968fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Wed, 3 Jun 2020 17:04:56 +0200 Subject: [PATCH 06/12] don't test gandiva import --- dev/tasks/python-wheels/manylinux-test.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/dev/tasks/python-wheels/manylinux-test.sh b/dev/tasks/python-wheels/manylinux-test.sh index bd751df7b46..7dcd58f2157 100755 --- a/dev/tasks/python-wheels/manylinux-test.sh +++ b/dev/tasks/python-wheels/manylinux-test.sh @@ -44,5 +44,4 @@ import pyarrow.fs import pyarrow._hdfs import pyarrow.dataset import pyarrow.flight -import pyarrow.gandiva " From 0d69eafeb74cc9a3bd03f5d81f33dfd3cdc6e6f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Wed, 3 Jun 2020 20:31:13 +0200 Subject: [PATCH 07/12] require boost if parquet is enabled --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 5 ++--- python/manylinux1/build_arrow.sh | 1 - python/manylinux201x/build_arrow.sh | 1 - 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 88e7f2927cb..2576a89b350 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -707,15 +707,14 @@ endif() # - Gandiva has a compile-time (header-only) dependency on Boost, not runtime. # - Tests need Boost at runtime. # - S3FS and Flight benchmarks need Boost at runtime. + if(ARROW_BUILD_INTEGRATION OR ARROW_BUILD_TESTS OR (ARROW_FLIGHT AND ARROW_BUILD_BENCHMARKS) OR (ARROW_S3 AND ARROW_BUILD_BENCHMARKS) OR ARROW_GANDIVA OR (ARROW_WITH_THRIFT AND Thrift_SOURCE STREQUAL "BUNDLED") - OR (ARROW_PARQUET - AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU" - AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.9")) + OR ARROW_PARQUET) set(ARROW_BOOST_REQUIRED TRUE) else() set(ARROW_BOOST_REQUIRED FALSE) diff --git a/python/manylinux1/build_arrow.sh b/python/manylinux1/build_arrow.sh index 7dc184f9868..4bd69919803 100755 --- a/python/manylinux1/build_arrow.sh +++ b/python/manylinux1/build_arrow.sh @@ -121,7 +121,6 @@ cmake \ -DCMAKE_INSTALL_PREFIX=/arrow-dist \ -DOPENSSL_USE_STATIC_LIBS=ON \ -DORC_SOURCE=BUNDLED \ - -DPythonInterp_FIND_VERSION=${PYTHON_VERSION} \ -GNinja /arrow/cpp ninja ninja install diff --git a/python/manylinux201x/build_arrow.sh b/python/manylinux201x/build_arrow.sh index 9550b46ac3f..72608f7c185 100755 --- a/python/manylinux201x/build_arrow.sh +++ b/python/manylinux201x/build_arrow.sh @@ -121,7 +121,6 @@ PATH="${CPYTHON_PATH}/bin:${PATH}" cmake \ -DCMAKE_INSTALL_PREFIX=/arrow-dist \ -DOPENSSL_USE_STATIC_LIBS=ON \ -DORC_SOURCE=BUNDLED \ - -DPythonInterp_FIND_VERSION=${PYTHON_VERSION} \ -DZLIB_ROOT=/usr/local \ -GNinja /arrow/cpp ninja install From f34b24e2a891d218c7b9dacde0bec3d10ca76aa2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Wed, 3 Jun 2020 21:23:09 +0200 Subject: [PATCH 08/12] only if gcc --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 2576a89b350..2193670304e 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -714,7 +714,7 @@ if(ARROW_BUILD_INTEGRATION OR (ARROW_S3 AND ARROW_BUILD_BENCHMARKS) OR ARROW_GANDIVA OR (ARROW_WITH_THRIFT AND Thrift_SOURCE STREQUAL "BUNDLED") - OR ARROW_PARQUET) + OR (ARROW_PARQUET AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")) set(ARROW_BOOST_REQUIRED TRUE) else() set(ARROW_BOOST_REQUIRED FALSE) From ff9e2b9998f67950b9b64d501a4bdcd69f96ef9c Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Wed, 3 Jun 2020 14:52:38 -0500 Subject: [PATCH 09/12] Install tests by default, make the create_library_symlinks functions an explicit opt-in, update documentation --- dev/tasks/python-wheels/osx-build.sh | 2 +- dev/tasks/python-wheels/win-build.bat | 2 +- docs/source/python/extending.rst | 18 +++++++++++------- python/manylinux1/build_arrow.sh | 2 +- python/manylinux201x/build_arrow.sh | 2 +- python/pyarrow/__init__.py | 13 ++++++++----- python/setup.py | 2 +- 7 files changed, 24 insertions(+), 17 deletions(-) diff --git a/dev/tasks/python-wheels/osx-build.sh b/dev/tasks/python-wheels/osx-build.sh index 43d8aacc6e6..fa2144d5da4 100755 --- a/dev/tasks/python-wheels/osx-build.sh +++ b/dev/tasks/python-wheels/osx-build.sh @@ -52,7 +52,7 @@ function build_wheel { pip install $(pip_opts) -r python/requirements-wheel-build.txt - export PYARROW_INSTALL_TESTS=OFF + export PYARROW_INSTALL_TESTS=1 export PYARROW_WITH_GANDIVA=0 export BUILD_ARROW_GANDIVA=OFF diff --git a/dev/tasks/python-wheels/win-build.bat b/dev/tasks/python-wheels/win-build.bat index c722c8d393f..a78dcf3a089 100644 --- a/dev/tasks/python-wheels/win-build.bat +++ b/dev/tasks/python-wheels/win-build.bat @@ -74,7 +74,7 @@ popd set PYARROW_BUILD_TYPE=Release set PYARROW_PARALLEL=8 -set PYARROW_INSTALL_TESTS=0 +set PYARROW_INSTALL_TESTS=1 set PYARROW_WITH_DATASET=1 set PYARROW_WITH_FLIGHT=1 set PYARROW_WITH_GANDIVA=0 diff --git a/docs/source/python/extending.rst b/docs/source/python/extending.rst index f3e31ff3994..6737b8d8633 100644 --- a/docs/source/python/extending.rst +++ b/docs/source/python/extending.rst @@ -407,10 +407,14 @@ Building Extensions against PyPI Wheels The Python wheels have the Arrow C++ libraries bundled in the top level ``pyarrow/`` install directory. On Linux and macOS, these libraries have an ABI -tag like ``libarrow.so.17`` which means that linking with ``-larrow`` will not -work right out of the box. To work around this, the -``pyarrow.get_library_dirs()`` directory will attempt to create symlinks like -``pyarrow/libarrow.so`` the first time it is called. If ``pyarrow`` is -installed someplace where you do not have write access, you will have to either -run this function once as root or have someone with root access run it to -create the symlinks. +tag like ``libarrow.so.17`` which means that linking with ``-larrow`` using the +linker path provided by ``pyarrow.get_library_dirs()`` will not work right out +of the box. To fix this, you must run ``pyarrow.create_library_symlinks()`` +once as a user with write access to the directory where pyarrow is +installed. This function will attempt to create symlinks like +``pyarrow/libarrow.so``. For example: + +.. code-block:: bash + + pip install pyarrow + python -c "import pyarrow; pyarrow.create_library_symlinks()" diff --git a/python/manylinux1/build_arrow.sh b/python/manylinux1/build_arrow.sh index 4bd69919803..2f1f4cc4fb4 100755 --- a/python/manylinux1/build_arrow.sh +++ b/python/manylinux1/build_arrow.sh @@ -75,7 +75,7 @@ touch ${CPYTHON_PATH}/lib/${py_libname} echo "=== (${PYTHON_VERSION}) Install the wheel build dependencies ===" $PIP install -r requirements-wheel-build.txt -export PYARROW_INSTALL_TESTS=0 +export PYARROW_INSTALL_TESTS=1 export PYARROW_WITH_DATASET=1 export PYARROW_WITH_FLIGHT=1 export PYARROW_WITH_GANDIVA=0 diff --git a/python/manylinux201x/build_arrow.sh b/python/manylinux201x/build_arrow.sh index 72608f7c185..a783998f80c 100755 --- a/python/manylinux201x/build_arrow.sh +++ b/python/manylinux201x/build_arrow.sh @@ -74,7 +74,7 @@ touch ${CPYTHON_PATH}/lib/${py_libname} echo "=== (${PYTHON_VERSION}) Install the wheel build dependencies ===" $PIP install -r requirements-wheel-build.txt -export PYARROW_INSTALL_TESTS=0 +export PYARROW_INSTALL_TESTS=1 export PYARROW_WITH_DATASET=1 export PYARROW_WITH_FLIGHT=1 export PYARROW_WITH_GANDIVA=0 diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index e68797c1b1b..2a3d9131c07 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -287,15 +287,20 @@ def get_libraries(): return ['arrow', 'arrow_python'] -def _setup_bundled_symlinks(): +def create_library_symlinks(): """ With Linux and macOS wheels, the bundled shared libraries have an embedded - ABI version like libarrow.so.18 and so linking to them with -larrow won't - work unless we create symlinks at locations like + ABI version like libarrow.so.17 or libarrow.17.dylib and so linking to them + with -larrow won't work unless we create symlinks at locations like site-packages/pyarrow/libarrow.so. This unfortunate workaround addresses prior problems we had with shipping two copies of the shared libraries to permit third party projects like turbodbc to build their C++ extensions against the pyarrow wheels. + + This function must only be invoked once and only when the shared libraries + are bundled with the Python package, which should only apply to wheel-based + installs. It requires write access to the site-packages/pyarrow directory + and so depending on your system may need to be run with root. """ import glob if _sys.platform == 'win32': @@ -333,8 +338,6 @@ def get_library_dirs(): package_cwd = _os.path.dirname(__file__) library_dirs = [package_cwd] - _setup_bundled_symlinks() - def append_library_dir(library_dir): if library_dir not in library_dirs: library_dirs.append(library_dir) diff --git a/python/setup.py b/python/setup.py index 21741ae89f1..3a699fe1093 100755 --- a/python/setup.py +++ b/python/setup.py @@ -567,7 +567,7 @@ def has_ext_modules(foo): setup_requires = [] -if strtobool(os.environ.get('PYARROW_INSTALL_TESTS', '0')): +if strtobool(os.environ.get('PYARROW_INSTALL_TESTS', '1')): packages = ['pyarrow', 'pyarrow.tests'] else: packages = ['pyarrow'] From e8743146dac150da79ab820ddcaaf86a1e7a5b2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Thu, 4 Jun 2020 10:12:33 +0200 Subject: [PATCH 10/12] boost requirement depends on thrift's version; don't run cython tests in macos wheel builds --- cpp/cmake_modules/FindLLVMAlt.cmake | 8 ++++--- cpp/cmake_modules/ThirdpartyToolchain.cmake | 26 ++++++++++++++++----- dev/tasks/python-wheels/osx-build.sh | 2 ++ 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/cpp/cmake_modules/FindLLVMAlt.cmake b/cpp/cmake_modules/FindLLVMAlt.cmake index ab399171c1f..7695c09ae8c 100644 --- a/cpp/cmake_modules/FindLLVMAlt.cmake +++ b/cpp/cmake_modules/FindLLVMAlt.cmake @@ -68,9 +68,11 @@ endif() mark_as_advanced(CLANG_EXECUTABLE LLVM_LINK_EXECUTABLE) -find_package_handle_standard_args(LLVMAlt REQUIRED_VARS - # The first variable is used for display. - LLVM_PACKAGE_VERSION CLANG_EXECUTABLE LLVM_FOUND +find_package_handle_standard_args(LLVMAlt + REQUIRED_VARS # The first variable is used for display. + LLVM_PACKAGE_VERSION + CLANG_EXECUTABLE + LLVM_FOUND LLVM_LINK_EXECUTABLE) if(LLVMAlt_FOUND) message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 2193670304e..ca227b7030e 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -694,7 +694,7 @@ set(Boost_ADDITIONAL_VERSIONS "1.60.0" "1.60") -# Thrift needs Boost if we're building the bundled version, +# Thrift needs Boost if we're building the bundled version with version < 0.13, # so we first need to determine whether we're building it if(ARROW_WITH_THRIFT AND Thrift_SOURCE STREQUAL "AUTO") find_package(Thrift 0.11.0 MODULE COMPONENTS ${ARROW_THRIFT_REQUIRED_COMPONENTS}) @@ -703,18 +703,32 @@ if(ARROW_WITH_THRIFT AND Thrift_SOURCE STREQUAL "AUTO") endif() endif() -# - Parquet requires boost only with gcc 4.8 (because of missing std::regex). +# Thrift < 0.13 has a compile-time header dependency on boost +if(Thrift_SOURCE STREQUAL "BUNDLED" AND ARROW_THRIFT_BUILD_VERSION VERSION_LESS "0.13") + set(THRIFT_REQUIRES_BOOST TRUE) +elseif(THRIFT_VERSION VERSION_LESS "0.13") + set(THRIFT_REQUIRES_BOOST TRUE) +else() + set(THRIFT_REQUIRES_BOOST FALSE) +endif() + +# Parquet requires boost only with gcc 4.8 (because of missing std::regex). +if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS "4.9") + set(PARQUET_REQUIRES_BOOST TRUE) +else() + set(PARQUET_REQUIRES_BOOST FALSE) +endif() + # - Gandiva has a compile-time (header-only) dependency on Boost, not runtime. # - Tests need Boost at runtime. # - S3FS and Flight benchmarks need Boost at runtime. - if(ARROW_BUILD_INTEGRATION OR ARROW_BUILD_TESTS + OR ARROW_GANDIVA OR (ARROW_FLIGHT AND ARROW_BUILD_BENCHMARKS) OR (ARROW_S3 AND ARROW_BUILD_BENCHMARKS) - OR ARROW_GANDIVA - OR (ARROW_WITH_THRIFT AND Thrift_SOURCE STREQUAL "BUNDLED") - OR (ARROW_PARQUET AND CMAKE_CXX_COMPILER_ID STREQUAL "GNU")) + OR (ARROW_WITH_THRIFT AND THRIFT_REQUIRES_BOOST) + OR (ARROW_PARQUET AND PARQUET_REQUIRES_BOOST)) set(ARROW_BOOST_REQUIRED TRUE) else() set(ARROW_BOOST_REQUIRED FALSE) diff --git a/dev/tasks/python-wheels/osx-build.sh b/dev/tasks/python-wheels/osx-build.sh index fa2144d5da4..af54a02c016 100755 --- a/dev/tasks/python-wheels/osx-build.sh +++ b/dev/tasks/python-wheels/osx-build.sh @@ -138,6 +138,8 @@ function install_wheel { function run_unit_tests { pushd $1 + export PYARROW_TEST_CYTHON=OFF + # Install test dependencies pip install $(pip_opts) -r python/requirements-wheel-test.txt From dc7cd2ad4d1ca9d74d2e7e3147ae45a9a30696b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Thu, 4 Jun 2020 20:12:16 +0200 Subject: [PATCH 11/12] turn off cython tests in manylinux builds --- dev/tasks/python-wheels/manylinux-test.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/dev/tasks/python-wheels/manylinux-test.sh b/dev/tasks/python-wheels/manylinux-test.sh index 7dcd58f2157..fd5e2284b07 100755 --- a/dev/tasks/python-wheels/manylinux-test.sh +++ b/dev/tasks/python-wheels/manylinux-test.sh @@ -20,6 +20,7 @@ set -e export ARROW_TEST_DATA=/arrow/testing/data +export PYARROW_TEST_CYTHON=OFF python --version # Install built wheel From 3267d5750bc668315672a57181b99f8cff7269bf Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Fri, 5 Jun 2020 18:41:12 -0500 Subject: [PATCH 12/12] Remove commented-out line --- dev/tasks/python-wheels/win-build.bat | 2 -- 1 file changed, 2 deletions(-) diff --git a/dev/tasks/python-wheels/win-build.bat b/dev/tasks/python-wheels/win-build.bat index a78dcf3a089..00e69642351 100644 --- a/dev/tasks/python-wheels/win-build.bat +++ b/dev/tasks/python-wheels/win-build.bat @@ -103,8 +103,6 @@ set ARROW_TEST_DATA=%ARROW_SRC%\testing\data %PYTHON_INTERPRETER% -c "import pyarrow.flight" || exit /B %PYTHON_INTERPRETER% -c "import pyarrow.dataset" || exit /B -@rem %PYTHON_INTERPRETER% -c "import pyarrow.gandiva" || exit /B - @rem run the python tests, but disable the cython because there is a linking @rem issue on python 3.8 set PYARROW_TEST_CYTHON=OFF