Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dev/tasks/python-wheels/manylinux-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ pip install -q /arrow/python/$WHEEL_TAG/dist/*.whl
# Install test dependencies (pip won't work after removing system zlib)
pip install -q -r /arrow/python/requirements-test.txt
# Run pyarrow tests
pytest -v --pyargs pyarrow
pytest -rs --pyargs pyarrow

if [[ "$1" == "--remove-system-libs" ]]; then
# Run import tests after removing the bundled dependencies from the system
Expand Down
7 changes: 4 additions & 3 deletions dev/tasks/python-wheels/osx-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -183,14 +183,15 @@ function install_wheel {
pip install $(pip_opts) \
$(python $multibuild_dir/supported_wheels.py $wheelhouse/*.whl)

# Install test dependencies
pip install $(pip_opts) -r python/requirements-test.txt
popd
}

function run_unit_tests {
# Install test dependencies
pip install $(pip_opts) -r python/requirements-test.txt

# Run pyarrow tests
py.test --pyargs pyarrow
pytest -rs --pyargs pyarrow
}

function run_import_tests {
Expand Down
4 changes: 2 additions & 2 deletions dev/tasks/python-wheels/win-build.bat
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ set ARROW_TEST_DATA=%ARROW_SRC%\testing\data
@rem test the wheel
@rem TODO For maximum reliability, we should test in a plain virtualenv instead.
call conda create -n wheel-test -q -y python=%PYTHON_VERSION% ^
numpy=%NUMPY_VERSION% pandas pytest hypothesis || exit /B
numpy=%NUMPY_VERSION% pandas cython pytest hypothesis || exit /B
call activate wheel-test

@rem install the built wheel
Expand All @@ -90,4 +90,4 @@ pip install -vv --no-index --find-links=%ARROW_SRC%\python\dist\ pyarrow || exit
python -c "import pyarrow; import pyarrow.parquet; import pyarrow.flight; import pyarrow.gandiva;" || exit /B

@rem run the python tests
pytest --pyargs pyarrow || exit /B
pytest -rs --pyargs pyarrow || exit /B
87 changes: 66 additions & 21 deletions python/pyarrow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,12 +209,58 @@ def get_include():
return _os.path.join(_os.path.dirname(__file__), 'include')


def _get_base_libraries():
return ['arrow', 'arrow_python']


def _get_pkg_config_executable():
return _os.environ.get('PKG_CONFIG', 'pkg-config')


def _has_pkg_config(pkgname):
import subprocess
try:
return subprocess.call([_get_pkg_config_executable(),
'--exists', pkgname]) == 0
except OSError:
# TODO: replace with FileNotFoundError once we ditch 2.7
return False


def _read_pkg_config_variable(pkgname, cli_args):
import subprocess
cmd = [_get_pkg_config_executable(), pkgname] + cli_args
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
out, err = proc.communicate()
if proc.returncode != 0:
raise RuntimeError("pkg-config failed: " + err.decode('utf8'))
return out.rstrip().decode('utf8')


def get_so_version():
"""
Return the SO version for Arrow libraries.
"""
if _sys.platform == 'win32':
raise NotImplementedError("Cannot get SO version on Windows")
if _has_pkg_config("arrow"):
return _read_pkg_config_variable("arrow", ["--variable=so_version"])
else:
return "100" # XXX Find a way not to hardcode this?
Copy link
Member Author

@kszucs kszucs Jul 15, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll need to cherry pick this patch for 0.14.1 where we need to use 14



def get_libraries():
"""
Return list of library names to include in the `libraries` argument for C
or Cython extensions using pyarrow
"""
return ['arrow', 'arrow_python']
libs = _get_base_libraries()
if _sys.platform != 'win32':
so_version = get_so_version()
return [":lib" + libname + ".so." + so_version for libname in libs]
else:
return libs


def get_library_dirs():
Expand All @@ -223,38 +269,37 @@ def get_library_dirs():
linking C or Cython extensions using pyarrow
"""
package_cwd = _os.path.dirname(__file__)

library_dirs = [package_cwd]

def append_library_dir(library_dir):
if library_dir not in library_dirs:
library_dirs.append(library_dir)

# Search library paths via pkg-config. This is necessary if the user
# installed libarrow and the other shared libraries manually and they
# are not shipped inside the pyarrow package (see also ARROW-2976).
from subprocess import call, PIPE, Popen
pkg_config_executable = _os.environ.get('PKG_CONFIG', None) or 'pkg-config'
for package in ["arrow", "plasma", "arrow_python"]:
cmd = '{0} --exists {1}'.format(pkg_config_executable, package).split()
try:
if call(cmd) == 0:
cmd = [pkg_config_executable, "--libs-only-L", package]
proc = Popen(cmd, stdout=PIPE, stderr=PIPE)
out, err = proc.communicate()
library_dir = out.rstrip().decode('utf-8')[2:] # strip "-L"
if library_dir not in library_dirs:
library_dirs.append(library_dir)
except FileNotFoundError:
pass
pkg_config_executable = _os.environ.get('PKG_CONFIG') or 'pkg-config'
for pkgname in ["arrow", "arrow_python"]:
if _has_pkg_config(pkgname):
library_dir = _read_pkg_config_variable(pkgname,
["--libs-only-L"])
assert library_dir.startswith("-L")
append_library_dir(library_dir[2:])

if _sys.platform == 'win32':
# TODO(wesm): Is this necessary, or does setuptools within a conda
# installation add Library\lib to the linker path for MSVC?
python_base_install = _os.path.dirname(_sys.executable)
library_lib = _os.path.join(python_base_install, 'Library', 'lib')
library_dir = _os.path.join(python_base_install, 'Library', 'lib')

if _os.path.exists(_os.path.join(library_lib, 'arrow.lib')):
library_dirs.append(library_lib)
if _os.path.exists(_os.path.join(library_dir, 'arrow.lib')):
append_library_dir(library_dir)

# ARROW-4074: Allow for ARROW_HOME to be set to some other directory
if 'ARROW_HOME' in _os.environ:
library_dirs.append(_os.path.join(_os.environ['ARROW_HOME'], 'lib'))
if _os.environ.get('ARROW_HOME'):
append_library_dir(_os.path.join(_os.environ['ARROW_HOME'], 'lib'))
else:
# Python wheels bundle the Arrow libraries in the pyarrow directory.
append_library_dir(_os.path.dirname(_os.path.abspath(__file__)))

return library_dirs
3 changes: 3 additions & 0 deletions python/pyarrow/includes/libarrow.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
cdef cppclass CFixedWidthType" arrow::FixedWidthType"(CDataType):
int bit_width()

cdef cppclass CNullArray" arrow::NullArray"(CArray):
CNullArray(int64_t length)

cdef cppclass CDictionaryArray" arrow::DictionaryArray"(CArray):
CDictionaryArray(const shared_ptr[CDataType]& type,
const shared_ptr[CArray]& indices,
Expand Down
12 changes: 8 additions & 4 deletions python/pyarrow/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@


groups = [
'cython',
'hypothesis',
'gandiva',
'hdfs',
Expand All @@ -53,6 +54,7 @@


defaults = {
'cython': False,
'hypothesis': False,
'gandiva': False,
'hdfs': False,
Expand All @@ -66,6 +68,12 @@
'flight': False,
}

try:
import cython # noqa
defaults['cython'] = True
except ImportError:
pass

try:
import pyarrow.gandiva # noqa
defaults['gandiva'] = True
Expand All @@ -78,14 +86,12 @@
except ImportError:
pass


try:
import pandas # noqa
defaults['pandas'] = True
except ImportError:
pass


try:
import pyarrow.parquet # noqa
defaults['parquet'] = True
Expand All @@ -98,14 +104,12 @@
except ImportError:
pass


try:
import tensorflow # noqa
defaults['tensorflow'] = True
except ImportError:
pass


try:
import pyarrow.flight # noqa
defaults['flight'] = True
Expand Down
10 changes: 9 additions & 1 deletion python/pyarrow/tests/pyarrow_cython_example.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,17 @@ from pyarrow.lib cimport *


def get_array_length(obj):
# Just an example function accessing both the pyarrow Cython API
# An example function accessing both the pyarrow Cython API
# and the Arrow C++ API
cdef shared_ptr[CArray] arr = pyarrow_unwrap_array(obj)
if arr.get() == NULL:
raise TypeError("not an array")
return arr.get().length()


def make_null_array(length):
# An example function that returns a PyArrow object without PyArrow
# being imported explicitly at the Python level.
cdef shared_ptr[CArray] null_array
null_array.reset(new CNullArray(length))
return pyarrow_wrap_array(null_array)
31 changes: 23 additions & 8 deletions python/pyarrow/tests/test_cython.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,12 @@
import pytest

import pyarrow as pa

import pyarrow.tests.util as test_util


here = os.path.dirname(os.path.abspath(__file__))


setup_template = """if 1:
from distutils.core import setup
from Cython.Build import cythonize
Expand All @@ -50,25 +51,24 @@
if custom_ld_path:
ext.library_dirs.append(custom_ld_path)
ext.extra_compile_args.extend(compiler_opts)
print("Extension module:",
ext, ext.include_dirs, ext.libraries, ext.library_dirs)

setup(
ext_modules=ext_modules,
)
"""


@pytest.mark.skipif(
'ARROW_HOME' not in os.environ,
reason='ARROW_HOME environment variable not defined')
@pytest.mark.cython
def test_cython_api(tmpdir):
"""
Basic test for the Cython API.
"""
pytest.importorskip('Cython')

ld_path_default = os.path.join(os.environ['ARROW_HOME'], 'lib')
# Fail early if cython is not found
import cython # noqa

test_ld_path = os.environ.get('PYARROW_TEST_LD_PATH', ld_path_default)
test_ld_path = os.environ.get('PYARROW_TEST_LD_PATH', '')

with tmpdir.as_cwd():
# Set up temporary workspace
Expand Down Expand Up @@ -106,3 +106,18 @@ def test_cython_api(tmpdir):
mod.get_array_length(None)
finally:
sys.path = orig_path

# Check the extension module is loadable from a subprocess without
# pyarrow imported first.
code = """if 1:
import sys

mod = __import__({mod_name!r})
arr = mod.make_null_array(5)
assert mod.get_array_length(arr) == 5
assert arr.null_count == 5
""".format(mod_path=str(tmpdir), mod_name='pyarrow_cython_example')

subprocess.check_call([sys.executable, '-c', code],
stdout=subprocess.PIPE,
env=subprocess_env)
5 changes: 3 additions & 2 deletions python/requirements-test.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
cython
hypothesis
pandas
pathlib2; python_version < "3.4"
pytest
hypothesis
pytz
pathlib2; python_version < "3.4"