Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
"Build the Arrow jemalloc-based allocator"
ON)

option(ARROW_JEMALLOC_USE_SHARED
"Rely on jemalloc shared libraries where relevant"
ON)

option(ARROW_BOOST_USE_SHARED
"Rely on boost shared libraries where relevant"
ON)
Expand All @@ -103,6 +107,10 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
option(ARROW_BUILD_UTILITIES
"Build Arrow commandline utilities"
ON)

option(ARROW_RPATH_ORIGIN
"Build Arrow libraries with RATH set to \$ORIGIN"
OFF)
endif()

if(ARROW_BUILD_TESTS)
Expand Down
14 changes: 12 additions & 2 deletions cpp/cmake_modules/BuildUtils.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,21 @@ function(ADD_ARROW_LIB LIB_NAME)
LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}"
LINK_FLAGS "${ARG_SHARED_LINK_FLAGS}"
OUTPUT_NAME ${LIB_NAME})
target_link_libraries(${LIB_NAME}_shared
target_link_libraries(${LIB_NAME}_shared
LINK_PUBLIC ${ARG_SHARED_LINK_LIBS}
LINK_PRIVATE ${ARG_SHARED_PRIVATE_LINK_LIBS})

if (ARROW_RPATH_ORIGIN)
if (APPLE)
set(_lib_install_rpath "@loader_path")
else()
set(_lib_install_rpath "\$ORIGIN")
endif()
set_target_properties(${LIB_NAME}_shared PROPERTIES
INSTALL_RPATH ${_lib_install_rpath})
endif()

install(TARGETS ${LIB_NAME}_shared
install(TARGETS ${LIB_NAME}_shared
LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib)
endif()
Expand Down
27 changes: 23 additions & 4 deletions cpp/src/arrow/jemalloc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,29 @@ if (NOT APPLE)
set(ARROW_JEMALLOC_STATIC_LINK_LIBS ${ARROW_JEMALLOC_STATIC_LINK_LIBS} pthread)
endif()

set(ARROW_JEMALLOC_SHARED_LINK_LIBS
arrow_shared
jemalloc_shared
)
if (ARROW_JEMALLOC_USE_SHARED)
set(ARROW_JEMALLOC_SHARED_LINK_LIBS
arrow_shared
jemalloc_shared
)
else()
if (CMAKE_COMPILER_IS_GNUCXX)
set(ARROW_JEMALLOC_SHARED_LINK_LIBS
arrow_shared
jemalloc_static
# For glibc <2.17 we need to link to librt.
# As we compile with --as-needed by default, the linker will omit this
# dependency if not required.
rt
)
else()
set(ARROW_JEMALLOC_SHARED_LINK_LIBS
arrow_shared
jemalloc_static
)
endif()
endif()


if (ARROW_BUILD_STATIC)
set(ARROW_JEMALLOC_TEST_LINK_LIBS
Expand Down
45 changes: 45 additions & 0 deletions python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
option(PYARROW_BUILD_JEMALLOC
"Build the PyArrow jemalloc integration"
OFF)
option(PYARROW_BUNDLE_ARROW_CPP
"Bundle the Arrow C++ libraries"
OFF)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, I'm hoping to soon move the python/src/pyarrow tree to cpp/src/arrow/python, any idea how this will be impacted or whether that will make things any easier?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will have to refactor this a bit but it won't change much sadly. Packaging binary libs in a manylinux1 wheel is cumbersome.

endif()

if(NOT PYARROW_BUILD_TESTS)
Expand Down Expand Up @@ -332,6 +335,25 @@ endif()
## Arrow
find_package(Arrow REQUIRED)
include_directories(SYSTEM ${ARROW_INCLUDE_DIR})

if (PYARROW_BUNDLE_ARROW_CPP)
configure_file(${ARROW_SHARED_LIB}
${BUILD_OUTPUT_ROOT_DIRECTORY}/libarrow${CMAKE_SHARED_LIBRARY_SUFFIX}
COPYONLY)
SET(ARROW_SHARED_LIB
${BUILD_OUTPUT_ROOT_DIRECTORY}/libarrow${CMAKE_SHARED_LIBRARY_SUFFIX})
configure_file(${ARROW_IO_SHARED_LIB}
${BUILD_OUTPUT_ROOT_DIRECTORY}/libarrow_io${CMAKE_SHARED_LIBRARY_SUFFIX}
COPYONLY)
SET(ARROW_IO_SHARED_LIB
${BUILD_OUTPUT_ROOT_DIRECTORY}/libarrow_io${CMAKE_SHARED_LIBRARY_SUFFIX})
configure_file(${ARROW_IPC_SHARED_LIB}
${BUILD_OUTPUT_ROOT_DIRECTORY}/libarrow_ipc${CMAKE_SHARED_LIBRARY_SUFFIX}
COPYONLY)
SET(ARROW_IPC_SHARED_LIB
${BUILD_OUTPUT_ROOT_DIRECTORY}/libarrow_ipc${CMAKE_SHARED_LIBRARY_SUFFIX})
endif()

ADD_THIRDPARTY_LIB(arrow
SHARED_LIB ${ARROW_SHARED_LIB})
ADD_THIRDPARTY_LIB(arrow_io
Expand Down Expand Up @@ -440,6 +462,18 @@ if (PYARROW_BUILD_PARQUET)
if(NOT (PARQUET_FOUND AND PARQUET_ARROW_FOUND))
message(FATAL_ERROR "Unable to locate Parquet libraries")
endif()
if (PYARROW_BUNDLE_ARROW_CPP)
configure_file(${PARQUET_SHARED_LIB}
${BUILD_OUTPUT_ROOT_DIRECTORY}/libparquet${CMAKE_SHARED_LIBRARY_SUFFIX}
COPYONLY)
SET(PARQUET_SHARED_LIB
${BUILD_OUTPUT_ROOT_DIRECTORY}/libparquet${CMAKE_SHARED_LIBRARY_SUFFIX})
configure_file(${PARQUET_ARROW_SHARED_LIB}
${BUILD_OUTPUT_ROOT_DIRECTORY}/libparquet_arrow${CMAKE_SHARED_LIBRARY_SUFFIX}
COPYONLY)
SET(PARQUET_ARROW_SHARED_LIB
${BUILD_OUTPUT_ROOT_DIRECTORY}/libparquet_arrow${CMAKE_SHARED_LIBRARY_SUFFIX})
endif()
ADD_THIRDPARTY_LIB(parquet_arrow
SHARED_LIB ${PARQUET_ARROW_SHARED_LIB})
set(LINK_LIBS
Expand All @@ -451,6 +485,13 @@ if (PYARROW_BUILD_PARQUET)
endif()

if (PYARROW_BUILD_JEMALLOC)
if (PYARROW_BUNDLE_ARROW_CPP)
configure_file(${ARROW_JEMALLOC_SHARED_LIB}
${BUILD_OUTPUT_ROOT_DIRECTORY}/libarrow_jemalloc${CMAKE_SHARED_LIBRARY_SUFFIX}
COPYONLY)
SET(ARROW_JEMALLOC_SHARED_LIB
${BUILD_OUTPUT_ROOT_DIRECTORY}/libarrow_jemalloc${CMAKE_SHARED_LIBRARY_SUFFIX})
endif()
ADD_THIRDPARTY_LIB(arrow_jemalloc
SHARED_LIB ${ARROW_JEMALLOC_SHARED_LIB})
set(LINK_LIBS
Expand All @@ -463,6 +504,10 @@ endif()

add_library(pyarrow SHARED
${PYARROW_SRCS})
if (PYARROW_BUNDLE_ARROW_CPP)
set_target_properties(pyarrow PROPERTIES
INSTALL_RPATH "\$ORIGIN")
endif()
target_link_libraries(pyarrow ${LINK_LIBS})

if(APPLE)
Expand Down
19 changes: 0 additions & 19 deletions python/manylinux1/Dockerfile-parquet_arrow-base-x86_64

This file was deleted.

21 changes: 18 additions & 3 deletions python/manylinux1/Dockerfile-x86_64
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,23 @@
FROM quay.io/pypa/manylinux1_x86_64:latest

# Install dependencies
RUN yum install -y flex openssl-devel
RUN yum install -y flex zlib-devel

# Build a newer OpenSSL version to support Thrift 0.10.0, note that we don't trigger the SSL code in Arrow.
WORKDIR /
RUN wget --no-check-certificate https://www.openssl.org/source/openssl-1.0.2k.tar.gz -O openssl-1.0.2k.tar.gz
RUN tar xf openssl-1.0.2k.tar.gz
WORKDIR openssl-1.0.2k
RUN ./config -fpic shared --prefix=/usr
RUN make -j5
RUN make install

WORKDIR /
RUN wget --no-check-certificate http://downloads.sourceforge.net/project/boost/boost/1.60.0/boost_1_60_0.tar.gz -O /boost_1_60_0.tar.gz
RUN tar xf boost_1_60_0.tar.gz
WORKDIR /boost_1_60_0
RUN ./bootstrap.sh
RUN ./bjam cxxflags=-fPIC cflags=-fPIC --prefix=/usr --with-filesystem --with-date_time --with-system install
RUN ./bjam cxxflags=-fPIC cflags=-fPIC --prefix=/usr --with-filesystem --with-date_time --with-system --with-regex install

WORKDIR /
RUN wget https://github.com/jemalloc/jemalloc/releases/download/4.4.0/jemalloc-4.4.0.tar.bz2 -O jemalloc-4.4.0.tar.bz2
Expand All @@ -43,5 +52,11 @@ RUN git checkout ffe59955ad8690c2f8bb74766cb7e9b0d0ee3963

ADD arrow /arrow
WORKDIR /arrow/cpp
RUN cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr -DARROW_HDFS=ON -DARROW_BUILD_TESTS=OFF -DARROW_BUILD_SHARED=ON -DARROW_BOOST_USE_SHARED=OFF -DARROW_JEMALLOC=ON .
RUN cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/arrow-dist -DARROW_HDFS=ON -DARROW_BUILD_TESTS=OFF -DARROW_BUILD_SHARED=ON -DARROW_BOOST_USE_SHARED=OFF -DARROW_JEMALLOC=ON -DARROW_RPATH_ORIGIN=ON -DARROW_JEMALLOC_USE_SHARED=OFF .
RUN make -j5 install

WORKDIR /
RUN git clone https://github.com/apache/parquet-cpp.git
WORKDIR /parquet-cpp
RUN ARROW_HOME=/arrow-dist cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr -DPARQUET_BUILD_TESTS=OFF -DPARQUET_ARROW=ON -DPARQUET_BOOST_USE_SHARED=OFF .
RUN make -j5 install
4 changes: 1 addition & 3 deletions python/manylinux1/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,8 @@ for all supported Python versions and place them in the `dist` folder.
git clone ../../ arrow
# Build the native baseimage
docker build -t arrow-base-x86_64 -f Dockerfile-x86_64 .
# (optionally) build parquet-cpp
docker build -t parquet_arrow-base-x86_64 -f Dockerfile-parquet_arrow-base-x86_64 .
# Build the python packages
docker run --rm -v $PWD:/io parquet_arrow-base-x86_64 /io/build_arrow.sh
docker run --rm -v $PWD:/io arrow-base-x86_64 /io/build_arrow.sh
# Now the new packages are located in the dist/ folder
ls -l dist/
```
52 changes: 26 additions & 26 deletions python/manylinux1/build_arrow.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,56 +29,56 @@ source /multibuild/manylinux_utils.sh

cd /arrow/python

export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/lib"
# PyArrow build configuration
export PYARROW_BUILD_TYPE='release'
export PYARROW_CMAKE_OPTIONS='-DPYARROW_BUILD_TESTS=ON'
export PYARROW_WITH_PARQUET=1
export PYARROW_WITH_JEMALLOC=1
export PYARROW_BUNDLE_ARROW_CPP=1
# Need as otherwise arrow_io is sometimes not linked
export LDFLAGS="-Wl,--no-as-needed"
export ARROW_HOME="/usr"
export ARROW_HOME="/arrow-dist"
export PARQUET_HOME="/usr"

# Ensure the target directory exists
mkdir -p /io/dist
# Temporary directory to store the wheels that should be sent through auditwheel
rm_mkdir unfixed_wheels

PY35_BIN=/opt/python/cp35-cp35m/bin
$PY35_BIN/pip install 'pyelftools<0.24'
$PY35_BIN/pip install 'git+https://github.com/xhochy/auditwheel.git@pyarrow-fixes'

# Override repair_wheelhouse function
function repair_wheelhouse {
local in_dir=$1
local out_dir=$2
for whl in $in_dir/*.whl; do
if [[ $whl == *none-any.whl ]]; then
cp $whl $out_dir
else
# Store libraries directly in . not .libs to fix problems with libpyarrow.so linkage.
$PY35_BIN/auditwheel -v repair -L . $whl -w $out_dir/
fi
done
chmod -R a+rwX $out_dir
}

for PYTHON in ${PYTHON_VERSIONS}; do
PYTHON_INTERPRETER="$(cpython_path $PYTHON)/bin/python"
PIP="$(cpython_path $PYTHON)/bin/pip"
PIPI_IO="$PIP install -f $MANYLINUX_URL"
PATH="$PATH:$(cpython_path $PYTHON)"

echo "=== (${PYTHON}) Installing build dependencies ==="
$PIPI_IO "numpy==1.9.0"
$PIPI_IO "cython==0.24"

PATH="$PATH:$(cpython_path $PYTHON)/bin" $PYTHON_INTERPRETER setup.py build_ext --inplace --with-parquet --with-jemalloc
# Clear output directory
rm -rf dist/
echo "=== (${PYTHON}) Building wheel ==="
PATH="$PATH:$(cpython_path $PYTHON)/bin" $PYTHON_INTERPRETER setup.py build_ext --inplace --with-parquet --with-jemalloc --bundle-arrow-cpp
PATH="$PATH:$(cpython_path $PYTHON)/bin" $PYTHON_INTERPRETER setup.py bdist_wheel

# Test for optional modules
echo "=== (${PYTHON}) Test the existence of optional modules ==="
$PIPI_IO -r requirements.txt
PATH="$PATH:$(cpython_path $PYTHON)/bin" $PYTHON_INTERPRETER -c "import pyarrow.parquet"
PATH="$PATH:$(cpython_path $PYTHON)/bin" $PYTHON_INTERPRETER -c "import pyarrow.jemalloc"

repair_wheelhouse dist /io/dist
echo "=== (${PYTHON}) Tag the wheel with manylinux1 ==="
mkdir -p repaired_wheels/
auditwheel -v repair -L . dist/pyarrow-*.whl -w repaired_wheels/

echo "=== (${PYTHON}) Testing manylinux1 wheel ==="
# Fix version to keep build reproducible"
$PIPI_IO "virtualenv==15.1.0"
rm -rf venv
"$(cpython_path $PYTHON)/bin/virtualenv" -p ${PYTHON_INTERPRETER} --no-download venv
source ./venv/bin/activate
pip install repaired_wheels/*.whl
pip install pytest pandas
py.test venv/lib/*/site-packages/pyarrow
deactivate

mv repaired_wheels/*.whl /io/dist
done

50 changes: 37 additions & 13 deletions python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from os.path import join as pjoin

from distutils.command.clean import clean as _clean
from distutils.util import strtobool
from distutils import sysconfig

# Check if we're running 64-bit Python
Expand Down Expand Up @@ -81,15 +82,17 @@ def run(self):
user_options = ([('extra-cmake-args=', None, 'extra arguments for CMake'),
('build-type=', None, 'build type (debug or release)'),
('with-parquet', None, 'build the Parquet extension'),
('with-jemalloc', None, 'build the jemalloc extension')] +
('with-jemalloc', None, 'build the jemalloc extension'),
('bundle-arrow-cpp', None, 'bundle the Arrow C++ libraries')] +
_build_ext.user_options)

def initialize_options(self):
_build_ext.initialize_options(self)
self.extra_cmake_args = os.environ.get('PYARROW_CMAKE_OPTIONS', '')
self.build_type = os.environ.get('PYARROW_BUILD_TYPE', 'debug').lower()
self.with_parquet = False
self.with_jemalloc = False
self.with_parquet = strtobool(os.environ.get('PYARROW_WITH_PARQUET', '0'))
self.with_jemalloc = strtobool(os.environ.get('PYARROW_WITH_JEMALLOC', '0'))
self.bundle_arrow_cpp = strtobool(os.environ.get('PYARROW_BUNDLE_ARROW_CPP', '0'))

CYTHON_MODULE_NAMES = [
'array',
Expand Down Expand Up @@ -142,6 +145,9 @@ def _run_cmake(self):
if self.with_jemalloc:
cmake_options.append('-DPYARROW_BUILD_JEMALLOC=on')

if self.bundle_arrow_cpp:
cmake_options.append('-DPYARROW_BUNDLE_ARROW_CPP=ON')

if sys.platform != 'win32':
cmake_options.append('-DCMAKE_BUILD_TYPE={0}'
.format(self.build_type))
Expand Down Expand Up @@ -181,17 +187,35 @@ def _run_cmake(self):

# Move the built libpyarrow library to the place expected by the Python
# build
if sys.platform != 'win32':
name, = glob.glob(pjoin(self.build_type, 'libpyarrow.*'))
try:
os.makedirs(pjoin(build_lib, 'pyarrow'))
except OSError:
pass
shutil.move(name,
pjoin(build_lib, 'pyarrow', os.path.split(name)[1]))
shared_library_prefix = 'lib'
if sys.platform == 'darwin':
shared_library_suffix = '.dylib'
elif sys.platform == 'win32':
shared_library_suffix = '.dll'
shared_library_prefix = ''
else:
shutil.move(pjoin(self.build_type, 'pyarrow.dll'),
pjoin(build_lib, 'pyarrow', 'pyarrow.dll'))
shared_library_suffix = '.so'

try:
os.makedirs(pjoin(build_lib, 'pyarrow'))
except OSError:
pass

def move_lib(lib_name):
lib_filename = shared_library_prefix + lib_name + shared_library_suffix
shutil.move(pjoin(self.build_type, lib_filename),
pjoin(build_lib, 'pyarrow', lib_filename))

move_lib("pyarrow")
if self.bundle_arrow_cpp:
move_lib("arrow")
move_lib("arrow_io")
move_lib("arrow_ipc")
if self.with_jemalloc:
move_lib("arrow_jemalloc")
if self.with_parquet:
move_lib("parquet")
move_lib("parquet_arrow")

# Move the built C-extension to the place expected by the Python build
self._found_names = []
Expand Down