From 8c2885ed95044ac74b5470de67e7a85ba8e7f2f6 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Tue, 18 Apr 2017 11:26:58 -0400 Subject: [PATCH 1/3] Another pass on conda dev guide, do not require LD_LIBRARY_PATH. Install everything in a single conda environment Change-Id: I4a655e39df1c3ffe08864e6345f390a2dfd04213 --- python/DEVELOPMENT.md | 63 +++++++++++++++++++++---------------------- python/setup.py | 2 +- 2 files changed, 31 insertions(+), 34 deletions(-) diff --git a/python/DEVELOPMENT.md b/python/DEVELOPMENT.md index ca744628da1..475b906bd60 100644 --- a/python/DEVELOPMENT.md +++ b/python/DEVELOPMENT.md @@ -16,36 +16,14 @@ ### Linux and macOS -First, set up your thirdparty C++ toolchain using libraries from conda-forge: +First, let's create a conda environment with all the C++ build and Python +dependencies from conda-forge: ```shell -conda config --add channels conda-forge - -export ARROW_BUILD_TYPE=Release - -export CPP_TOOLCHAIN=$HOME/cpp-toolchain -export LD_LIBRARY_PATH=$CPP_TOOLCHAIN/lib:$LD_LIBRARY_PATH - -export BOOST_ROOT=$CPP_TOOLCHAIN -export FLATBUFFERS_HOME=$CPP_TOOLCHAIN -export RAPIDJSON_HOME=$CPP_TOOLCHAIN -export THRIFT_HOME=$CPP_TOOLCHAIN -export ZLIB_HOME=$CPP_TOOLCHAIN -export SNAPPY_HOME=$CPP_TOOLCHAIN -export BROTLI_HOME=$CPP_TOOLCHAIN -export JEMALLOC_HOME=$CPP_TOOLCHAIN -export ARROW_HOME=$CPP_TOOLCHAIN -export PARQUET_HOME=$CPP_TOOLCHAIN - -conda create -y -q -p $CPP_TOOLCHAIN \ - flatbuffers rapidjson boost-cpp thrift-cpp snappy zlib brotli jemalloc -``` - -Now, activate a conda environment containing your target Python version and -NumPy installed: - -```shell -conda create -y -q -n pyarrow-dev python=3.6 numpy +conda create -y -q -n pyarrow-dev \ + python=3.6 numpy six setuptools cython pandas pytest \ + flatbuffers rapidjson boost-cpp thrift-cpp snappy zlib \ + brotli jemalloc -c conda-forge source activate pyarrow-dev ``` @@ -67,6 +45,26 @@ drwxrwxr-x 12 wesm wesm 4096 Apr 15 19:19 arrow/ drwxrwxr-x 12 wesm wesm 4096 Apr 15 19:19 parquet-cpp/ ``` +We need to set a number of environment variables to let Arrow's build system +know about our build toolchain: + +``` +export ARROW_BUILD_TYPE=release + +export BOOST_ROOT=$CONDA_PREFIX +export BOOST_LIBRARYDIR=$CONDA_PREFIX/lib + +export FLATBUFFERS_HOME=$CONDA_PREFIX +export RAPIDJSON_HOME=$CONDA_PREFIX +export THRIFT_HOME=$CONDA_PREFIX +export ZLIB_HOME=$CONDA_PREFIX +export SNAPPY_HOME=$CONDA_PREFIX +export BROTLI_HOME=$CONDA_PREFIX +export JEMALLOC_HOME=$CONDA_PREFIX +export ARROW_HOME=$CONDA_PREFIX +export PARQUET_HOME=$CONDA_PREFIX +``` + Now build and install the Arrow C++ libraries: ```shell @@ -74,8 +72,9 @@ mkdir arrow/cpp/build pushd arrow/cpp/build cmake -DCMAKE_BUILD_TYPE=$ARROW_BUILD_TYPE \ - -DCMAKE_INSTALL_PREFIX=$CPP_TOOLCHAIN \ + -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX \ -DARROW_PYTHON=on \ + -DARROW_BOOST_USE_SHARED=off \ -DARROW_BUILD_TESTS=OFF \ .. make -j4 @@ -90,7 +89,7 @@ mkdir parquet-cpp/build pushd parquet-cpp/build cmake -DCMAKE_BUILD_TYPE=$ARROW_BUILD_TYPE \ - -DCMAKE_INSTALL_PREFIX=$CPP_TOOLCHAIN \ + -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX \ -DPARQUET_BUILD_BENCHMARKS=off \ -DPARQUET_BUILD_EXECUTABLES=off \ -DPARQUET_ZLIB_VENDORED=off \ @@ -102,11 +101,9 @@ make install popd ``` -Now, install requisite build requirements for pyarrow, then build: +Now, build pyarrow: ```shell -conda install -y -q six setuptools cython pandas pytest - cd arrow/python python setup.py build_ext --build-type=$ARROW_BUILD_TYPE --with-parquet --inplace ``` diff --git a/python/setup.py b/python/setup.py index ab71e7858e6..1c466170669 100644 --- a/python/setup.py +++ b/python/setup.py @@ -155,7 +155,7 @@ def _run_cmake(self): cmake_options.append('-DPYARROW_BUNDLE_ARROW_CPP=ON') cmake_options.append('-DCMAKE_BUILD_TYPE={0}' - .format(self.build_type)) + .format(self.build_type.lower())) if sys.platform != 'win32': cmake_command = (['cmake', self.extra_cmake_args] + From 8c957053df5d935c683ea18f6f8149f4d5c71597 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Tue, 18 Apr 2017 11:48:45 -0400 Subject: [PATCH 2/3] Install cmake in conda env Change-Id: Ib38249c4a85ed414ef4a9c669c3d5aaa488339fc --- python/DEVELOPMENT.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/DEVELOPMENT.md b/python/DEVELOPMENT.md index 475b906bd60..8f70e335dfe 100644 --- a/python/DEVELOPMENT.md +++ b/python/DEVELOPMENT.md @@ -22,7 +22,7 @@ dependencies from conda-forge: ```shell conda create -y -q -n pyarrow-dev \ python=3.6 numpy six setuptools cython pandas pytest \ - flatbuffers rapidjson boost-cpp thrift-cpp snappy zlib \ + cmake flatbuffers rapidjson boost-cpp thrift-cpp snappy zlib \ brotli jemalloc -c conda-forge source activate pyarrow-dev ``` From 881a44da4218ac496a3473c604f7f19cad9bd49f Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Tue, 18 Apr 2017 21:03:20 -0400 Subject: [PATCH 3/3] Add system requirements notes about gcc 4.9, use boost shared libs Change-Id: I964df5aab950061326119fb4672da721afff3382 --- python/DEVELOPMENT.md | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/python/DEVELOPMENT.md b/python/DEVELOPMENT.md index 8f70e335dfe..7f08169d613 100644 --- a/python/DEVELOPMENT.md +++ b/python/DEVELOPMENT.md @@ -16,6 +16,33 @@ ### Linux and macOS +#### System Requirements + +On macOS, any modern XCode (6.4 or higher; the current version is 8.3.1) is +sufficient. + +On Linux, for this guide, we recommend using gcc 4.8 or 4.9, or clang 3.7 or +higher. You can check your version by running + +```shell +$ gcc --version +``` + +On Ubuntu 16.04 and higher, you can obtain gcc 4.9 with: + +```shell +$ sudo apt-get install g++-4.9 +``` + +Finally, set gcc 4.9 as the active compiler using: + +```shell +export CC=gcc-4.9 +export CXX=g++-4.9 +``` + +#### Environment Setup and Build + First, let's create a conda environment with all the C++ build and Python dependencies from conda-forge: @@ -74,7 +101,6 @@ pushd arrow/cpp/build cmake -DCMAKE_BUILD_TYPE=$ARROW_BUILD_TYPE \ -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX \ -DARROW_PYTHON=on \ - -DARROW_BOOST_USE_SHARED=off \ -DARROW_BUILD_TESTS=OFF \ .. make -j4