From 1c9628f154a73f5d77a78da5ef63707493790201 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Mon, 2 Jul 2018 14:41:37 -0700 Subject: [PATCH 01/11] try fixing tensorflow crash --- python/pyarrow/__init__.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index 20254c2a84d..6cda5f56211 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -44,6 +44,17 @@ def parse_version(root): __version__ = None +try: + import ctypes + import os + import site + SITE_PATH, = site.getsitepackages() + ctypes.CDLL(os.path.join(SITE_PATH, "tensorflow", + "libtensorflow_framework.so")) +except: + pass + + from pyarrow.lib import cpu_count, set_cpu_count from pyarrow.lib import (null, bool_, int8, int16, int32, int64, From 7835fba1ad3792a2a206b3941900906ac6c6a709 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Mon, 2 Jul 2018 15:57:59 -0700 Subject: [PATCH 02/11] check for linux --- python/pyarrow/__init__.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index 6cda5f56211..0367b071afd 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -47,10 +47,12 @@ def parse_version(root): try: import ctypes import os + from sys import platform import site - SITE_PATH, = site.getsitepackages() - ctypes.CDLL(os.path.join(SITE_PATH, "tensorflow", - "libtensorflow_framework.so")) + if platform == "linux" or platform == "linux2": + SITE_PATH, = site.getsitepackages() + ctypes.CDLL(os.path.join(SITE_PATH, "tensorflow", + "libtensorflow_framework.so")) except: pass From 02cb5005d51e62ebd7422169b8282dd26e705674 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Mon, 2 Jul 2018 16:07:57 -0700 Subject: [PATCH 03/11] tests if the wheels work with tensorflow --- ci/travis_script_manylinux.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ci/travis_script_manylinux.sh b/ci/travis_script_manylinux.sh index 14e6404d3de..f06cd0a75e3 100755 --- a/ci/travis_script_manylinux.sh +++ b/ci/travis_script_manylinux.sh @@ -24,3 +24,11 @@ pushd python/manylinux1 git clone ../../ arrow docker build -t arrow-base-x86_64 -f Dockerfile-x86_64 . docker run --shm-size=2g --rm -e PYARROW_PARALLEL=3 -v $PWD:/io arrow-base-x86_64 /io/build_arrow.sh + +# Testing for https://issues.apache.org/jira/browse/ARROW-2657 +# These tests cannot be run inside of the docker container, since TensorFlow +# does not run on manylinux1 + +pip install tensorflow +pip install "dist/`ls dist/ | grep cp36`" +python -c "import pyarrow; import tensorflow" From 57ca5fc2de475f9093a057f8009cec0e1433f5b6 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Mon, 2 Jul 2018 17:22:02 -0700 Subject: [PATCH 04/11] install conda to test wheels outside of docker --- ci/travis_script_manylinux.sh | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/ci/travis_script_manylinux.sh b/ci/travis_script_manylinux.sh index f06cd0a75e3..a6c0d533187 100755 --- a/ci/travis_script_manylinux.sh +++ b/ci/travis_script_manylinux.sh @@ -29,6 +29,16 @@ docker run --shm-size=2g --rm -e PYARROW_PARALLEL=3 -v $PWD:/io arrow-base-x86_6 # These tests cannot be run inside of the docker container, since TensorFlow # does not run on manylinux1 +source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh + +source $TRAVIS_BUILD_DIR/ci/travis_install_conda.sh + +PYTHON_VERSION=3.6 +CONDA_ENV_DIR=$TRAVIS_BUILD_DIR/pyarrow-test-$PYTHON_VERSION + +conda create -y -q -p $CONDA_ENV_DIR python=$PYTHON_VERSION +source activate $CONDA_ENV_DIR + pip install tensorflow pip install "dist/`ls dist/ | grep cp36`" python -c "import pyarrow; import tensorflow" From 1135b51b090de26afb5dbc0067fa92bb0fde2351 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Mon, 2 Jul 2018 17:46:43 -0700 Subject: [PATCH 05/11] silence tensorflow installation --- ci/travis_script_manylinux.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/travis_script_manylinux.sh b/ci/travis_script_manylinux.sh index a6c0d533187..9ea15e7902f 100755 --- a/ci/travis_script_manylinux.sh +++ b/ci/travis_script_manylinux.sh @@ -39,6 +39,6 @@ CONDA_ENV_DIR=$TRAVIS_BUILD_DIR/pyarrow-test-$PYTHON_VERSION conda create -y -q -p $CONDA_ENV_DIR python=$PYTHON_VERSION source activate $CONDA_ENV_DIR -pip install tensorflow +pip install -q tensorflow pip install "dist/`ls dist/ | grep cp36`" python -c "import pyarrow; import tensorflow" From ac38837d1063dc35bfab24c8ec6be7fe40b2145b Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Mon, 2 Jul 2018 17:50:27 -0700 Subject: [PATCH 06/11] add clarification comment --- python/pyarrow/__init__.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index 0367b071afd..1662cd0d25d 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -45,6 +45,12 @@ def parse_version(root): try: + # We need to do the following to load TensorFlow before + # pyarrow.lib. If we don't do this there are symbol clashes + # between TensorFlow's use of threading and our global + # thread pool, see also + # https://issues.apache.org/jira/browse/ARROW-2657 and + # https://github.com/apache/arrow/pull/2096. import ctypes import os from sys import platform From c18cccb67cfe2ac8c62eceb934308e46932cdbd2 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Tue, 3 Jul 2018 15:58:09 -0700 Subject: [PATCH 07/11] address comments --- python/pyarrow/__init__.py | 22 +++++----------------- python/pyarrow/compat.py | 19 +++++++++++++++++++ 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index 1662cd0d25d..c8f23ca3fee 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -44,23 +44,11 @@ def parse_version(root): __version__ = None -try: - # We need to do the following to load TensorFlow before - # pyarrow.lib. If we don't do this there are symbol clashes - # between TensorFlow's use of threading and our global - # thread pool, see also - # https://issues.apache.org/jira/browse/ARROW-2657 and - # https://github.com/apache/arrow/pull/2096. - import ctypes - import os - from sys import platform - import site - if platform == "linux" or platform == "linux2": - SITE_PATH, = site.getsitepackages() - ctypes.CDLL(os.path.join(SITE_PATH, "tensorflow", - "libtensorflow_framework.so")) -except: - pass +from pyarrow.compat import import_tensorflow_extension + + +# Workaround for https://issues.apache.org/jira/browse/ARROW-2657 +import_tensorflow_extension() from pyarrow.lib import cpu_count, set_cpu_count diff --git a/python/pyarrow/compat.py b/python/pyarrow/compat.py index 1b19ca0e402..b2e98082253 100644 --- a/python/pyarrow/compat.py +++ b/python/pyarrow/compat.py @@ -160,6 +160,25 @@ def encode_file_path(path): # will convert utf8 to utf16 return encoded_path +def import_tensorflow_extension(): + """ + Load the TensorFlow extension if it exists. + + This is used to load the TensorFlow extension before + pyarrow.lib. If we don't do this there are symbol clashes + between TensorFlow's use of threading and our global + thread pool, see also + https://issues.apache.org/jira/browse/ARROW-2657 and + https://github.com/apache/arrow/pull/2096. + """ + import os + import site + for site_path in site.getsitepackages(): + ext = os.path.join(site_path, "tensorflow", + "libtensorflow_framework.so") + if os.path.exists(ext): + import ctypes + ctypes.CDLL(ext) integer_types = six.integer_types + (np.integer,) From bbf6cfc3b77247f91b68a1bb0dda891f0e77aca5 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Tue, 3 Jul 2018 16:15:54 -0700 Subject: [PATCH 08/11] load TensorFlow for sure if it exists --- python/pyarrow/compat.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/python/pyarrow/compat.py b/python/pyarrow/compat.py index b2e98082253..724ecd899d4 100644 --- a/python/pyarrow/compat.py +++ b/python/pyarrow/compat.py @@ -173,12 +173,28 @@ def import_tensorflow_extension(): """ import os import site - for site_path in site.getsitepackages(): + tensorflow_loaded = False + + # Try to load the tensorflow extension directly + # (this is much faster than importing tensorflow) + site_paths = site.getsitepackages() + [site.getusersitepackages()] + for site_path in site_paths: ext = os.path.join(site_path, "tensorflow", "libtensorflow_framework.so") if os.path.exists(ext): import ctypes ctypes.CDLL(ext) + tensorflow_loaded = True + break + + # If this failed, try to load tensorflow the normal ways + # (this is more expensive) + if not tensorflow_loaded: + try: + import tensorflow + except: + pass + integer_types = six.integer_types + (np.integer,) From 70f3bcaac66f0418bb210b1fe72690e1ab9b8aff Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Tue, 3 Jul 2018 16:43:05 -0700 Subject: [PATCH 09/11] workaround for virtualenv --- python/pyarrow/compat.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/python/pyarrow/compat.py b/python/pyarrow/compat.py index 724ecd899d4..03af73a67f9 100644 --- a/python/pyarrow/compat.py +++ b/python/pyarrow/compat.py @@ -177,7 +177,11 @@ def import_tensorflow_extension(): # Try to load the tensorflow extension directly # (this is much faster than importing tensorflow) - site_paths = site.getsitepackages() + [site.getusersitepackages()] + try: + site_paths = site.getsitepackages() + [site.getusersitepackages()] + except AttributeError: + # Workaround for https://github.com/pypa/virtualenv/issues/228 + site_paths = [os.path.dirname(site.__file__) + '/site-packages'] for site_path in site_paths: ext = os.path.join(site_path, "tensorflow", "libtensorflow_framework.so") @@ -192,7 +196,7 @@ def import_tensorflow_extension(): if not tensorflow_loaded: try: import tensorflow - except: + except ImportError: pass From 2ca3de9fc7606f2b8483b1edea31aaba6bf3782c Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Tue, 3 Jul 2018 16:51:26 -0700 Subject: [PATCH 10/11] clarify comment --- python/pyarrow/compat.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/python/pyarrow/compat.py b/python/pyarrow/compat.py index 03af73a67f9..1fcaf4c59c4 100644 --- a/python/pyarrow/compat.py +++ b/python/pyarrow/compat.py @@ -176,11 +176,14 @@ def import_tensorflow_extension(): tensorflow_loaded = False # Try to load the tensorflow extension directly - # (this is much faster than importing tensorflow) + # This is a performance optimization, tensorflow will always be + # loaded via the "import tensorflow" statement below if this + # doesn't succeed. try: site_paths = site.getsitepackages() + [site.getusersitepackages()] except AttributeError: - # Workaround for https://github.com/pypa/virtualenv/issues/228 + # Workaround for https://github.com/pypa/virtualenv/issues/228, + # this happends in some configurations of virtualenv site_paths = [os.path.dirname(site.__file__) + '/site-packages'] for site_path in site_paths: ext = os.path.join(site_path, "tensorflow", @@ -191,7 +194,7 @@ def import_tensorflow_extension(): tensorflow_loaded = True break - # If this failed, try to load tensorflow the normal ways + # If the above failed, try to load tensorflow the normal way # (this is more expensive) if not tensorflow_loaded: try: From 92aef7a9b0d4032cc67f3c39e340de743e877e7f Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Wed, 4 Jul 2018 10:49:20 -0400 Subject: [PATCH 11/11] Use compat namespace to avoid adding import_tensorflow_extension to pyarrow.* namespace Change-Id: I94bd60d98b64cab6105fbdda793bc82050dc640e --- python/pyarrow/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index c8f23ca3fee..dc045e6eab5 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -44,11 +44,11 @@ def parse_version(root): __version__ = None -from pyarrow.compat import import_tensorflow_extension +import pyarrow.compat as compat # Workaround for https://issues.apache.org/jira/browse/ARROW-2657 -import_tensorflow_extension() +compat.import_tensorflow_extension() from pyarrow.lib import cpu_count, set_cpu_count