From 365f5683f82cedba422ae13d422a9a4988b585dd Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Wed, 16 Nov 2022 13:28:13 +0100 Subject: [PATCH 1/7] Copy pyarrow cpp headers even if pyarrow build is not bundled with Arrow cpp --- python/setup.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/python/setup.py b/python/setup.py index bcc427f87df..99d6cfe8d7a 100755 --- a/python/setup.py +++ b/python/setup.py @@ -436,19 +436,28 @@ def copy_libs(dir): else: build_prefix = self.build_type + # Copy PyArrow headers to pyarrow/include + pyarrow_include = pjoin(build_lib, 'pyarrow', 'include') + def copy_pyarrow_cpp_headers(): + pyarrow_cpp_include = pjoin(pyarrow_cpp_home, 'include') + if os.path.exists(pjoin(pyarrow_include, 'arrow', 'python')): + shutil.rmtree(pjoin(pyarrow_include, 'arrow', 'python')) + shutil.copytree(pjoin(pyarrow_cpp_include, 'arrow', 'python'), + pjoin(pyarrow_include, 'arrow', 'python')) + copy_pyarrow_cpp_headers() + + # Move Arrow C++ and Pyarrow C++ headers to pyarrow/include + # if bundled build is specified if self.bundle_arrow_cpp or self.bundle_arrow_cpp_headers: arrow_cpp_include = pjoin(build_prefix, 'include') print('Bundling includes: ' + arrow_cpp_include) - pyarrow_include = pjoin(build_lib, 'pyarrow', 'include') if os.path.exists(pyarrow_include): shutil.rmtree(pyarrow_include) shutil.move(arrow_cpp_include, pyarrow_include) # pyarrow/include file is first deleted in the previous step # so we need to add the PyArrow C++ include folder again - pyarrow_cpp_include = pjoin(pyarrow_cpp_home, 'include') - shutil.move(pjoin(pyarrow_cpp_include, 'arrow', 'python'), - pjoin(pyarrow_include, 'arrow', 'python')) + copy_pyarrow_cpp_headers() # Move the built C-extension to the place expected by the Python # build From d5f953f9c4b9a256de9876057fe285a134acc24d Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Wed, 16 Nov 2022 14:13:50 +0100 Subject: [PATCH 2/7] Optimize moving of header files --- python/setup.py | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/python/setup.py b/python/setup.py index 99d6cfe8d7a..5049f895fc5 100755 --- a/python/setup.py +++ b/python/setup.py @@ -436,18 +436,8 @@ def copy_libs(dir): else: build_prefix = self.build_type - # Copy PyArrow headers to pyarrow/include pyarrow_include = pjoin(build_lib, 'pyarrow', 'include') - def copy_pyarrow_cpp_headers(): - pyarrow_cpp_include = pjoin(pyarrow_cpp_home, 'include') - if os.path.exists(pjoin(pyarrow_include, 'arrow', 'python')): - shutil.rmtree(pjoin(pyarrow_include, 'arrow', 'python')) - shutil.copytree(pjoin(pyarrow_cpp_include, 'arrow', 'python'), - pjoin(pyarrow_include, 'arrow', 'python')) - copy_pyarrow_cpp_headers() - - # Move Arrow C++ and Pyarrow C++ headers to pyarrow/include - # if bundled build is specified + # Move Arrow C++ headers to pyarrow/include if self.bundle_arrow_cpp or self.bundle_arrow_cpp_headers: arrow_cpp_include = pjoin(build_prefix, 'include') print('Bundling includes: ' + arrow_cpp_include) @@ -455,9 +445,14 @@ def copy_pyarrow_cpp_headers(): shutil.rmtree(pyarrow_include) shutil.move(arrow_cpp_include, pyarrow_include) - # pyarrow/include file is first deleted in the previous step - # so we need to add the PyArrow C++ include folder again - copy_pyarrow_cpp_headers() + # Move PyArrow headers to pyarrow/include + pyarrow_cpp_include = pjoin(pyarrow_cpp_home, 'include') + print('Moving PyArrow C++ includes: ' + + pjoin(pyarrow_include, 'arrow', 'python')) + if os.path.exists(pjoin(pyarrow_include, 'arrow', 'python')): + shutil.rmtree(pjoin(pyarrow_include, 'arrow', 'python')) + shutil.move(pjoin(pyarrow_cpp_include, 'arrow', 'python'), + pjoin(pyarrow_include, 'arrow', 'python')) # Move the built C-extension to the place expected by the Python # build From 893bfdfa6a975e49209d653d457931a9f0674ca0 Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Thu, 17 Nov 2022 10:57:41 +0100 Subject: [PATCH 3/7] Define a variable for pjoin(pyarrow_include, 'arrow', 'python') --- python/setup.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/python/setup.py b/python/setup.py index 5049f895fc5..e6ef23afd0a 100755 --- a/python/setup.py +++ b/python/setup.py @@ -447,12 +447,12 @@ def copy_libs(dir): # Move PyArrow headers to pyarrow/include pyarrow_cpp_include = pjoin(pyarrow_cpp_home, 'include') - print('Moving PyArrow C++ includes: ' + - pjoin(pyarrow_include, 'arrow', 'python')) - if os.path.exists(pjoin(pyarrow_include, 'arrow', 'python')): - shutil.rmtree(pjoin(pyarrow_include, 'arrow', 'python')) + pyarrow_include_sub = pjoin(pyarrow_include, 'arrow', 'python') + print('Moving PyArrow C++ includes: ' + pyarrow_include_sub) + if os.path.exists(pyarrow_include_sub): + shutil.rmtree(pyarrow_include_sub) shutil.move(pjoin(pyarrow_cpp_include, 'arrow', 'python'), - pjoin(pyarrow_include, 'arrow', 'python')) + pyarrow_include_sub) # Move the built C-extension to the place expected by the Python # build From 3ea82031c7432db5598d61a10cbdcf074329e7af Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Thu, 17 Nov 2022 11:14:09 +0100 Subject: [PATCH 4/7] Add a test for the existance of pyarrow/include folder --- python/pyarrow/tests/test_cpp_internals.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/python/pyarrow/tests/test_cpp_internals.py b/python/pyarrow/tests/test_cpp_internals.py index 1c44bff9b5b..5357b64c617 100644 --- a/python/pyarrow/tests/test_cpp_internals.py +++ b/python/pyarrow/tests/test_cpp_internals.py @@ -15,6 +15,9 @@ # specific language governing permissions and limitations # under the License. +from pathlib import Path +import os + from pyarrow._pyarrow_cpp_tests import get_cpp_tests @@ -31,3 +34,19 @@ def wrapper(case=case): inject_cpp_tests(globals()) + + +def test_pyarrow_include(): + # We need to make sure that pyarrow/include is always + # created. Either with PyArrow C++ header files or with + # Arrow C++ and PyArrow C++ header files together + + cwd = os.getcwd() + pyarrow_include = os.path.join(cwd, 'pyarrow', 'include') + pyarrow_cpp_include = os.path.join(pyarrow_include, 'arrow', 'python') + + obj_include = Path(pyarrow_include) + obj_python_include = Path(pyarrow_cpp_include) + + assert obj_include.exists() + assert obj_python_include.exists() From 5f35dde4629a96bc42b30ef4b03b2043ad1f5c26 Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Thu, 17 Nov 2022 11:44:17 +0100 Subject: [PATCH 5/7] Try something different with paths to pass the test on CI --- python/pyarrow/tests/test_cpp_internals.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/python/pyarrow/tests/test_cpp_internals.py b/python/pyarrow/tests/test_cpp_internals.py index 5357b64c617..f1b6179cc8f 100644 --- a/python/pyarrow/tests/test_cpp_internals.py +++ b/python/pyarrow/tests/test_cpp_internals.py @@ -15,8 +15,8 @@ # specific language governing permissions and limitations # under the License. -from pathlib import Path -import os +import os.path +from os.path import join as pjoin from pyarrow._pyarrow_cpp_tests import get_cpp_tests @@ -41,12 +41,10 @@ def test_pyarrow_include(): # created. Either with PyArrow C++ header files or with # Arrow C++ and PyArrow C++ header files together - cwd = os.getcwd() - pyarrow_include = os.path.join(cwd, 'pyarrow', 'include') + source = os.path.dirname(os.path.abspath(__file__)) + pyarrow_dir = os.path.abspath(os.path.join(source, '..')) + pyarrow_include = os.path.join(pyarrow_dir, 'include') pyarrow_cpp_include = os.path.join(pyarrow_include, 'arrow', 'python') - obj_include = Path(pyarrow_include) - obj_python_include = Path(pyarrow_cpp_include) - - assert obj_include.exists() - assert obj_python_include.exists() + assert os.path.exists(pyarrow_include) + assert os.path.exists(pyarrow_cpp_include) From acf87540fe6e1597549275f784401206cb747dd3 Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Thu, 17 Nov 2022 11:46:58 +0100 Subject: [PATCH 6/7] Use pjoin instead of os.path.join --- python/pyarrow/tests/test_cpp_internals.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/pyarrow/tests/test_cpp_internals.py b/python/pyarrow/tests/test_cpp_internals.py index f1b6179cc8f..17e865299d6 100644 --- a/python/pyarrow/tests/test_cpp_internals.py +++ b/python/pyarrow/tests/test_cpp_internals.py @@ -42,9 +42,9 @@ def test_pyarrow_include(): # Arrow C++ and PyArrow C++ header files together source = os.path.dirname(os.path.abspath(__file__)) - pyarrow_dir = os.path.abspath(os.path.join(source, '..')) - pyarrow_include = os.path.join(pyarrow_dir, 'include') - pyarrow_cpp_include = os.path.join(pyarrow_include, 'arrow', 'python') + pyarrow_dir = os.path.abspath(pjoin(source, '..')) + pyarrow_include = pjoin(pyarrow_dir, 'include') + pyarrow_cpp_include = pjoin(pyarrow_include, 'arrow', 'python') assert os.path.exists(pyarrow_include) assert os.path.exists(pyarrow_cpp_include) From 4669e940882c67e320da88c587dd14b227cff84a Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Tue, 22 Nov 2022 12:02:48 +0100 Subject: [PATCH 7/7] Update python/pyarrow/tests/test_cpp_internals.py Co-authored-by: Joris Van den Bossche --- python/pyarrow/tests/test_cpp_internals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/tests/test_cpp_internals.py b/python/pyarrow/tests/test_cpp_internals.py index 17e865299d6..83800b77f89 100644 --- a/python/pyarrow/tests/test_cpp_internals.py +++ b/python/pyarrow/tests/test_cpp_internals.py @@ -42,7 +42,7 @@ def test_pyarrow_include(): # Arrow C++ and PyArrow C++ header files together source = os.path.dirname(os.path.abspath(__file__)) - pyarrow_dir = os.path.abspath(pjoin(source, '..')) + pyarrow_dir = pjoin(source, '..') pyarrow_include = pjoin(pyarrow_dir, 'include') pyarrow_cpp_include = pjoin(pyarrow_include, 'arrow', 'python')