diff --git a/ci/scripts/python_test.sh b/ci/scripts/python_test.sh index 4400eeeae66..a18bc5b9133 100755 --- a/ci/scripts/python_test.sh +++ b/ci/scripts/python_test.sh @@ -20,6 +20,7 @@ set -ex arrow_dir=${1} +test_dir=${1}/python/build/dist export ARROW_SOURCE_DIR=${arrow_dir} export ARROW_TEST_DATA=${arrow_dir}/testing/data @@ -54,4 +55,14 @@ export PYARROW_TEST_ORC export PYARROW_TEST_PARQUET export PYARROW_TEST_S3 +# Testing PyArrow C++ +if [ "${ARROW_BUILD_TESTS}" == "ON" ]; then + pushd ${test_dir} + ctest \ + --output-on-failure \ + --parallel ${n_jobs} \ + --timeout 300 + popd +fi +# Testing PyArrow pytest -r s ${PYTEST_ARGS} --pyargs pyarrow diff --git a/ci/scripts/python_wheel_macos_build.sh b/ci/scripts/python_wheel_macos_build.sh index a6e763b6523..6bd6ea22a35 100755 --- a/ci/scripts/python_wheel_macos_build.sh +++ b/ci/scripts/python_wheel_macos_build.sh @@ -156,6 +156,7 @@ export PYARROW_WITH_PLASMA=${ARROW_PLASMA} export PYARROW_WITH_SUBSTRAIT=${ARROW_SUBSTRAIT} export PYARROW_WITH_S3=${ARROW_S3} export PYARROW_CMAKE_OPTIONS="-DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL}" +export ARROW_HOME=${build_dir}/install # PyArrow build configuration export PKG_CONFIG_PATH=/usr/lib/pkgconfig:${build_dir}/install/lib/pkgconfig # Set PyArrow version explicitly diff --git a/ci/scripts/python_wheel_manylinux_build.sh b/ci/scripts/python_wheel_manylinux_build.sh index af17606199e..47721e45e3b 100755 --- a/ci/scripts/python_wheel_manylinux_build.sh +++ b/ci/scripts/python_wheel_manylinux_build.sh @@ -151,6 +151,7 @@ export PYARROW_WITH_PARQUET_ENCRYPTION=${PARQUET_REQUIRE_ENCRYPTION} export PYARROW_WITH_PLASMA=${ARROW_PLASMA} export PYARROW_WITH_SUBSTRAIT=${ARROW_SUBSTRAIT} export PYARROW_WITH_S3=${ARROW_S3} +export ARROW_HOME=/tmp/arrow-dist # PyArrow build configuration export PKG_CONFIG_PATH=/usr/lib/pkgconfig:/tmp/arrow-dist/lib/pkgconfig diff --git a/cpp/cmake_modules/FindArrowPython.cmake b/cpp/cmake_modules/FindArrowPython.cmake index b503e6a9e02..5acd3dab1d0 100644 --- a/cpp/cmake_modules/FindArrowPython.cmake +++ b/cpp/cmake_modules/FindArrowPython.cmake @@ -47,7 +47,7 @@ find_package(Arrow ${find_package_arguments}) if(ARROW_FOUND) arrow_find_package(ARROW_PYTHON - "${ARROW_HOME}" + "${PYARROW_CPP_HOME}" arrow_python arrow/python/api.h ArrowPython diff --git a/cpp/cmake_modules/FindArrowPythonFlight.cmake b/cpp/cmake_modules/FindArrowPythonFlight.cmake index 3a639928ce5..fb670fa7658 100644 --- a/cpp/cmake_modules/FindArrowPythonFlight.cmake +++ b/cpp/cmake_modules/FindArrowPythonFlight.cmake @@ -50,7 +50,7 @@ find_package(ArrowPython ${find_package_arguments}) if(ARROW_PYTHON_FOUND AND ARROW_FLIGHT_FOUND) arrow_find_package(ARROW_PYTHON_FLIGHT - "${ARROW_HOME}" + "${PYARROW_CPP_HOME}" arrow_python_flight arrow/python/flight.h ArrowPythonFlight diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 5070d22fc55..1e30e50a317 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -805,10 +805,6 @@ if(ARROW_ORC) add_subdirectory(adapters/orc) endif() -if(ARROW_PYTHON) - add_subdirectory(python) -endif() - if(ARROW_TENSORFLOW) add_subdirectory(adapters/tensorflow) endif() diff --git a/cpp/src/arrow/public_api_test.cc b/cpp/src/arrow/public_api_test.cc index 45f3313c67f..a2aa624d092 100644 --- a/cpp/src/arrow/public_api_test.cc +++ b/cpp/src/arrow/public_api_test.cc @@ -50,10 +50,6 @@ #include "arrow/json/api.h" // IWYU pragma: keep #endif -#ifdef ARROW_PYTHON -#include "arrow/python/api.h" // IWYU pragma: keep -#endif - #ifdef DCHECK #error "DCHECK should not be visible from Arrow public headers." #endif diff --git a/cpp/src/arrow/python/CMakeLists.txt b/cpp/src/arrow/python/CMakeLists.txt deleted file mode 100644 index c37240a426c..00000000000 --- a/cpp/src/arrow/python/CMakeLists.txt +++ /dev/null @@ -1,208 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# -# arrow_python -# - -find_package(Python3Alt 3.7 REQUIRED) - -add_custom_target(arrow_python-all) -add_custom_target(arrow_python) -add_custom_target(arrow_python-tests) -add_dependencies(arrow_python-all arrow_python arrow_python-tests) - -set(ARROW_PYTHON_SRCS - arrow_to_pandas.cc - benchmark.cc - common.cc - datetime.cc - decimal.cc - deserialize.cc - extension_type.cc - gdb.cc - helpers.cc - inference.cc - init.cc - io.cc - ipc.cc - numpy_convert.cc - numpy_to_arrow.cc - python_to_arrow.cc - pyarrow.cc - serialize.cc - udf.cc) - -set_source_files_properties(init.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON - SKIP_UNITY_BUILD_INCLUSION ON) - -if(ARROW_CSV) - list(APPEND ARROW_PYTHON_SRCS csv.cc) -endif() - -if(ARROW_FILESYSTEM) - list(APPEND ARROW_PYTHON_SRCS filesystem.cc) -endif() - -if(PARQUET_REQUIRE_ENCRYPTION) - list(APPEND ARROW_PYTHON_SRCS parquet_encryption.cc) -endif() - -set(ARROW_PYTHON_DEPENDENCIES arrow_dependencies) - -if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - set_property(SOURCE pyarrow.cc - APPEND_STRING - PROPERTY COMPILE_FLAGS " -Wno-cast-qual ") -endif() - -set(ARROW_PYTHON_SHARED_LINK_LIBS arrow_shared) -set(ARROW_PYTHON_SHARED_PRIVATE_LINK_LIBS) -set(ARROW_PYTHON_STATIC_LINK_LIBS ${PYTHON_OTHER_LIBS}) - -if(WIN32) - list(APPEND ARROW_PYTHON_SHARED_LINK_LIBS ${PYTHON_LIBRARIES} ${PYTHON_OTHER_LIBS}) -endif() -if(PARQUET_REQUIRE_ENCRYPTION) - list(APPEND ARROW_PYTHON_SHARED_LINK_LIBS parquet_shared) -endif() -if(ARROW_USE_XSIMD) - list(APPEND ARROW_PYTHON_SHARED_PRIVATE_LINK_LIBS xsimd) - list(APPEND ARROW_PYTHON_STATIC_LINK_LIBS xsimd) -endif() - -set(ARROW_PYTHON_INCLUDES ${NUMPY_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS}) - -add_arrow_lib(arrow_python - CMAKE_PACKAGE_NAME - ArrowPython - PKG_CONFIG_NAME - arrow-python - SOURCES - ${ARROW_PYTHON_SRCS} - PRECOMPILED_HEADERS - "$<$:arrow/python/pch.h>" - OUTPUTS - ARROW_PYTHON_LIBRARIES - DEPENDENCIES - ${ARROW_PYTHON_DEPENDENCIES} - SHARED_LINK_FLAGS - ${ARROW_VERSION_SCRIPT_FLAGS} # Defined in cpp/arrow/CMakeLists.txt - SHARED_LINK_LIBS - ${ARROW_PYTHON_SHARED_LINK_LIBS} - SHARED_PRIVATE_LINK_LIBS - ${ARROW_PYTHON_SHARED_PRIVATE_LINK_LIBS} - STATIC_LINK_LIBS - ${ARROW_PYTHON_STATIC_LINK_LIBS} - EXTRA_INCLUDES - "${ARROW_PYTHON_INCLUDES}") - -add_dependencies(arrow_python ${ARROW_PYTHON_LIBRARIES}) - -foreach(LIB_TARGET ${ARROW_PYTHON_LIBRARIES}) - target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_PYTHON_EXPORTING) -endforeach() - -if(ARROW_BUILD_STATIC AND MSVC) - target_compile_definitions(arrow_python_static PUBLIC ARROW_STATIC) -endif() - -if(ARROW_FLIGHT AND ARROW_BUILD_SHARED) - # Must link to shared libarrow_flight: we don't want to link more than one - # copy of gRPC into the eventual Cython shared object, otherwise gRPC calls - # fail with weird errors due to multiple copies of global static state (The - # other solution is to link gRPC shared everywhere instead of statically only - # in Flight) - add_arrow_lib(arrow_python_flight - CMAKE_PACKAGE_NAME - ArrowPythonFlight - PKG_CONFIG_NAME - arrow-python-flight - SOURCES - flight.cc - OUTPUTS - ARROW_PYFLIGHT_LIBRARIES - DEPENDENCIES - flight_grpc_gen - SHARED_LINK_FLAGS - ${ARROW_VERSION_SCRIPT_FLAGS} # Defined in cpp/arrow/CMakeLists.txt - SHARED_LINK_LIBS - arrow_python_shared - arrow_flight_shared - STATIC_LINK_LIBS - ${PYTHON_OTHER_LIBS} - EXTRA_INCLUDES - "${ARROW_PYTHON_INCLUDES}" - PRIVATE_INCLUDES - "${Protobuf_INCLUDE_DIRS}") - - add_dependencies(arrow_python ${ARROW_PYFLIGHT_LIBRARIES}) - - foreach(LIB_TARGET ${ARROW_PYFLIGHT_LIBRARIES}) - target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_PYFLIGHT_EXPORTING) - endforeach() - - if(ARROW_BUILD_STATIC AND MSVC) - target_compile_definitions(arrow_python_flight_static PUBLIC ARROW_STATIC) - endif() -endif() - -if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - # Clang, be quiet. Python C API has lots of macros - set_property(SOURCE ${ARROW_PYTHON_SRCS} - APPEND_STRING - PROPERTY COMPILE_FLAGS -Wno-parentheses-equality) -endif() - -arrow_install_all_headers("arrow/python") - -# ---------------------------------------------------------------------- - -if(ARROW_BUILD_TESTS) - add_library(arrow_python_test_main STATIC util/test_main.cc) - - target_link_libraries(arrow_python_test_main GTest::gtest) - target_include_directories(arrow_python_test_main SYSTEM - PUBLIC ${ARROW_PYTHON_INCLUDES}) - - if(APPLE) - target_link_libraries(arrow_python_test_main ${CMAKE_DL_LIBS}) - set_target_properties(arrow_python_test_main PROPERTIES LINK_FLAGS - "-undefined dynamic_lookup") - elseif(NOT MSVC) - target_link_libraries(arrow_python_test_main pthread ${CMAKE_DL_LIBS}) - endif() - - if(ARROW_TEST_LINKAGE STREQUAL shared) - set(ARROW_PYTHON_TEST_LINK_LIBS arrow_python_test_main arrow_python_shared - arrow_testing_shared arrow_shared) - else() - set(ARROW_PYTHON_TEST_LINK_LIBS arrow_python_test_main arrow_python_static - arrow_testing_static arrow_static) - endif() - - add_arrow_test(python_test - STATIC_LINK_LIBS - "${ARROW_PYTHON_TEST_LINK_LIBS}" - EXTRA_LINK_LIBS - ${PYTHON_LIBRARIES} - EXTRA_INCLUDES - "${ARROW_PYTHON_INCLUDES}" - LABELS - "arrow_python-tests" - NO_VALGRIND) -endif() diff --git a/dev/release/01-prepare-test.rb b/dev/release/01-prepare-test.rb index fa00d6290c1..808b48d4c4f 100644 --- a/dev/release/01-prepare-test.rb +++ b/dev/release/01-prepare-test.rb @@ -197,6 +197,13 @@ def test_version_pre_tag "+set(MLARROW_VERSION \"#{@release_version}\")"], ], }, + { + path: "python/pyarrow/src/CMakeLists.txt", + hunks: [ + ["-set(ARROW_PYTHON_VERSION \"#{@snapshot_version}\")", + "+set(ARROW_PYTHON_VERSION \"#{@release_version}\")"], + ], + }, { path: "python/setup.py", hunks: [ diff --git a/dev/release/post-11-bump-versions-test.rb b/dev/release/post-11-bump-versions-test.rb index 5bcd5c55ea3..7f7def52800 100644 --- a/dev/release/post-11-bump-versions-test.rb +++ b/dev/release/post-11-bump-versions-test.rb @@ -144,6 +144,13 @@ def test_version_post_tag "+set(MLARROW_VERSION \"#{@next_snapshot_version}\")"], ], }, + { + path: "python/pyarrow/src/CMakeLists.txt", + hunks: [ + ["-set(ARROW_PYTHON_VERSION \"#{@snapshot_version}\")", + "+set(ARROW_PYTHON_VERSION \"#{@next_snapshot_version}\")"], + ], + }, { path: "python/setup.py", hunks: [ diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index c9d0309425c..752d06fd595 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -98,10 +98,6 @@ dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.doc-base dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.install dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib-doc.links dev/tasks/linux-packages/apache-arrow/debian/libarrow-glib1000.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-dev.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight-dev.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight1000.install -dev/tasks/linux-packages/apache-arrow/debian/libarrow-python1000.install dev/tasks/linux-packages/apache-arrow/debian/libarrow1000.install dev/tasks/linux-packages/apache-arrow/debian/libgandiva-dev.install dev/tasks/linux-packages/apache-arrow/debian/libgandiva-glib-dev.install diff --git a/dev/release/utils-prepare.sh b/dev/release/utils-prepare.sh index 7b8bcf062b1..f6ee589f157 100644 --- a/dev/release/utils-prepare.sh +++ b/dev/release/utils-prepare.sh @@ -121,6 +121,14 @@ update_versions() { git add setup.py popd + pushd "${ARROW_DIR}/python/pyarrow/src" + sed -i.bak -E -e \ + "s/^set\(ARROW_PYTHON_VERSION \".+\"\)/set(ARROW_PYTHON_VERSION \"${version}\")/" \ + CMakeLists.txt + rm -f CMakeLists.txt.bak + git add CMakeLists.txt + popd + pushd "${ARROW_DIR}/r" sed -i.bak -E -e \ "s/^Version: .+/Version: ${r_version}/" \ diff --git a/dev/release/verify-apt.sh b/dev/release/verify-apt.sh index 121abdbc6eb..5de5209a55c 100755 --- a/dev/release/verify-apt.sh +++ b/dev/release/verify-apt.sh @@ -198,13 +198,6 @@ ruby -r gi -e "p GI.load('ArrowFlightSQL')" echo "::endgroup::" -if [ "${have_python}" = "yes" ]; then - echo "::group::Test libarrow-python" - ${APT_INSTALL} libarrow-python-dev=${package_version} - echo "::endgroup::" -fi - - if [ "${have_plasma}" = "yes" ]; then echo "::group::Test Plasma" ${APT_INSTALL} libplasma-glib-dev=${package_version} diff --git a/dev/release/verify-yum.sh b/dev/release/verify-yum.sh index 93bbcfe1366..6d40bccf2bc 100755 --- a/dev/release/verify-yum.sh +++ b/dev/release/verify-yum.sh @@ -250,12 +250,6 @@ if [ "${have_flight}" = "yes" ]; then echo "::endgroup::" fi -if [ "${have_python}" = "yes" ]; then - echo "::group::Test libarrow-python" - ${install_command} --enablerepo=epel arrow-python-devel-${package_version} - echo "::endgroup::" -fi - echo "::group::Test Plasma" if [ "${have_glib}" = "yes" ]; then ${install_command} --enablerepo=epel plasma-glib-devel-${package_version} diff --git a/dev/tasks/conda-recipes/arrow-cpp/meta.yaml b/dev/tasks/conda-recipes/arrow-cpp/meta.yaml index dcf1afe4a1a..8f1ba74e3af 100644 --- a/dev/tasks/conda-recipes/arrow-cpp/meta.yaml +++ b/dev/tasks/conda-recipes/arrow-cpp/meta.yaml @@ -123,7 +123,6 @@ outputs: - test -f $PREFIX/lib/libarrow.so # [linux] - test -f $PREFIX/lib/libarrow_dataset.so # [linux] - test -f $PREFIX/lib/libarrow_flight.so # [linux] - - test -f $PREFIX/lib/libarrow_python.so # [linux] - test -f $PREFIX/lib/libparquet.so # [linux] - test -f $PREFIX/lib/libgandiva.so # [linux] - test -f $PREFIX/lib/libplasma.so # [linux] @@ -133,14 +132,12 @@ outputs: - if exist %PREFIX%\\Library\\bin\\arrow_cuda.dll exit 1 # [(cuda_compiler_version == "None") and win] - test -f $PREFIX/lib/libarrow.dylib # [osx] - test -f $PREFIX/lib/libarrow_dataset.dylib # [osx] - - test -f $PREFIX/lib/libarrow_python.dylib # [osx] - test -f $PREFIX/lib/libgandiva.dylib # [osx and not arm64] - test -f $PREFIX/lib/libparquet.dylib # [osx] - test -f $PREFIX/lib/libplasma.dylib # [osx] - if not exist %PREFIX%\\Library\\bin\\arrow.dll exit 1 # [win] - if not exist %PREFIX%\\Library\\bin\\arrow_dataset.dll exit 1 # [win] - if not exist %PREFIX%\\Library\\bin\\arrow_flight.dll exit 1 # [win] - - if not exist %PREFIX%\\Library\\bin\\arrow_python.dll exit 1 # [win] - if not exist %PREFIX%\\Library\\bin\\parquet.dll exit 1 # [win] - if not exist %PREFIX%\\Library\\bin\\gandiva.dll exit 1 # [win] @@ -148,14 +145,12 @@ outputs: - test ! -f $PREFIX/lib/libarrow.a # [unix] - test ! -f $PREFIX/lib/libarrow_dataset.a # [unix] - test ! -f $PREFIX/lib/libarrow_flight.a # [unix] - - test ! -f $PREFIX/lib/libarrow_python.a # [unix] - test ! -f $PREFIX/lib/libplasma.a # [unix] - test ! -f $PREFIX/lib/libparquet.a # [unix] - test ! -f $PREFIX/lib/libgandiva.a # [unix] - if exist %PREFIX%\\Library\\lib\\arrow_static.lib exit 1 # [win] - if exist %PREFIX%\\Library\\lib\\arrow_dataset_static.lib exit 1 # [win] - if exist %PREFIX%\\Library\\lib\\arrow_flight_static.lib exit 1 # [win] - - if exist %PREFIX%\\Library\\lib\\arrow_python_static.lib exit 1 # [win] - if exist %PREFIX%\\Library\\lib\\parquet_static.lib exit 1 # [win] - if exist %PREFIX%\\Library\\lib\\gandiva_static.lib exit 1 # [win] diff --git a/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm/Dockerfile index 152ac08a7ca..0b50d887128 100644 --- a/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/apt/debian-bookworm/Dockerfile @@ -69,7 +69,6 @@ RUN \ pkg-config \ protobuf-compiler-grpc \ python3-dev \ - python3-numpy \ python3-pip \ rapidjson-dev \ tzdata \ diff --git a/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile index 440a07f09c2..600ecc6962e 100644 --- a/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile @@ -69,7 +69,6 @@ RUN \ pkg-config \ protobuf-compiler-grpc \ python3-dev \ - python3-numpy \ python3-pip \ rapidjson-dev \ tzdata \ diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile index 5abee7c2e09..cea9eeb6fac 100644 --- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile @@ -63,7 +63,6 @@ RUN \ nlohmann-json3-dev \ pkg-config \ python3-dev \ - python3-numpy \ python3-pip \ python3-setuptools \ rapidjson-dev \ diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-jammy/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-jammy/Dockerfile index 7e95c37642a..e305479f721 100644 --- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-jammy/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-jammy/Dockerfile @@ -67,7 +67,6 @@ RUN \ pkg-config \ protobuf-compiler-grpc \ python3-dev \ - python3-numpy \ python3-pip \ python3-setuptools \ rapidjson-dev \ diff --git a/dev/tasks/linux-packages/apache-arrow/debian/control.in b/dev/tasks/linux-packages/apache-arrow/debian/control.in index 7a7aeb8dce4..1e05faf2828 100644 --- a/dev/tasks/linux-packages/apache-arrow/debian/control.in +++ b/dev/tasks/linux-packages/apache-arrow/debian/control.in @@ -32,8 +32,6 @@ Build-Depends: nvidia-cuda-toolkit [!arm64], pkg-config, @USE_SYSTEM_GRPC@ protobuf-compiler-grpc, -@ENABLE_PYTHON@ python3-dev, -@ENABLE_PYTHON@ python3-numpy, valac, tzdata, zlib1g-dev @@ -119,35 +117,6 @@ Description: Apache Arrow is a data processing library for analysis . This package provides C++ library files for Flight SQL system. -@ENABLE_PYTHON@Package: libarrow-python1000 -@ENABLE_PYTHON@Section: libs -@ENABLE_PYTHON@Architecture: any -@ENABLE_PYTHON@Multi-Arch: same -@ENABLE_PYTHON@Pre-Depends: ${misc:Pre-Depends} -@ENABLE_PYTHON@Depends: -@ENABLE_PYTHON@ ${misc:Depends}, -@ENABLE_PYTHON@ ${shlibs:Depends}, -@ENABLE_PYTHON@ libarrow1000 (= ${binary:Version}), -@ENABLE_PYTHON@ python3, -@ENABLE_PYTHON@ python3-numpy -@ENABLE_PYTHON@Description: Apache Arrow is a data processing library for analysis -@ENABLE_PYTHON@ . -@ENABLE_PYTHON@ This package provides C++ library files for Python support. - -@ENABLE_PYTHON@Package: libarrow-python-flight1000 -@ENABLE_PYTHON@Section: libs -@ENABLE_PYTHON@Architecture: any -@ENABLE_PYTHON@Multi-Arch: same -@ENABLE_PYTHON@Pre-Depends: ${misc:Pre-Depends} -@ENABLE_PYTHON@Depends: -@ENABLE_PYTHON@ ${misc:Depends}, -@ENABLE_PYTHON@ ${shlibs:Depends}, -@ENABLE_PYTHON@ libarrow-flight1000 (= ${binary:Version}), -@ENABLE_PYTHON@ libarrow-python1000 (= ${binary:Version}) -@ENABLE_PYTHON@Description: Apache Arrow is a data processing library for analysis -@ENABLE_PYTHON@ . -@ENABLE_PYTHON@ This package provides C++ library files for Flight and Python support. - Package: libarrow-dev Section: libdevel Architecture: any @@ -158,7 +127,7 @@ Depends: libbrotli-dev, libbz2-dev, @USE_SYSTEM_C_ARES@ libc-ares-dev, - libcurl4-openssl-dev, + libcurl4-openssl-dev, @USE_SYSTEM_GRPC@ libgrpc++-dev, liblz4-dev, libre2-dev, @@ -223,31 +192,6 @@ Description: Apache Arrow is a data processing library for analysis . This package provides C++ header files for Flight SQL system. -@ENABLE_PYTHON@Package: libarrow-python-dev -@ENABLE_PYTHON@Section: libdevel -@ENABLE_PYTHON@Architecture: any -@ENABLE_PYTHON@Multi-Arch: same -@ENABLE_PYTHON@Depends: -@ENABLE_PYTHON@ ${misc:Depends}, -@ENABLE_PYTHON@ libarrow-dev (= ${binary:Version}), -@ENABLE_PYTHON@ libarrow-python1000 (= ${binary:Version}) -@ENABLE_PYTHON@Description: Apache Arrow is a data processing library for analysis -@ENABLE_PYTHON@ . -@ENABLE_PYTHON@ This package provides C++ header files for Python support. - -@ENABLE_PYTHON@Package: libarrow-python-flight-dev -@ENABLE_PYTHON@Section: libdevel -@ENABLE_PYTHON@Architecture: any -@ENABLE_PYTHON@Multi-Arch: same -@ENABLE_PYTHON@Depends: -@ENABLE_PYTHON@ ${misc:Depends}, -@ENABLE_PYTHON@ libarrow-flight-dev (= ${binary:Version}), -@ENABLE_PYTHON@ libarrow-python-dev (= ${binary:Version}), -@ENABLE_PYTHON@ libarrow-python-flight1000 (= ${binary:Version}) -@ENABLE_PYTHON@Description: Apache Arrow is a data processing library for analysis -@ENABLE_PYTHON@ . -@ENABLE_PYTHON@ This package provides C++ header files for Flight and Python support. - Package: libgandiva1000 Section: libs Architecture: any diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-dev.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-dev.install deleted file mode 100644 index 807583f9845..00000000000 --- a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-dev.install +++ /dev/null @@ -1,6 +0,0 @@ -usr/lib/*/cmake/arrow/ArrowPythonConfig*.cmake -usr/lib/*/cmake/arrow/ArrowPythonTargets*.cmake -usr/lib/*/cmake/arrow/FindArrowPython.cmake -usr/lib/*/libarrow_python.a -usr/lib/*/libarrow_python.so -usr/lib/*/pkgconfig/arrow-python.pc diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight-dev.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight-dev.install deleted file mode 100644 index 6cf96e227e9..00000000000 --- a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight-dev.install +++ /dev/null @@ -1,6 +0,0 @@ -usr/lib/*/cmake/arrow/ArrowPythonFlightConfig*.cmake -usr/lib/*/cmake/arrow/ArrowPythonFlightTargets*.cmake -usr/lib/*/cmake/arrow/FindArrowPythonFlight.cmake -usr/lib/*/libarrow_python_flight.a -usr/lib/*/libarrow_python_flight.so -usr/lib/*/pkgconfig/arrow-python-flight.pc diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight1000.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight1000.install deleted file mode 100644 index b7cbfec1f05..00000000000 --- a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python-flight1000.install +++ /dev/null @@ -1 +0,0 @@ -usr/lib/*/libarrow_python_flight.so.* diff --git a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python1000.install b/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python1000.install deleted file mode 100644 index eef3e664837..00000000000 --- a/dev/tasks/linux-packages/apache-arrow/debian/libarrow-python1000.install +++ /dev/null @@ -1 +0,0 @@ -usr/lib/*/libarrow_python.so.* diff --git a/dev/tasks/linux-packages/apache-arrow/debian/rules b/dev/tasks/linux-packages/apache-arrow/debian/rules index 3889ddc42ed..b9664c15a7c 100755 --- a/dev/tasks/linux-packages/apache-arrow/debian/rules +++ b/dev/tasks/linux-packages/apache-arrow/debian/rules @@ -21,11 +21,6 @@ override_dh_auto_configure: ARROW_CUDA=OFF; \ ARROW_PLASMA=OFF; \ fi; \ - if python3 -c 'import numpy' > /dev/null 2>&1; then \ - ARROW_PYTHON=ON; \ - else \ - ARROW_PYTHON=OFF; \ - fi; \ dh_auto_configure \ --sourcedirectory=cpp \ --builddirectory=cpp_build \ @@ -49,7 +44,6 @@ override_dh_auto_configure: -DARROW_PACKAGE_KIND=deb \ -DARROW_PARQUET=ON \ -DARROW_PLASMA=$${ARROW_PLASMA} \ - -DARROW_PYTHON=$${ARROW_PYTHON} \ -DARROW_S3=ON \ -DARROW_USE_CCACHE=OFF \ -DARROW_WITH_BROTLI=ON \ @@ -62,9 +56,7 @@ override_dh_auto_configure: -DCMAKE_UNITY_BUILD=ON \ -DCUDAToolkit_ROOT=/usr \ -DPARQUET_BUILD_EXECUTABLES=ON \ - -DPARQUET_REQUIRE_ENCRYPTION=ON \ - -DPythonInterp_FIND_VERSION=ON \ - -DPythonInterp_FIND_VERSION_MAJOR=3 + -DPARQUET_REQUIRE_ENCRYPTION=ON override_dh_auto_build: dh_auto_build \ diff --git a/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8/Dockerfile index a75fd022bda..5a5fd903bfc 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8/Dockerfile @@ -52,8 +52,6 @@ RUN \ openssl-devel \ pkg-config \ python39 \ - python39-devel \ - python39-numpy \ python39-pip \ re2-devel \ # rapidjson-devel \ diff --git a/dev/tasks/linux-packages/apache-arrow/yum/almalinux-9/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/almalinux-9/Dockerfile index df63f7a1a7e..dde7930ff5b 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/almalinux-9/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/yum/almalinux-9/Dockerfile @@ -51,8 +51,6 @@ RUN \ openssl-devel \ pkg-config \ python3 \ - python3-devel \ - python3-numpy \ python3-pip \ re2-devel \ rapidjson-devel \ diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in index 59b62315627..872f461fa4a 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in +++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in @@ -64,7 +64,6 @@ %define use_gflags (!%{is_amazon_linux}) %define use_glog (%{rhel} <= 8) %define use_mimalloc (%{rhel} >= 8) -%define use_python (%{rhel} >= 8) # TODO: Enable this. This works on local but is fragile on GitHub Actions and # Travis CI. # %%define use_s3 (%%{rhel} >= 8) @@ -115,10 +114,6 @@ BuildRequires: lz4-devel %{lz4_requirement} BuildRequires: ninja-build BuildRequires: openssl-devel BuildRequires: pkgconfig -%if %{use_python} -BuildRequires: python%{python_version}-devel -BuildRequires: python%{python_version}-numpy -%endif %if %{have_rapidjson} BuildRequires: rapidjson-devel %endif @@ -175,9 +170,6 @@ cd cpp -DARROW_PACKAGE_KIND=rpm \ -DARROW_PARQUET=ON \ -DARROW_PLASMA=ON \ -%if %{use_python} - -DARROW_PYTHON=ON \ -%endif %if %{use_s3} -DARROW_S3=ON \ %endif @@ -191,10 +183,6 @@ cd cpp -DCMAKE_BUILD_TYPE=$cpp_build_type \ -DPARQUET_BUILD_EXECUTABLES=ON \ -DPARQUET_REQUIRE_ENCRYPTION=ON \ -%if %{use_python} - -DPythonInterp_FIND_VERSION=ON \ - -DPythonInterp_FIND_VERSION_MAJOR=3 \ -%endif -G"Unix Makefiles" %arrow_cmake_build cd - @@ -288,7 +276,7 @@ Requires: bzip2-devel %if %{use_flight} Requires: c-ares-devel %endif -Requires: curl-devel +Requires: curl-devel %if %{use_gcs} Requires: json-devel %endif @@ -506,86 +494,6 @@ Libraries and header files for Gandiva. %{_libdir}/pkgconfig/gandiva.pc %endif -%if %{use_python} -%package -n %{name}%{major_version}-python-libs -Summary: Python integration library for Apache Arrow -License: Apache-2.0 -Requires: %{name}%{major_version}-libs = %{version}-%{release} -Requires: python%{python_version}-numpy - -%description -n %{name}%{major_version}-python-libs -This package contains the Python integration library for Apache Arrow. - -%files -n %{name}%{major_version}-python-libs -%defattr(-,root,root,-) -%doc README.md -%license LICENSE.txt NOTICE.txt -%{_libdir}/libarrow_python.so.* - -%package python-devel -Summary: Libraries and header files for Python integration library for Apache Arrow -License: Apache-2.0 -Requires: %{name}%{major_version}-python-libs = %{version}-%{release} -Requires: %{name}-devel = %{version}-%{release} -Requires: python%{python_version}-devel - -%description python-devel -Libraries and header files for Python integration library for Apache Arrow. - -%files python-devel -%defattr(-,root,root,-) -%doc README.md -%license LICENSE.txt NOTICE.txt -%{_includedir}/arrow/python/ -%exclude %{_includedir}/arrow/python/flight.h -%{_libdir}/cmake/arrow/ArrowPythonConfig*.cmake -%{_libdir}/cmake/arrow/ArrowPythonTargets*.cmake -%{_libdir}/cmake/arrow/FindArrowPython.cmake -%{_libdir}/libarrow_python.a -%{_libdir}/libarrow_python.so -%{_libdir}/pkgconfig/arrow-python.pc - -%if %{use_flight} -%package -n %{name}%{major_version}-python-flight-libs -Summary: Python integration library for Apache Arrow Flight -License: Apache-2.0 -Requires: %{name}%{major_version}-flight-libs = %{version}-%{release} -Requires: %{name}%{major_version}-python-libs = %{version}-%{release} - -%description -n %{name}%{major_version}-python-flight-libs -This package contains the Python integration library for Apache Arrow Flight. - -%files -n %{name}%{major_version}-python-flight-libs -%defattr(-,root,root,-) -%doc README.md -%license LICENSE.txt NOTICE.txt -%{_libdir}/libarrow_python_flight.so.* - -%package python-flight-devel -Summary: Libraries and header files for Python integration library for Apache Arrow Flight. -License: Apache-2.0 -Requires: %{name}%{major_version}-python-flight-libs = %{version}-%{release} -Requires: %{name}-flight-devel = %{version}-%{release} -Requires: %{name}-python-devel = %{version}-%{release} - -%description python-flight-devel -Libraries and header files for Python integration library for -Apache Arrow Flight. - -%files python-flight-devel -%defattr(-,root,root,-) -%doc README.md -%license LICENSE.txt NOTICE.txt -%{_includedir}/arrow/python/flight.h -%{_libdir}/cmake/arrow/ArrowPythonFlightConfig*.cmake -%{_libdir}/cmake/arrow/ArrowPythonFlightTargets*.cmake -%{_libdir}/cmake/arrow/FindArrowPythonFlight.cmake -%{_libdir}/libarrow_python_flight.a -%{_libdir}/libarrow_python_flight.so -%{_libdir}/pkgconfig/arrow-python-flight.pc -%endif -%endif - %package -n plasma%{major_version}-libs Summary: Runtime libraries for Plasma in-memory object store License: Apache-2.0 diff --git a/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile index 9c93e2f2407..04e74012f95 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile @@ -47,8 +47,6 @@ RUN \ openssl-devel \ pkg-config \ python36 \ - python36-devel \ - python36-numpy \ rapidjson-devel \ rpmdevtools \ snappy-devel \ diff --git a/dev/tasks/linux-packages/apache-arrow/yum/centos-8-stream/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/centos-8-stream/Dockerfile index b29cc4565bd..5dba632628a 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/centos-8-stream/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/yum/centos-8-stream/Dockerfile @@ -52,8 +52,6 @@ RUN \ openssl-devel \ pkg-config \ python39 \ - python39-devel \ - python39-numpy \ python39-pip \ re2-devel \ rapidjson-devel \ diff --git a/dev/tasks/linux-packages/apache-arrow/yum/centos-9-stream/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/centos-9-stream/Dockerfile index 38e6ae4531e..8bcd97fa6fe 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/centos-9-stream/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/yum/centos-9-stream/Dockerfile @@ -51,8 +51,6 @@ RUN \ openssl-devel \ pkg-config \ python3 \ - python3-devel \ - python3-numpy \ python3-pip \ re2-devel \ rapidjson-devel \ diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index fa5cbf969cb..40133f287a4 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -627,14 +627,6 @@ tasks: - libarrow-glib-doc_{no_rc_version}-1_[a-z0-9]+.deb - libarrow-glib1000-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - libarrow-glib1000_{no_rc_version}-1_[a-z0-9]+.deb - {% if target != "ubuntu-bionic" %} - - libarrow-python-dev_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-python-flight-dev_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-python-flight1000-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-python-flight1000_{no_rc_version}-1_[a-z0-9]+.deb - - libarrow-python1000-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - - libarrow-python1000_{no_rc_version}-1_[a-z0-9]+.deb - {% endif %} - libarrow1000-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb - libarrow1000_{no_rc_version}-1_[a-z0-9]+.deb - libgandiva-dev_{no_rc_version}-1_[a-z0-9]+.deb @@ -748,16 +740,6 @@ tasks: - arrow[0-9]+-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm {% endif %} - arrow[0-9]+-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% if not is_rhel7_based %} - - arrow-python-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% if architecture == "amd64" %} - - arrow-python-flight-devel-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - arrow[0-9]+-python-flight-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - arrow[0-9]+-python-flight-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% endif %} - - arrow[0-9]+-python-libs-debuginfo-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - - arrow[0-9]+-python-libs-{no_rc_version}-1.[a-z0-9]+.[a-z0-9_]+.rpm - {% endif %} {% if architecture == "amd64" %} - arrow-{no_rc_version}-1.[a-z0-9]+.src.rpm {% endif %} diff --git a/docs/source/developers/python.rst b/docs/source/developers/python.rst index 98ed93968a9..c30efd2358f 100644 --- a/docs/source/developers/python.rst +++ b/docs/source/developers/python.rst @@ -131,6 +131,30 @@ for ``.py`` files or for ``.pyx`` and ``.pxi`` files. In this case you will also need to install the `pytest-cython `_ plugin. +Testing PyArrow C++ +------------------- + +Most of the tests for PyArrow are part of the ``pytest``-based test suite mentioned above, +but a few low-level tests are written directly in C++ for historical reasons. +Those tests can be run using ``ctest``, but you first will need to build Arrow C++ +with ``-DARROW_BUILD_TESTS=ON``. + +.. note:: + + Currently, building the PyArrow C++ unit tests does not work with the + googletest package from conda-forge. If you are in this situation, please + add ``-DGTest_SOURCE=BUNDLED`` to the CMake flags + when building Arrow C++. + +After Arrow C++ and PyArrow are built, you can navigate to the ``python/build/dist`` +folder and run ``ctest``: + +.. code-block:: + + $ pushd arrow/python/build/dist + $ ctest + $ popd + Benchmarking ------------ @@ -391,6 +415,13 @@ variable to 1. To set the number of threads used to compile PyArrow's C++/Cython components, set the ``PYARROW_PARALLEL`` environment variable. +.. note:: + + If you used a different directory name for building Arrow C++ (by default it is + named "build"), then you should also set the environment variable + ``ARROW_BUILD_DIR='name_of_build_dir'``. This way + PyArrow can find the Arrow C++ built files. + If you wish to delete stale PyArrow build artifacts before rebuilding, navigate to the ``arrow/python`` folder and run ``git clean -Xfd .``. diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index a657f56bb2d..66087fb9795 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -604,29 +604,27 @@ foreach(module ${CYTHON_EXTENSIONS}) ${module_output_directory}) endif() - if(PYARROW_BUNDLE_ARROW_CPP) - # In the event that we are bundling the shared libraries (e.g. in a - # manylinux1 wheel), we need to set the RPATH of the extensions to the - # root of the pyarrow/ package so that libarrow/libarrow_python are able - # to be loaded properly - if(APPLE) - set(module_install_rpath "@loader_path/") - else() - set(module_install_rpath "\$ORIGIN") - endif() + # In the event that we are bundling the shared libraries (e.g. in a + # manylinux1 wheel), we need to set the RPATH of the extensions to the + # root of the pyarrow/ package so that libarrow is able to be + # loaded properly + if(APPLE) + set(module_install_rpath "@loader_path/") + else() + set(module_install_rpath "\$ORIGIN") + endif() - # XXX(wesm): ARROW-2326 this logic is only needed when we have Cython - # modules in interior directories. Since all of our C extensions and - # bundled libraries are in the same place, we can skip this part + # XXX(wesm): ARROW-2326 this logic is only needed when we have Cython + # modules in interior directories. Since all of our C extensions and + # bundled libraries are in the same place, we can skip this part - # list(LENGTH directories i) - # while(${i} GREATER 0) - # set(module_install_rpath "${module_install_rpath}/..") - # math(EXPR i "${i} - 1" ) - # endwhile(${i} GREATER 0) + # list(LENGTH directories i) + # while(${i} GREATER 0) + # set(module_install_rpath "${module_install_rpath}/..") + # math(EXPR i "${i} - 1" ) + # endwhile(${i} GREATER 0) - set_target_properties(${module_name} PROPERTIES INSTALL_RPATH ${module_install_rpath}) - endif() + set_target_properties(${module_name} PROPERTIES INSTALL_RPATH ${module_install_rpath}) if(PYARROW_GENERATE_COVERAGE) set_target_properties(${module_name} PROPERTIES COMPILE_DEFINITIONS diff --git a/cpp/src/arrow/python/ArrowPythonConfig.cmake.in b/python/pyarrow/src/ArrowPythonConfig.cmake.in similarity index 100% rename from cpp/src/arrow/python/ArrowPythonConfig.cmake.in rename to python/pyarrow/src/ArrowPythonConfig.cmake.in diff --git a/cpp/src/arrow/python/ArrowPythonFlightConfig.cmake.in b/python/pyarrow/src/ArrowPythonFlightConfig.cmake.in similarity index 100% rename from cpp/src/arrow/python/ArrowPythonFlightConfig.cmake.in rename to python/pyarrow/src/ArrowPythonFlightConfig.cmake.in diff --git a/python/pyarrow/src/CMakeLists.txt b/python/pyarrow/src/CMakeLists.txt new file mode 100644 index 00000000000..178b156a5c0 --- /dev/null +++ b/python/pyarrow/src/CMakeLists.txt @@ -0,0 +1,457 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# +# arrow_python +# + +cmake_minimum_required(VERSION 3.5) + +# RPATH settings on macOS do not affect install_name. +# https://cmake.org/cmake/help/latest/policy/CMP0068.html +if(POLICY CMP0068) + cmake_policy(SET CMP0068 NEW) +endif() + +# +# Define +# CMAKE_MODULE_PATH: location of cmake_modules in python +# + +get_filename_component(PYARROW_SOURCE_DIR ${CMAKE_SOURCE_DIR} DIRECTORY) +get_filename_component(PYTHON_SOURCE_DIR ${PYARROW_SOURCE_DIR} DIRECTORY) +get_filename_component(ARROW_SOURCE_DIR ${PYTHON_SOURCE_DIR} DIRECTORY) +set(ARROW_CPP_SOURCE_DIR "${ARROW_SOURCE_DIR}/cpp") + +# normalize ARROW_HOME path +file(TO_CMAKE_PATH "$ENV{ARROW_HOME}" ARROW_HOME) +set(CMAKE_MODULE_PATH "${PYTHON_SOURCE_DIR}/cmake_modules" "${ARROW_HOME}/lib/cmake/arrow") + +# +# Arrow version +# + +set(ARROW_PYTHON_VERSION "10.0.0-SNAPSHOT") +string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_PYTHON_BASE_VERSION "${ARROW_PYTHON_VERSION}") +# Need to set to ARRROW_VERSION before finding Arrow package! +project(arrow_python VERSION "${ARROW_PYTHON_BASE_VERSION}") + +if(NOT DEFINED CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release) +endif() + +# +# Arrow +# + +find_package(Arrow REQUIRED) +include(ArrowOptions) + +# +# Python +# +# Use the first Python installation on PATH, not the newest one +set(Python3_FIND_STRATEGY "LOCATION") +# On Windows, use registry last, not first +set(Python3_FIND_REGISTRY "LAST") +# On macOS, use framework last, not first +set(Python3_FIND_FRAMEWORK "LAST") + +find_package(Python3Alt 3.7 REQUIRED) +include_directories(SYSTEM ${NUMPY_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS} ${ARROW_INCLUDE_DIR} src) + +add_custom_target(arrow_python-all) +add_custom_target(arrow_python) +add_custom_target(arrow_python-tests) +add_dependencies(arrow_python-all arrow_python arrow_python-tests) + +set(ARROW_PYTHON_SRCS + arrow_to_pandas.cc + benchmark.cc + common.cc + datetime.cc + decimal.cc + deserialize.cc + extension_type.cc + gdb.cc + helpers.cc + inference.cc + init.cc + io.cc + ipc.cc + numpy_convert.cc + numpy_to_arrow.cc + python_to_arrow.cc + pyarrow.cc + serialize.cc + udf.cc) + +set_source_files_properties(init.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON + SKIP_UNITY_BUILD_INCLUSION ON) + +# +# Arrow vs PyArrow cpp options +# + +# Check all the options from Arrow and PyArrow cpp to be in line +if(PYARROW_WITH_DATASET) + find_package(ArrowDataset REQUIRED) +endif() + +if(PYARROW_WITH_PARQUET_ENCRYPTION) + if(PARQUET_REQUIRE_ENCRYPTION) + list(APPEND ARROW_PYTHON_SRCS parquet_encryption.cc) + find_package(Parquet REQUIRED) + else() + message(FATAL_ERROR "You must build Arrow C++ with PARQUET_REQUIRE_ENCRYPTION=ON") + endif() +endif() + +if(PYARROW_WITH_HDFS) + if(NOT ARROW_HDFS) + message(FATAL_ERROR "You must build Arrow C++ with ARROW_HDFS=ON") + endif() +endif() + +# Check for only Arrow C++ options +if(ARROW_CSV) + list(APPEND ARROW_PYTHON_SRCS csv.cc) +endif() + +if(ARROW_FILESYSTEM) + list(APPEND ARROW_PYTHON_SRCS filesystem.cc) +endif() + +# Link to arrow dependecies +if(ARROW_BUILD_SHARED) + set(ARROW_PYTHON_DEPENDENCIES arrow_shared) +else() + set(THREADS_PREFER_PTHREAD_FLAG ON) + find_package(Threads REQUIRED) + set(ARROW_PYTHON_DEPENDENCIES arrow_static Threads::Threads) +endif() + +if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + set_property(SOURCE pyarrow.cc + APPEND_STRING + PROPERTY COMPILE_FLAGS " -Wno-cast-qual ") +endif() + +# +# Compiler stuff +# + +include(GNUInstallDirs) + +# This ensures that things like gnu++11 get passed correctly +if(NOT DEFINED CMAKE_CXX_STANDARD) + set(CMAKE_CXX_STANDARD 11) +endif() + +# We require a C++11 compliant compiler +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +# Needed compiler flags +include(SetupCxxFlags) + +# +# Shared/static link libs +# + +set(ARROW_PYTHON_SHARED_LINK_LIBS arrow_shared) +set(ARROW_PYTHON_SHARED_PRIVATE_LINK_LIBS) +set(ARROW_PYTHON_STATIC_LINK_LIBS ${PYTHON_OTHER_LIBS}) + +if(WIN32) + list(APPEND ARROW_PYTHON_SHARED_LINK_LIBS ${PYTHON_LIBRARIES} ${PYTHON_OTHER_LIBS}) +endif() + +if(PARQUET_REQUIRE_ENCRYPTION AND PYARROW_WITH_PARQUET_ENCRYPTION) + list(APPEND ARROW_PYTHON_SHARED_LINK_LIBS parquet_shared) +endif() + +set(ARROW_PYTHON_INCLUDES ${NUMPY_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS}) + +# Inlude macros needed to find and use add_arrow_lib function +include(BuildUtils) +include(CMakePackageConfigHelpers) + +# Set the output directory for cmake module +# (CMAKE_INSTALL_PREFIX = python/build/dist! should be set in setup.py!) +set(ARROW_CMAKE_DIR "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}") + +# Changing ARROW_SOURCE_DIR for sdist build +# In this case cpp/cmake_modules doesn't exist +if(NOT EXISTS "${ARROW_SOURCE_DIR}/cpp/cmake_modules/Find${MODULE}.cmake") + set(ARROW_SOURCE_DIR ${PYTHON_SOURCE_DIR}) +endif() + +add_arrow_lib(arrow_python + CMAKE_PACKAGE_NAME + ArrowPython + PKG_CONFIG_NAME + arrow-python + SOURCES + ${ARROW_PYTHON_SRCS} + PRECOMPILED_HEADERS + "$<$:pch.h>" + OUTPUTS + ARROW_PYTHON_LIBRARIES + DEPENDENCIES + ${ARROW_PYTHON_DEPENDENCIES} + SHARED_LINK_FLAGS + ${ARROW_VERSION_SCRIPT_FLAGS} + SHARED_LINK_LIBS + ${ARROW_PYTHON_SHARED_LINK_LIBS} + SHARED_PRIVATE_LINK_LIBS + ${ARROW_PYTHON_SHARED_PRIVATE_LINK_LIBS} + STATIC_LINK_LIBS + ${ARROW_PYTHON_STATIC_LINK_LIBS} + EXTRA_INCLUDES + "${ARROW_PYTHON_INCLUDES}") + +add_dependencies(arrow_python ${ARROW_PYTHON_LIBRARIES}) + +foreach(LIB_TARGET ${ARROW_PYTHON_LIBRARIES}) + target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_PYTHON_EXPORTING) +endforeach() + +if(ARROW_BUILD_STATIC AND MSVC) + target_compile_definitions(arrow_python_static PUBLIC ARROW_STATIC) +endif() + +if(ARROW_FLIGHT AND ARROW_BUILD_SHARED) + # Must link to shared libarrow_flight: we don't want to link more than one + # copy of gRPC into the eventual Cython shared object, otherwise gRPC calls + # fail with weird errors due to multiple copies of global static state (The + # other solution is to link gRPC shared everywhere instead of statically only + # in Flight) + find_package(ArrowFlight REQUIRED) + + set(FLIGHT_LINK_LIBS arrow_flight_shared) + + add_arrow_lib(arrow_python_flight + CMAKE_PACKAGE_NAME + ArrowPythonFlight + PKG_CONFIG_NAME + arrow-python-flight + SOURCES + flight.cc + OUTPUTS + ARROW_PYFLIGHT_LIBRARIES + SHARED_LINK_FLAGS + ${ARROW_VERSION_SCRIPT_FLAGS} + SHARED_LINK_LIBS + arrow_python_shared + arrow_flight_shared + STATIC_LINK_LIBS + ${PYTHON_OTHER_LIBS} + EXTRA_INCLUDES + "${ARROW_PYTHON_INCLUDES}" + PRIVATE_INCLUDES + "${Protobuf_INCLUDE_DIRS}") + + add_dependencies(arrow_python ${ARROW_PYFLIGHT_LIBRARIES}) + + foreach(LIB_TARGET ${ARROW_PYFLIGHT_LIBRARIES}) + target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_PYFLIGHT_EXPORTING) + endforeach() + + if(ARROW_BUILD_STATIC AND MSVC) + target_compile_definitions(arrow_python_flight_static PUBLIC ARROW_STATIC) + endif() +endif() + +if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + # Clang, be quiet. Python C API has lots of macros + set_property(SOURCE ${ARROW_PYTHON_SRCS} + APPEND_STRING + PROPERTY COMPILE_FLAGS -Wno-parentheses-equality) +endif() + +arrow_install_all_headers("arrow/python") + +# ---------------------------------------------------------------------- + +# +# Tests +# The tests will be moved to Cython and are currently supported for bundled GTest +# Follow-up: https://issues.apache.org/jira/browse/ARROW-17016 +# + +if(ARROW_BUILD_TESTS) + + enable_testing() + set(GTEST_ROOT ${ARROW_CPP_SOURCE_DIR}/${ARROW_BUILD_DIR}/googletest_ep-prefix) + + # GTest must be built from source + if(EXISTS ${ARROW_CPP_SOURCE_DIR}/${ARROW_BUILD_DIR}/googletest_ep-prefix) + + # Set necessary paths for cmake to find GTest + set(GTEST_INCLUDE_DIR "${GTEST_ROOT}/include") + set(GTEST_LIBRARY ${GTEST_ROOT}/lib) + set(GTEST_MAIN_LIBRARY ${GTEST_ROOT}/lib) + + # + # Taken from Matlab CMakeLists.txt (enable_gtest and build_gtest) + # + + set(ARROW_GTEST_PREFIX "${GTEST_ROOT}") + set(ARROW_GTEST_MAIN_PREFIX "${GTEST_ROOT}") + + if(WIN32) + set(ARROW_GTEST_SHARED_LIB_DIR "${ARROW_GTEST_PREFIX}/bin") + set(ARROW_GTEST_MAIN_SHARED_LIB_DIR "${ARROW_GTEST_MAIN_PREFIX}/bin") + + set(ARROW_GTEST_LINK_LIB_DIR "${ARROW_GTEST_PREFIX}/lib") + set(ARROW_GTEST_LINK_LIB + "${ARROW_GTEST_LINK_LIB_DIR}/${CMAKE_IMPORT_LIBRARY_PREFIX}gtestd${CMAKE_IMPORT_LIBRARY_SUFFIX}" + ) + + set(ARROW_GTEST_MAIN_LINK_LIB_DIR "${ARROW_GTEST_MAIN_PREFIX}/lib") + set(ARROW_GTEST_MAIN_LINK_LIB + "${ARROW_GTEST_MAIN_LINK_LIB_DIR}/${CMAKE_IMPORT_LIBRARY_PREFIX}gtest_maind${CMAKE_IMPORT_LIBRARY_SUFFIX}" + ) + else() + set(ARROW_GTEST_SHARED_LIB_DIR "${ARROW_GTEST_PREFIX}/lib") + set(ARROW_GTEST_MAIN_SHARED_LIB_DIR "${ARROW_GTEST_MAIN_PREFIX}/lib") + endif() + + set(ARROW_GTEST_INCLUDE_DIR "${ARROW_GTEST_PREFIX}/include") + set(ARROW_GTEST_SHARED_LIB + "${ARROW_GTEST_SHARED_LIB_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtestd${CMAKE_SHARED_LIBRARY_SUFFIX}" + ) + + set(ARROW_GTEST_MAIN_INCLUDE_DIR "${ARROW_GTEST_MAIN_PREFIX}/include") + set(ARROW_GTEST_MAIN_SHARED_LIB + "${ARROW_GTEST_MAIN_SHARED_LIB_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}gtest_maind${CMAKE_SHARED_LIBRARY_SUFFIX}" + ) + + file(MAKE_DIRECTORY "${ARROW_GTEST_INCLUDE_DIR}") + + # Create target GTest::gtest + add_library(GTest::gtest SHARED IMPORTED) + set_target_properties(GTest::gtest + PROPERTIES IMPORTED_LOCATION ${ARROW_GTEST_SHARED_LIB} + INTERFACE_INCLUDE_DIRECTORIES + ${ARROW_GTEST_INCLUDE_DIR}) + if(WIN32) + set_target_properties(GTest::gtest PROPERTIES IMPORTED_IMPLIB ${ARROW_GTEST_LINK_LIB}) + endif() + + # ArrowTesting + # needed to be able to use arrow_testing_shared target + find_package(ArrowTesting REQUIRED) + + add_custom_target(all-tests) + + add_library(arrow_python_test_main STATIC util/test_main.cc) + + target_link_libraries(arrow_python_test_main GTest::gtest) + target_include_directories(arrow_python_test_main SYSTEM + PUBLIC ${ARROW_PYTHON_INCLUDES}) + + # Link libraries to avoid include error on Linux + if(ARROW_TEST_LINKAGE STREQUAL shared) + target_link_libraries(arrow_python_test_main arrow_shared) + else() + target_link_libraries(arrow_python_test_main arrow_static) + endif() + + if(APPLE) + target_link_libraries(arrow_python_test_main ${CMAKE_DL_LIBS}) + set_target_properties(arrow_python_test_main PROPERTIES LINK_FLAGS + "-undefined dynamic_lookup") + elseif(NOT MSVC) + target_link_libraries(arrow_python_test_main pthread ${CMAKE_DL_LIBS}) + endif() + + if(ARROW_TEST_LINKAGE STREQUAL shared) + set(ARROW_PYTHON_TEST_LINK_LIBS arrow_python_test_main arrow_python_shared + arrow_testing_shared arrow_shared) + else() + set(ARROW_PYTHON_TEST_LINK_LIBS arrow_python_test_main arrow_python_static + arrow_testing_static arrow_static) + endif() + + # + # Add a test case + # + + set(REL_TEST_NAME "python_test") + get_filename_component(TEST_NAME ${REL_TEST_NAME} NAME_WE) + set(TEST_NAME "arrow-${TEST_NAME}") + set(SOURCES "${REL_TEST_NAME}.cc") + + # # Make sure the executable name contains only hyphens, not underscores + string(REPLACE "_" "-" TEST_NAME ${TEST_NAME}) + + set(TEST_PATH "${CMAKE_BINARY_DIR}/${TEST_NAME}") + add_executable(${TEST_NAME} ${SOURCES}) + + # We need to set the correct RPATH so that dependencies + set_target_properties(${TEST_NAME} + PROPERTIES BUILD_WITH_INSTALL_RPATH TRUE + INSTALL_RPATH_USE_LINK_PATH TRUE + INSTALL_RPATH + "${PYTHON_SOURCE_DIR}/pyarrow;$ENV{CONDA_PREFIX}/lib") + + # Customize link libraries + target_link_libraries(${TEST_NAME} PRIVATE "${ARROW_PYTHON_TEST_LINK_LIBS}") + # Extra link libs + target_link_libraries(${TEST_NAME} PRIVATE ${PYTHON_LIBRARIES}) + # Extra includes + target_include_directories(${TEST_NAME} SYSTEM PUBLIC "${ARROW_PYTHON_INCLUDES}") + + # Add the test + if(WIN32) + add_test(${TEST_NAME} ${TEST_PATH}) + else() + add_test(${TEST_NAME} + ${ARROW_CPP_SOURCE_DIR}/build-support/run-test.sh + ${CMAKE_BINARY_DIR} + test + ${TEST_PATH}) + endif() + + # Add test as dependency of relevant targets + add_dependencies(all-tests ${TEST_NAME}) + add_dependencies(arrow_python-tests ${TEST_NAME}) + + set(LABELS) + list(APPEND LABELS "unittest" arrow_python-tests) + + # ensure there is a cmake target which exercises tests with this LABEL + set(LABEL_TEST_NAME "test-arrow_python-tests") + if(NOT TARGET ${LABEL_TEST_NAME}) + add_custom_target(${LABEL_TEST_NAME} + ctest -L "${LABEL}" --output-on-failure + USES_TERMINAL) + endif() + # ensure the test is (re)built before the LABEL test runs + add_dependencies(${LABEL_TEST_NAME} ${TEST_NAME}) + + set_property(TEST ${TEST_NAME} + APPEND + PROPERTY LABELS ${LABELS}) + + else() + message(STATUS "Tests for PyArrow CPP not build") + message(STATUS "Set -DGTest_SOURCE=BUNDLED when building Arrow C++ + to enable building tests for PyArrow CPP") + endif() +endif() \ No newline at end of file diff --git a/cpp/src/arrow/python/api.h b/python/pyarrow/src/api.h similarity index 66% rename from cpp/src/arrow/python/api.h rename to python/pyarrow/src/api.h index a0b13d6d130..28c46c95e94 100644 --- a/cpp/src/arrow/python/api.h +++ b/python/pyarrow/src/api.h @@ -17,14 +17,14 @@ #pragma once -#include "arrow/python/arrow_to_pandas.h" -#include "arrow/python/common.h" -#include "arrow/python/datetime.h" -#include "arrow/python/deserialize.h" -#include "arrow/python/helpers.h" -#include "arrow/python/inference.h" -#include "arrow/python/io.h" -#include "arrow/python/numpy_convert.h" -#include "arrow/python/numpy_to_arrow.h" -#include "arrow/python/python_to_arrow.h" -#include "arrow/python/serialize.h" +#include "arrow_to_pandas.h" +#include "common.h" +#include "datetime.h" +#include "deserialize.h" +#include "helpers.h" +#include "inference.h" +#include "io.h" +#include "numpy_convert.h" +#include "numpy_to_arrow.h" +#include "python_to_arrow.h" +#include "serialize.h" diff --git a/cpp/src/arrow/python/arrow-python-flight.pc.in b/python/pyarrow/src/arrow-python-flight.pc.in similarity index 100% rename from cpp/src/arrow/python/arrow-python-flight.pc.in rename to python/pyarrow/src/arrow-python-flight.pc.in diff --git a/cpp/src/arrow/python/arrow-python.pc.in b/python/pyarrow/src/arrow-python.pc.in similarity index 100% rename from cpp/src/arrow/python/arrow-python.pc.in rename to python/pyarrow/src/arrow-python.pc.in diff --git a/cpp/src/arrow/python/arrow_to_pandas.cc b/python/pyarrow/src/arrow_to_pandas.cc similarity index 99% rename from cpp/src/arrow/python/arrow_to_pandas.cc rename to python/pyarrow/src/arrow_to_pandas.cc index 8f9d1cb45b9..437f0f11925 100644 --- a/cpp/src/arrow/python/arrow_to_pandas.cc +++ b/python/pyarrow/src/arrow_to_pandas.cc @@ -17,8 +17,8 @@ // Functions for pandas conversion via NumPy -#include "arrow/python/arrow_to_pandas.h" -#include "arrow/python/numpy_interop.h" // IWYU pragma: expand +#include "arrow_to_pandas.h" +#include "numpy_interop.h" // IWYU pragma: expand #include #include @@ -48,16 +48,16 @@ #include "arrow/compute/api.h" -#include "arrow/python/arrow_to_python_internal.h" -#include "arrow/python/common.h" -#include "arrow/python/datetime.h" -#include "arrow/python/decimal.h" -#include "arrow/python/helpers.h" -#include "arrow/python/numpy_convert.h" -#include "arrow/python/numpy_internal.h" -#include "arrow/python/pyarrow.h" -#include "arrow/python/python_to_arrow.h" -#include "arrow/python/type_traits.h" +#include "arrow_to_python_internal.h" +#include "common.h" +#include "datetime.h" +#include "decimal.h" +#include "helpers.h" +#include "numpy_convert.h" +#include "numpy_internal.h" +#include "pyarrow.h" +#include "python_to_arrow.h" +#include "type_traits.h" namespace arrow { diff --git a/cpp/src/arrow/python/arrow_to_pandas.h b/python/pyarrow/src/arrow_to_pandas.h similarity index 98% rename from cpp/src/arrow/python/arrow_to_pandas.h rename to python/pyarrow/src/arrow_to_pandas.h index 6570364b8d2..33c08b6fe81 100644 --- a/cpp/src/arrow/python/arrow_to_pandas.h +++ b/python/pyarrow/src/arrow_to_pandas.h @@ -20,14 +20,14 @@ #pragma once -#include "arrow/python/platform.h" +#include "platform.h" #include #include #include #include "arrow/memory_pool.h" -#include "arrow/python/visibility.h" +#include "visibility.h" namespace arrow { diff --git a/cpp/src/arrow/python/arrow_to_python_internal.h b/python/pyarrow/src/arrow_to_python_internal.h similarity index 97% rename from cpp/src/arrow/python/arrow_to_python_internal.h rename to python/pyarrow/src/arrow_to_python_internal.h index 514cda32001..251c2a38ca0 100644 --- a/cpp/src/arrow/python/arrow_to_python_internal.h +++ b/python/pyarrow/src/arrow_to_python_internal.h @@ -18,7 +18,7 @@ #pragma once #include "arrow/array.h" -#include "arrow/python/platform.h" +#include "platform.h" namespace arrow { namespace py { diff --git a/cpp/src/arrow/python/benchmark.cc b/python/pyarrow/src/benchmark.cc similarity index 94% rename from cpp/src/arrow/python/benchmark.cc rename to python/pyarrow/src/benchmark.cc index 2d29f69d25b..1e56552ed80 100644 --- a/cpp/src/arrow/python/benchmark.cc +++ b/python/pyarrow/src/benchmark.cc @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -#include -#include +#include "benchmark.h" +#include "helpers.h" namespace arrow { namespace py { diff --git a/cpp/src/arrow/python/benchmark.h b/python/pyarrow/src/benchmark.h similarity index 93% rename from cpp/src/arrow/python/benchmark.h rename to python/pyarrow/src/benchmark.h index 8060dd33722..883f02feceb 100644 --- a/cpp/src/arrow/python/benchmark.h +++ b/python/pyarrow/src/benchmark.h @@ -17,9 +17,9 @@ #pragma once -#include "arrow/python/platform.h" +#include "platform.h" -#include "arrow/python/visibility.h" +#include "visibility.h" namespace arrow { namespace py { diff --git a/cpp/src/arrow/python/common.cc b/python/pyarrow/src/common.cc similarity index 98% rename from cpp/src/arrow/python/common.cc rename to python/pyarrow/src/common.cc index 6fe2ed4dae3..09dde3e4489 100644 --- a/cpp/src/arrow/python/common.cc +++ b/python/pyarrow/src/common.cc @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/python/common.h" +#include "common.h" #include #include @@ -26,7 +26,7 @@ #include "arrow/util/checked_cast.h" #include "arrow/util/logging.h" -#include "arrow/python/helpers.h" +#include "helpers.h" namespace arrow { diff --git a/cpp/src/arrow/python/common.h b/python/pyarrow/src/common.h similarity index 99% rename from cpp/src/arrow/python/common.h rename to python/pyarrow/src/common.h index 5c16106730b..768ff8dce44 100644 --- a/cpp/src/arrow/python/common.h +++ b/python/pyarrow/src/common.h @@ -21,10 +21,10 @@ #include #include "arrow/buffer.h" -#include "arrow/python/pyarrow.h" -#include "arrow/python/visibility.h" #include "arrow/result.h" #include "arrow/util/macros.h" +#include "pyarrow.h" +#include "visibility.h" namespace arrow { diff --git a/cpp/src/arrow/python/csv.cc b/python/pyarrow/src/csv.cc similarity index 96% rename from cpp/src/arrow/python/csv.cc rename to python/pyarrow/src/csv.cc index d96c9400e2b..61ff23a22ed 100644 --- a/cpp/src/arrow/python/csv.cc +++ b/python/pyarrow/src/csv.cc @@ -15,11 +15,11 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/python/csv.h" +#include "csv.h" #include -#include "arrow/python/common.h" +#include "common.h" namespace arrow { diff --git a/cpp/src/arrow/python/csv.h b/python/pyarrow/src/csv.h similarity index 97% rename from cpp/src/arrow/python/csv.h rename to python/pyarrow/src/csv.h index 34302e93667..e6e53af0f85 100644 --- a/cpp/src/arrow/python/csv.h +++ b/python/pyarrow/src/csv.h @@ -23,8 +23,8 @@ #include #include "arrow/csv/options.h" -#include "arrow/python/common.h" #include "arrow/util/macros.h" +#include "common.h" namespace arrow { namespace py { diff --git a/cpp/src/arrow/python/datetime.cc b/python/pyarrow/src/datetime.cc similarity index 99% rename from cpp/src/arrow/python/datetime.cc rename to python/pyarrow/src/datetime.cc index 848b0a6bf10..9604b529753 100644 --- a/cpp/src/arrow/python/datetime.cc +++ b/python/pyarrow/src/datetime.cc @@ -14,22 +14,22 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. -#include "arrow/python/datetime.h" +#include "datetime.h" #include #include #include #include "arrow/array.h" -#include "arrow/python/arrow_to_python_internal.h" -#include "arrow/python/common.h" -#include "arrow/python/helpers.h" -#include "arrow/python/platform.h" #include "arrow/scalar.h" #include "arrow/status.h" #include "arrow/type.h" #include "arrow/util/logging.h" #include "arrow/util/value_parsing.h" +#include "arrow_to_python_internal.h" +#include "common.h" +#include "helpers.h" +#include "platform.h" namespace arrow { namespace py { diff --git a/cpp/src/arrow/python/datetime.h b/python/pyarrow/src/datetime.h similarity index 99% rename from cpp/src/arrow/python/datetime.h rename to python/pyarrow/src/datetime.h index dd07710aaf6..6f9bfbe2dbf 100644 --- a/cpp/src/arrow/python/datetime.h +++ b/python/pyarrow/src/datetime.h @@ -20,8 +20,8 @@ #include #include -#include "arrow/python/platform.h" -#include "arrow/python/visibility.h" +#include "platform.h" +#include "visibility.h" #include "arrow/status.h" #include "arrow/type.h" #include "arrow/type_fwd.h" diff --git a/cpp/src/arrow/python/decimal.cc b/python/pyarrow/src/decimal.cc similarity index 98% rename from cpp/src/arrow/python/decimal.cc rename to python/pyarrow/src/decimal.cc index 0c00fcfaa8e..a7244f9dcc1 100644 --- a/cpp/src/arrow/python/decimal.cc +++ b/python/pyarrow/src/decimal.cc @@ -18,9 +18,9 @@ #include #include -#include "arrow/python/common.h" -#include "arrow/python/decimal.h" -#include "arrow/python/helpers.h" +#include "common.h" +#include "decimal.h" +#include "helpers.h" #include "arrow/type_fwd.h" #include "arrow/util/decimal.h" #include "arrow/util/logging.h" diff --git a/cpp/src/arrow/python/decimal.h b/python/pyarrow/src/decimal.h similarity index 99% rename from cpp/src/arrow/python/decimal.h rename to python/pyarrow/src/decimal.h index 1187037aed2..5c4a17dcd48 100644 --- a/cpp/src/arrow/python/decimal.h +++ b/python/pyarrow/src/decimal.h @@ -19,7 +19,7 @@ #include -#include "arrow/python/visibility.h" +#include "visibility.h" #include "arrow/type.h" namespace arrow { diff --git a/cpp/src/arrow/python/deserialize.cc b/python/pyarrow/src/deserialize.cc similarity index 98% rename from cpp/src/arrow/python/deserialize.cc rename to python/pyarrow/src/deserialize.cc index 961a1686e0a..ad28874460a 100644 --- a/cpp/src/arrow/python/deserialize.cc +++ b/python/pyarrow/src/deserialize.cc @@ -15,9 +15,9 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/python/deserialize.h" +#include "deserialize.h" -#include "arrow/python/numpy_interop.h" +#include "numpy_interop.h" #include #include @@ -40,12 +40,12 @@ #include "arrow/util/logging.h" #include "arrow/util/value_parsing.h" -#include "arrow/python/common.h" -#include "arrow/python/datetime.h" -#include "arrow/python/helpers.h" -#include "arrow/python/numpy_convert.h" -#include "arrow/python/pyarrow.h" -#include "arrow/python/serialize.h" +#include "common.h" +#include "datetime.h" +#include "helpers.h" +#include "numpy_convert.h" +#include "pyarrow.h" +#include "serialize.h" namespace arrow { diff --git a/cpp/src/arrow/python/deserialize.h b/python/pyarrow/src/deserialize.h similarity index 98% rename from cpp/src/arrow/python/deserialize.h rename to python/pyarrow/src/deserialize.h index 41b6a13a388..08d0972048d 100644 --- a/cpp/src/arrow/python/deserialize.h +++ b/python/pyarrow/src/deserialize.h @@ -21,8 +21,8 @@ #include #include -#include "arrow/python/serialize.h" -#include "arrow/python/visibility.h" +#include "serialize.h" +#include "visibility.h" #include "arrow/status.h" namespace arrow { diff --git a/cpp/src/arrow/python/extension_type.cc b/python/pyarrow/src/extension_type.cc similarity index 98% rename from cpp/src/arrow/python/extension_type.cc rename to python/pyarrow/src/extension_type.cc index 3ccc171c871..9fd2f2e1466 100644 --- a/cpp/src/arrow/python/extension_type.cc +++ b/python/pyarrow/src/extension_type.cc @@ -19,9 +19,9 @@ #include #include -#include "arrow/python/extension_type.h" -#include "arrow/python/helpers.h" -#include "arrow/python/pyarrow.h" +#include "extension_type.h" +#include "helpers.h" +#include "pyarrow.h" #include "arrow/util/checked_cast.h" #include "arrow/util/logging.h" diff --git a/cpp/src/arrow/python/extension_type.h b/python/pyarrow/src/extension_type.h similarity index 97% rename from cpp/src/arrow/python/extension_type.h rename to python/pyarrow/src/extension_type.h index e433d9aca70..76ddb327733 100644 --- a/cpp/src/arrow/python/extension_type.h +++ b/python/pyarrow/src/extension_type.h @@ -21,8 +21,8 @@ #include #include "arrow/extension_type.h" -#include "arrow/python/common.h" -#include "arrow/python/visibility.h" +#include "common.h" +#include "visibility.h" #include "arrow/util/macros.h" namespace arrow { diff --git a/cpp/src/arrow/python/filesystem.cc b/python/pyarrow/src/filesystem.cc similarity index 99% rename from cpp/src/arrow/python/filesystem.cc rename to python/pyarrow/src/filesystem.cc index 5e9b500a4f7..17ca732e073 100644 --- a/cpp/src/arrow/python/filesystem.cc +++ b/python/pyarrow/src/filesystem.cc @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/python/filesystem.h" +#include "filesystem.h" #include "arrow/util/logging.h" namespace arrow { diff --git a/cpp/src/arrow/python/filesystem.h b/python/pyarrow/src/filesystem.h similarity index 98% rename from cpp/src/arrow/python/filesystem.h rename to python/pyarrow/src/filesystem.h index 003fd5cb805..993145b5327 100644 --- a/cpp/src/arrow/python/filesystem.h +++ b/python/pyarrow/src/filesystem.h @@ -22,8 +22,8 @@ #include #include "arrow/filesystem/filesystem.h" -#include "arrow/python/common.h" -#include "arrow/python/visibility.h" +#include "common.h" +#include "visibility.h" #include "arrow/util/macros.h" namespace arrow { diff --git a/cpp/src/arrow/python/flight.cc b/python/pyarrow/src/flight.cc similarity index 99% rename from cpp/src/arrow/python/flight.cc rename to python/pyarrow/src/flight.cc index bf7af27ac72..79b8db074a5 100644 --- a/cpp/src/arrow/python/flight.cc +++ b/python/pyarrow/src/flight.cc @@ -18,7 +18,7 @@ #include #include -#include "arrow/python/flight.h" +#include "flight.h" #include "arrow/util/io_util.h" #include "arrow/util/logging.h" diff --git a/cpp/src/arrow/python/flight.h b/python/pyarrow/src/flight.h similarity index 99% rename from cpp/src/arrow/python/flight.h rename to python/pyarrow/src/flight.h index 5713b2e4b70..7e0136fcc3e 100644 --- a/cpp/src/arrow/python/flight.h +++ b/python/pyarrow/src/flight.h @@ -23,7 +23,7 @@ #include "arrow/flight/api.h" #include "arrow/ipc/dictionary.h" -#include "arrow/python/common.h" +#include "common.h" #if defined(_WIN32) || defined(__CYGWIN__) // Windows #if defined(_MSC_VER) diff --git a/cpp/src/arrow/python/gdb.cc b/python/pyarrow/src/gdb.cc similarity index 99% rename from cpp/src/arrow/python/gdb.cc rename to python/pyarrow/src/gdb.cc index 944e1e96d71..297bc6dbffc 100644 --- a/cpp/src/arrow/python/gdb.cc +++ b/python/pyarrow/src/gdb.cc @@ -24,7 +24,7 @@ #include "arrow/datum.h" #include "arrow/extension_type.h" #include "arrow/ipc/json_simple.h" -#include "arrow/python/gdb.h" +#include "gdb.h" #include "arrow/record_batch.h" #include "arrow/scalar.h" #include "arrow/table.h" diff --git a/cpp/src/arrow/python/gdb.h b/python/pyarrow/src/gdb.h similarity index 96% rename from cpp/src/arrow/python/gdb.h rename to python/pyarrow/src/gdb.h index 1ddcbb51f6e..b4296abe6dd 100644 --- a/cpp/src/arrow/python/gdb.h +++ b/python/pyarrow/src/gdb.h @@ -17,7 +17,7 @@ #pragma once -#include "arrow/python/visibility.h" +#include "visibility.h" namespace arrow { namespace gdb { diff --git a/cpp/src/arrow/python/helpers.cc b/python/pyarrow/src/helpers.cc similarity index 99% rename from cpp/src/arrow/python/helpers.cc rename to python/pyarrow/src/helpers.cc index c266abc169d..73d7cd8dcfc 100644 --- a/cpp/src/arrow/python/helpers.cc +++ b/python/pyarrow/src/helpers.cc @@ -16,17 +16,17 @@ // under the License. // helpers.h includes a NumPy header, so we include this first -#include "arrow/python/numpy_interop.h" +#include "numpy_interop.h" -#include "arrow/python/helpers.h" +#include "helpers.h" #include #include #include #include -#include "arrow/python/common.h" -#include "arrow/python/decimal.h" +#include "common.h" +#include "decimal.h" #include "arrow/type_fwd.h" #include "arrow/util/checked_cast.h" #include "arrow/util/logging.h" diff --git a/cpp/src/arrow/python/helpers.h b/python/pyarrow/src/helpers.h similarity index 97% rename from cpp/src/arrow/python/helpers.h rename to python/pyarrow/src/helpers.h index a8e5f80b606..089d1225dd6 100644 --- a/cpp/src/arrow/python/helpers.h +++ b/python/pyarrow/src/helpers.h @@ -17,18 +17,18 @@ #pragma once -#include "arrow/python/platform.h" +#include "platform.h" #include #include #include #include -#include "arrow/python/numpy_interop.h" +#include "numpy_interop.h" #include -#include "arrow/python/visibility.h" +#include "visibility.h" #include "arrow/type.h" #include "arrow/util/macros.h" diff --git a/cpp/src/arrow/python/inference.cc b/python/pyarrow/src/inference.cc similarity index 98% rename from cpp/src/arrow/python/inference.cc rename to python/pyarrow/src/inference.cc index db5f0896a95..513b0bfdbbb 100644 --- a/cpp/src/arrow/python/inference.cc +++ b/python/pyarrow/src/inference.cc @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/python/inference.h" -#include "arrow/python/numpy_interop.h" +#include "inference.h" +#include "numpy_interop.h" #include @@ -31,11 +31,11 @@ #include "arrow/util/decimal.h" #include "arrow/util/logging.h" -#include "arrow/python/datetime.h" -#include "arrow/python/decimal.h" -#include "arrow/python/helpers.h" -#include "arrow/python/iterators.h" -#include "arrow/python/numpy_convert.h" +#include "datetime.h" +#include "decimal.h" +#include "helpers.h" +#include "iterators.h" +#include "numpy_convert.h" namespace arrow { namespace py { diff --git a/cpp/src/arrow/python/inference.h b/python/pyarrow/src/inference.h similarity index 94% rename from cpp/src/arrow/python/inference.h rename to python/pyarrow/src/inference.h index eff18362934..24005dd96f1 100644 --- a/cpp/src/arrow/python/inference.h +++ b/python/pyarrow/src/inference.h @@ -20,15 +20,15 @@ #pragma once -#include "arrow/python/platform.h" +#include "platform.h" #include -#include "arrow/python/visibility.h" +#include "visibility.h" #include "arrow/type.h" #include "arrow/util/macros.h" -#include "arrow/python/common.h" +#include "common.h" namespace arrow { diff --git a/cpp/src/arrow/python/init.cc b/python/pyarrow/src/init.cc similarity index 93% rename from cpp/src/arrow/python/init.cc rename to python/pyarrow/src/init.cc index dba293bbe23..f09c5cd12a5 100644 --- a/cpp/src/arrow/python/init.cc +++ b/python/pyarrow/src/init.cc @@ -18,7 +18,7 @@ // Trigger the array import (inversion of NO_IMPORT_ARRAY) #define NUMPY_IMPORT_ARRAY -#include "arrow/python/init.h" -#include "arrow/python/numpy_interop.h" +#include "init.h" +#include "numpy_interop.h" int arrow_init_numpy() { return arrow::py::import_numpy(); } diff --git a/cpp/src/arrow/python/init.h b/python/pyarrow/src/init.h similarity index 92% rename from cpp/src/arrow/python/init.h rename to python/pyarrow/src/init.h index 2e6c954862b..eab467b631e 100644 --- a/cpp/src/arrow/python/init.h +++ b/python/pyarrow/src/init.h @@ -17,8 +17,8 @@ #pragma once -#include "arrow/python/platform.h" -#include "arrow/python/visibility.h" +#include "platform.h" +#include "visibility.h" extern "C" { ARROW_PYTHON_EXPORT diff --git a/cpp/src/arrow/python/io.cc b/python/pyarrow/src/io.cc similarity index 99% rename from cpp/src/arrow/python/io.cc rename to python/pyarrow/src/io.cc index 73525feed38..173d84ff567 100644 --- a/cpp/src/arrow/python/io.cc +++ b/python/pyarrow/src/io.cc @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/python/io.h" +#include "io.h" #include #include @@ -28,8 +28,8 @@ #include "arrow/status.h" #include "arrow/util/logging.h" -#include "arrow/python/common.h" -#include "arrow/python/pyarrow.h" +#include "common.h" +#include "pyarrow.h" namespace arrow { diff --git a/cpp/src/arrow/python/io.h b/python/pyarrow/src/io.h similarity index 98% rename from cpp/src/arrow/python/io.h rename to python/pyarrow/src/io.h index a38d0ca332c..53b15434ea6 100644 --- a/cpp/src/arrow/python/io.h +++ b/python/pyarrow/src/io.h @@ -22,8 +22,8 @@ #include "arrow/io/interfaces.h" #include "arrow/io/transform.h" -#include "arrow/python/common.h" -#include "arrow/python/visibility.h" +#include "common.h" +#include "visibility.h" namespace arrow { namespace py { diff --git a/cpp/src/arrow/python/ipc.cc b/python/pyarrow/src/ipc.cc similarity index 96% rename from cpp/src/arrow/python/ipc.cc rename to python/pyarrow/src/ipc.cc index 2e6c9d91275..bed3da2d1ac 100644 --- a/cpp/src/arrow/python/ipc.cc +++ b/python/pyarrow/src/ipc.cc @@ -15,11 +15,11 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/python/ipc.h" +#include "ipc.h" #include -#include "arrow/python/pyarrow.h" +#include "pyarrow.h" namespace arrow { namespace py { diff --git a/cpp/src/arrow/python/ipc.h b/python/pyarrow/src/ipc.h similarity index 95% rename from cpp/src/arrow/python/ipc.h rename to python/pyarrow/src/ipc.h index 92232ed8300..38839af82fd 100644 --- a/cpp/src/arrow/python/ipc.h +++ b/python/pyarrow/src/ipc.h @@ -19,8 +19,8 @@ #include -#include "arrow/python/common.h" -#include "arrow/python/visibility.h" +#include "common.h" +#include "visibility.h" #include "arrow/record_batch.h" #include "arrow/result.h" #include "arrow/util/macros.h" diff --git a/cpp/src/arrow/python/iterators.h b/python/pyarrow/src/iterators.h similarity index 98% rename from cpp/src/arrow/python/iterators.h rename to python/pyarrow/src/iterators.h index 7b31962dac5..d581adf52c9 100644 --- a/cpp/src/arrow/python/iterators.h +++ b/python/pyarrow/src/iterators.h @@ -21,8 +21,8 @@ #include "arrow/array/array_primitive.h" -#include "arrow/python/common.h" -#include "arrow/python/numpy_internal.h" +#include "common.h" +#include "numpy_internal.h" namespace arrow { namespace py { diff --git a/cpp/src/arrow/python/numpy_convert.cc b/python/pyarrow/src/numpy_convert.cc similarity index 99% rename from cpp/src/arrow/python/numpy_convert.cc rename to python/pyarrow/src/numpy_convert.cc index 49706807644..d8e10605daa 100644 --- a/cpp/src/arrow/python/numpy_convert.cc +++ b/python/pyarrow/src/numpy_convert.cc @@ -15,9 +15,9 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/python/numpy_interop.h" +#include "numpy_interop.h" -#include "arrow/python/numpy_convert.h" +#include "numpy_convert.h" #include #include @@ -30,9 +30,9 @@ #include "arrow/type.h" #include "arrow/util/logging.h" -#include "arrow/python/common.h" -#include "arrow/python/pyarrow.h" -#include "arrow/python/type_traits.h" +#include "common.h" +#include "pyarrow.h" +#include "type_traits.h" namespace arrow { namespace py { diff --git a/cpp/src/arrow/python/numpy_convert.h b/python/pyarrow/src/numpy_convert.h similarity index 98% rename from cpp/src/arrow/python/numpy_convert.h rename to python/pyarrow/src/numpy_convert.h index 10451077a22..d2772dcdcdf 100644 --- a/cpp/src/arrow/python/numpy_convert.h +++ b/python/pyarrow/src/numpy_convert.h @@ -20,14 +20,14 @@ #pragma once -#include "arrow/python/platform.h" +#include "platform.h" #include #include #include #include "arrow/buffer.h" -#include "arrow/python/visibility.h" +#include "visibility.h" #include "arrow/sparse_tensor.h" namespace arrow { diff --git a/cpp/src/arrow/python/numpy_internal.h b/python/pyarrow/src/numpy_internal.h similarity index 98% rename from cpp/src/arrow/python/numpy_internal.h rename to python/pyarrow/src/numpy_internal.h index b9b632f9f9a..d408e908442 100644 --- a/cpp/src/arrow/python/numpy_internal.h +++ b/python/pyarrow/src/numpy_internal.h @@ -19,11 +19,11 @@ #pragma once -#include "arrow/python/numpy_interop.h" +#include "numpy_interop.h" #include "arrow/status.h" -#include "arrow/python/platform.h" +#include "platform.h" #include #include diff --git a/cpp/src/arrow/python/numpy_interop.h b/python/pyarrow/src/numpy_interop.h similarity index 98% rename from cpp/src/arrow/python/numpy_interop.h rename to python/pyarrow/src/numpy_interop.h index ce7baed259f..d212e014ec6 100644 --- a/cpp/src/arrow/python/numpy_interop.h +++ b/python/pyarrow/src/numpy_interop.h @@ -17,7 +17,7 @@ #pragma once -#include "arrow/python/platform.h" // IWYU pragma: export +#include "platform.h" // IWYU pragma: export #include // IWYU pragma: export diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc b/python/pyarrow/src/numpy_to_arrow.cc similarity index 98% rename from cpp/src/arrow/python/numpy_to_arrow.cc rename to python/pyarrow/src/numpy_to_arrow.cc index 2727ce32f44..cd01577d71d 100644 --- a/cpp/src/arrow/python/numpy_to_arrow.cc +++ b/python/pyarrow/src/numpy_to_arrow.cc @@ -17,8 +17,8 @@ // Functions for pandas conversion via NumPy -#include "arrow/python/numpy_to_arrow.h" -#include "arrow/python/numpy_interop.h" +#include "numpy_to_arrow.h" +#include "numpy_interop.h" #include #include @@ -49,14 +49,14 @@ #include "arrow/compute/api_scalar.h" -#include "arrow/python/common.h" -#include "arrow/python/datetime.h" -#include "arrow/python/helpers.h" -#include "arrow/python/iterators.h" -#include "arrow/python/numpy_convert.h" -#include "arrow/python/numpy_internal.h" -#include "arrow/python/python_to_arrow.h" -#include "arrow/python/type_traits.h" +#include "common.h" +#include "datetime.h" +#include "helpers.h" +#include "iterators.h" +#include "numpy_convert.h" +#include "numpy_internal.h" +#include "python_to_arrow.h" +#include "type_traits.h" namespace arrow { diff --git a/cpp/src/arrow/python/numpy_to_arrow.h b/python/pyarrow/src/numpy_to_arrow.h similarity index 97% rename from cpp/src/arrow/python/numpy_to_arrow.h rename to python/pyarrow/src/numpy_to_arrow.h index b6cd093e554..13924b1ac1c 100644 --- a/cpp/src/arrow/python/numpy_to_arrow.h +++ b/python/pyarrow/src/numpy_to_arrow.h @@ -19,12 +19,12 @@ #pragma once -#include "arrow/python/platform.h" +#include "platform.h" #include #include "arrow/compute/api.h" -#include "arrow/python/visibility.h" +#include "visibility.h" namespace arrow { diff --git a/cpp/src/arrow/python/parquet_encryption.cc b/python/pyarrow/src/parquet_encryption.cc similarity index 98% rename from cpp/src/arrow/python/parquet_encryption.cc rename to python/pyarrow/src/parquet_encryption.cc index a5f924bce78..b64c77f2b9f 100644 --- a/cpp/src/arrow/python/parquet_encryption.cc +++ b/python/pyarrow/src/parquet_encryption.cc @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/python/parquet_encryption.h" +#include "parquet_encryption.h" #include "parquet/exception.h" namespace arrow { diff --git a/cpp/src/arrow/python/parquet_encryption.h b/python/pyarrow/src/parquet_encryption.h similarity index 98% rename from cpp/src/arrow/python/parquet_encryption.h rename to python/pyarrow/src/parquet_encryption.h index 23ee478348e..0079b6eed56 100644 --- a/cpp/src/arrow/python/parquet_encryption.h +++ b/python/pyarrow/src/parquet_encryption.h @@ -19,8 +19,8 @@ #include -#include "arrow/python/common.h" -#include "arrow/python/visibility.h" +#include "common.h" +#include "visibility.h" #include "arrow/util/macros.h" #include "parquet/encryption/crypto_factory.h" #include "parquet/encryption/kms_client.h" diff --git a/cpp/src/arrow/python/pch.h b/python/pyarrow/src/pch.h similarity index 96% rename from cpp/src/arrow/python/pch.h rename to python/pyarrow/src/pch.h index d1d688b4f17..0afcf7938df 100644 --- a/cpp/src/arrow/python/pch.h +++ b/python/pyarrow/src/pch.h @@ -21,4 +21,4 @@ // may incur a slowdown, since it makes the precompiled header heavier to load. #include "arrow/pch.h" -#include "arrow/python/platform.h" +#include "platform.h" diff --git a/cpp/src/arrow/python/platform.h b/python/pyarrow/src/platform.h similarity index 100% rename from cpp/src/arrow/python/platform.h rename to python/pyarrow/src/platform.h diff --git a/cpp/src/arrow/python/pyarrow.cc b/python/pyarrow/src/pyarrow.cc similarity index 95% rename from cpp/src/arrow/python/pyarrow.cc rename to python/pyarrow/src/pyarrow.cc index c3244b74bf5..b567a68df3e 100644 --- a/cpp/src/arrow/python/pyarrow.cc +++ b/python/pyarrow/src/pyarrow.cc @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/python/pyarrow.h" +#include "pyarrow.h" #include #include @@ -25,10 +25,10 @@ #include "arrow/tensor.h" #include "arrow/type.h" -#include "arrow/python/common.h" -#include "arrow/python/datetime.h" +#include "common.h" +#include "datetime.h" namespace { -#include "arrow/python/pyarrow_api.h" +#include "pyarrow_api.h" } namespace arrow { diff --git a/cpp/src/arrow/python/pyarrow.h b/python/pyarrow/src/pyarrow.h similarity index 97% rename from cpp/src/arrow/python/pyarrow.h rename to python/pyarrow/src/pyarrow.h index 4c365081d70..a63e29a33fb 100644 --- a/cpp/src/arrow/python/pyarrow.h +++ b/python/pyarrow/src/pyarrow.h @@ -17,11 +17,11 @@ #pragma once -#include "arrow/python/platform.h" +#include "platform.h" #include -#include "arrow/python/visibility.h" +#include "visibility.h" #include "arrow/sparse_tensor.h" diff --git a/cpp/src/arrow/python/pyarrow_api.h b/python/pyarrow/src/pyarrow_api.h similarity index 100% rename from cpp/src/arrow/python/pyarrow_api.h rename to python/pyarrow/src/pyarrow_api.h diff --git a/cpp/src/arrow/python/pyarrow_lib.h b/python/pyarrow/src/pyarrow_lib.h similarity index 100% rename from cpp/src/arrow/python/pyarrow_lib.h rename to python/pyarrow/src/pyarrow_lib.h diff --git a/cpp/src/arrow/python/python_test.cc b/python/pyarrow/src/python_test.cc similarity index 98% rename from cpp/src/arrow/python/python_test.cc rename to python/pyarrow/src/python_test.cc index c465fabc680..54086faa7ca 100644 --- a/cpp/src/arrow/python/python_test.cc +++ b/python/pyarrow/src/python_test.cc @@ -21,7 +21,7 @@ #include #include -#include "arrow/python/platform.h" +#include "platform.h" #include "arrow/array.h" #include "arrow/array/builder_binary.h" @@ -30,12 +30,12 @@ #include "arrow/util/decimal.h" #include "arrow/util/optional.h" -#include "arrow/python/arrow_to_pandas.h" -#include "arrow/python/decimal.h" -#include "arrow/python/helpers.h" -#include "arrow/python/numpy_convert.h" -#include "arrow/python/numpy_interop.h" -#include "arrow/python/python_to_arrow.h" +#include "arrow_to_pandas.h" +#include "decimal.h" +#include "helpers.h" +#include "numpy_convert.h" +#include "numpy_interop.h" +#include "python_to_arrow.h" #include "arrow/util/checked_cast.h" #include "arrow/util/logging.h" diff --git a/cpp/src/arrow/python/python_to_arrow.cc b/python/pyarrow/src/python_to_arrow.cc similarity index 99% rename from cpp/src/arrow/python/python_to_arrow.cc rename to python/pyarrow/src/python_to_arrow.cc index 7a94407d2d9..4ca19049a73 100644 --- a/cpp/src/arrow/python/python_to_arrow.cc +++ b/python/pyarrow/src/python_to_arrow.cc @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/python/python_to_arrow.h" -#include "arrow/python/numpy_interop.h" +#include "python_to_arrow.h" +#include "numpy_interop.h" #include @@ -44,13 +44,13 @@ #include "arrow/util/int_util_overflow.h" #include "arrow/util/logging.h" -#include "arrow/python/datetime.h" -#include "arrow/python/decimal.h" -#include "arrow/python/helpers.h" -#include "arrow/python/inference.h" -#include "arrow/python/iterators.h" -#include "arrow/python/numpy_convert.h" -#include "arrow/python/type_traits.h" +#include "datetime.h" +#include "decimal.h" +#include "helpers.h" +#include "inference.h" +#include "iterators.h" +#include "numpy_convert.h" +#include "type_traits.h" #include "arrow/visit_type_inline.h" namespace arrow { diff --git a/cpp/src/arrow/python/python_to_arrow.h b/python/pyarrow/src/python_to_arrow.h similarity index 95% rename from cpp/src/arrow/python/python_to_arrow.h rename to python/pyarrow/src/python_to_arrow.h index d167996ba8d..ca246a87401 100644 --- a/cpp/src/arrow/python/python_to_arrow.h +++ b/python/pyarrow/src/python_to_arrow.h @@ -20,16 +20,16 @@ #pragma once -#include "arrow/python/platform.h" +#include "platform.h" #include #include -#include "arrow/python/visibility.h" +#include "visibility.h" #include "arrow/type.h" #include "arrow/util/macros.h" -#include "arrow/python/common.h" +#include "common.h" namespace arrow { diff --git a/cpp/src/arrow/python/serialize.cc b/python/pyarrow/src/serialize.cc similarity index 99% rename from cpp/src/arrow/python/serialize.cc rename to python/pyarrow/src/serialize.cc index ad079cbd9c7..c7c925fa198 100644 --- a/cpp/src/arrow/python/serialize.cc +++ b/python/pyarrow/src/serialize.cc @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/python/serialize.h" -#include "arrow/python/numpy_interop.h" +#include "serialize.h" +#include "numpy_interop.h" #include #include @@ -42,13 +42,13 @@ #include "arrow/tensor.h" #include "arrow/util/logging.h" -#include "arrow/python/common.h" -#include "arrow/python/datetime.h" -#include "arrow/python/helpers.h" -#include "arrow/python/iterators.h" -#include "arrow/python/numpy_convert.h" -#include "arrow/python/platform.h" -#include "arrow/python/pyarrow.h" +#include "common.h" +#include "datetime.h" +#include "helpers.h" +#include "iterators.h" +#include "numpy_convert.h" +#include "platform.h" +#include "pyarrow.h" constexpr int32_t kMaxRecursionDepth = 100; diff --git a/cpp/src/arrow/python/serialize.h b/python/pyarrow/src/serialize.h similarity index 99% rename from cpp/src/arrow/python/serialize.h rename to python/pyarrow/src/serialize.h index fd207d3e069..fd78c9a309a 100644 --- a/cpp/src/arrow/python/serialize.h +++ b/python/pyarrow/src/serialize.h @@ -21,7 +21,7 @@ #include #include "arrow/ipc/options.h" -#include "arrow/python/visibility.h" +#include "visibility.h" #include "arrow/sparse_tensor.h" #include "arrow/status.h" diff --git a/cpp/src/arrow/python/type_traits.h b/python/pyarrow/src/type_traits.h similarity index 99% rename from cpp/src/arrow/python/type_traits.h rename to python/pyarrow/src/type_traits.h index a941577f765..4cdfe9d8d62 100644 --- a/cpp/src/arrow/python/type_traits.h +++ b/python/pyarrow/src/type_traits.h @@ -19,12 +19,12 @@ #pragma once -#include "arrow/python/platform.h" +#include "platform.h" #include #include -#include "arrow/python/numpy_interop.h" +#include "numpy_interop.h" #include diff --git a/cpp/src/arrow/python/udf.cc b/python/pyarrow/src/udf.cc similarity index 98% rename from cpp/src/arrow/python/udf.cc rename to python/pyarrow/src/udf.cc index 81bf47c0ade..51cea5e6c64 100644 --- a/cpp/src/arrow/python/udf.cc +++ b/python/pyarrow/src/udf.cc @@ -15,9 +15,9 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/python/udf.h" +#include "udf.h" #include "arrow/compute/function.h" -#include "arrow/python/common.h" +#include "common.h" namespace arrow { diff --git a/cpp/src/arrow/python/udf.h b/python/pyarrow/src/udf.h similarity index 93% rename from cpp/src/arrow/python/udf.h rename to python/pyarrow/src/udf.h index 4ab3e7cc72b..52f22b4cb4f 100644 --- a/cpp/src/arrow/python/udf.h +++ b/python/pyarrow/src/udf.h @@ -20,11 +20,11 @@ #include "arrow/compute/exec.h" #include "arrow/compute/function.h" #include "arrow/compute/registry.h" -#include "arrow/python/platform.h" +#include "platform.h" -#include "arrow/python/common.h" -#include "arrow/python/pyarrow.h" -#include "arrow/python/visibility.h" +#include "common.h" +#include "pyarrow.h" +#include "visibility.h" namespace arrow { diff --git a/cpp/src/arrow/python/util/CMakeLists.txt b/python/pyarrow/src/util/CMakeLists.txt similarity index 100% rename from cpp/src/arrow/python/util/CMakeLists.txt rename to python/pyarrow/src/util/CMakeLists.txt diff --git a/cpp/src/arrow/python/util/test_main.cc b/python/pyarrow/src/util/test_main.cc similarity index 89% rename from cpp/src/arrow/python/util/test_main.cc rename to python/pyarrow/src/util/test_main.cc index dd7f379bdd4..3ee1657e644 100644 --- a/cpp/src/arrow/python/util/test_main.cc +++ b/python/pyarrow/src/util/test_main.cc @@ -15,13 +15,13 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/python/platform.h" +#include "../platform.h" #include -#include "arrow/python/datetime.h" -#include "arrow/python/init.h" -#include "arrow/python/pyarrow.h" +#include "../datetime.h" +#include "../init.h" +#include "../pyarrow.h" int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); diff --git a/cpp/src/arrow/python/visibility.h b/python/pyarrow/src/visibility.h similarity index 100% rename from cpp/src/arrow/python/visibility.h rename to python/pyarrow/src/visibility.h diff --git a/python/pyarrow/tensorflow/plasma_op.cc b/python/pyarrow/tensorflow/plasma_op.cc index bf4eec78912..6445ba75e5e 100644 --- a/python/pyarrow/tensorflow/plasma_op.cc +++ b/python/pyarrow/tensorflow/plasma_op.cc @@ -36,8 +36,8 @@ #include "arrow/util/logging.h" // These headers do not include Python.h -#include "arrow/python/deserialize.h" -#include "arrow/python/serialize.h" +#include "deserialize.h" +#include "serialize.h" #include "plasma/client.h" diff --git a/python/pyarrow/tests/test_cython.py b/python/pyarrow/tests/test_cython.py index 89fca4190ec..85d65383d8b 100644 --- a/python/pyarrow/tests/test_cython.py +++ b/python/pyarrow/tests/test_cython.py @@ -81,6 +81,8 @@ def check_cython_example_module(mod): mod.cast_scalar(scal, pa.list_(pa.int64())) +@pytest.mark.skipif(sys.platform == "win32", + reason="ARROW-17172: currently fails on windows") @pytest.mark.cython def test_cython_api(tmpdir): """ @@ -138,7 +140,6 @@ def test_cython_api(tmpdir): subprocess_env[var] = delim.join( pa.get_library_dirs() + [subprocess_env.get(var, '')] ) - subprocess.check_call([sys.executable, '-c', code], stdout=subprocess.PIPE, env=subprocess_env) diff --git a/python/pyarrow/tests/test_gdb.py b/python/pyarrow/tests/test_gdb.py index 89c42648d24..1990198d9f1 100644 --- a/python/pyarrow/tests/test_gdb.py +++ b/python/pyarrow/tests/test_gdb.py @@ -154,7 +154,7 @@ def select_frame(self, func_name): # but it's not available on old GDB versions (such as 8.1.1), # so instead parse the stack trace for a matching frame number. out = self.run_command("info stack") - pat = r"(?mi)^#(\d+)\s+.* in " + re.escape(func_name) + " " + pat = r"(?mi)^#(\d+)\s+.* in " + re.escape(func_name) + r"\b" m = re.search(pat, out) if m is None: pytest.fail(f"Could not select frame for function {func_name}") diff --git a/python/setup.py b/python/setup.py index 9fd7c480088..a02d101002c 100755 --- a/python/setup.py +++ b/python/setup.py @@ -93,6 +93,7 @@ def build_extensions(self): _build_ext.build_extensions(self) def run(self): + self._run_cmake_pyarrow_cpp() self._run_cmake() _build_ext.run(self) @@ -227,6 +228,126 @@ def initialize_options(self): '_hdfsio', 'gandiva'] + def _run_cmake_pyarrow_cpp(self): + # check if build_type is correctly passed / set + if self.build_type.lower() not in ('release', 'debug'): + raise ValueError("--build-type (or PYARROW_BUILD_TYPE) needs to " + "be 'release' or 'debug'") + + # The directory containing this setup.py + source = os.path.dirname(os.path.abspath(__file__)) + # The directory containing this PyArrow cpp CMakeLists.txt + source_pyarrow_cpp = pjoin(source, "pyarrow/src") + + # The directory for the module being built + build_cmd = self.get_finalized_command('build') + saved_cwd = os.getcwd() + build_dir = pjoin(saved_cwd, 'build', 'dist') + build_include = pjoin(saved_cwd, 'build', 'dist', 'include') + build_lib = pjoin(os.getcwd(), build_cmd.build_lib) + + # The directory containing Arrow C++ build + arrow_build_dir = os.environ.get('ARROW_BUILD_DIR', 'build') + if self.inplace: + # a bit hacky + build_lib = saved_cwd + if not os.path.isdir(build_dir): + self.mkpath(build_dir) + if not os.path.isdir(build_lib): + self.mkpath(build_lib) + if not os.path.isdir(build_include): + self.mkpath(build_include) + + # Change to the build directory + with changed_dir(build_dir): + # cmake args + cmake_options = [ + '-DCMAKE_INSTALL_PREFIX=' + str(build_dir), + '-DCMAKE_BUILD_TYPE=' + str(self.build_type.lower()), + '-DARROW_BUILD_DIR=' + str(arrow_build_dir), + '-DPYTHON_EXECUTABLE=' + str(sys.executable), + '-DPython3_EXECUTABLE=' + str(sys.executable), + ] + + # Check for specific options + def append_cmake_bool(value, varname): + cmake_options.append('-D{0}={1}'.format( + varname, 'on' if value else 'off')) + + append_cmake_bool(self.with_dataset, 'PYARROW_WITH_DATASET') + append_cmake_bool(self.with_parquet_encryption, + 'PYARROW_WITH_PARQUET_ENCRYPTION') + append_cmake_bool(self.with_hdfs, + 'PYARROW_WITH_HDFS') + + # Windows + if self.cmake_generator: + cmake_options += ['-G', self.cmake_generator] + + # build args + build_tool_args = [] + if os.environ.get('PYARROW_PARALLEL'): + build_tool_args.append('--') + build_tool_args.append( + '-j{0}'.format(os.environ['PYARROW_PARALLEL'])) + + # run cmake + print("-- Running cmake for pyarrow cpp") + self.spawn(['cmake'] + cmake_options + [source_pyarrow_cpp]) + print("-- Finished cmake for pyarrow cpp") + # run make & install + print("-- Running cmake build and install for pyarrow cpp") + self.spawn(['cmake', '--build', '.', '--config', + self.build_type, '--target', 'install'] + + build_tool_args) + print("-- Finished cmake build and install for pyarrow cpp") + + # Move the libraries to the place expected by the Python build + try: + os.makedirs(pjoin(build_lib, 'pyarrow')) + except OSError: + pass + + # helper function + def copy_libs(folder_name): + for libname in os.listdir(pjoin(build_dir, folder_name)): + if "python" in libname: + libname_path = pjoin(build_lib, "pyarrow", libname) + if os.path.exists(libname_path): + os.remove(libname_path) + print( + f"Copying {pjoin(build_dir, folder_name, libname)}" + f" to {pjoin(build_lib, 'pyarrow', libname)}") + shutil.copy(pjoin(build_dir, folder_name, libname), + pjoin(build_lib, "pyarrow")) + + # Move libraries to python/pyarrow + # For windows builds, move dll from bin + try: + copy_libs("bin") + except OSError: + pass + try: + folder_name, = (name for name in ["lib", "lib64"] + if os.path.exists(pjoin(build_dir, name))) + copy_libs(folder_name) + except ValueError: + print("There are multiple or none libraries for PyArrow cpp ") + print("installed in the python/build/dist folder. Check the ") + print("installation process and be sure there is exactly one ") + print("library folder created") + + # Copy headers to python/pyarrow/include + pyarrow_cpp_include = pjoin(build_include, "arrow", "python") + pyarrow_include = pjoin( + build_lib, "pyarrow", "include", "arrow", "python") + if os.path.exists(pyarrow_include): + shutil.rmtree(pyarrow_include) + print( + f"Copying include folder: {pyarrow_cpp_include}" + f" to {pyarrow_include}") + shutil.copytree(pyarrow_cpp_include, pyarrow_include) + def _run_cmake(self): # check if build_type is correctly passed / set if self.build_type.lower() not in ('release', 'debug'): @@ -245,6 +366,10 @@ def _run_cmake(self): if not os.path.isdir(build_temp): self.mkpath(build_temp) + if self.inplace: + # a bit hacky + build_lib = saved_cwd + # Change to the build directory with changed_dir(build_temp): # Detect if we built elsewhere @@ -266,6 +391,7 @@ def _run_cmake(self): cmake_options = [ '-DPYTHON_EXECUTABLE=%s' % sys.executable, '-DPython3_EXECUTABLE=%s' % sys.executable, + '-DPYARROW_CPP_HOME=' + str(pjoin(build_lib, "pyarrow")), static_lib_option, ] @@ -332,10 +458,6 @@ def append_cmake_bool(value, varname): build_tool_args) print("-- Finished cmake --build for pyarrow") - if self.inplace: - # a bit hacky - build_lib = saved_cwd - # Move the libraries to the place expected by the Python build try: os.makedirs(pjoin(build_lib, 'pyarrow')) @@ -354,6 +476,16 @@ def append_cmake_bool(value, varname): shutil.move(pjoin(build_prefix, 'include'), pjoin(build_lib, 'pyarrow')) + # pyarrow/include file is first deleted in the previous step + # so we need to add the PyArrow cpp include folder again + build_pyarrow_cpp_include = pjoin( + saved_cwd, 'build/dist/include') + shutil.move(pjoin( + build_pyarrow_cpp_include, "arrow", "python"), + pjoin( + build_lib, "pyarrow", "include", + "arrow", "python")) + # Move the built C-extension to the place expected by the Python # build self._found_names = [] @@ -408,8 +540,6 @@ def _bundle_arrow_cpp(self, build_prefix, build_lib): move_shared_libs(build_prefix, build_lib, "arrow_substrait") if self.with_flight: move_shared_libs(build_prefix, build_lib, "arrow_flight") - move_shared_libs(build_prefix, build_lib, - "arrow_python_flight") if self.with_dataset: move_shared_libs(build_prefix, build_lib, "arrow_dataset") if self.with_plasma: