diff --git a/ci/travis_script_python.sh b/ci/travis_script_python.sh index 6f4b8e9a090..df11209e7c4 100755 --- a/ci/travis_script_python.sh +++ b/ci/travis_script_python.sh @@ -23,7 +23,6 @@ export MINICONDA=$HOME/miniconda export PATH="$MINICONDA/bin:$PATH" export ARROW_HOME=$ARROW_CPP_INSTALL -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$ARROW_CPP_INSTALL/lib pushd $PYTHON_DIR export PARQUET_HOME=$TRAVIS_BUILD_DIR/parquet-env @@ -70,11 +69,31 @@ build_parquet_cpp() { build_parquet_cpp -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$PARQUET_HOME/lib +function build_arrow_libraries() { + CPP_BUILD_DIR=$1 + CPP_DIR=$TRAVIS_BUILD_DIR/cpp + + mkdir $CPP_BUILD_DIR + pushd $CPP_BUILD_DIR + + cmake -DARROW_BUILD_TESTS=off \ + -DARROW_PYTHON=on \ + -DCMAKE_INSTALL_PREFIX=$2 \ + $CPP_DIR + + make -j4 + make install + + popd +} python_version_tests() { PYTHON_VERSION=$1 CONDA_ENV_DIR=$TRAVIS_BUILD_DIR/pyarrow-test-$PYTHON_VERSION + + export ARROW_HOME=$TRAVIS_BUILD_DIR/arrow-install-$PYTHON_VERSION + export LD_LIBRARY_PATH=$ARROW_HOME/lib:$PARQUET_HOME/lib + conda create -y -q -p $CONDA_ENV_DIR python=$PYTHON_VERSION source activate $CONDA_ENV_DIR @@ -87,6 +106,9 @@ python_version_tests() { # Expensive dependencies install from Continuum package repo conda install -y pip numpy pandas cython + # Build C++ libraries + build_arrow_libraries arrow-build-$PYTHON_VERSION $ARROW_HOME + # Other stuff pip install pip install -r requirements.txt diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index c04afe47030..c77cf601cbd 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -106,6 +106,10 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") "Rely on boost shared libraries where relevant" ON) + option(ARROW_PYTHON + "Build the Arrow CPython extensions" + OFF) + option(ARROW_SSE3 "Build Arrow with SSE3" ON) @@ -133,6 +137,7 @@ if(NOT ARROW_BUILD_BENCHMARKS) set(NO_BENCHMARKS 1) endif() +include(BuildUtils) ############################################################ # Compiler flags @@ -303,6 +308,14 @@ endfunction() # # Arguments after the test name will be passed to set_tests_properties(). function(ADD_ARROW_TEST REL_TEST_NAME) + set(options) + set(single_value_args) + set(multi_value_args STATIC_LINK_LIBS) + cmake_parse_arguments(ARG "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN}) + if(ARG_UNPARSED_ARGUMENTS) + message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}") + endif() + if(NO_TESTS OR NOT ARROW_BUILD_STATIC) return() endif() @@ -312,7 +325,13 @@ function(ADD_ARROW_TEST REL_TEST_NAME) # This test has a corresponding .cc file, set it up as an executable. set(TEST_PATH "${EXECUTABLE_OUTPUT_PATH}/${TEST_NAME}") add_executable(${TEST_NAME} "${REL_TEST_NAME}.cc") - target_link_libraries(${TEST_NAME} ${ARROW_TEST_LINK_LIBS}) + + if (ARG_STATIC_LINK_LIBS) + # Customize link libraries + target_link_libraries(${TEST_NAME} ${ARG_STATIC_LINK_LIBS}) + else() + target_link_libraries(${TEST_NAME} ${ARROW_TEST_LINK_LIBS}) + endif() add_dependencies(unittest ${TEST_NAME}) else() # No executable, just invoke the test (probably a script) directly. @@ -332,10 +351,6 @@ function(ADD_ARROW_TEST REL_TEST_NAME) ${BUILD_SUPPORT_DIR}/run-test.sh ${CMAKE_BINARY_DIR} test ${TEST_PATH}) endif() set_tests_properties(${TEST_NAME} PROPERTIES LABELS "unittest") - - if(ARGN) - set_tests_properties(${TEST_NAME} PROPERTIES ${ARGN}) - endif() endfunction() # A wrapper for add_dependencies() that is compatible with NO_TESTS. @@ -363,72 +378,6 @@ enable_testing() ############################################################ # Dependencies ############################################################ -function(ADD_THIRDPARTY_LIB LIB_NAME) - set(options) - set(one_value_args SHARED_LIB STATIC_LIB) - set(multi_value_args DEPS) - cmake_parse_arguments(ARG "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN}) - if(ARG_UNPARSED_ARGUMENTS) - message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}") - endif() - - if(ARG_STATIC_LIB AND ARG_SHARED_LIB) - if(NOT ARG_STATIC_LIB) - message(FATAL_ERROR "No static or shared library provided for ${LIB_NAME}") - endif() - - SET(AUG_LIB_NAME "${LIB_NAME}_static") - add_library(${AUG_LIB_NAME} STATIC IMPORTED) - set_target_properties(${AUG_LIB_NAME} - PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}") - message("Added static library dependency ${LIB_NAME}: ${ARG_STATIC_LIB}") - - SET(AUG_LIB_NAME "${LIB_NAME}_shared") - add_library(${AUG_LIB_NAME} SHARED IMPORTED) - - if(MSVC) - # Mark the ”.lib” location as part of a Windows DLL - set_target_properties(${AUG_LIB_NAME} - PROPERTIES IMPORTED_IMPLIB "${ARG_SHARED_LIB}") - else() - set_target_properties(${AUG_LIB_NAME} - PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}") - endif() - message("Added shared library dependency ${LIB_NAME}: ${ARG_SHARED_LIB}") - elseif(ARG_STATIC_LIB) - add_library(${LIB_NAME} STATIC IMPORTED) - set_target_properties(${LIB_NAME} - PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}") - SET(AUG_LIB_NAME "${LIB_NAME}_static") - add_library(${AUG_LIB_NAME} STATIC IMPORTED) - set_target_properties(${AUG_LIB_NAME} - PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}") - message("Added static library dependency ${LIB_NAME}: ${ARG_STATIC_LIB}") - elseif(ARG_SHARED_LIB) - add_library(${LIB_NAME} SHARED IMPORTED) - set_target_properties(${LIB_NAME} - PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}") - SET(AUG_LIB_NAME "${LIB_NAME}_shared") - add_library(${AUG_LIB_NAME} SHARED IMPORTED) - - if(MSVC) - # Mark the ”.lib” location as part of a Windows DLL - set_target_properties(${AUG_LIB_NAME} - PROPERTIES IMPORTED_IMPLIB "${ARG_SHARED_LIB}") - else() - set_target_properties(${AUG_LIB_NAME} - PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}") - endif() - message("Added shared library dependency ${LIB_NAME}: ${ARG_SHARED_LIB}") - else() - message(FATAL_ERROR "No static or shared library provided for ${LIB_NAME}") - endif() - - if(ARG_DEPS) - set_target_properties(${LIB_NAME} - PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES "${ARG_DEPS}") - endif() -endfunction() # ---------------------------------------------------------------------- # Add Boost dependencies (code adapted from Apache Kudu (incubating)) @@ -798,8 +747,7 @@ if (${CLANG_FORMAT_FOUND}) add_custom_target(format ${BUILD_SUPPORT_DIR}/run-clang-format.sh ${CMAKE_CURRENT_SOURCE_DIR} ${CLANG_FORMAT_BIN} 1 `find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc -or -name \\*.h | sed -e '/_generated/g' | - sed -e '/windows_compatibility.h/g'` - `find ${CMAKE_CURRENT_SOURCE_DIR}/../python -name \\*.cc -or -name \\*.h`) + sed -e '/windows_compatibility.h/g'`) # runs clang format and exits with a non-zero exit code if any files need to be reformatted add_custom_target(check-format ${BUILD_SUPPORT_DIR}/run-clang-format.sh ${CMAKE_CURRENT_SOURCE_DIR} ${CLANG_FORMAT_BIN} 0 @@ -857,11 +805,9 @@ if(NOT APPLE) set(ARROW_SHARED_LINK_FLAGS "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/src/arrow/symbols.map") endif() -include(BuildUtils) - ADD_ARROW_LIB(arrow - SOURCES ${ARROW_SRCS} - SHARED_LINK_FLAGS ${ARROW_SHARED_LINK_FLAGS} + SOURCES ${ARROW_SRCS} + SHARED_LINK_FLAGS ${ARROW_SHARED_LINK_FLAGS} ) add_subdirectory(src/arrow) @@ -875,6 +821,10 @@ endif() #---------------------------------------------------------------------- # IPC library +if(ARROW_PYTHON) + set(ARROW_IPC on) +endif() + ## Flatbuffers if(ARROW_IPC) if("$ENV{FLATBUFFERS_HOME}" STREQUAL "") @@ -908,3 +858,14 @@ if(ARROW_IPC) add_subdirectory(src/arrow/ipc) endif() + +if(ARROW_PYTHON) + find_package(PythonLibsNew REQUIRED) + find_package(NumPy REQUIRED) + + include_directories(SYSTEM + ${NUMPY_INCLUDE_DIRS} + ${PYTHON_INCLUDE_DIRS}) + + add_subdirectory(src/arrow/python) +endif() diff --git a/cpp/cmake_modules/BuildUtils.cmake b/cpp/cmake_modules/BuildUtils.cmake index 78b514c2295..c9930418185 100644 --- a/cpp/cmake_modules/BuildUtils.cmake +++ b/cpp/cmake_modules/BuildUtils.cmake @@ -15,6 +15,73 @@ # specific language governing permissions and limitations # under the License. +function(ADD_THIRDPARTY_LIB LIB_NAME) + set(options) + set(one_value_args SHARED_LIB STATIC_LIB) + set(multi_value_args DEPS) + cmake_parse_arguments(ARG "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN}) + if(ARG_UNPARSED_ARGUMENTS) + message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}") + endif() + + if(ARG_STATIC_LIB AND ARG_SHARED_LIB) + if(NOT ARG_STATIC_LIB) + message(FATAL_ERROR "No static or shared library provided for ${LIB_NAME}") + endif() + + SET(AUG_LIB_NAME "${LIB_NAME}_static") + add_library(${AUG_LIB_NAME} STATIC IMPORTED) + set_target_properties(${AUG_LIB_NAME} + PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}") + message("Added static library dependency ${LIB_NAME}: ${ARG_STATIC_LIB}") + + SET(AUG_LIB_NAME "${LIB_NAME}_shared") + add_library(${AUG_LIB_NAME} SHARED IMPORTED) + + if(MSVC) + # Mark the ”.lib” location as part of a Windows DLL + set_target_properties(${AUG_LIB_NAME} + PROPERTIES IMPORTED_IMPLIB "${ARG_SHARED_LIB}") + else() + set_target_properties(${AUG_LIB_NAME} + PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}") + endif() + message("Added shared library dependency ${LIB_NAME}: ${ARG_SHARED_LIB}") + elseif(ARG_STATIC_LIB) + add_library(${LIB_NAME} STATIC IMPORTED) + set_target_properties(${LIB_NAME} + PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}") + SET(AUG_LIB_NAME "${LIB_NAME}_static") + add_library(${AUG_LIB_NAME} STATIC IMPORTED) + set_target_properties(${AUG_LIB_NAME} + PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}") + message("Added static library dependency ${LIB_NAME}: ${ARG_STATIC_LIB}") + elseif(ARG_SHARED_LIB) + add_library(${LIB_NAME} SHARED IMPORTED) + set_target_properties(${LIB_NAME} + PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}") + SET(AUG_LIB_NAME "${LIB_NAME}_shared") + add_library(${AUG_LIB_NAME} SHARED IMPORTED) + + if(MSVC) + # Mark the ”.lib” location as part of a Windows DLL + set_target_properties(${AUG_LIB_NAME} + PROPERTIES IMPORTED_IMPLIB "${ARG_SHARED_LIB}") + else() + set_target_properties(${AUG_LIB_NAME} + PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}") + endif() + message("Added shared library dependency ${LIB_NAME}: ${ARG_SHARED_LIB}") + else() + message(FATAL_ERROR "No static or shared library provided for ${LIB_NAME}") + endif() + + if(ARG_DEPS) + set_target_properties(${LIB_NAME} + PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES "${ARG_DEPS}") + endif() +endfunction() + function(ADD_ARROW_LIB LIB_NAME) set(options) set(one_value_args SHARED_LINK_FLAGS) @@ -45,9 +112,16 @@ function(ADD_ARROW_LIB LIB_NAME) if (ARROW_BUILD_SHARED) add_library(${LIB_NAME}_shared SHARED $) + if(APPLE) - set_target_properties(${LIB_NAME}_shared PROPERTIES LINK_FLAGS "-undefined dynamic_lookup") + # On OS X, you can avoid linking at library load time and instead + # expecting that the symbols have been loaded separately. This happens + # with libpython* where there can be conflicts between system Python and + # the Python from a thirdparty distribution + set(ARG_SHARED_LINK_FLAGS + "-undefined dynamic_lookup ${ARG_SHARED_LINK_FLAGS}") endif() + set_target_properties(${LIB_NAME}_shared PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}" @@ -55,6 +129,7 @@ function(ADD_ARROW_LIB LIB_NAME) OUTPUT_NAME ${LIB_NAME} VERSION "${ARROW_ABI_VERSION}" SOVERSION "${ARROW_SO_VERSION}") + target_link_libraries(${LIB_NAME}_shared LINK_PUBLIC ${ARG_SHARED_LINK_LIBS} LINK_PRIVATE ${ARG_SHARED_PRIVATE_LINK_LIBS}) @@ -68,28 +143,28 @@ function(ADD_ARROW_LIB LIB_NAME) set_target_properties(${LIB_NAME}_shared PROPERTIES INSTALL_RPATH ${_lib_install_rpath}) endif() - + install(TARGETS ${LIB_NAME}_shared LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) endif() - + if (ARROW_BUILD_STATIC) add_library(${LIB_NAME}_static STATIC $) set_target_properties(${LIB_NAME}_static PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${BUILD_OUTPUT_ROOT_DIRECTORY}" OUTPUT_NAME ${LIB_NAME}) - + target_link_libraries(${LIB_NAME}_static LINK_PUBLIC ${ARG_STATIC_LINK_LIBS} LINK_PRIVATE ${ARG_STATIC_PRIVATE_LINK_LIBS}) - + install(TARGETS ${LIB_NAME}_static LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) endif() - + if (APPLE) set_target_properties(${LIB_NAME}_shared PROPERTIES @@ -98,4 +173,3 @@ function(ADD_ARROW_LIB LIB_NAME) endif() endfunction() - diff --git a/python/cmake_modules/FindNumPy.cmake b/cpp/cmake_modules/FindNumPy.cmake similarity index 100% rename from python/cmake_modules/FindNumPy.cmake rename to cpp/cmake_modules/FindNumPy.cmake diff --git a/python/cmake_modules/FindPythonLibsNew.cmake b/cpp/cmake_modules/FindPythonLibsNew.cmake similarity index 100% rename from python/cmake_modules/FindPythonLibsNew.cmake rename to cpp/cmake_modules/FindPythonLibsNew.cmake diff --git a/cpp/src/arrow/python/CMakeLists.txt b/cpp/src/arrow/python/CMakeLists.txt new file mode 100644 index 00000000000..03f5afc624b --- /dev/null +++ b/cpp/src/arrow/python/CMakeLists.txt @@ -0,0 +1,93 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# + +####################################### +# arrow_python +####################################### + +if (ARROW_BUILD_TESTS) + add_library(arrow_python_test_main STATIC + util/test_main.cc) + + if (APPLE) + target_link_libraries(arrow_python_test_main + gtest + dl) + set_target_properties(arrow_python_test_main + PROPERTIES LINK_FLAGS "-undefined dynamic_lookup") + else() + target_link_libraries(arrow_python_test_main + gtest + pthread + dl + ) + endif() +endif() + +set(ARROW_PYTHON_MIN_TEST_LIBS + arrow_python_test_main + arrow_python_static + arrow_ipc_static + arrow_io_static + arrow_static) + +if(NOT APPLE AND ARROW_BUILD_TESTS) + ADD_THIRDPARTY_LIB(python + SHARED_LIB "${PYTHON_LIBRARIES}") + list(APPEND ARROW_PYTHON_MIN_TEST_LIBS python) +endif() + +set(ARROW_PYTHON_TEST_LINK_LIBS ${ARROW_PYTHON_MIN_TEST_LIBS}) + +# ---------------------------------------------------------------------- + +set(ARROW_PYTHON_SRCS + builtin_convert.cc + common.cc + config.cc + helpers.cc + io.cc + pandas_convert.cc +) + +set(ARROW_PYTHON_SHARED_LINK_LIBS + arrow_io_shared + arrow_ipc_shared + arrow_shared +) + +ADD_ARROW_LIB(arrow_python + SOURCES ${ARROW_PYTHON_SRCS} + SHARED_LINK_FLAGS "" + SHARED_LINK_LIBS ${ARROW_PYTHON_SHARED_LINK_LIBS} + STATIC_LINK_LIBS ${ARROW_IO_SHARED_PRIVATE_LINK_LIBS} +) + +install(FILES + api.h + builtin_convert.h + common.h + config.h + do_import_numpy.h + helpers.h + io.h + numpy_interop.h + pandas_convert.h + type_traits.h + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/arrow/python") + +# set_target_properties(arrow_python_shared PROPERTIES +# INSTALL_RPATH "\$ORIGIN") + +if (ARROW_BUILD_TESTS) + ADD_ARROW_TEST(pandas-test + STATIC_LINK_LIBS "${ARROW_PYTHON_TEST_LINK_LIBS}") +endif() diff --git a/python/src/pyarrow/api.h b/cpp/src/arrow/python/api.h similarity index 75% rename from python/src/pyarrow/api.h rename to cpp/src/arrow/python/api.h index f65cc097f54..f4f1c0cf9a5 100644 --- a/python/src/pyarrow/api.h +++ b/cpp/src/arrow/python/api.h @@ -15,12 +15,13 @@ // specific language governing permissions and limitations // under the License. -#ifndef PYARROW_API_H -#define PYARROW_API_H +#ifndef ARROW_PYTHON_API_H +#define ARROW_PYTHON_API_H -#include "pyarrow/helpers.h" +#include "arrow/python/builtin_convert.h" +#include "arrow/python/common.h" +#include "arrow/python/helpers.h" +#include "arrow/python/io.h" +#include "arrow/python/pandas_convert.h" -#include "pyarrow/adapters/builtin.h" -#include "pyarrow/adapters/pandas.h" - -#endif // PYARROW_API_H +#endif // ARROW_PYTHON_API_H diff --git a/python/src/pyarrow/adapters/builtin.cc b/cpp/src/arrow/python/builtin_convert.cc similarity index 99% rename from python/src/pyarrow/adapters/builtin.cc rename to cpp/src/arrow/python/builtin_convert.cc index 06e098a8036..9acccc14966 100644 --- a/python/src/pyarrow/adapters/builtin.cc +++ b/cpp/src/arrow/python/builtin_convert.cc @@ -19,13 +19,13 @@ #include #include -#include "pyarrow/adapters/builtin.h" +#include "arrow/python/builtin_convert.h" #include "arrow/api.h" #include "arrow/status.h" -#include "pyarrow/helpers.h" -#include "pyarrow/util/datetime.h" +#include "arrow/python/helpers.h" +#include "arrow/python/util/datetime.h" namespace arrow { namespace py { diff --git a/python/src/pyarrow/adapters/builtin.h b/cpp/src/arrow/python/builtin_convert.h similarity index 90% rename from python/src/pyarrow/adapters/builtin.h rename to cpp/src/arrow/python/builtin_convert.h index 2d45e670628..7b50990dd55 100644 --- a/python/src/pyarrow/adapters/builtin.h +++ b/cpp/src/arrow/python/builtin_convert.h @@ -18,8 +18,8 @@ // Functions for converting between CPython built-in data structures and Arrow // data structures -#ifndef PYARROW_ADAPTERS_BUILTIN_H -#define PYARROW_ADAPTERS_BUILTIN_H +#ifndef ARROW_PYTHON_ADAPTERS_BUILTIN_H +#define ARROW_PYTHON_ADAPTERS_BUILTIN_H #include @@ -29,7 +29,7 @@ #include "arrow/util/visibility.h" -#include "pyarrow/common.h" +#include "arrow/python/common.h" namespace arrow { @@ -51,4 +51,4 @@ Status ConvertPySequence(PyObject* obj, MemoryPool* pool, std::shared_ptr } // namespace py } // namespace arrow -#endif // PYARROW_ADAPTERS_BUILTIN_H +#endif // ARROW_PYTHON_ADAPTERS_BUILTIN_H diff --git a/python/src/pyarrow/common.cc b/cpp/src/arrow/python/common.cc similarity index 69% rename from python/src/pyarrow/common.cc rename to cpp/src/arrow/python/common.cc index 792aa4775d4..a5aea308844 100644 --- a/python/src/pyarrow/common.cc +++ b/cpp/src/arrow/python/common.cc @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "pyarrow/common.h" +#include "arrow/python/common.h" #include #include @@ -28,17 +28,17 @@ namespace arrow { namespace py { static std::mutex memory_pool_mutex; -static MemoryPool* default_pyarrow_pool = nullptr; +static MemoryPool* default_python_pool = nullptr; void set_default_memory_pool(MemoryPool* pool) { std::lock_guard guard(memory_pool_mutex); - default_pyarrow_pool = pool; + default_python_pool = pool; } MemoryPool* get_memory_pool() { std::lock_guard guard(memory_pool_mutex); - if (default_pyarrow_pool) { - return default_pyarrow_pool; + if (default_python_pool) { + return default_python_pool; } else { return default_memory_pool(); } @@ -47,22 +47,21 @@ MemoryPool* get_memory_pool() { // ---------------------------------------------------------------------- // PyBuffer -PyBuffer::PyBuffer(PyObject* obj) - : Buffer(nullptr, 0) { - if (PyObject_CheckBuffer(obj)) { - obj_ = PyMemoryView_FromObject(obj); - Py_buffer* buffer = PyMemoryView_GET_BUFFER(obj_); - data_ = reinterpret_cast(buffer->buf); - size_ = buffer->len; - capacity_ = buffer->len; - is_mutable_ = false; - Py_INCREF(obj_); - } +PyBuffer::PyBuffer(PyObject* obj) : Buffer(nullptr, 0) { + if (PyObject_CheckBuffer(obj)) { + obj_ = PyMemoryView_FromObject(obj); + Py_buffer* buffer = PyMemoryView_GET_BUFFER(obj_); + data_ = reinterpret_cast(buffer->buf); + size_ = buffer->len; + capacity_ = buffer->len; + is_mutable_ = false; + Py_INCREF(obj_); + } } PyBuffer::~PyBuffer() { - PyAcquireGIL lock; - Py_DECREF(obj_); + PyAcquireGIL lock; + Py_DECREF(obj_); } } // namespace py diff --git a/python/src/pyarrow/common.h b/cpp/src/arrow/python/common.h similarity index 90% rename from python/src/pyarrow/common.h rename to cpp/src/arrow/python/common.h index b4e4ea6d2b9..f1be471cd3a 100644 --- a/python/src/pyarrow/common.h +++ b/cpp/src/arrow/python/common.h @@ -15,10 +15,12 @@ // specific language governing permissions and limitations // under the License. -#ifndef PYARROW_COMMON_H -#define PYARROW_COMMON_H +#ifndef ARROW_PYTHON_COMMON_H +#define ARROW_PYTHON_COMMON_H -#include "pyarrow/config.h" +#include + +#include "arrow/python/config.h" #include "arrow/buffer.h" #include "arrow/util/macros.h" @@ -47,7 +49,7 @@ class OwnedRef { public: OwnedRef() : obj_(nullptr) {} - OwnedRef(PyObject* obj) : obj_(obj) {} + explicit OwnedRef(PyObject* obj) : obj_(obj) {} ~OwnedRef() { PyAcquireGIL lock; @@ -71,7 +73,7 @@ struct PyObjectStringify { OwnedRef tmp_obj; const char* bytes; - PyObjectStringify(PyObject* obj) { + explicit PyObjectStringify(PyObject* obj) { PyObject* bytes_obj; if (PyUnicode_Check(obj)) { bytes_obj = PyUnicode_AsUTF8String(obj); @@ -103,7 +105,7 @@ ARROW_EXPORT MemoryPool* get_memory_pool(); class ARROW_EXPORT NumPyBuffer : public Buffer { public: - NumPyBuffer(PyArrayObject* arr) : Buffer(nullptr, 0) { + explicit NumPyBuffer(PyArrayObject* arr) : Buffer(nullptr, 0) { arr_ = arr; Py_INCREF(arr); @@ -124,7 +126,7 @@ class ARROW_EXPORT PyBuffer : public Buffer { /// /// While memoryview objects support multi-demensional buffers, PyBuffer only supports /// one-dimensional byte buffers. - PyBuffer(PyObject* obj); + explicit PyBuffer(PyObject* obj); ~PyBuffer(); private: @@ -134,4 +136,4 @@ class ARROW_EXPORT PyBuffer : public Buffer { } // namespace py } // namespace arrow -#endif // PYARROW_COMMON_H +#endif // ARROW_PYTHON_COMMON_H diff --git a/python/src/pyarrow/config.cc b/cpp/src/arrow/python/config.cc similarity index 91% rename from python/src/pyarrow/config.cc rename to cpp/src/arrow/python/config.cc index 0be6d962b55..2abc4dda6ee 100644 --- a/python/src/pyarrow/config.cc +++ b/cpp/src/arrow/python/config.cc @@ -17,16 +17,16 @@ #include -#include "pyarrow/config.h" +#include "arrow/python/config.h" namespace arrow { namespace py { -void pyarrow_init() {} +void Init() {} PyObject* numpy_nan = nullptr; -void pyarrow_set_numpy_nan(PyObject* obj) { +void set_numpy_nan(PyObject* obj) { Py_INCREF(obj); numpy_nan = obj; } diff --git a/python/src/pyarrow/config.h b/cpp/src/arrow/python/config.h similarity index 85% rename from python/src/pyarrow/config.h rename to cpp/src/arrow/python/config.h index 87fc5c2b290..dd554e05b93 100644 --- a/python/src/pyarrow/config.h +++ b/cpp/src/arrow/python/config.h @@ -15,15 +15,14 @@ // specific language governing permissions and limitations // under the License. -#ifndef PYARROW_CONFIG_H -#define PYARROW_CONFIG_H +#ifndef ARROW_PYTHON_CONFIG_H +#define ARROW_PYTHON_CONFIG_H #include +#include "arrow/python/numpy_interop.h" #include "arrow/util/visibility.h" -#include "pyarrow/numpy_interop.h" - #if PY_MAJOR_VERSION >= 3 #define PyString_Check PyUnicode_Check #endif @@ -35,12 +34,12 @@ ARROW_EXPORT extern PyObject* numpy_nan; ARROW_EXPORT -void pyarrow_init(); +void Init(); ARROW_EXPORT -void pyarrow_set_numpy_nan(PyObject* obj); +void set_numpy_nan(PyObject* obj); } // namespace py } // namespace arrow -#endif // PYARROW_CONFIG_H +#endif // ARROW_PYTHON_CONFIG_H diff --git a/python/src/pyarrow/do_import_numpy.h b/cpp/src/arrow/python/do_import_numpy.h similarity index 100% rename from python/src/pyarrow/do_import_numpy.h rename to cpp/src/arrow/python/do_import_numpy.h diff --git a/python/src/pyarrow/helpers.cc b/cpp/src/arrow/python/helpers.cc similarity index 98% rename from python/src/pyarrow/helpers.cc rename to cpp/src/arrow/python/helpers.cc index 43edf8af17f..add2d9a222a 100644 --- a/python/src/pyarrow/helpers.cc +++ b/cpp/src/arrow/python/helpers.cc @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "pyarrow/helpers.h" +#include "arrow/python/helpers.h" #include diff --git a/python/src/pyarrow/helpers.h b/cpp/src/arrow/python/helpers.h similarity index 100% rename from python/src/pyarrow/helpers.h rename to cpp/src/arrow/python/helpers.h diff --git a/python/src/pyarrow/io.cc b/cpp/src/arrow/python/io.cc similarity index 98% rename from python/src/pyarrow/io.cc rename to cpp/src/arrow/python/io.cc index c66155b946a..ba82a45411c 100644 --- a/python/src/pyarrow/io.cc +++ b/cpp/src/arrow/python/io.cc @@ -15,16 +15,17 @@ // specific language governing permissions and limitations // under the License. -#include "pyarrow/io.h" +#include "arrow/python/io.h" #include #include +#include #include "arrow/io/memory.h" #include "arrow/memory_pool.h" #include "arrow/status.h" -#include "pyarrow/common.h" +#include "arrow/python/common.h" namespace arrow { namespace py { @@ -166,7 +167,7 @@ Status PyReadableFile::GetSize(int64_t* size) { PyAcquireGIL lock; int64_t current_position; - ; + ARROW_RETURN_NOT_OK(file_->Tell(¤t_position)); ARROW_RETURN_NOT_OK(file_->Seek(0, 2)); diff --git a/python/src/pyarrow/io.h b/cpp/src/arrow/python/io.h similarity index 96% rename from python/src/pyarrow/io.h rename to cpp/src/arrow/python/io.h index 89af60926ad..905bd6c7a6a 100644 --- a/python/src/pyarrow/io.h +++ b/cpp/src/arrow/python/io.h @@ -22,9 +22,9 @@ #include "arrow/io/memory.h" #include "arrow/util/visibility.h" -#include "pyarrow/config.h" +#include "arrow/python/config.h" -#include "pyarrow/common.h" +#include "arrow/python/common.h" namespace arrow { @@ -36,7 +36,7 @@ namespace py { // calling any methods class PythonFile { public: - PythonFile(PyObject* file); + explicit PythonFile(PyObject* file); ~PythonFile(); Status Close(); diff --git a/python/src/pyarrow/numpy_interop.h b/cpp/src/arrow/python/numpy_interop.h similarity index 97% rename from python/src/pyarrow/numpy_interop.h rename to cpp/src/arrow/python/numpy_interop.h index 57f3328e870..0a4b425e734 100644 --- a/python/src/pyarrow/numpy_interop.h +++ b/cpp/src/arrow/python/numpy_interop.h @@ -34,7 +34,7 @@ // This is required to be able to access the NumPy C API properly in C++ files // other than this main one -#define PY_ARRAY_UNIQUE_SYMBOL pyarrow_ARRAY_API +#define PY_ARRAY_UNIQUE_SYMBOL arrow_ARRAY_API #ifndef NUMPY_IMPORT_ARRAY #define NO_IMPORT_ARRAY #endif diff --git a/python/src/pyarrow/adapters/pandas-test.cc b/cpp/src/arrow/python/pandas-test.cc similarity index 95% rename from python/src/pyarrow/adapters/pandas-test.cc rename to cpp/src/arrow/python/pandas-test.cc index e694e790a38..ae2527e19c0 100644 --- a/python/src/pyarrow/adapters/pandas-test.cc +++ b/cpp/src/arrow/python/pandas-test.cc @@ -24,17 +24,17 @@ #include "arrow/array.h" #include "arrow/builder.h" +#include "arrow/python/pandas_convert.h" #include "arrow/schema.h" #include "arrow/table.h" #include "arrow/test-util.h" #include "arrow/type.h" -#include "pyarrow/adapters/pandas.h" namespace arrow { namespace py { TEST(PandasConversionTest, TestObjectBlockWriteFails) { - StringBuilder builder; + StringBuilder builder(default_memory_pool()); const char value[] = {'\xf1', '\0'}; for (int i = 0; i < 1000; ++i) { diff --git a/python/src/pyarrow/adapters/pandas.cc b/cpp/src/arrow/python/pandas_convert.cc similarity index 99% rename from python/src/pyarrow/adapters/pandas.cc rename to cpp/src/arrow/python/pandas_convert.cc index a7386cefcdb..f2c2415ed27 100644 --- a/python/src/pyarrow/adapters/pandas.cc +++ b/cpp/src/arrow/python/pandas_convert.cc @@ -19,8 +19,8 @@ #include -#include "pyarrow/adapters/pandas.h" -#include "pyarrow/numpy_interop.h" +#include "arrow/python/numpy_interop.h" +#include "arrow/python/pandas_convert.h" #include #include @@ -32,10 +32,16 @@ #include #include #include +#include #include "arrow/array.h" #include "arrow/column.h" #include "arrow/loader.h" +#include "arrow/python/builtin_convert.h" +#include "arrow/python/common.h" +#include "arrow/python/config.h" +#include "arrow/python/type_traits.h" +#include "arrow/python/util/datetime.h" #include "arrow/status.h" #include "arrow/table.h" #include "arrow/type_fwd.h" @@ -43,12 +49,6 @@ #include "arrow/util/bit-util.h" #include "arrow/util/macros.h" -#include "pyarrow/adapters/builtin.h" -#include "pyarrow/common.h" -#include "pyarrow/config.h" -#include "pyarrow/type_traits.h" -#include "pyarrow/util/datetime.h" - namespace arrow { namespace py { @@ -125,7 +125,7 @@ static int64_t ValuesToValidBytes( // TODO(wesm): striding for (int i = 0; i < length; ++i) { - valid_bytes[i] = not traits::isnull(values[i]); + valid_bytes[i] = !traits::isnull(values[i]); if (traits::isnull(values[i])) null_count++; } @@ -226,7 +226,7 @@ class PandasConverter : public TypeVisitor { type_(type), arr_(reinterpret_cast(ao)), mask_(nullptr) { - if (mo != nullptr and mo != Py_None) { mask_ = reinterpret_cast(mo); } + if (mo != nullptr && mo != Py_None) { mask_ = reinterpret_cast(mo); } length_ = PyArray_SIZE(arr_); } @@ -820,6 +820,7 @@ class PandasBlock { OwnedRef placement_arr_; int64_t* placement_data_; + private: DISALLOW_COPY_AND_ASSIGN(PandasBlock); }; @@ -947,7 +948,6 @@ inline Status ConvertListsLike( for (int c = 0; c < data.num_chunks(); c++) { auto arr = std::static_pointer_cast(data.chunk(c)); - const uint8_t* data_ptr; const bool has_nulls = data.null_count() > 0; for (int64_t i = 0; i < arr->length(); ++i) { if (has_nulls && arr->IsNull(i)) { @@ -1304,7 +1304,7 @@ class DatetimeTZBlock : public DatetimeBlock { template class CategoricalBlock : public PandasBlock { public: - CategoricalBlock(int64_t num_rows) : PandasBlock(num_rows, 1) {} + explicit CategoricalBlock(int64_t num_rows) : PandasBlock(num_rows, 1) {} Status Allocate() override { constexpr int npy_type = arrow_traits::npy_type; @@ -1432,7 +1432,7 @@ using BlockMap = std::unordered_map>; // * placement arrays as we go class DataFrameBlockCreator { public: - DataFrameBlockCreator(const std::shared_ptr& table) : table_(table) {} + explicit DataFrameBlockCreator(const std::shared_ptr
& table) : table_(table) {} Status Convert(int nthreads, PyObject** output) { column_types_.resize(table_->num_columns()); diff --git a/python/src/pyarrow/adapters/pandas.h b/cpp/src/arrow/python/pandas_convert.h similarity index 95% rename from python/src/pyarrow/adapters/pandas.h rename to cpp/src/arrow/python/pandas_convert.h index 6862339d89b..a33741efaa4 100644 --- a/python/src/pyarrow/adapters/pandas.h +++ b/cpp/src/arrow/python/pandas_convert.h @@ -18,8 +18,8 @@ // Functions for converting between pandas's NumPy-based data representation // and Arrow data structures -#ifndef PYARROW_ADAPTERS_PANDAS_H -#define PYARROW_ADAPTERS_PANDAS_H +#ifndef ARROW_PYTHON_ADAPTERS_PANDAS_H +#define ARROW_PYTHON_ADAPTERS_PANDAS_H #include @@ -76,4 +76,4 @@ Status PandasObjectsToArrow(MemoryPool* pool, PyObject* ao, PyObject* mo, } // namespace py } // namespace arrow -#endif // PYARROW_ADAPTERS_PANDAS_H +#endif // ARROW_PYTHON_ADAPTERS_PANDAS_H diff --git a/python/src/pyarrow/type_traits.h b/cpp/src/arrow/python/type_traits.h similarity index 99% rename from python/src/pyarrow/type_traits.h rename to cpp/src/arrow/python/type_traits.h index cc65d5ceed9..f78dc360095 100644 --- a/python/src/pyarrow/type_traits.h +++ b/cpp/src/arrow/python/type_traits.h @@ -18,8 +18,9 @@ #include #include +#include -#include "pyarrow/numpy_interop.h" +#include "arrow/python/numpy_interop.h" #include "arrow/builder.h" #include "arrow/type.h" diff --git a/python/src/pyarrow/util/CMakeLists.txt b/cpp/src/arrow/python/util/CMakeLists.txt similarity index 83% rename from python/src/pyarrow/util/CMakeLists.txt rename to cpp/src/arrow/python/util/CMakeLists.txt index 6cd49cb75a4..4cc20f6f4b4 100644 --- a/python/src/pyarrow/util/CMakeLists.txt +++ b/cpp/src/arrow/python/util/CMakeLists.txt @@ -16,21 +16,21 @@ # under the License. ####################################### -# pyarrow_test_main +# arrow/python_test_main ####################################### if (PYARROW_BUILD_TESTS) - add_library(pyarrow_test_main STATIC + add_library(arrow/python_test_main STATIC test_main.cc) if (APPLE) - target_link_libraries(pyarrow_test_main + target_link_libraries(arrow/python_test_main gtest dl) - set_target_properties(pyarrow_test_main + set_target_properties(arrow/python_test_main PROPERTIES LINK_FLAGS "-undefined dynamic_lookup") else() - target_link_libraries(pyarrow_test_main + target_link_libraries(arrow/python_test_main gtest pthread dl diff --git a/python/src/pyarrow/util/datetime.h b/cpp/src/arrow/python/util/datetime.h similarity index 100% rename from python/src/pyarrow/util/datetime.h rename to cpp/src/arrow/python/util/datetime.h diff --git a/python/src/pyarrow/util/test_main.cc b/cpp/src/arrow/python/util/test_main.cc similarity index 92% rename from python/src/pyarrow/util/test_main.cc rename to cpp/src/arrow/python/util/test_main.cc index d8d1d030f8f..c83514d0dbd 100644 --- a/python/src/pyarrow/util/test_main.cc +++ b/cpp/src/arrow/python/util/test_main.cc @@ -19,8 +19,8 @@ #include -#include "pyarrow/do_import_numpy.h" -#include "pyarrow/numpy_interop.h" +#include "arrow/python/do_import_numpy.h" +#include "arrow/python/numpy_interop.h" int main(int argc, char** argv) { ::testing::InitGoogleTest(&argc, argv); diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index ef874e3d079..35a1a89ef31 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -47,9 +47,6 @@ endif() # Top level cmake dir if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") - option(PYARROW_BUILD_TESTS - "Build the PyArrow C++ googletest unit tests" - OFF) option(PYARROW_BUILD_PARQUET "Build the PyArrow Parquet integration" OFF) @@ -57,7 +54,7 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") "Build the PyArrow jemalloc integration" OFF) option(PYARROW_BUNDLE_ARROW_CPP - "Bundle the Arrow C++ libraries" + "Bundle the Arrow C++ libraries" OFF) endif() @@ -75,6 +72,8 @@ endif(CCACHE_FOUND) # Compiler flags ############################################################ +include(BuildUtils) +include(CompilerInfo) include(SetupCxxFlags) # Add common flags @@ -86,8 +85,6 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-omit-frame-pointer") # Suppress Cython warnings set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-variable") -# Determine compiler version -include(CompilerInfo) if ("${COMPILER_FAMILY}" STREQUAL "clang") # Using Clang with ccache causes a bunch of spurious warnings that are @@ -215,116 +212,9 @@ include_directories(SYSTEM ${PYTHON_INCLUDE_DIRS} src) -############################################################ -# Testing -############################################################ - -# Add a new test case, with or without an executable that should be built. -# -# REL_TEST_NAME is the name of the test. It may be a single component -# (e.g. monotime-test) or contain additional components (e.g. -# net/net_util-test). Either way, the last component must be a globally -# unique name. -# -# Arguments after the test name will be passed to set_tests_properties(). -function(ADD_PYARROW_TEST REL_TEST_NAME) - if(NO_TESTS) - return() - endif() - get_filename_component(TEST_NAME ${REL_TEST_NAME} NAME_WE) - - if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${REL_TEST_NAME}.cc) - # This test has a corresponding .cc file, set it up as an executable. - set(TEST_PATH "${EXECUTABLE_OUTPUT_PATH}/${TEST_NAME}") - add_executable(${TEST_NAME} "${REL_TEST_NAME}.cc") - target_link_libraries(${TEST_NAME} ${PYARROW_TEST_LINK_LIBS}) - else() - # No executable, just invoke the test (probably a script) directly. - set(TEST_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${REL_TEST_NAME}) - endif() - - add_test(${TEST_NAME} - ${BUILD_SUPPORT_DIR}/run-test.sh ${TEST_PATH}) - if(ARGN) - set_tests_properties(${TEST_NAME} PROPERTIES ${ARGN}) - endif() -endfunction() - -# A wrapper for add_dependencies() that is compatible with NO_TESTS. -function(ADD_PYARROW_TEST_DEPENDENCIES REL_TEST_NAME) - if(NO_TESTS) - return() - endif() - get_filename_component(TEST_NAME ${REL_TEST_NAME} NAME_WE) - - add_dependencies(${TEST_NAME} ${ARGN}) -endfunction() - -enable_testing() - ############################################################ # Dependencies ############################################################ -function(ADD_THIRDPARTY_LIB LIB_NAME) - set(options) - set(one_value_args SHARED_LIB STATIC_LIB) - set(multi_value_args DEPS) - cmake_parse_arguments(ARG "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN}) - if(ARG_UNPARSED_ARGUMENTS) - message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}") - endif() - - if(("${PYARROW_LINK}" STREQUAL "s" AND ARG_STATIC_LIB) OR (NOT ARG_SHARED_LIB)) - if(NOT ARG_STATIC_LIB) - message(FATAL_ERROR "No static or shared library provided for ${LIB_NAME}") - endif() - add_library(${LIB_NAME} STATIC IMPORTED) - set_target_properties(${LIB_NAME} - PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}") - message(STATUS "Added static library dependency ${LIB_NAME}: ${ARG_STATIC_LIB}") - else() - add_library(${LIB_NAME} SHARED IMPORTED) - set_target_properties(${LIB_NAME} - PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}") - message(STATUS "Added shared library dependency ${LIB_NAME}: ${ARG_SHARED_LIB}") - endif() - - if(ARG_DEPS) - set_target_properties(${LIB_NAME} - PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES "${ARG_DEPS}") - endif() - - # Set up an "exported variant" for this thirdparty library (see "Visibility" - # above). It's the same as the real target, just with an "_exported" suffix. - # We prefer the static archive if it exists (as it's akin to an "internal" - # library), but we'll settle for the shared object if we must. - # - # A shared object exported variant will force any "leaf" library that - # transitively depends on it to also depend on it at runtime; this is - # desirable for some libraries (e.g. cyrus_sasl). - set(LIB_NAME_EXPORTED ${LIB_NAME}_exported) - if(ARG_STATIC_LIB) - add_library(${LIB_NAME_EXPORTED} STATIC IMPORTED) - set_target_properties(${LIB_NAME_EXPORTED} - PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}") - else() - add_library(${LIB_NAME_EXPORTED} SHARED IMPORTED) - set_target_properties(${LIB_NAME_EXPORTED} - PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}") - endif() - if(ARG_DEPS) - set_target_properties(${LIB_NAME_EXPORTED} - PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES "${ARG_DEPS}") - endif() -endfunction() - -## GMock -if (PYARROW_BUILD_TESTS) - find_package(GTest REQUIRED) - include_directories(SYSTEM ${GTEST_INCLUDE_DIR}) - ADD_THIRDPARTY_LIB(gtest - STATIC_LIB ${GTEST_STATIC_LIB}) -endif() ## Parquet find_package(Parquet) @@ -352,6 +242,8 @@ if (PYARROW_BUNDLE_ARROW_CPP) COPYONLY) SET(ARROW_IPC_SHARED_LIB ${BUILD_OUTPUT_ROOT_DIRECTORY}/libarrow_ipc${CMAKE_SHARED_LIBRARY_SUFFIX}) + SET(ARROW_PYTHON_SHARED_LIB + ${BUILD_OUTPUT_ROOT_DIRECTORY}/libarrow_python${CMAKE_SHARED_LIBRARY_SUFFIX}) endif() ADD_THIRDPARTY_LIB(arrow @@ -360,66 +252,8 @@ ADD_THIRDPARTY_LIB(arrow_io SHARED_LIB ${ARROW_IO_SHARED_LIB}) ADD_THIRDPARTY_LIB(arrow_ipc SHARED_LIB ${ARROW_IPC_SHARED_LIB}) - -############################################################ -# Linker setup -############################################################ - -set(PYARROW_MIN_TEST_LIBS - pyarrow_test_main - pyarrow) - -set(PYARROW_MIN_TEST_LIBS - pyarrow_test_main - pyarrow - ${PYARROW_BASE_LIBS}) - -if(NOT APPLE AND PYARROW_BUILD_TESTS) - ADD_THIRDPARTY_LIB(python - SHARED_LIB "${PYTHON_LIBRARIES}") - list(APPEND PYARROW_MIN_TEST_LIBS python) -endif() - -set(PYARROW_TEST_LINK_LIBS ${PYARROW_MIN_TEST_LIBS}) - -############################################################ -# "make ctags" target -############################################################ -if (UNIX) - add_custom_target(ctags ctags -R --languages=c++,c --exclude=thirdparty/installed) -endif (UNIX) - -############################################################ -# "make etags" target -############################################################ -if (UNIX) - add_custom_target(tags etags --members --declarations - `find ${CMAKE_CURRENT_SOURCE_DIR}/src - -name \\*.cc -or -name \\*.hh -or -name \\*.cpp -or -name \\*.h -or -name \\*.c -or - -name \\*.f`) - add_custom_target(etags DEPENDS tags) -endif (UNIX) - -############################################################ -# "make cscope" target -############################################################ -if (UNIX) - add_custom_target(cscope find ${CMAKE_CURRENT_SOURCE_DIR} - ( -name \\*.cc -or -name \\*.hh -or -name \\*.cpp -or - -name \\*.h -or -name \\*.c -or -name \\*.f ) - -exec echo \"{}\" \; > cscope.files && cscope -q -b VERBATIM) -endif (UNIX) - -############################################################ -# "make lint" target -############################################################ -if (UNIX) - # Full lint - add_custom_target(lint ${BUILD_SUPPORT_DIR}/cpplint.py - --verbose=2 - --filter=-whitespace/comments,-readability/todo,-build/header_guard - `find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc -or -name \\*.h`) -endif (UNIX) +ADD_THIRDPARTY_LIB(arrow_python + SHARED_LIB ${ARROW_PYTHON_SHARED_LIB}) ############################################################ # Subdirectories @@ -429,9 +263,6 @@ if (UNIX) set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) endif() -add_subdirectory(src/pyarrow) -add_subdirectory(src/pyarrow/util) - set(CYTHON_EXTENSIONS array config @@ -444,19 +275,11 @@ set(CYTHON_EXTENSIONS table ) -set(PYARROW_SRCS - src/pyarrow/common.cc - src/pyarrow/config.cc - src/pyarrow/helpers.cc - src/pyarrow/io.cc - src/pyarrow/adapters/builtin.cc - src/pyarrow/adapters/pandas.cc -) - set(LINK_LIBS - arrow - arrow_io - arrow_ipc + arrow_shared + arrow_io_shared + arrow_ipc_shared + arrow_python_shared ) if (PYARROW_BUILD_PARQUET) @@ -497,24 +320,12 @@ if (PYARROW_BUILD_JEMALLOC) SHARED_LIB ${ARROW_JEMALLOC_SHARED_LIB}) set(LINK_LIBS ${LINK_LIBS} - arrow_jemalloc) + arrow_jemalloc_shared) set(CYTHON_EXTENSIONS ${CYTHON_EXTENSIONS} jemalloc) endif() -add_library(pyarrow SHARED - ${PYARROW_SRCS}) -if (PYARROW_BUNDLE_ARROW_CPP) - set_target_properties(pyarrow PROPERTIES - INSTALL_RPATH "\$ORIGIN") -endif() -target_link_libraries(pyarrow ${LINK_LIBS}) - -if(APPLE) - set_target_properties(pyarrow PROPERTIES LINK_FLAGS "-undefined dynamic_lookup") -endif() - ############################################################ # Setup and build Cython modules ############################################################ @@ -555,5 +366,5 @@ foreach(module ${CYTHON_EXTENSIONS}) set_target_properties(${module_name} PROPERTIES INSTALL_RPATH ${module_install_rpath}) - target_link_libraries(${module_name} pyarrow) + target_link_libraries(${module_name} ${LINK_LIBS}) endforeach(module) diff --git a/python/cmake_modules/FindArrow.cmake b/python/cmake_modules/FindArrow.cmake index 5d0207d7c77..5030c9c8ce9 100644 --- a/python/cmake_modules/FindArrow.cmake +++ b/python/cmake_modules/FindArrow.cmake @@ -57,12 +57,18 @@ find_library(ARROW_JEMALLOC_LIB_PATH NAMES arrow_jemalloc ${ARROW_SEARCH_LIB_PATH} NO_DEFAULT_PATH) +find_library(ARROW_PYTHON_LIB_PATH NAMES arrow_python + PATHS + ${ARROW_SEARCH_LIB_PATH} + NO_DEFAULT_PATH) + if (ARROW_INCLUDE_DIR AND ARROW_LIB_PATH) set(ARROW_FOUND TRUE) set(ARROW_LIB_NAME libarrow) set(ARROW_IO_LIB_NAME libarrow_io) set(ARROW_IPC_LIB_NAME libarrow_ipc) set(ARROW_JEMALLOC_LIB_NAME libarrow_jemalloc) + set(ARROW_PYTHON_LIB_NAME libarrow_python) set(ARROW_LIBS ${ARROW_SEARCH_LIB_PATH}) set(ARROW_STATIC_LIB ${ARROW_SEARCH_LIB_PATH}/${ARROW_LIB_NAME}.a) @@ -77,6 +83,9 @@ if (ARROW_INCLUDE_DIR AND ARROW_LIB_PATH) set(ARROW_JEMALLOC_STATIC_LIB ${ARROW_SEARCH_LIB_PATH}/${ARROW_JEMALLOC_LIB_NAME}.a) set(ARROW_JEMALLOC_SHARED_LIB ${ARROW_LIBS}/${ARROW_JEMALLOC_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(ARROW_PYTHON_STATIC_LIB ${ARROW_SEARCH_LIB_PATH}/${ARROW_PYTHON_LIB_NAME}.a) + set(ARROW_PYTHON_SHARED_LIB ${ARROW_LIBS}/${ARROW_PYTHON_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}) + if (NOT Arrow_FIND_QUIETLY) message(STATUS "Found the Arrow core library: ${ARROW_LIB_PATH}") message(STATUS "Found the Arrow IO library: ${ARROW_IO_LIB_PATH}") diff --git a/python/pyarrow/config.pyx b/python/pyarrow/config.pyx index 5ad7cf53261..536f27839ae 100644 --- a/python/pyarrow/config.pyx +++ b/python/pyarrow/config.pyx @@ -14,21 +14,21 @@ # distutils: language = c++ # cython: embedsignature = True -cdef extern from 'pyarrow/do_import_numpy.h': +cdef extern from 'arrow/python/do_import_numpy.h': pass -cdef extern from 'pyarrow/numpy_interop.h' namespace 'arrow::py': +cdef extern from 'arrow/python/numpy_interop.h' namespace 'arrow::py': int import_numpy() -cdef extern from 'pyarrow/config.h' namespace 'arrow::py': - void pyarrow_init() - void pyarrow_set_numpy_nan(object o) +cdef extern from 'arrow/python/config.h' namespace 'arrow::py': + void Init() + void set_numpy_nan(object o) import_numpy() -pyarrow_init() +Init() import numpy as np -pyarrow_set_numpy_nan(np.nan) +set_numpy_nan(np.nan) import multiprocessing import os diff --git a/python/pyarrow/includes/pyarrow.pxd b/python/pyarrow/includes/pyarrow.pxd index 3fdbebc9293..c3fdf4b070e 100644 --- a/python/pyarrow/includes/pyarrow.pxd +++ b/python/pyarrow/includes/pyarrow.pxd @@ -25,7 +25,7 @@ from pyarrow.includes.libarrow cimport (CArray, CBuffer, CColumn, cimport pyarrow.includes.libarrow_io as arrow_io -cdef extern from "pyarrow/api.h" namespace "arrow::py" nogil: +cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil: shared_ptr[CDataType] GetPrimitiveType(Type type) shared_ptr[CDataType] GetTimestampType(TimeUnit unit) CStatus ConvertPySequence(object obj, CMemoryPool* pool, @@ -53,13 +53,9 @@ cdef extern from "pyarrow/api.h" namespace "arrow::py" nogil: void set_default_memory_pool(CMemoryPool* pool) CMemoryPool* get_memory_pool() - -cdef extern from "pyarrow/common.h" namespace "arrow::py" nogil: cdef cppclass PyBuffer(CBuffer): PyBuffer(object o) - -cdef extern from "pyarrow/io.h" namespace "arrow::py" nogil: cdef cppclass PyReadableFile(arrow_io.RandomAccessFile): PyReadableFile(object fo) diff --git a/python/setup.py b/python/setup.py index 9abf9854af2..dae6cb2f078 100644 --- a/python/setup.py +++ b/python/setup.py @@ -186,7 +186,7 @@ def _run_cmake(self): # a bit hacky build_lib = saved_cwd - # Move the built libpyarrow library to the place expected by the Python + # Move the libraries to the place expected by the Python # build shared_library_prefix = 'lib' if sys.platform == 'darwin': @@ -203,15 +203,16 @@ def _run_cmake(self): pass def move_lib(lib_name): - lib_filename = shared_library_prefix + lib_name + shared_library_suffix + lib_filename = (shared_library_prefix + lib_name + + shared_library_suffix) shutil.move(pjoin(self.build_type, lib_filename), pjoin(build_lib, 'pyarrow', lib_filename)) - move_lib("pyarrow") if self.bundle_arrow_cpp: move_lib("arrow") move_lib("arrow_io") move_lib("arrow_ipc") + move_lib("arrow_python") if self.with_jemalloc: move_lib("arrow_jemalloc") if self.with_parquet: @@ -227,14 +228,14 @@ def move_lib(lib_name): if self._failure_permitted(name): print('Cython module {0} failure permitted'.format(name)) continue - raise RuntimeError('libpyarrow C-extension failed to build:', + raise RuntimeError('pyarrow C-extension failed to build:', os.path.abspath(built_path)) ext_path = pjoin(build_lib, self._get_cmake_ext_path(name)) if os.path.exists(ext_path): os.remove(ext_path) self.mkpath(os.path.dirname(ext_path)) - print('Moving built libpyarrow C-extension', built_path, + print('Moving built C-extension', built_path, 'to build path', ext_path) shutil.move(self.get_ext_built(name), ext_path) self._found_names.append(name) diff --git a/python/src/pyarrow/CMakeLists.txt b/python/src/pyarrow/CMakeLists.txt deleted file mode 100644 index 9e69718dfa7..00000000000 --- a/python/src/pyarrow/CMakeLists.txt +++ /dev/null @@ -1,22 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -####################################### -# Unit tests -####################################### - -ADD_PYARROW_TEST(adapters/pandas-test)