diff --git a/.travis.yml b/.travis.yml index 60c44462f6c..7458ab6ed87 100644 --- a/.travis.yml +++ b/.travis.yml @@ -225,6 +225,8 @@ matrix: - $TRAVIS_BUILD_DIR/ci/travis_script_python.sh 3.6 - name: "[manylinux1] Python" language: cpp + env: + - PYTHON_VERSIONS="2.7,32 3.6,16" before_script: - if [ $ARROW_CI_PYTHON_AFFECTED == "1" ]; then docker pull quay.io/xhochy/arrow_manylinux1_x86_64_base:llvm-7-manylinux1; fi script: diff --git a/ci/docker_build_cpp.sh b/ci/docker_build_cpp.sh index 450dc870249..5bf6094ae2a 100755 --- a/ci/docker_build_cpp.sh +++ b/ci/docker_build_cpp.sh @@ -34,6 +34,8 @@ cmake -GNinja \ -DARROW_PARQUET=${ARROW_PARQUET:-ON} \ -DARROW_HDFS=${ARROW_HDFS:-OFF} \ -DARROW_PYTHON=${ARROW_PYTHON:-OFF} \ + -DARROW_GANDIVA=${ARROW_GANDIVA:-OFF} \ + -DARROW_GANDIVA_JAVA=${ARROW_GANDIVA_JAVA:-OFF} \ -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS:-OFF} \ -DARROW_BUILD_UTILITIES=${ARROW_BUILD_UTILITIES:-ON} \ -DARROW_INSTALL_NAME_RPATH=${ARROW_INSTALL_NAME_RPATH:-ON} \ diff --git a/ci/travis_script_gandiva_java.sh b/ci/travis_script_gandiva_java.sh index 387be9a092b..9d5871f69fc 100755 --- a/ci/travis_script_gandiva_java.sh +++ b/ci/travis_script_gandiva_java.sh @@ -22,6 +22,8 @@ set -e source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh JAVA_DIR=${TRAVIS_BUILD_DIR}/java +export MAVEN_OPTS="-Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn" + pushd $JAVA_DIR # build with gandiva profile diff --git a/ci/travis_script_manylinux.sh b/ci/travis_script_manylinux.sh index 8e8086aa32d..fa02c82e1bf 100755 --- a/ci/travis_script_manylinux.sh +++ b/ci/travis_script_manylinux.sh @@ -20,23 +20,58 @@ set -ex -pushd python/manylinux1 -docker run --shm-size=2g --rm -e PYARROW_PARALLEL=3 -v $PWD:/io -v $PWD/../../:/arrow quay.io/xhochy/arrow_manylinux1_x86_64_base:llvm-7-manylinux1 /io/build_arrow.sh - # Testing for https://issues.apache.org/jira/browse/ARROW-2657 # These tests cannot be run inside of the docker container, since TensorFlow # does not run on manylinux1 source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh - source $TRAVIS_BUILD_DIR/ci/travis_install_conda.sh -PYTHON_VERSION=3.6 -CONDA_ENV_DIR=$TRAVIS_BUILD_DIR/pyarrow-test-$PYTHON_VERSION +pushd python/manylinux1 + +cat << EOF > check_imports.py +import sys +import pyarrow +import pyarrow.orc +import pyarrow.parquet +import pyarrow.plasma +import tensorflow + +if sys.version_info.major > 2: + import pyarrow.gandiva +EOF + +for PYTHON_TUPLE in ${PYTHON_VERSIONS}; do + IFS="," read PYTHON_VERSION UNICODE_WIDTH <<< $PYTHON_TUPLE + + # cleanup the artifact directory, docker writes it as root + sudo rm -rf dist + + # build the wheels + docker run --shm-size=2g --rm \ + -e PYARROW_PARALLEL=3 \ + -e PYTHON_VERSION=$PYTHON_VERSION \ + -e UNICODE_WIDTH=$UNICODE_WIDTH \ + -v $PWD:/io \ + -v $PWD/../../:/arrow \ + quay.io/xhochy/arrow_manylinux1_x86_64_base:latest \ + /io/build_arrow.sh + + # create a testing conda environment + CONDA_ENV_DIR=$TRAVIS_BUILD_DIR/pyarrow-test-$PYTHON_VERSION + conda create -y -q -p $CONDA_ENV_DIR python=$PYTHON_VERSION + conda activate $CONDA_ENV_DIR + + # install the produced wheels + pip install tensorflow + pip install dist/*.whl + + # Test optional dependencies and the presence of tensorflow + python check_imports.py -conda create -y -q -p $CONDA_ENV_DIR python=$PYTHON_VERSION -conda activate $CONDA_ENV_DIR + # Install test dependencies and run pyarrow tests + pip install -r ../requirements-test.txt + pytest --pyargs pyarrow +done -pip install -q tensorflow -pip install "dist/`ls dist/ | grep cp36`" -python -c "import pyarrow; import tensorflow" +popd diff --git a/cpp/Dockerfile b/cpp/Dockerfile index 17d332d22be..b570bf679e7 100644 --- a/cpp/Dockerfile +++ b/cpp/Dockerfile @@ -48,6 +48,7 @@ RUN arrow/ci/docker_install_conda.sh && \ ENV CC=gcc \ CXX=g++ \ + ARROW_GANDIVA=OFF \ ARROW_BUILD_TESTS=ON \ ARROW_BUILD_TOOLCHAIN=$CONDA_PREFIX \ ARROW_HOME=$CONDA_PREFIX \ diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt index 068a21ee1d6..9d03ef13225 100644 --- a/cpp/src/gandiva/CMakeLists.txt +++ b/cpp/src/gandiva/CMakeLists.txt @@ -27,17 +27,14 @@ add_dependencies(gandiva-all gandiva gandiva-tests gandiva-benchmarks) find_package(LLVM) -# Set the path where the byte-code files will be installed. -set(GANDIVA_BC_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/gandiva) +# Set the path where the bitcode file generated, see precompiled/CMakeLists.txt +set(GANDIVA_PRECOMPILED_BC_PATH "${CMAKE_CURRENT_BINARY_DIR}/irhelpers.bc") +set(GANDIVA_PRECOMPILED_CC_PATH "${CMAKE_CURRENT_BINARY_DIR}/precompiled_bitcode.cc") +set(GANDIVA_PRECOMPILED_CC_IN_PATH + "${CMAKE_CURRENT_SOURCE_DIR}/precompiled_bitcode.cc.in") -set(GANDIVA_BC_FILE_NAME irhelpers.bc) -set(GANDIVA_BC_INSTALL_PATH "${GANDIVA_BC_INSTALL_DIR}/${GANDIVA_BC_FILE_NAME}") -set(GANDIVA_BC_OUTPUT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${GANDIVA_BC_FILE_NAME}") -install(FILES ${GANDIVA_BC_OUTPUT_PATH} DESTINATION ${GANDIVA_BC_INSTALL_DIR}) - -set(BC_FILE_PATH_CC "${CMAKE_CURRENT_BINARY_DIR}/bc_file_path.cc") -configure_file(bc_file_path.cc.in ${BC_FILE_PATH_CC}) -add_definitions(-DGANDIVA_BYTE_COMPILE_FILE_PATH="${GANDIVA_BC_OUTPUT_PATH}") +# add_arrow_lib will look for this not yet existing file, so flag as generated +set_source_files_properties(${GANDIVA_PRECOMPILED_CC_PATH} PROPERTIES GENERATED TRUE) set(SRC_FILES annotator.cc @@ -73,7 +70,7 @@ set(SRC_FILES selection_vector.cc tree_expr_builder.cc to_date_holder.cc - ${BC_FILE_PATH_CC}) + ${GANDIVA_PRECOMPILED_CC_PATH}) set(GANDIVA_SHARED_PRIVATE_LINK_LIBS arrow_shared LLVM::LLVM_INTERFACE ${RE2_LIBRARY}) diff --git a/cpp/src/gandiva/configuration.cc b/cpp/src/gandiva/configuration.cc index df1a13d570c..0c6a33fc00e 100644 --- a/cpp/src/gandiva/configuration.cc +++ b/cpp/src/gandiva/configuration.cc @@ -25,12 +25,11 @@ const std::shared_ptr ConfigurationBuilder::default_configuration InitDefaultConfig(); std::size_t Configuration::Hash() const { - boost::hash string_hash; - return string_hash(byte_code_file_path_); + return 0; // dummy for now, no configuration properties yet } bool Configuration::operator==(const Configuration& other) const { - return other.byte_code_file_path() == byte_code_file_path(); + return true; // always true, no configuration properties yet } bool Configuration::operator!=(const Configuration& other) const { diff --git a/cpp/src/gandiva/configuration.h b/cpp/src/gandiva/configuration.h index 480a95e9274..7fb0d63cf57 100644 --- a/cpp/src/gandiva/configuration.h +++ b/cpp/src/gandiva/configuration.h @@ -26,9 +26,6 @@ namespace gandiva { -GANDIVA_EXPORT -extern const char kByteCodeFilePath[]; - class ConfigurationBuilder; /// \brief runtime config for gandiva /// @@ -38,17 +35,9 @@ class GANDIVA_EXPORT Configuration { public: friend class ConfigurationBuilder; - const std::string& byte_code_file_path() const { return byte_code_file_path_; } - std::size_t Hash() const; bool operator==(const Configuration& other) const; bool operator!=(const Configuration& other) const; - - private: - explicit Configuration(const std::string& byte_code_file_path) - : byte_code_file_path_(byte_code_file_path) {} - - const std::string byte_code_file_path_; }; /// \brief configuration builder for gandiva @@ -57,15 +46,8 @@ class GANDIVA_EXPORT Configuration { /// to override specific values and build a custom instance class GANDIVA_EXPORT ConfigurationBuilder { public: - ConfigurationBuilder() : byte_code_file_path_(kByteCodeFilePath) {} - - ConfigurationBuilder& set_byte_code_file_path(const std::string& byte_code_file_path) { - byte_code_file_path_ = byte_code_file_path; - return *this; - } - std::shared_ptr build() { - std::shared_ptr configuration(new Configuration(byte_code_file_path_)); + std::shared_ptr configuration(new Configuration()); return configuration; } @@ -74,10 +56,8 @@ class GANDIVA_EXPORT ConfigurationBuilder { } private: - std::string byte_code_file_path_; - static std::shared_ptr InitDefaultConfig() { - std::shared_ptr configuration(new Configuration(kByteCodeFilePath)); + std::shared_ptr configuration(new Configuration()); return configuration; } diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc index c816ec76ff4..22805bb80f6 100644 --- a/cpp/src/gandiva/engine.cc +++ b/cpp/src/gandiva/engine.cc @@ -66,6 +66,9 @@ namespace gandiva { +extern const char kPrecompiledBitcode[]; +extern const size_t kPrecompiledBitcodeSize; + std::once_flag init_once_flag; bool Engine::init_once_done_ = false; @@ -114,7 +117,7 @@ Status Engine::Make(std::shared_ptr config, // Add mappings for functions that can be accessed from LLVM/IR module. engine_obj->AddGlobalMappings(); - auto status = engine_obj->LoadPreCompiledIRFiles(config->byte_code_file_path()); + auto status = engine_obj->LoadPreCompiledIR(); ARROW_RETURN_NOT_OK(status); // Add decimal functions @@ -126,14 +129,16 @@ Status Engine::Make(std::shared_ptr config, } // Handling for pre-compiled IR libraries. -Status Engine::LoadPreCompiledIRFiles(const std::string& byte_code_file_path) { +Status Engine::LoadPreCompiledIR() { + auto bitcode = llvm::StringRef(kPrecompiledBitcode, kPrecompiledBitcodeSize); + /// Read from file into memory buffer. llvm::ErrorOr> buffer_or_error = - llvm::MemoryBuffer::getFile(byte_code_file_path); - ARROW_RETURN_IF( - !buffer_or_error, - Status::CodeGenError("Could not load module from IR ", byte_code_file_path, ": ", - buffer_or_error.getError().message())); + llvm::MemoryBuffer::getMemBuffer(bitcode, "precompiled", false); + + ARROW_RETURN_IF(!buffer_or_error, + Status::CodeGenError("Could not load module from IR: ", + buffer_or_error.getError().message())); std::unique_ptr buffer = move(buffer_or_error.get()); diff --git a/cpp/src/gandiva/engine.h b/cpp/src/gandiva/engine.h index 7a976d5f926..3b46f62f5ef 100644 --- a/cpp/src/gandiva/engine.h +++ b/cpp/src/gandiva/engine.h @@ -79,8 +79,9 @@ class GANDIVA_EXPORT Engine { llvm::ExecutionEngine& execution_engine() { return *execution_engine_.get(); } - /// load pre-compiled IR modules and merge them into the main module. - Status LoadPreCompiledIRFiles(const std::string& byte_code_file_path); + /// load pre-compiled IR modules from precompiled_bitcode.cc and merge them into + /// the main module. + Status LoadPreCompiledIR(); // Create and add mappings for cpp functions that can be accessed from LLVM. void AddGlobalMappings(); diff --git a/cpp/src/gandiva/jni/config_builder.cc b/cpp/src/gandiva/jni/config_builder.cc index b224279c1a6..f2694bbee6d 100644 --- a/cpp/src/gandiva/jni/config_builder.cc +++ b/cpp/src/gandiva/jni/config_builder.cc @@ -34,16 +34,8 @@ using gandiva::ConfigurationBuilder; JNIEXPORT jlong JNICALL Java_org_apache_arrow_gandiva_evaluator_ConfigurationBuilder_buildConfigInstance( JNIEnv* env, jobject configuration) { - jstring byte_code_file_path = - (jstring)env->CallObjectMethod(configuration, byte_code_accessor_method_id_, 0); ConfigurationBuilder configuration_builder; - if (byte_code_file_path != nullptr) { - const char* byte_code_file_path_cpp = env->GetStringUTFChars(byte_code_file_path, 0); - configuration_builder.set_byte_code_file_path(byte_code_file_path_cpp); - env->ReleaseStringUTFChars(byte_code_file_path, byte_code_file_path_cpp); - } std::shared_ptr config = configuration_builder.build(); - env->DeleteLocalRef(byte_code_file_path); return ConfigHolder::MapInsert(config); } diff --git a/cpp/src/gandiva/jni/env_helper.h b/cpp/src/gandiva/jni/env_helper.h index c1bfbaa82a5..1b56304f967 100644 --- a/cpp/src/gandiva/jni/env_helper.h +++ b/cpp/src/gandiva/jni/env_helper.h @@ -23,7 +23,4 @@ // class references extern jclass configuration_builder_class_; -// method references -extern jmethodID byte_code_accessor_method_id_; - #endif // ENV_HELPER_H diff --git a/cpp/src/gandiva/jni/jni_common.cc b/cpp/src/gandiva/jni/jni_common.cc index 339b0cd4e5a..1ba3aad16cd 100644 --- a/cpp/src/gandiva/jni/jni_common.cc +++ b/cpp/src/gandiva/jni/jni_common.cc @@ -68,7 +68,6 @@ static jint JNI_VERSION = JNI_VERSION_1_6; // extern refs - initialized for other modules. jclass configuration_builder_class_; -jmethodID byte_code_accessor_method_id_; // refs for self. static jclass gandiva_exception_; @@ -93,11 +92,6 @@ jint JNI_OnLoad(JavaVM* vm, void* reserved) { gandiva_exception_ = (jclass)env->NewGlobalRef(localExceptionClass); env->DeleteLocalRef(localExceptionClass); - const char method_name[] = "getByteCodeFilePath"; - const char return_type[] = "()Ljava/lang/String;"; - byte_code_accessor_method_id_ = - env->GetMethodID(configuration_builder_class_, method_name, return_type); - env->ExceptionDescribe(); return JNI_VERSION; diff --git a/cpp/src/gandiva/precompiled/CMakeLists.txt b/cpp/src/gandiva/precompiled/CMakeLists.txt index 8e2f4989676..12490008259 100644 --- a/cpp/src/gandiva/precompiled/CMakeLists.txt +++ b/cpp/src/gandiva/precompiled/CMakeLists.txt @@ -67,12 +67,32 @@ foreach(SRC_FILE ${PRECOMPILED_SRCS}) endforeach() # link all of the bitcode files into a single bitcode file. -add_custom_command(OUTPUT ${GANDIVA_BC_OUTPUT_PATH} - COMMAND ${LLVM_LINK_EXECUTABLE} -o ${GANDIVA_BC_OUTPUT_PATH} +add_custom_command(OUTPUT ${GANDIVA_PRECOMPILED_BC_PATH} + COMMAND ${LLVM_LINK_EXECUTABLE} -o ${GANDIVA_PRECOMPILED_BC_PATH} ${BC_FILES} DEPENDS ${BC_FILES}) -add_custom_target(precompiled ALL DEPENDS ${GANDIVA_BC_OUTPUT_PATH}) +# write a cmake script to replace precompiled bitcode file's content into a .cc file +file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/WritePrecompiledCC.cmake" "\ + file(READ \"${GANDIVA_PRECOMPILED_BC_PATH}\" HEXCONTENT HEX) + string(REGEX REPLACE + \"([0-9a-f][0-9a-f])\" + \"'\\\\\\\\x\\\\1',\" + GANDIVA_PRECOMPILED_BITCODE_CHARS + \"\${HEXCONTENT}\") + configure_file(${GANDIVA_PRECOMPILED_CC_IN_PATH} + ${GANDIVA_PRECOMPILED_CC_PATH}) +") + +# add the previous command to the execution chain +add_custom_command(OUTPUT ${GANDIVA_PRECOMPILED_CC_PATH} + COMMAND ${CMAKE_COMMAND} -P + "${CMAKE_CURRENT_BINARY_DIR}/WritePrecompiledCC.cmake" + DEPENDS ${GANDIVA_PRECOMPILED_CC_IN_PATH} + ${GANDIVA_PRECOMPILED_BC_PATH}) + +add_custom_target(precompiled ALL + DEPENDS ${GANDIVA_PRECOMPILED_BC_PATH} ${GANDIVA_PRECOMPILED_CC_PATH}) function(add_precompiled_unit_test REL_TEST_NAME) get_filename_component(TEST_NAME ${REL_TEST_NAME} NAME_WE) diff --git a/cpp/src/gandiva/bc_file_path.cc.in b/cpp/src/gandiva/precompiled_bitcode.cc.in similarity index 79% rename from cpp/src/gandiva/bc_file_path.cc.in rename to cpp/src/gandiva/precompiled_bitcode.cc.in index 54e81ca2bfa..1d5985aa48d 100644 --- a/cpp/src/gandiva/bc_file_path.cc.in +++ b/cpp/src/gandiva/precompiled_bitcode.cc.in @@ -15,9 +15,12 @@ // specific language governing permissions and limitations // under the License. +#include + namespace gandiva { -// Path to the byte-code file. -extern const char kByteCodeFilePath[] = "${GANDIVA_BC_INSTALL_PATH}"; +// Content of precompiled bitcode file. +extern const char kPrecompiledBitcode[] = { ${GANDIVA_PRECOMPILED_BITCODE_CHARS} }; +extern const size_t kPrecompiledBitcodeSize = sizeof(kPrecompiledBitcode); } // namespace gandiva diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc index ba0e63292f4..8c0b9ddccb0 100644 --- a/cpp/src/gandiva/tests/projector_test.cc +++ b/cpp/src/gandiva/tests/projector_test.cc @@ -84,21 +84,6 @@ TEST_F(TestProjector, TestProjectCache) { &cached_projector); ASSERT_OK(status); EXPECT_EQ(cached_projector, projector); - - // if configuration is different, should return a new projector. - - // build a new path by replacing the first '/' with '//' - std::string alt_path(GANDIVA_BYTE_COMPILE_FILE_PATH); - auto pos = alt_path.find('/', 0); - EXPECT_NE(pos, std::string::npos); - alt_path.replace(pos, 1, "//"); - auto other_configuration = - ConfigurationBuilder().set_byte_code_file_path(alt_path).build(); - std::shared_ptr should_be_new_projector2; - status = Projector::Make(schema, {sum_expr, sub_expr}, other_configuration, - &should_be_new_projector2); - ASSERT_OK(status); - EXPECT_NE(projector, should_be_new_projector2); } TEST_F(TestProjector, TestProjectCacheFieldNames) { diff --git a/cpp/src/gandiva/tests/test_util.h b/cpp/src/gandiva/tests/test_util.h index af164125886..e841e6a0ebf 100644 --- a/cpp/src/gandiva/tests/test_util.h +++ b/cpp/src/gandiva/tests/test_util.h @@ -89,8 +89,7 @@ static ArrayPtr MakeArrowTypeArray(const std::shared_ptr& type, std::shared_ptr TestConfiguration() { auto builder = ConfigurationBuilder(); - builder.set_byte_code_file_path(GANDIVA_BYTE_COMPILE_FILE_PATH); - return builder.build(); + return builder.DefaultConfiguration(); } } // namespace gandiva diff --git a/dev/tasks/python-wheels/linux-test.sh b/dev/tasks/python-wheels/linux-test.sh index 45efdb0c91e..dcd77eb56f5 100755 --- a/dev/tasks/python-wheels/linux-test.sh +++ b/dev/tasks/python-wheels/linux-test.sh @@ -24,17 +24,17 @@ pip install /arrow/python/manylinux1/dist/*.whl python --version # Test optional dependencies -command=" +python -c " import sys import pyarrow import pyarrow.orc import pyarrow.parquet import pyarrow.plasma +import tensorflow if sys.version_info.major > 2: import pyarrow.gandiva " -python -c "$command" # Run pyarrow tests pip install -r /arrow/python/requirements-test.txt diff --git a/dev/tasks/python-wheels/travis.linux.yml b/dev/tasks/python-wheels/travis.linux.yml index b5cbc65bc7e..240c523e1cf 100644 --- a/dev/tasks/python-wheels/travis.linux.yml +++ b/dev/tasks/python-wheels/travis.linux.yml @@ -41,10 +41,12 @@ script: - pushd arrow/python/manylinux1 - docker run --shm-size=2g -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }} - -e PYTHON_VERSIONS="{{ python_version }},{{ unicode_width }}" + -e PYTHON_VERSION="{{ python_version }}" + -e UNICODE_WIDTH="{{ unicode_width }}" -v $PWD:/io -v $PWD/../../:/arrow - quay.io/xhochy/arrow_manylinux1_x86_64_base:latest /io/build_arrow.sh + quay.io/xhochy/arrow_manylinux1_x86_64_base:llvm-7-manylinux1 + /io/build_arrow.sh - popd # test on multiple distributions diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml index a0901530a61..3469c9f0030 100644 --- a/java/gandiva/pom.xml +++ b/java/gandiva/pom.xml @@ -29,7 +29,7 @@ 2.5.0 18.0 true - ../../cpp/debug/debug + ../../cpp/build/debug @@ -126,18 +126,6 @@ **/libgandiva_jni.* - - ${gandiva.cpp.build.dir} - - **/libgandiva_helpers.* - - - - ${gandiva.cpp.build.dir}/../src/gandiva - - irhelpers.bc - - diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java index 46deee95fa7..f0e8c0d826a 100644 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java +++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java @@ -22,18 +22,8 @@ */ public class ConfigurationBuilder { - private String byteCodeFilePath = ""; - - public ConfigurationBuilder withByteCodeFilePath(final String byteCodeFilePath) { - this.byteCodeFilePath = byteCodeFilePath; - return this; - } - - public String getByteCodeFilePath() { - return byteCodeFilePath; - } - public native long buildConfigInstance(); public native void releaseConfigInstance(long configId); + } diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java index ccb53070494..b6174d723d0 100644 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java +++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java @@ -32,16 +32,13 @@ */ class JniLoader { private static final String LIBRARY_NAME = "gandiva_jni"; - private static final String IRHELPERS_BC = "irhelpers.bc"; private static volatile JniLoader INSTANCE; private static volatile long defaultConfiguration = 0L; - private final String byteCodeFilePath; private final JniWrapper wrapper; - private JniLoader(String byteCodeFilePath) { - this.byteCodeFilePath = byteCodeFilePath; + private JniLoader() { this.wrapper = new JniWrapper(); } @@ -60,8 +57,7 @@ private static JniLoader setupInstance() throws GandivaException { try { String tempDir = System.getProperty("java.io.tmpdir"); loadGandivaLibraryFromJar(tempDir); - File byteCodeFile = moveFileFromJarToTemp(tempDir, IRHELPERS_BC); - return new JniLoader(byteCodeFile.getAbsolutePath()); + return new JniLoader(); } catch (IOException ioException) { throw new GandivaException("unable to create native instance", ioException); } @@ -107,13 +103,6 @@ private static File setupFile(String tmpDir, String libraryToLoad) return temp; } - /** - * Returns the byte code file path extracted from jar. - */ - public String getByteCodeFilePath() { - return byteCodeFilePath; - } - /** * Returns the jni wrapper. */ @@ -130,11 +119,9 @@ static long getDefaultConfiguration() throws GandivaException { if (defaultConfiguration == 0L) { synchronized (ConfigurationBuilder.class) { if (defaultConfiguration == 0L) { - String defaultByteCodeFilePath = JniLoader.getInstance().getByteCodeFilePath(); - + JniLoader.getInstance(); // setup defaultConfiguration = new ConfigurationBuilder() - .withByteCodeFilePath(defaultByteCodeFilePath) - .buildConfigInstance(); + .buildConfigInstance(); } } } diff --git a/python/manylinux1/build_arrow.sh b/python/manylinux1/build_arrow.sh index 75537a64e73..e0475bb6c48 100755 --- a/python/manylinux1/build_arrow.sh +++ b/python/manylinux1/build_arrow.sh @@ -23,9 +23,6 @@ # Build upon the scripts in https://github.com/matthew-brett/manylinux-builds # * Copyright (c) 2013-2016, Matt Terry and Matthew Brett (BSD 2-clause) -# Build different python versions with various unicode widths -PYTHON_VERSIONS="${PYTHON_VERSIONS:-2.7,16 2.7,32 3.5,16 3.6,16 3.7,16}" - source /multibuild/manylinux_utils.sh # Quit on failure @@ -48,95 +45,89 @@ export PYARROW_CMAKE_OPTIONS='-DTHRIFT_HOME=/usr -DBoost_NAMESPACE=arrow_boost - # Ensure the target directory exists mkdir -p /io/dist -for PYTHON_TUPLE in ${PYTHON_VERSIONS}; do - IFS="," - set -- $PYTHON_TUPLE; - PYTHON=$1 - U_WIDTH=$2 - CPYTHON_PATH="$(cpython_path $PYTHON ${U_WIDTH})" - PYTHON_INTERPRETER="${CPYTHON_PATH}/bin/python" - PIP="${CPYTHON_PATH}/bin/pip" - PATH="$PATH:${CPYTHON_PATH}" - - if [ $PYTHON != "2.7" ]; then - # Gandiva is not supported on Python 2.7 - export PYARROW_WITH_GANDIVA=1 - export BUILD_ARROW_GANDIVA=ON - else - export PYARROW_WITH_GANDIVA=0 - export BUILD_ARROW_GANDIVA=OFF - fi - - echo "=== (${PYTHON}) Building Arrow C++ libraries ===" - ARROW_BUILD_DIR=/tmp/build-PY${PYTHON}-${U_WIDTH} - mkdir -p "${ARROW_BUILD_DIR}" - pushd "${ARROW_BUILD_DIR}" - PATH="${CPYTHON_PATH}/bin:$PATH" cmake -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_INSTALL_PREFIX=/arrow-dist \ - -DCMAKE_INSTALL_LIBDIR=lib \ - -DARROW_BUILD_TESTS=OFF \ - -DARROW_BUILD_SHARED=ON \ - -DARROW_BOOST_USE_SHARED=ON \ - -DARROW_GANDIVA_PC_CXX_FLAGS="-isystem;/opt/rh/devtoolset-2/root/usr/include/c++/4.8.2;-isystem;/opt/rh/devtoolset-2/root/usr/include/c++/4.8.2/x86_64-CentOS-linux/" \ - -DARROW_JEMALLOC=ON \ - -DARROW_RPATH_ORIGIN=ON \ - -DARROW_PYTHON=ON \ - -DARROW_PARQUET=ON \ - -DPythonInterp_FIND_VERSION=${PYTHON} \ - -DARROW_PLASMA=ON \ - -DARROW_TENSORFLOW=ON \ - -DARROW_ORC=ON \ - -DARROW_GANDIVA=${BUILD_ARROW_GANDIVA} \ - -DARROW_GANDIVA_JAVA=OFF \ - -DBoost_NAMESPACE=arrow_boost \ - -DBOOST_ROOT=/arrow_boost_dist \ - -GNinja /arrow/cpp - ninja install - popd - - # Check that we don't expose any unwanted symbols - /io/scripts/check_arrow_visibility.sh - - echo "=== (${PYTHON}) Install the wheel build dependencies ===" - $PIP install -r requirements-wheel.txt - - # Clear output directory - rm -rf dist/ - echo "=== (${PYTHON}) Building wheel ===" - # Remove build directory to ensure CMake gets a clean run - rm -rf build/ - PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER setup.py build_ext \ - --inplace \ - --bundle-arrow-cpp \ - --bundle-boost \ - --boost-namespace=arrow_boost - PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER setup.py bdist_wheel - PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER setup.py sdist - - echo "=== (${PYTHON}) Tag the wheel with manylinux1 ===" - mkdir -p repaired_wheels/ - auditwheel -v repair -L . dist/pyarrow-*.whl -w repaired_wheels/ - - echo "=== (${PYTHON}) Testing manylinux1 wheel ===" - source /venv-test-${PYTHON}-${U_WIDTH}/bin/activate - pip install repaired_wheels/*.whl - - if [ $PYTHON != "2.7" ]; then - PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER -c "import pyarrow.gandiva" - fi - PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER -c "import pyarrow.orc" - PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER -c "import pyarrow.parquet" - PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER -c "import pyarrow.plasma" - - echo "=== (${PYTHON}) Install modules required for testing ===" - pip install -r requirements-test.txt - - # The TensorFlow test will be skipped here, since TensorFlow is not - # manylinux1 compatible; however, the wheels will support TensorFlow on - # a TensorFlow compatible system - py.test -v -r sxX --durations=15 --parquet ${VIRTUAL_ENV}/lib/*/site-packages/pyarrow - deactivate - - mv repaired_wheels/*.whl /io/dist - mv dist/*.tar.gz /io/dist -done +# Must pass PYTHON_VERSION and UNICODE_WIDTH env variables +# possible values are: 2.7,16 2.7,32 3.5,16 3.6,16 3.7,16 + +CPYTHON_PATH="$(cpython_path ${PYTHON_VERSION} ${UNICODE_WIDTH})" +PYTHON_INTERPRETER="${CPYTHON_PATH}/bin/python" +PIP="${CPYTHON_PATH}/bin/pip" +PATH="${PATH}:${CPYTHON_PATH}" + +if [ ${PYTHON_VERSION} != "2.7" ]; then + # Gandiva is not supported on Python 2.7 + export PYARROW_WITH_GANDIVA=1 + export BUILD_ARROW_GANDIVA=ON +else + export PYARROW_WITH_GANDIVA=0 + export BUILD_ARROW_GANDIVA=OFF +fi + +echo "=== (${PYTHON_VERSION}) Building Arrow C++ libraries ===" +ARROW_BUILD_DIR=/tmp/build-PY${PYTHON_VERSION}-${UNICODE_WIDTH} +mkdir -p "${ARROW_BUILD_DIR}" +pushd "${ARROW_BUILD_DIR}" +PATH="${CPYTHON_PATH}/bin:${PATH}" cmake -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX=/arrow-dist \ + -DCMAKE_INSTALL_LIBDIR=lib \ + -DARROW_BUILD_TESTS=OFF \ + -DARROW_BUILD_SHARED=ON \ + -DARROW_BOOST_USE_SHARED=ON \ + -DARROW_GANDIVA_PC_CXX_FLAGS="-isystem;/opt/rh/devtoolset-2/root/usr/include/c++/4.8.2;-isystem;/opt/rh/devtoolset-2/root/usr/include/c++/4.8.2/x86_64-CentOS-linux/" \ + -DARROW_JEMALLOC=ON \ + -DARROW_RPATH_ORIGIN=ON \ + -DARROW_PYTHON=ON \ + -DARROW_PARQUET=ON \ + -DPythonInterp_FIND_VERSION=${PYTHON_VERSION} \ + -DARROW_PLASMA=ON \ + -DARROW_TENSORFLOW=ON \ + -DARROW_ORC=ON \ + -DARROW_GANDIVA=${BUILD_ARROW_GANDIVA} \ + -DARROW_GANDIVA_JAVA=OFF \ + -DBoost_NAMESPACE=arrow_boost \ + -DBOOST_ROOT=/arrow_boost_dist \ + -GNinja /arrow/cpp +ninja install +popd + +# Check that we don't expose any unwanted symbols +/io/scripts/check_arrow_visibility.sh + +echo "=== (${PYTHON_VERSION}) Install the wheel build dependencies ===" +$PIP install -r requirements-wheel.txt + +# Clear output directory +rm -rf dist/ +echo "=== (${PYTHON_VERSION}) Building wheel ===" +# Remove build directory to ensure CMake gets a clean run +rm -rf build/ +PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER setup.py build_ext \ + --inplace \ + --bundle-arrow-cpp \ + --bundle-boost \ + --boost-namespace=arrow_boost +PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER setup.py bdist_wheel +PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER setup.py sdist + +echo "=== (${PYTHON_VERSION}) Tag the wheel with manylinux1 ===" +mkdir -p repaired_wheels/ +auditwheel -v repair -L . dist/pyarrow-*.whl -w repaired_wheels/ + +# Install the built wheels +$PIP install repaired_wheels/*.whl + +# Test that the modules are importable +$PYTHON_INTERPRETER -c " +import sys +import pyarrow +import pyarrow.orc +import pyarrow.parquet +import pyarrow.plasma + +if sys.version_info.major > 2: + import pyarrow.gandiva +" + +# More thorough testing happens outsite of the build to prevent +# packaging issues like ARROW-4372 +mv dist/*.tar.gz /io/dist +mv repaired_wheels/*.whl /io/dist