From cb6406b29f8953e9c9efa46de522c43369f01612 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 18 Aug 2022 10:38:47 +0900 Subject: [PATCH 01/24] ARROW-17081: [Java][Datasets] Move JNI build configuration from cpp/ to java/ --- ci/docker/java-jni-manylinux-201x.dockerfile | 1 + ci/scripts/java_jni_build.sh | 34 ++++++++-- ci/scripts/java_jni_macos_build.sh | 8 ++- ci/scripts/java_jni_manylinux_build.sh | 5 +- ci/vcpkg/vcpkg.json | 1 + cpp/CMakeLists.txt | 3 - cpp/src/arrow/filesystem/s3_test_util.cc | 2 + java/CMakeLists.txt | 12 ++++ java/c/CMakeLists.txt | 11 ++-- java/dataset/CMakeLists.txt | 36 ++++++----- java/dataset/src/main/cpp/CMakeLists.txt | 65 -------------------- 11 files changed, 79 insertions(+), 99 deletions(-) delete mode 100644 java/dataset/src/main/cpp/CMakeLists.txt diff --git a/ci/docker/java-jni-manylinux-201x.dockerfile b/ci/docker/java-jni-manylinux-201x.dockerfile index de953fd5ae0..a834ae0bb51 100644 --- a/ci/docker/java-jni-manylinux-201x.dockerfile +++ b/ci/docker/java-jni-manylinux-201x.dockerfile @@ -24,6 +24,7 @@ RUN vcpkg install \ --clean-after-build \ --x-install-root=${VCPKG_ROOT}/installed \ --x-manifest-root=/arrow/ci/vcpkg \ + --x-feature=dev \ --x-feature=flight \ --x-feature=gcs \ --x-feature=json \ diff --git a/ci/scripts/java_jni_build.sh b/ci/scripts/java_jni_build.sh index 0f19e614133..e986301b6e2 100755 --- a/ci/scripts/java_jni_build.sh +++ b/ci/scripts/java_jni_build.sh @@ -20,9 +20,10 @@ set -ex arrow_dir=${1} -build_dir=${2}/java_jni +arrow_install_dir=${2} +build_dir=${3}/java_jni # The directory where the final binaries will be stored when scripts finish -dist_dir=${3} +dist_dir=${4} echo "=== Clear output directories and leftovers ===" # Clear output directories and leftovers @@ -32,11 +33,34 @@ echo "=== Building Arrow Java C Data Interface native library ===" mkdir -p "${build_dir}" pushd "${build_dir}" +case "$(uname)" in + Linux) + n_jobs=$(nproc) + ;; + Darwin) + n_jobs=$(sysctl -n hw.ncpu) + ;; + *) + n_jobs=${NPROC:-1} + ;; +esac + +: ${CMKAE_BUILD_TYPE:=Release} +# TODO: Remove the last "/arrow" from -DCMAKE_INSTALL_PREFIX when +# we resolve ARROW-12175 / https://github.com/apache/arrow/pull/13892 . cmake \ - -DCMAKE_BUILD_TYPE=${ARROW_BUILD_TYPE:-release} \ - -DCMAKE_INSTALL_PREFIX=${dist_dir} \ + -DARROW_JAVA_JNI_ENABLE_DATASET=${ARROW_DATASET:-ON} \ + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ + -DCMAKE_PREFIX_PATH=${dist_dir} \ + -DCMAKE_INSTALL_PREFIX=${arrow_install_dir}/lib/cmake/arrow \ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \ ${JAVA_JNI_CMAKE_ARGS:-} \ ${arrow_dir}/java -cmake --build . --target install --config ${ARROW_BUILD_TYPE:-release} +export CMAKE_BUILD_PARALLEL_LEVEL=${n_jobs} +cmake --build . --config ${CMAKE_BUILD_TYPE} +ctest \ + --output-on-failure \ + --parallel ${n_jobs} \ + --timeout 300 +cmake --build . --config ${CMAKE_BUILD_TYPE} --target install popd diff --git a/ci/scripts/java_jni_macos_build.sh b/ci/scripts/java_jni_macos_build.sh index 5418daaf011..8b756a24e91 100755 --- a/ci/scripts/java_jni_macos_build.sh +++ b/ci/scripts/java_jni_macos_build.sh @@ -30,7 +30,7 @@ rm -rf ${build_dir} echo "=== Building Arrow C++ libraries ===" install_dir=${build_dir}/cpp-install -: ${ARROW_BUILD_TESTS:=OFF} +: ${ARROW_BUILD_TESTS:=ON} : ${ARROW_DATASET:=ON} : ${ARROW_FILESYSTEM:=ON} : ${ARROW_GANDIVA_JAVA:=ON} @@ -99,7 +99,10 @@ cmake \ cmake --build . --target install if [ "${ARROW_BUILD_TESTS}" == "ON" ]; then - ctest + ctest \ + --output-on-failure \ + --parallel $(sysctl -n hw.ncpu) \ + --timeout 300 fi popd @@ -107,6 +110,7 @@ popd ${arrow_dir}/ci/scripts/java_jni_build.sh \ ${arrow_dir} \ + ${install_dir} \ ${build_dir} \ ${dist_dir} diff --git a/ci/scripts/java_jni_manylinux_build.sh b/ci/scripts/java_jni_manylinux_build.sh index 331d74b34a1..4a371f612b8 100755 --- a/ci/scripts/java_jni_manylinux_build.sh +++ b/ci/scripts/java_jni_manylinux_build.sh @@ -24,6 +24,8 @@ build_dir=${2} # The directory where the final binaries will be stored when scripts finish dist_dir=${3} +INSTALL_DIR=${build_dir}/cpp-install + echo "=== Clear output directories and leftovers ===" # Clear output directories and leftovers rm -rf ${build_dir} @@ -32,7 +34,7 @@ echo "=== Building Arrow C++ libraries ===" devtoolset_version=$(rpm -qa "devtoolset-*-gcc" --queryformat %{VERSION} | \ grep -o "^[0-9]*") devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/include/c++/${devtoolset_version}" -: ${ARROW_BUILD_TESTS:=OFF} +: ${ARROW_BUILD_TESTS:=ON} : ${ARROW_DATASET:=ON} : ${ARROW_GANDIVA:=ON} : ${ARROW_GANDIVA_JAVA:=ON} @@ -130,6 +132,7 @@ JAVA_JNI_CMAKE_ARGS="${JAVA_JNI_CMAKE_ARGS} -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGE export JAVA_JNI_CMAKE_ARGS ${arrow_dir}/ci/scripts/java_jni_build.sh \ ${arrow_dir} \ + ${ARROW_HOME} \ ${build_dir} \ ${dist_dir} diff --git a/ci/vcpkg/vcpkg.json b/ci/vcpkg/vcpkg.json index d9d074e99b0..71c23165e61 100644 --- a/ci/vcpkg/vcpkg.json +++ b/ci/vcpkg/vcpkg.json @@ -43,6 +43,7 @@ "description": "Development dependencies", "dependencies": [ "benchmark", + "boost-process", "gtest" ] }, diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 4c0d8f1e91b..081ebc92d69 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -977,9 +977,6 @@ if(ARROW_JNI) if(ARROW_ORC) add_subdirectory(../java/adapter/orc/src/main/cpp ./java/orc/jni) endif() - if(ARROW_DATASET) - add_subdirectory(../java/dataset/src/main/cpp ./java/dataset/jni) - endif() endif() if(ARROW_GANDIVA) diff --git a/cpp/src/arrow/filesystem/s3_test_util.cc b/cpp/src/arrow/filesystem/s3_test_util.cc index 1aafb5ec66c..f5a054a8efa 100644 --- a/cpp/src/arrow/filesystem/s3_test_util.cc +++ b/cpp/src/arrow/filesystem/s3_test_util.cc @@ -31,7 +31,9 @@ // includes windows.h. boost/process/args.hpp is included before // boost/process/async.h that includes // boost/asio/detail/socket_types.hpp implicitly is included. +#ifdef __MINGW32__ #include +#endif // We need BOOST_USE_WINDOWS_H definition with MinGW when we use // boost/process.hpp. See BOOST_USE_WINDOWS_H=1 in // cpp/cmake_modules/ThirdpartyToolchain.cmake for details. diff --git a/java/CMakeLists.txt b/java/CMakeLists.txt index 43818e7a9f3..91483ebc05e 100644 --- a/java/CMakeLists.txt +++ b/java/CMakeLists.txt @@ -28,6 +28,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) # Components option(ARROW_JAVA_JNI_ENABLE_DEFAULT "Whether enable components by default or not" ON) option(ARROW_JAVA_JNI_ENABLE_C "Enable C data interface" ${ARROW_JAVA_JNI_ENABLE_DEFAULT}) +option(ARROW_JAVA_JNI_ENABLE_DATASET "Enable dataset" ${ARROW_JAVA_JNI_ENABLE_DEFAULT}) # ccache option(ARROW_JAVA_JNI_USE_CCACHE "Use ccache when compiling (if available)" ON) @@ -54,6 +55,17 @@ include(UseJava) add_library(jni INTERFACE IMPORTED) set_target_properties(jni PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${JNI_INCLUDE_DIRS}") +include(CTest) +if(BUILD_TESTING) + find_package(ArrowTesting REQUIRED) + find_package(GTest REQUIRED) + add_library(arrow_java_test INTERFACE IMPORTED) + target_link_libraries(arrow_java_test INTERFACE arrow_testing_static GTest::gtest_main) +endif() + if(ARROW_JAVA_JNI_ENABLE_C) add_subdirectory(c) endif() +if(ARROW_JAVA_JNI_ENABLE_DATASET) + add_subdirectory(dataset) +endif() diff --git a/java/c/CMakeLists.txt b/java/c/CMakeLists.txt index f3b3117eacf..2bfa33cf027 100644 --- a/java/c/CMakeLists.txt +++ b/java/c/CMakeLists.txt @@ -18,16 +18,15 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR} ${JNI_INCLUDE_DIRS} ${JNI_HEADERS_DIR}) -add_jar(arrow_cdata_jar +add_jar(arrow_java_jni_cdata_jar src/main/java/org/apache/arrow/c/jni/CDataJniException.java src/main/java/org/apache/arrow/c/jni/JniLoader.java src/main/java/org/apache/arrow/c/jni/JniWrapper.java src/main/java/org/apache/arrow/c/jni/PrivateData.java GENERATE_NATIVE_HEADERS - arrow_cdata_jni_headers) + arrow_java_jni_cdata_headers) -set(ARROW_CDATA_JNI_SOURCES src/main/cpp/jni_wrapper.cc) -add_library(arrow_cdata_jni SHARED ${ARROW_CDATA_JNI_SOURCES}) -target_link_libraries(arrow_cdata_jni arrow_cdata_jni_headers jni) +add_library(arrow_java_jni_cdata SHARED src/main/cpp/jni_wrapper.cc) +target_link_libraries(arrow_java_jni_cdata arrow_java_jni_cdata_headers jni) -install(TARGETS arrow_cdata_jni DESTINATION ${CMAKE_INSTALL_PREFIX}) +install(TARGETS arrow_java_jni_cdata DESTINATION ${CMAKE_INSTALL_PREFIX}) diff --git a/java/dataset/CMakeLists.txt b/java/dataset/CMakeLists.txt index 5b6e4a9ce24..49ca631d114 100644 --- a/java/dataset/CMakeLists.txt +++ b/java/dataset/CMakeLists.txt @@ -15,28 +15,30 @@ # specific language governing permissions and limitations # under the License. -# -# arrow_dataset_java -# - -# Headers: top level - -project(arrow_dataset_java) +find_package(ArrowDataset REQUIRED) -# Find java/jni -include(FindJava) -include(UseJava) -include(FindJNI) +include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR} + ${JNI_INCLUDE_DIRS} ${JNI_HEADERS_DIR}) -message("generating headers to ${JNI_HEADERS_DIR}") - -add_jar(arrow_dataset_java +add_jar(arrow_java_jni_dataset_jar src/main/java/org/apache/arrow/dataset/jni/JniLoader.java src/main/java/org/apache/arrow/dataset/jni/JniWrapper.java src/main/java/org/apache/arrow/dataset/file/JniWrapper.java src/main/java/org/apache/arrow/dataset/jni/NativeMemoryPool.java src/main/java/org/apache/arrow/dataset/jni/ReservationListener.java GENERATE_NATIVE_HEADERS - arrow_dataset_java-native - DESTINATION - ${JNI_HEADERS_DIR}) + arrow_java_jni_dataset_headers) + +add_library(arrow_java_jni_dataset SHARED src/main/cpp/jni_wrapper.cc + src/main/cpp/jni_util.cc) +target_link_libraries(arrow_java_jni_dataset arrow_java_jni_dataset_headers jni + arrow_dataset_static) + +if(BUILD_TESTING) + add_executable(arrow-java-jni-dataset-test src/main/cpp/jni_util_test.cc + src/main/cpp/jni_util.cc) + target_link_libraries(arrow-java-jni-dataset-test arrow_java_test) + add_test(NAME arrow-java-jni-dataset-test COMMAND arrow-dataset-jni-test) +endif() + +install(TARGETS arrow_java_jni_dataset DESTINATION ${CMAKE_INSTALL_PREFIX}) diff --git a/java/dataset/src/main/cpp/CMakeLists.txt b/java/dataset/src/main/cpp/CMakeLists.txt deleted file mode 100644 index 6a0be9b7f58..00000000000 --- a/java/dataset/src/main/cpp/CMakeLists.txt +++ /dev/null @@ -1,65 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitationsn -# under the License. - -# -# arrow_dataset_jni -# - -project(arrow_dataset_jni) - -cmake_minimum_required(VERSION 3.11) - -find_package(JNI REQUIRED) - -add_custom_target(arrow_dataset_jni) - -set(JNI_HEADERS_DIR "${CMAKE_CURRENT_BINARY_DIR}/generated") - -add_subdirectory(../../../../dataset ./java) - -set(ARROW_BUILD_STATIC OFF) - -set(ARROW_DATASET_JNI_LIBS arrow_dataset_static) - -set(ARROW_DATASET_JNI_SOURCES jni_wrapper.cc jni_util.cc) - -add_arrow_lib(arrow_dataset_jni - BUILD_SHARED - SOURCES - ${ARROW_DATASET_JNI_SOURCES} - OUTPUTS - ARROW_DATASET_JNI_LIBRARIES - SHARED_PRIVATE_LINK_LIBS - ${ARROW_DATASET_JNI_LIBS} - STATIC_LINK_LIBS - ${ARROW_DATASET_JNI_LIBS} - EXTRA_INCLUDES - ${JNI_HEADERS_DIR} - PRIVATE_INCLUDES - ${JNI_INCLUDE_DIRS} - DEPENDENCIES - arrow_static - arrow_dataset_java) - -add_dependencies(arrow_dataset_jni ${ARROW_DATASET_JNI_LIBRARIES}) - -add_arrow_test(dataset_jni_test - SOURCES - jni_util_test.cc - jni_util.cc - EXTRA_INCLUDES - ${JNI_INCLUDE_DIRS}) From a39203bd647691dc25620d800fee8f878fb75bf5 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 18 Aug 2022 12:57:51 +0900 Subject: [PATCH 02/24] Fix build errors --- ci/scripts/java_jni_build.sh | 1 + ci/scripts/java_jni_macos_build.sh | 20 ++++------ ci/scripts/java_jni_manylinux_build.sh | 30 ++++++-------- cpp/CMakeLists.txt | 9 +++++ cpp/cmake_modules/ThirdpartyToolchain.cmake | 44 +-------------------- cpp/src/arrow/dataset/api.h | 6 +++ cpp/src/arrow/filesystem/s3_internal.h | 4 +- cpp/src/arrow/util/config.h.cmake | 2 + java/CMakeLists.txt | 3 ++ java/c/CMakeLists.txt | 1 + java/dataset/CMakeLists.txt | 6 ++- 11 files changed, 50 insertions(+), 76 deletions(-) diff --git a/ci/scripts/java_jni_build.sh b/ci/scripts/java_jni_build.sh index e986301b6e2..4475058705c 100755 --- a/ci/scripts/java_jni_build.sh +++ b/ci/scripts/java_jni_build.sh @@ -54,6 +54,7 @@ cmake \ -DCMAKE_PREFIX_PATH=${dist_dir} \ -DCMAKE_INSTALL_PREFIX=${arrow_install_dir}/lib/cmake/arrow \ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \ + -GNinja \ ${JAVA_JNI_CMAKE_ARGS:-} \ ${arrow_dir}/java export CMAKE_BUILD_PARALLEL_LEVEL=${n_jobs} diff --git a/ci/scripts/java_jni_macos_build.sh b/ci/scripts/java_jni_macos_build.sh index 8b756a24e91..8fc7f3cf9d5 100755 --- a/ci/scripts/java_jni_macos_build.sh +++ b/ci/scripts/java_jni_macos_build.sh @@ -58,33 +58,25 @@ mkdir -p "${build_dir}/cpp" pushd "${build_dir}/cpp" cmake \ - -DARROW_BOOST_USE_SHARED=OFF \ - -DARROW_BROTLI_USE_SHARED=OFF \ + -DARROW_BUILD_SHARED=OFF \ -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS} \ -DARROW_BUILD_UTILITIES=OFF \ - -DARROW_BZ2_USE_SHARED=OFF \ + -DARROW_CSV=${ARROW_DATASET} \ -DARROW_DATASET=${ARROW_DATASET} \ + -DARROW_DEPENDENCY_SOURCE=SYSTEM \ + -DARROW_DEPENDENCY_USE_SHARED=OFF \ -DARROW_FILESYSTEM=${ARROW_FILESYSTEM} \ -DARROW_GANDIVA=${ARROW_GANDIVA} \ -DARROW_GANDIVA_JAVA=${ARROW_GANDIVA_JAVA} \ -DARROW_GANDIVA_STATIC_LIBSTDCPP=ON \ - -DARROW_GFLAGS_USE_SHARED=OFF \ - -DARROW_GRPC_USE_SHARED=OFF \ -DARROW_JNI=ON \ - -DARROW_LZ4_USE_SHARED=OFF \ - -DARROW_OPENSSL_USE_SHARED=OFF \ -DARROW_ORC=${ARROW_ORC} \ -DARROW_PARQUET=${ARROW_PARQUET} \ -DARROW_PLASMA=${ARROW_PLASMA} \ -DARROW_PLASMA_JAVA_CLIENT=${ARROW_PLASMA_JAVA_CLIENT} \ - -DARROW_PROTOBUF_USE_SHARED=OFF \ -DARROW_PYTHON=${ARROW_PYTHON} \ -DARROW_S3=${ARROW_S3} \ - -DARROW_SNAPPY_USE_SHARED=OFF \ - -DARROW_THRIFT_USE_SHARED=OFF \ -DARROW_USE_CCACHE=${ARROW_USE_CCACHE} \ - -DARROW_UTF8PROC_USE_SHARED=OFF \ - -DARROW_ZSTD_USE_SHARED=OFF \ -DAWSSDK_SOURCE=BUNDLED \ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ -DCMAKE_INSTALL_LIBDIR=lib \ @@ -99,7 +91,11 @@ cmake \ cmake --build . --target install if [ "${ARROW_BUILD_TESTS}" == "ON" ]; then + # MinIO is required + exclude_tests="arrow-s3-test" ctest \ + --exclude-regex "${exclude_tests}" \ + --label-regex unittest \ --output-on-failure \ --parallel $(sysctl -n hw.ncpu) \ --timeout 300 diff --git a/ci/scripts/java_jni_manylinux_build.sh b/ci/scripts/java_jni_manylinux_build.sh index 4a371f612b8..2f3459a4928 100755 --- a/ci/scripts/java_jni_manylinux_build.sh +++ b/ci/scripts/java_jni_manylinux_build.sh @@ -48,7 +48,7 @@ devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/includ : ${ARROW_PYTHON:=OFF} : ${ARROW_S3:=ON} : ${ARROW_USE_CCACHE:=OFF} -: ${CMAKE_BUILD_TYPE:=Release} +: ${CMAKE_BUILD_TYPE:=release} : ${CMAKE_UNITY_BUILD:=ON} : ${VCPKG_ROOT:=/opt/vcpkg} : ${VCPKG_FEATURE_FLAGS:=-manifests} @@ -68,36 +68,27 @@ mkdir -p "${build_dir}/cpp" pushd "${build_dir}/cpp" cmake \ - -DARROW_BOOST_USE_SHARED=OFF \ - -DARROW_BROTLI_USE_SHARED=OFF \ - -DARROW_BUILD_SHARED=ON \ - -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS} \ + -DARROW_BUILD_SHARED=OFF \ + -DARROW_BUILD_TESTS=ON \ -DARROW_BUILD_UTILITIES=OFF \ - -DARROW_BZ2_USE_SHARED=OFF \ + -DARROW_CSV=${ARROW_DATASET} \ -DARROW_DATASET=${ARROW_DATASET} \ -DARROW_DEPENDENCY_SOURCE="VCPKG" \ + -DARROW_DEPENDENCY_USE_SHARED=OFF \ -DARROW_FILESYSTEM=${ARROW_FILESYSTEM} \ -DARROW_GANDIVA_JAVA=${ARROW_GANDIVA_JAVA} \ -DARROW_GANDIVA_PC_CXX_FLAGS=${GANDIVA_CXX_FLAGS} \ -DARROW_GANDIVA=${ARROW_GANDIVA} \ - -DARROW_GRPC_USE_SHARED=OFF \ -DARROW_JEMALLOC=${ARROW_JEMALLOC} \ -DARROW_JNI=ON \ - -DARROW_LZ4_USE_SHARED=OFF \ - -DARROW_OPENSSL_USE_SHARED=OFF \ -DARROW_ORC=${ARROW_ORC} \ -DARROW_PARQUET=${ARROW_PARQUET} \ -DARROW_PLASMA_JAVA_CLIENT=${ARROW_PLASMA_JAVA_CLIENT} \ -DARROW_PLASMA=${ARROW_PLASMA} \ - -DARROW_PROTOBUF_USE_SHARED=OFF \ -DARROW_PYTHON=${ARROW_PYTHON} \ -DARROW_RPATH_ORIGIN=${ARROW_RPATH_ORIGIN} \ -DARROW_S3=${ARROW_S3} \ - -DARROW_SNAPPY_USE_SHARED=OFF \ - -DARROW_THRIFT_USE_SHARED=OFF \ -DARROW_USE_CCACHE=${ARROW_USE_CCACHE} \ - -DARROW_UTF8PROC_USE_SHARED=OFF \ - -DARROW_ZSTD_USE_SHARED=OFF \ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ -DCMAKE_INSTALL_LIBDIR=lib \ -DCMAKE_INSTALL_PREFIX=${ARROW_HOME} \ @@ -107,16 +98,19 @@ cmake \ -DPARQUET_BUILD_EXAMPLES=OFF \ -DPARQUET_BUILD_EXECUTABLES=OFF \ -DPARQUET_REQUIRE_ENCRYPTION=OFF \ - -DPythonInterp_FIND_VERSION_MAJOR=3 \ - -DPythonInterp_FIND_VERSION=ON \ -DVCPKG_MANIFEST_MODE=OFF \ -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET} \ -GNinja \ ${arrow_dir}/cpp ninja install -if [ $ARROW_BUILD_TESTS = "ON" ]; then +if [ "${ARROW_BUILD_TESTS}" = "ON" ]; then + # MinIO is required + exclude_tests="arrow-s3fs-test" + # strptime + exclude_tests="${exclude_tests}|arrow-utility-test" ctest \ + --exclude-regex "${exclude_tests}" \ --label-regex unittest \ --output-on-failure \ --parallel $(nproc) \ @@ -127,7 +121,7 @@ popd JAVA_JNI_CMAKE_ARGS="" -JAVA_JNI_CMAKE_ARGS="${JAVA_JNI_CMAKE_ARGS} -DVCPKG_MANIFEST_MODE=OFF" +JAVA_JNI_CMAKE_ARGS="${JAVA_JNI_CMAKE_ARGS} -DCMAKE_TOOLCHAIN_FILE=/opt/vcpkg/scripts/buildsystems/vcpkg.cmake" JAVA_JNI_CMAKE_ARGS="${JAVA_JNI_CMAKE_ARGS} -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET}" export JAVA_JNI_CMAKE_ARGS ${arrow_dir}/ci/scripts/java_jni_build.sh \ diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 081ebc92d69..63add810c8c 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -786,6 +786,15 @@ endif() if(ARROW_S3) list(APPEND ARROW_SHARED_LINK_LIBS ${AWSSDK_LINK_LIBRARIES}) list(APPEND ARROW_STATIC_LINK_LIBS ${AWSSDK_LINK_LIBRARIES}) + if(AWSSDK_SOURCE STREQUAL "SYSTEM") + list(APPEND + ARROW_STATIC_INSTALL_INTERFACE_LIBS + aws-cpp-sdk-identity-management + aws-cpp-sdk-sts + aws-cpp-sdk-cognito-identity + aws-cpp-sdk-s3 + aws-cpp-sdk-core) + endif() endif() if(ARROW_WITH_OPENTELEMETRY) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 7c3e3a53322..d9e7e0405f9 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -4746,49 +4746,7 @@ macro(build_awssdk) endmacro() if(ARROW_S3) - # See https://aws.amazon.com/blogs/developer/developer-experience-of-the-aws-sdk-for-c-now-simplified-by-cmake/ - - # Workaround to force AWS CMake configuration to look for shared libraries - if(DEFINED ENV{CONDA_PREFIX}) - if(DEFINED BUILD_SHARED_LIBS) - set(BUILD_SHARED_LIBS_WAS_SET TRUE) - set(BUILD_SHARED_LIBS_VALUE ${BUILD_SHARED_LIBS}) - else() - set(BUILD_SHARED_LIBS_WAS_SET FALSE) - endif() - set(BUILD_SHARED_LIBS "ON") - endif() - - # Need to customize the find_package() call, so cannot call resolve_dependency() - if(AWSSDK_SOURCE STREQUAL "AUTO") - find_package(AWSSDK - COMPONENTS config - s3 - transfer - identity-management - sts) - if(NOT AWSSDK_FOUND) - build_awssdk() - endif() - elseif(AWSSDK_SOURCE STREQUAL "BUNDLED") - build_awssdk() - elseif(AWSSDK_SOURCE STREQUAL "SYSTEM") - find_package(AWSSDK REQUIRED - COMPONENTS config - s3 - transfer - identity-management - sts) - endif() - - # Restore previous value of BUILD_SHARED_LIBS - if(DEFINED ENV{CONDA_PREFIX}) - if(BUILD_SHARED_LIBS_WAS_SET) - set(BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS_VALUE}) - else() - unset(BUILD_SHARED_LIBS) - endif() - endif() + resolve_dependency(AWSSDK HAVE_ALT TRUE) message(STATUS "Found AWS SDK headers: ${AWSSDK_INCLUDE_DIR}") message(STATUS "Found AWS SDK libraries: ${AWSSDK_LINK_LIBRARIES}") diff --git a/cpp/src/arrow/dataset/api.h b/cpp/src/arrow/dataset/api.h index 8b81f4c15d1..6e8aab5e9ea 100644 --- a/cpp/src/arrow/dataset/api.h +++ b/cpp/src/arrow/dataset/api.h @@ -23,8 +23,14 @@ #include "arrow/dataset/dataset.h" #include "arrow/dataset/discovery.h" #include "arrow/dataset/file_base.h" +#ifdef ARROW_CSV #include "arrow/dataset/file_csv.h" +#endif #include "arrow/dataset/file_ipc.h" +#ifdef ARROW_ORC #include "arrow/dataset/file_orc.h" +#endif +#ifdef ARROW_PARQUET #include "arrow/dataset/file_parquet.h" +#endif #include "arrow/dataset/scanner.h" diff --git a/cpp/src/arrow/filesystem/s3_internal.h b/cpp/src/arrow/filesystem/s3_internal.h index ae938c17601..6d942cfdd9a 100644 --- a/cpp/src/arrow/filesystem/s3_internal.h +++ b/cpp/src/arrow/filesystem/s3_internal.h @@ -43,7 +43,7 @@ namespace internal { enum class S3Backend { Amazon, Minio, Other }; // Detect the S3 backend type from the S3 server's response headers -S3Backend DetectS3Backend(const Aws::Http::HeaderValueCollection& headers) { +inline S3Backend DetectS3Backend(const Aws::Http::HeaderValueCollection& headers) { const auto it = headers.find("server"); if (it != headers.end()) { const auto& value = util::string_view(it->second); @@ -58,7 +58,7 @@ S3Backend DetectS3Backend(const Aws::Http::HeaderValueCollection& headers) { } template -S3Backend DetectS3Backend(const Aws::Client::AWSError& error) { +inline S3Backend DetectS3Backend(const Aws::Client::AWSError& error) { return DetectS3Backend(error.GetResponseHeaders()); } diff --git a/cpp/src/arrow/util/config.h.cmake b/cpp/src/arrow/util/config.h.cmake index c987a0cae36..9948c1e3587 100644 --- a/cpp/src/arrow/util/config.h.cmake +++ b/cpp/src/arrow/util/config.h.cmake @@ -46,6 +46,8 @@ #cmakedefine ARROW_JEMALLOC #cmakedefine ARROW_JEMALLOC_VENDORED #cmakedefine ARROW_JSON +#cmakedefine ARROW_ORC +#cmakedefine ARROW_PARQUET #cmakedefine ARROW_GCS #cmakedefine ARROW_S3 diff --git a/java/CMakeLists.txt b/java/CMakeLists.txt index 91483ebc05e..5dbe5a8a1a6 100644 --- a/java/CMakeLists.txt +++ b/java/CMakeLists.txt @@ -60,6 +60,9 @@ if(BUILD_TESTING) find_package(ArrowTesting REQUIRED) find_package(GTest REQUIRED) add_library(arrow_java_test INTERFACE IMPORTED) + # TODO: Remove this when + # we resolve ARROW-12175 / https://github.com/apache/arrow/pull/13892 . + target_link_libraries(arrow_testing_static INTERFACE arrow_static) target_link_libraries(arrow_java_test INTERFACE arrow_testing_static GTest::gtest_main) endif() diff --git a/java/c/CMakeLists.txt b/java/c/CMakeLists.txt index 2bfa33cf027..7510ab233fe 100644 --- a/java/c/CMakeLists.txt +++ b/java/c/CMakeLists.txt @@ -27,6 +27,7 @@ add_jar(arrow_java_jni_cdata_jar arrow_java_jni_cdata_headers) add_library(arrow_java_jni_cdata SHARED src/main/cpp/jni_wrapper.cc) +set_property(TARGET arrow_java_jni_cdata PROPERTY OUTPUT_NAME "arrow_cdata_jni") target_link_libraries(arrow_java_jni_cdata arrow_java_jni_cdata_headers jni) install(TARGETS arrow_java_jni_cdata DESTINATION ${CMAKE_INSTALL_PREFIX}) diff --git a/java/dataset/CMakeLists.txt b/java/dataset/CMakeLists.txt index 49ca631d114..b9977717bde 100644 --- a/java/dataset/CMakeLists.txt +++ b/java/dataset/CMakeLists.txt @@ -31,6 +31,10 @@ add_jar(arrow_java_jni_dataset_jar add_library(arrow_java_jni_dataset SHARED src/main/cpp/jni_wrapper.cc src/main/cpp/jni_util.cc) +set_property(TARGET arrow_java_jni_dataset PROPERTY OUTPUT_NAME "arrow_dataset_jni") +# TODO: Remove this when +# we resolve ARROW-12175 / https://github.com/apache/arrow/pull/13892 . +target_link_libraries(arrow_dataset_static INTERFACE arrow_static) target_link_libraries(arrow_java_jni_dataset arrow_java_jni_dataset_headers jni arrow_dataset_static) @@ -38,7 +42,7 @@ if(BUILD_TESTING) add_executable(arrow-java-jni-dataset-test src/main/cpp/jni_util_test.cc src/main/cpp/jni_util.cc) target_link_libraries(arrow-java-jni-dataset-test arrow_java_test) - add_test(NAME arrow-java-jni-dataset-test COMMAND arrow-dataset-jni-test) + add_test(NAME arrow-java-jni-dataset-test COMMAND arrow-java-jni-dataset-test) endif() install(TARGETS arrow_java_jni_dataset DESTINATION ${CMAKE_INSTALL_PREFIX}) From 3fe8bdf26f6578209dfdb6eecea0d9bde26780e2 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 18 Aug 2022 13:01:29 +0900 Subject: [PATCH 03/24] Exclude more tests --- ci/scripts/java_jni_macos_build.sh | 2 ++ ci/scripts/java_jni_manylinux_build.sh | 2 ++ 2 files changed, 4 insertions(+) diff --git a/ci/scripts/java_jni_macos_build.sh b/ci/scripts/java_jni_macos_build.sh index 8fc7f3cf9d5..10b4becf81d 100755 --- a/ci/scripts/java_jni_macos_build.sh +++ b/ci/scripts/java_jni_macos_build.sh @@ -93,6 +93,8 @@ cmake --build . --target install if [ "${ARROW_BUILD_TESTS}" == "ON" ]; then # MinIO is required exclude_tests="arrow-s3-test" + # unstable + exclude_tests="${exclude_tests}|arrow-compute-hash-join-node-test" ctest \ --exclude-regex "${exclude_tests}" \ --label-regex unittest \ diff --git a/ci/scripts/java_jni_manylinux_build.sh b/ci/scripts/java_jni_manylinux_build.sh index 2f3459a4928..936138c0aca 100755 --- a/ci/scripts/java_jni_manylinux_build.sh +++ b/ci/scripts/java_jni_manylinux_build.sh @@ -107,6 +107,8 @@ ninja install if [ "${ARROW_BUILD_TESTS}" = "ON" ]; then # MinIO is required exclude_tests="arrow-s3fs-test" + # unstable + exclude_tests="${exclude_tests}|arrow-compute-hash-join-node-test" # strptime exclude_tests="${exclude_tests}|arrow-utility-test" ctest \ From b2a27476cc6b94dae68ebb24603fe446b1e1ed26 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 18 Aug 2022 14:11:10 +0900 Subject: [PATCH 04/24] Add a missing file --- cpp/cmake_modules/FindAWSSDKAlt.cmake | 50 +++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 cpp/cmake_modules/FindAWSSDKAlt.cmake diff --git a/cpp/cmake_modules/FindAWSSDKAlt.cmake b/cpp/cmake_modules/FindAWSSDKAlt.cmake new file mode 100644 index 00000000000..64ea4d24d5a --- /dev/null +++ b/cpp/cmake_modules/FindAWSSDKAlt.cmake @@ -0,0 +1,50 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set(find_package_args) +if(AWSSDKAlt_FIND_VERSION) + list(APPEND find_package_args ${AWSSDKAlt_FIND_VERSION}) +endif() +if(AWSSDKAlt_FIND_QUIETLY) + list(APPEND find_package_args QUIET) +endif() +# See https://aws.amazon.com/blogs/developer/developer-experience-of-the-aws-sdk-for-c-now-simplified-by-cmake/ +# Workaround to force AWS CMake configuration to look for shared libraries +if(DEFINED ENV{CONDA_PREFIX}) + if(DEFINED BUILD_SHARED_LIBS) + set(BUILD_SHARED_LIBS_WAS_SET TRUE) + set(BUILD_SHARED_LIBS_VALUE ${BUILD_SHARED_LIBS}) + else() + set(BUILD_SHARED_LIBS_WAS_SET FALSE) + endif() + set(BUILD_SHARED_LIBS "ON") +endif() +find_package(AWSSDK ${find_package_args} + COMPONENTS config + s3 + transfer + identity-management + sts) +# Restore previous value of BUILD_SHARED_LIBS +if(DEFINED ENV{CONDA_PREFIX}) + if(BUILD_SHARED_LIBS_WAS_SET) + set(BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS_VALUE}) + else() + unset(BUILD_SHARED_LIBS) + endif() +endif() +set(AWSSDKAlt_FOUND ${AWSSDK_FOUND}) From 21ca701340b5a29d6a087afca4721d7543dd4721 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 18 Aug 2022 14:13:32 +0900 Subject: [PATCH 05/24] Add missing dependency --- cpp/Brewfile | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/Brewfile b/cpp/Brewfile index 9cffd8e3a81..2b44d33b5ec 100644 --- a/cpp/Brewfile +++ b/cpp/Brewfile @@ -26,6 +26,7 @@ brew "cmake" brew "flatbuffers" brew "git" brew "glog" +brew "googletest" brew "grpc" brew "llvm" brew "llvm@12" From 35a398eb2bb67b7c2595c5fbbd7cd8c78011a475 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 18 Aug 2022 14:14:59 +0900 Subject: [PATCH 06/24] Uncapitalize --- dev/tasks/java-jars/github.yml | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/dev/tasks/java-jars/github.yml b/dev/tasks/java-jars/github.yml index 23b97087c39..f94a43a8b44 100644 --- a/dev/tasks/java-jars/github.yml +++ b/dev/tasks/java-jars/github.yml @@ -22,12 +22,12 @@ jobs: build-cpp-ubuntu: - name: Build C++ Libs Ubuntu + name: Build C++ libraries Ubuntu runs-on: ubuntu-latest steps: {{ macros.github_checkout_arrow()|indent }} {{ macros.github_install_archery()|indent }} - - name: Build C++ Libs + - name: Build C++ libraries run: | archery docker run \ -e ARROW_JAVA_BUILD=OFF \ @@ -35,27 +35,27 @@ jobs: java-jni-manylinux-2014 - name: Compress into single artifact to keep directory structure run: tar -cvzf arrow-shared-libs-linux.tar.gz arrow/java-dist/ - - name: Upload Artifacts + - name: Upload artifacts uses: actions/upload-artifact@v2 with: name: ubuntu-shared-lib path: arrow-shared-libs-linux.tar.gz {% if arrow.branch == 'master' %} {{ macros.github_login_dockerhub()|indent }} - - name: Push Docker Image + - name: Push Docker image shell: bash run: archery docker push java-jni-manylinux-2014 {% endif %} build-cpp-macos: - name: Build C++ Libs MacOS + name: Build C++ libraries macOS runs-on: macos-latest env: MACOSX_DEPLOYMENT_TARGET: "10.13" steps: {{ macros.github_checkout_arrow()|indent }} {{ macros.github_install_archery()|indent }} - - name: Install Dependencies + - name: Install dependencies run: | brew install --overwrite git brew bundle --file=arrow/cpp/Brewfile @@ -68,7 +68,7 @@ jobs: - name: Setup ccache run: | arrow/ci/scripts/ccache_setup.sh - - name: Build C++ Libs + - name: Build C++ libraries run: | set -e arrow/ci/scripts/java_jni_macos_build.sh \ @@ -77,14 +77,14 @@ jobs: $GITHUB_WORKSPACE/arrow/java-dist - name: Compress into single artifact to keep directory structure run: tar -cvzf arrow-shared-libs-macos.tar.gz arrow/java-dist/ - - name: Upload Artifacts + - name: Upload artifacts uses: actions/upload-artifact@v2 with: name: macos-shared-lib path: arrow-shared-libs-macos.tar.gz package-jars: - name: Build Jar Files + name: Build jar files runs-on: macos-latest needs: [build-cpp-macos, build-cpp-ubuntu] steps: @@ -93,7 +93,7 @@ jobs: uses: actions/download-artifact@v2 with: name: ubuntu-shared-lib - - name: Download MacOS C++ Library + - name: Download macOS C++ libraries uses: actions/download-artifact@v2 with: name: macos-shared-lib @@ -101,7 +101,7 @@ jobs: run: | tar -xvzf arrow-shared-libs-linux.tar.gz tar -xvzf arrow-shared-libs-macos.tar.gz - - name: Test that Shared Libraries Exist + - name: Test that shared libraries exist run: | set -x test -f arrow/java-dist/libarrow_cdata_jni.dylib @@ -114,7 +114,7 @@ jobs: test -f arrow/java-dist/libarrow_orc_jni.so test -f arrow/java-dist/libgandiva_jni.so test -f arrow/java-dist/libplasma_java.so - - name: Build Bundled Jar + - name: Build bundled jar run: | set -e pushd arrow/java From a0bfb7f68ad9676ee6f7d8554a478d9f6438681a Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 18 Aug 2022 16:46:47 +0900 Subject: [PATCH 07/24] Relax dependency --- ci/scripts/java_jni_macos_build.sh | 1 - cpp/Brewfile | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/scripts/java_jni_macos_build.sh b/ci/scripts/java_jni_macos_build.sh index 10b4becf81d..f206809a920 100755 --- a/ci/scripts/java_jni_macos_build.sh +++ b/ci/scripts/java_jni_macos_build.sh @@ -63,7 +63,6 @@ cmake \ -DARROW_BUILD_UTILITIES=OFF \ -DARROW_CSV=${ARROW_DATASET} \ -DARROW_DATASET=${ARROW_DATASET} \ - -DARROW_DEPENDENCY_SOURCE=SYSTEM \ -DARROW_DEPENDENCY_USE_SHARED=OFF \ -DARROW_FILESYSTEM=${ARROW_FILESYSTEM} \ -DARROW_GANDIVA=${ARROW_GANDIVA} \ diff --git a/cpp/Brewfile b/cpp/Brewfile index 2b44d33b5ec..61fb619dc66 100644 --- a/cpp/Brewfile +++ b/cpp/Brewfile @@ -40,4 +40,5 @@ brew "rapidjson" brew "snappy" brew "thrift" brew "wget" +brew "xsimd" brew "zstd" From c8c1b3174990d5f2b438a33ae9752061115a4799 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Sat, 20 Aug 2022 20:47:43 +0900 Subject: [PATCH 08/24] Clean --- ci/scripts/java_jni_build.sh | 8 ++++---- ci/scripts/java_jni_manylinux_build.sh | 4 +--- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/ci/scripts/java_jni_build.sh b/ci/scripts/java_jni_build.sh index 4475058705c..7cda33da0bf 100755 --- a/ci/scripts/java_jni_build.sh +++ b/ci/scripts/java_jni_build.sh @@ -45,14 +45,14 @@ case "$(uname)" in ;; esac -: ${CMKAE_BUILD_TYPE:=Release} -# TODO: Remove the last "/arrow" from -DCMAKE_INSTALL_PREFIX when +: ${CMKAE_BUILD_TYPE:=release} +# TODO: Remove the last "/arrow" from -DCMAKE_PREFIX_PATH when # we resolve ARROW-12175 / https://github.com/apache/arrow/pull/13892 . cmake \ -DARROW_JAVA_JNI_ENABLE_DATASET=${ARROW_DATASET:-ON} \ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ - -DCMAKE_PREFIX_PATH=${dist_dir} \ - -DCMAKE_INSTALL_PREFIX=${arrow_install_dir}/lib/cmake/arrow \ + -DCMAKE_PREFIX_PATH=${arrow_install_dir}/lib/cmake/arrow \ + -DCMAKE_INSTALL_PREFIX=${dist_dir} \ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \ -GNinja \ ${JAVA_JNI_CMAKE_ARGS:-} \ diff --git a/ci/scripts/java_jni_manylinux_build.sh b/ci/scripts/java_jni_manylinux_build.sh index 936138c0aca..a121efaa1bc 100755 --- a/ci/scripts/java_jni_manylinux_build.sh +++ b/ci/scripts/java_jni_manylinux_build.sh @@ -24,8 +24,6 @@ build_dir=${2} # The directory where the final binaries will be stored when scripts finish dist_dir=${3} -INSTALL_DIR=${build_dir}/cpp-install - echo "=== Clear output directories and leftovers ===" # Clear output directories and leftovers rm -rf ${build_dir} @@ -123,7 +121,7 @@ popd JAVA_JNI_CMAKE_ARGS="" -JAVA_JNI_CMAKE_ARGS="${JAVA_JNI_CMAKE_ARGS} -DCMAKE_TOOLCHAIN_FILE=/opt/vcpkg/scripts/buildsystems/vcpkg.cmake" +JAVA_JNI_CMAKE_ARGS="${JAVA_JNI_CMAKE_ARGS} -DCMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" JAVA_JNI_CMAKE_ARGS="${JAVA_JNI_CMAKE_ARGS} -DVCPKG_TARGET_TRIPLET=${VCPKG_TARGET_TRIPLET}" export JAVA_JNI_CMAKE_ARGS ${arrow_dir}/ci/scripts/java_jni_build.sh \ From 23d0e546f11c4620d77cbfb25cf632be62e999d5 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 23 Aug 2022 15:17:27 +0900 Subject: [PATCH 09/24] Remove needless copies --- ci/scripts/java_jni_macos_build.sh | 1 - ci/scripts/java_jni_manylinux_build.sh | 1 - 2 files changed, 2 deletions(-) diff --git a/ci/scripts/java_jni_macos_build.sh b/ci/scripts/java_jni_macos_build.sh index f206809a920..6bef08208bc 100755 --- a/ci/scripts/java_jni_macos_build.sh +++ b/ci/scripts/java_jni_macos_build.sh @@ -118,7 +118,6 @@ fi echo "=== Copying libraries to the distribution folder ===" mkdir -p "${dist_dir}" -cp -L ${install_dir}/lib/libarrow_dataset_jni.dylib ${dist_dir} cp -L ${install_dir}/lib/libarrow_orc_jni.dylib ${dist_dir} cp -L ${install_dir}/lib/libgandiva_jni.dylib ${dist_dir} cp -L ${build_dir}/cpp/*/libplasma_java.dylib ${dist_dir} diff --git a/ci/scripts/java_jni_manylinux_build.sh b/ci/scripts/java_jni_manylinux_build.sh index a121efaa1bc..46bbe450189 100755 --- a/ci/scripts/java_jni_manylinux_build.sh +++ b/ci/scripts/java_jni_manylinux_build.sh @@ -137,7 +137,6 @@ fi echo "=== Copying libraries to the distribution folder ===" -cp -L ${ARROW_HOME}/lib/libarrow_dataset_jni.so ${dist_dir} cp -L ${ARROW_HOME}/lib/libarrow_orc_jni.so ${dist_dir} cp -L ${ARROW_HOME}/lib/libgandiva_jni.so ${dist_dir} cp -L ${build_dir}/cpp/*/libplasma_java.so ${dist_dir} From 78d80d7c503f7aa71b5cc188e22a9bceb018a36c Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 23 Aug 2022 15:21:20 +0900 Subject: [PATCH 10/24] Clean up --- cpp/cmake_modules/FindAWSSDKAlt.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/cmake_modules/FindAWSSDKAlt.cmake b/cpp/cmake_modules/FindAWSSDKAlt.cmake index 64ea4d24d5a..611184aa1d1 100644 --- a/cpp/cmake_modules/FindAWSSDKAlt.cmake +++ b/cpp/cmake_modules/FindAWSSDKAlt.cmake @@ -27,11 +27,11 @@ endif() if(DEFINED ENV{CONDA_PREFIX}) if(DEFINED BUILD_SHARED_LIBS) set(BUILD_SHARED_LIBS_WAS_SET TRUE) - set(BUILD_SHARED_LIBS_VALUE ${BUILD_SHARED_LIBS}) + set(BUILD_SHARED_LIBS_KEEP ${BUILD_SHARED_LIBS}) else() set(BUILD_SHARED_LIBS_WAS_SET FALSE) endif() - set(BUILD_SHARED_LIBS "ON") + set(BUILD_SHARED_LIBS ON) endif() find_package(AWSSDK ${find_package_args} COMPONENTS config @@ -42,7 +42,7 @@ find_package(AWSSDK ${find_package_args} # Restore previous value of BUILD_SHARED_LIBS if(DEFINED ENV{CONDA_PREFIX}) if(BUILD_SHARED_LIBS_WAS_SET) - set(BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS_VALUE}) + set(BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS_KEEP}) else() unset(BUILD_SHARED_LIBS) endif() From 98c995296f1d559bae9ff4f3c5e5389d230d5c3c Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 23 Aug 2022 17:22:45 +0900 Subject: [PATCH 11/24] Don't enable test on integration test --- ci/docker/java-jni-manylinux-201x.dockerfile | 2 +- ci/scripts/java_jni_build.sh | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/ci/docker/java-jni-manylinux-201x.dockerfile b/ci/docker/java-jni-manylinux-201x.dockerfile index a834ae0bb51..c77ec63df74 100644 --- a/ci/docker/java-jni-manylinux-201x.dockerfile +++ b/ci/docker/java-jni-manylinux-201x.dockerfile @@ -37,7 +37,7 @@ ARG java=1.8.0 RUN yum install -y java-$java-openjdk-devel rh-maven35 && yum clean all ENV JAVA_HOME=/usr/lib/jvm/java-$java-openjdk/ -# For ci/scripts/java_*.sh +# For ci/scripts/{cpp,java}_*.sh ENV ARROW_GANDIVA_JAVA=ON \ ARROW_HOME=/tmp/local \ ARROW_JAVA_CDATA=ON \ diff --git a/ci/scripts/java_jni_build.sh b/ci/scripts/java_jni_build.sh index 7cda33da0bf..4afaa1c9e34 100755 --- a/ci/scripts/java_jni_build.sh +++ b/ci/scripts/java_jni_build.sh @@ -45,11 +45,13 @@ case "$(uname)" in ;; esac +: ${ARROW_JAVA_BUILD_TESTS:=${ARROW_BUILD_TESTS:-OFF}} : ${CMKAE_BUILD_TYPE:=release} # TODO: Remove the last "/arrow" from -DCMAKE_PREFIX_PATH when # we resolve ARROW-12175 / https://github.com/apache/arrow/pull/13892 . cmake \ -DARROW_JAVA_JNI_ENABLE_DATASET=${ARROW_DATASET:-ON} \ + -DBUILD_TESTING=${ARROW_JAVA_BUILD_TESTS} \ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ -DCMAKE_PREFIX_PATH=${arrow_install_dir}/lib/cmake/arrow \ -DCMAKE_INSTALL_PREFIX=${dist_dir} \ @@ -59,9 +61,11 @@ cmake \ ${arrow_dir}/java export CMAKE_BUILD_PARALLEL_LEVEL=${n_jobs} cmake --build . --config ${CMAKE_BUILD_TYPE} -ctest \ - --output-on-failure \ - --parallel ${n_jobs} \ - --timeout 300 +if [ "${ARROW_JAVA_BUILD_TESTS}" = "ON" ]; then + ctest \ + --output-on-failure \ + --parallel ${n_jobs} \ + --timeout 300 +fi cmake --build . --config ${CMAKE_BUILD_TYPE} --target install popd From 0d5ebc4978e8f128f5c4e83b97fc5cdd17412a94 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 23 Aug 2022 21:48:53 +0900 Subject: [PATCH 12/24] Fix java_jni_build.sh usage --- ci/scripts/java_jni_build.sh | 2 +- docker-compose.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/scripts/java_jni_build.sh b/ci/scripts/java_jni_build.sh index 4afaa1c9e34..6f516189636 100755 --- a/ci/scripts/java_jni_build.sh +++ b/ci/scripts/java_jni_build.sh @@ -52,7 +52,7 @@ esac cmake \ -DARROW_JAVA_JNI_ENABLE_DATASET=${ARROW_DATASET:-ON} \ -DBUILD_TESTING=${ARROW_JAVA_BUILD_TESTS} \ - -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug} \ -DCMAKE_PREFIX_PATH=${arrow_install_dir}/lib/cmake/arrow \ -DCMAKE_INSTALL_PREFIX=${dist_dir} \ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \ diff --git a/docker-compose.yml b/docker-compose.yml index 751a81fa554..67dfd87512e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1167,7 +1167,7 @@ services: command: [ "/arrow/ci/scripts/cpp_build.sh /arrow /build && /arrow/ci/scripts/python_build.sh /arrow /build && - /arrow/ci/scripts/java_jni_build.sh /arrow /build /tmp/dist/java && + /arrow/ci/scripts/java_jni_build.sh /arrow $${ARROW_HOME} /build /tmp/dist/java && /arrow/ci/scripts/java_build.sh /arrow /build /tmp/dist/java && /arrow/ci/scripts/java_cdata_integration.sh /arrow /tmp/dist/java" ] From cb6fb00775dcdd9e6a4b6da1f362c9f000d3f629 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 23 Aug 2022 21:52:35 +0900 Subject: [PATCH 13/24] Fix a typo --- ci/scripts/java_jni_build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/scripts/java_jni_build.sh b/ci/scripts/java_jni_build.sh index 6f516189636..7ce1059ec0c 100755 --- a/ci/scripts/java_jni_build.sh +++ b/ci/scripts/java_jni_build.sh @@ -46,13 +46,13 @@ case "$(uname)" in esac : ${ARROW_JAVA_BUILD_TESTS:=${ARROW_BUILD_TESTS:-OFF}} -: ${CMKAE_BUILD_TYPE:=release} +: ${CMAKE_BUILD_TYPE:=release} # TODO: Remove the last "/arrow" from -DCMAKE_PREFIX_PATH when # we resolve ARROW-12175 / https://github.com/apache/arrow/pull/13892 . cmake \ -DARROW_JAVA_JNI_ENABLE_DATASET=${ARROW_DATASET:-ON} \ -DBUILD_TESTING=${ARROW_JAVA_BUILD_TESTS} \ - -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug} \ + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:} \ -DCMAKE_PREFIX_PATH=${arrow_install_dir}/lib/cmake/arrow \ -DCMAKE_INSTALL_PREFIX=${dist_dir} \ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \ From 9078c7036368dba39d7075e8326826a2763f14c9 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 23 Aug 2022 22:01:04 +0900 Subject: [PATCH 14/24] Remove garbage --- ci/scripts/java_jni_build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/scripts/java_jni_build.sh b/ci/scripts/java_jni_build.sh index 7ce1059ec0c..650ef9b35b4 100755 --- a/ci/scripts/java_jni_build.sh +++ b/ci/scripts/java_jni_build.sh @@ -52,7 +52,7 @@ esac cmake \ -DARROW_JAVA_JNI_ENABLE_DATASET=${ARROW_DATASET:-ON} \ -DBUILD_TESTING=${ARROW_JAVA_BUILD_TESTS} \ - -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:} \ + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ -DCMAKE_PREFIX_PATH=${arrow_install_dir}/lib/cmake/arrow \ -DCMAKE_INSTALL_PREFIX=${dist_dir} \ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \ From 12e3944dc5b153f68d8c6da9a85b725b69a0e71c Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 23 Aug 2022 22:57:29 +0900 Subject: [PATCH 15/24] Build shared for JNI --- ci/scripts/java_jni_macos_build.sh | 4 +++- ci/scripts/java_jni_manylinux_build.sh | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/ci/scripts/java_jni_macos_build.sh b/ci/scripts/java_jni_macos_build.sh index 6bef08208bc..b3316d2197f 100755 --- a/ci/scripts/java_jni_macos_build.sh +++ b/ci/scripts/java_jni_macos_build.sh @@ -57,8 +57,10 @@ export AWS_EC2_METADATA_DISABLED=TRUE mkdir -p "${build_dir}/cpp" pushd "${build_dir}/cpp" +# TODO: Change -DARROW_BUILD_SHARED=ON to OFF when we move all JNI +# related codes to java/ from cpp/. cmake \ - -DARROW_BUILD_SHARED=OFF \ + -DARROW_BUILD_SHARED=ON \ -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS} \ -DARROW_BUILD_UTILITIES=OFF \ -DARROW_CSV=${ARROW_DATASET} \ diff --git a/ci/scripts/java_jni_manylinux_build.sh b/ci/scripts/java_jni_manylinux_build.sh index 46bbe450189..77a254587df 100755 --- a/ci/scripts/java_jni_manylinux_build.sh +++ b/ci/scripts/java_jni_manylinux_build.sh @@ -65,8 +65,10 @@ export AWS_EC2_METADATA_DISABLED=TRUE mkdir -p "${build_dir}/cpp" pushd "${build_dir}/cpp" +# TODO: Change -DARROW_BUILD_SHARED=ON to OFF when we move all JNI +# related codes to java/ from cpp/. cmake \ - -DARROW_BUILD_SHARED=OFF \ + -DARROW_BUILD_SHARED=ON \ -DARROW_BUILD_TESTS=ON \ -DARROW_BUILD_UTILITIES=OFF \ -DARROW_CSV=${ARROW_DATASET} \ From 931b6daa2a7cb97b4e3d36d8aef72f28344fe14e Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Wed, 24 Aug 2022 05:57:04 +0900 Subject: [PATCH 16/24] Ignore more tests --- ci/scripts/java_jni_macos_build.sh | 2 +- ci/scripts/java_jni_manylinux_build.sh | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/scripts/java_jni_macos_build.sh b/ci/scripts/java_jni_macos_build.sh index b3316d2197f..39bfbf775f0 100755 --- a/ci/scripts/java_jni_macos_build.sh +++ b/ci/scripts/java_jni_macos_build.sh @@ -93,7 +93,7 @@ cmake --build . --target install if [ "${ARROW_BUILD_TESTS}" == "ON" ]; then # MinIO is required - exclude_tests="arrow-s3-test" + exclude_tests="arrow-s3fs-test" # unstable exclude_tests="${exclude_tests}|arrow-compute-hash-join-node-test" ctest \ diff --git a/ci/scripts/java_jni_manylinux_build.sh b/ci/scripts/java_jni_manylinux_build.sh index 77a254587df..4be88942b8b 100755 --- a/ci/scripts/java_jni_manylinux_build.sh +++ b/ci/scripts/java_jni_manylinux_build.sh @@ -109,6 +109,7 @@ if [ "${ARROW_BUILD_TESTS}" = "ON" ]; then exclude_tests="arrow-s3fs-test" # unstable exclude_tests="${exclude_tests}|arrow-compute-hash-join-node-test" + exclude_tests="${exclude_tests}|arrow-dataset-scanner-test" # strptime exclude_tests="${exclude_tests}|arrow-utility-test" ctest \ From 89922302a97d5fa4f03232c5f642616728593b2a Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Wed, 24 Aug 2022 06:01:17 +0900 Subject: [PATCH 17/24] JNI libraries must be shared --- ci/scripts/java_jni_macos_build.sh | 4 +--- ci/scripts/java_jni_manylinux_build.sh | 4 +--- cpp/src/gandiva/jni/CMakeLists.txt | 6 ++++-- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/ci/scripts/java_jni_macos_build.sh b/ci/scripts/java_jni_macos_build.sh index 39bfbf775f0..53553346475 100755 --- a/ci/scripts/java_jni_macos_build.sh +++ b/ci/scripts/java_jni_macos_build.sh @@ -57,10 +57,8 @@ export AWS_EC2_METADATA_DISABLED=TRUE mkdir -p "${build_dir}/cpp" pushd "${build_dir}/cpp" -# TODO: Change -DARROW_BUILD_SHARED=ON to OFF when we move all JNI -# related codes to java/ from cpp/. cmake \ - -DARROW_BUILD_SHARED=ON \ + -DARROW_BUILD_SHARED=OFF \ -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS} \ -DARROW_BUILD_UTILITIES=OFF \ -DARROW_CSV=${ARROW_DATASET} \ diff --git a/ci/scripts/java_jni_manylinux_build.sh b/ci/scripts/java_jni_manylinux_build.sh index 4be88942b8b..b59eddd4374 100755 --- a/ci/scripts/java_jni_manylinux_build.sh +++ b/ci/scripts/java_jni_manylinux_build.sh @@ -65,10 +65,8 @@ export AWS_EC2_METADATA_DISABLED=TRUE mkdir -p "${build_dir}/cpp" pushd "${build_dir}/cpp" -# TODO: Change -DARROW_BUILD_SHARED=ON to OFF when we move all JNI -# related codes to java/ from cpp/. cmake \ - -DARROW_BUILD_SHARED=ON \ + -DARROW_BUILD_SHARED=OFF \ -DARROW_BUILD_TESTS=ON \ -DARROW_BUILD_UTILITIES=OFF \ -DARROW_CSV=${ARROW_DATASET} \ diff --git a/cpp/src/gandiva/jni/CMakeLists.txt b/cpp/src/gandiva/jni/CMakeLists.txt index 046934141f6..3fc80200891 100644 --- a/cpp/src/gandiva/jni/CMakeLists.txt +++ b/cpp/src/gandiva/jni/CMakeLists.txt @@ -76,8 +76,10 @@ add_arrow_lib(gandiva_jni ${GANDIVA_JNI_SOURCES} OUTPUTS GANDIVA_JNI_LIBRARIES - SHARED_PRIVATE_LINK_LIBS - ${GANDIVA_LINK_LIBS} + BUILD_SHARED + ON + BUILD_STATIC + OFF STATIC_LINK_LIBS ${GANDIVA_LINK_LIBS} DEPENDENCIES From 49dbee8c35c6149a7c2b19c88eb23897fb2a752d Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 30 Aug 2022 14:05:14 +0900 Subject: [PATCH 18/24] Resolve TODO for ARROW-12175 --- ci/scripts/java_jni_build.sh | 4 +--- java/CMakeLists.txt | 3 --- java/dataset/CMakeLists.txt | 2 -- 3 files changed, 1 insertion(+), 8 deletions(-) diff --git a/ci/scripts/java_jni_build.sh b/ci/scripts/java_jni_build.sh index 650ef9b35b4..c68b52d77ef 100755 --- a/ci/scripts/java_jni_build.sh +++ b/ci/scripts/java_jni_build.sh @@ -47,13 +47,11 @@ esac : ${ARROW_JAVA_BUILD_TESTS:=${ARROW_BUILD_TESTS:-OFF}} : ${CMAKE_BUILD_TYPE:=release} -# TODO: Remove the last "/arrow" from -DCMAKE_PREFIX_PATH when -# we resolve ARROW-12175 / https://github.com/apache/arrow/pull/13892 . cmake \ -DARROW_JAVA_JNI_ENABLE_DATASET=${ARROW_DATASET:-ON} \ -DBUILD_TESTING=${ARROW_JAVA_BUILD_TESTS} \ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \ - -DCMAKE_PREFIX_PATH=${arrow_install_dir}/lib/cmake/arrow \ + -DCMAKE_PREFIX_PATH=${arrow_install_dir} \ -DCMAKE_INSTALL_PREFIX=${dist_dir} \ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \ -GNinja \ diff --git a/java/CMakeLists.txt b/java/CMakeLists.txt index 5dbe5a8a1a6..91483ebc05e 100644 --- a/java/CMakeLists.txt +++ b/java/CMakeLists.txt @@ -60,9 +60,6 @@ if(BUILD_TESTING) find_package(ArrowTesting REQUIRED) find_package(GTest REQUIRED) add_library(arrow_java_test INTERFACE IMPORTED) - # TODO: Remove this when - # we resolve ARROW-12175 / https://github.com/apache/arrow/pull/13892 . - target_link_libraries(arrow_testing_static INTERFACE arrow_static) target_link_libraries(arrow_java_test INTERFACE arrow_testing_static GTest::gtest_main) endif() diff --git a/java/dataset/CMakeLists.txt b/java/dataset/CMakeLists.txt index b9977717bde..141c0f22c3b 100644 --- a/java/dataset/CMakeLists.txt +++ b/java/dataset/CMakeLists.txt @@ -32,8 +32,6 @@ add_jar(arrow_java_jni_dataset_jar add_library(arrow_java_jni_dataset SHARED src/main/cpp/jni_wrapper.cc src/main/cpp/jni_util.cc) set_property(TARGET arrow_java_jni_dataset PROPERTY OUTPUT_NAME "arrow_dataset_jni") -# TODO: Remove this when -# we resolve ARROW-12175 / https://github.com/apache/arrow/pull/13892 . target_link_libraries(arrow_dataset_static INTERFACE arrow_static) target_link_libraries(arrow_java_jni_dataset arrow_java_jni_dataset_headers jni arrow_dataset_static) From d085780b23d61afae30f14043f8c993893b30fe3 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 30 Aug 2022 17:43:38 +0900 Subject: [PATCH 19/24] Fix link type --- cpp/src/gandiva/jni/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/gandiva/jni/CMakeLists.txt b/cpp/src/gandiva/jni/CMakeLists.txt index 3fc80200891..b89356121dc 100644 --- a/cpp/src/gandiva/jni/CMakeLists.txt +++ b/cpp/src/gandiva/jni/CMakeLists.txt @@ -80,7 +80,7 @@ add_arrow_lib(gandiva_jni ON BUILD_STATIC OFF - STATIC_LINK_LIBS + SHARED_LINK_LIBS ${GANDIVA_LINK_LIBS} DEPENDENCIES ${GANDIVA_LINK_LIBS} From e79ee3009c19253b29a61df1385ff300b8d07b4c Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Wed, 31 Aug 2022 06:36:51 +0900 Subject: [PATCH 20/24] Use new CMake targets --- java/CMakeLists.txt | 3 ++- java/dataset/CMakeLists.txt | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/java/CMakeLists.txt b/java/CMakeLists.txt index 91483ebc05e..f187cd943d1 100644 --- a/java/CMakeLists.txt +++ b/java/CMakeLists.txt @@ -60,7 +60,8 @@ if(BUILD_TESTING) find_package(ArrowTesting REQUIRED) find_package(GTest REQUIRED) add_library(arrow_java_test INTERFACE IMPORTED) - target_link_libraries(arrow_java_test INTERFACE arrow_testing_static GTest::gtest_main) + target_link_libraries(arrow_java_test INTERFACE ArrowTesting::arrow_testing_static + GTest::gtest_main) endif() if(ARROW_JAVA_JNI_ENABLE_C) diff --git a/java/dataset/CMakeLists.txt b/java/dataset/CMakeLists.txt index 141c0f22c3b..3b76b4e03bc 100644 --- a/java/dataset/CMakeLists.txt +++ b/java/dataset/CMakeLists.txt @@ -32,9 +32,8 @@ add_jar(arrow_java_jni_dataset_jar add_library(arrow_java_jni_dataset SHARED src/main/cpp/jni_wrapper.cc src/main/cpp/jni_util.cc) set_property(TARGET arrow_java_jni_dataset PROPERTY OUTPUT_NAME "arrow_dataset_jni") -target_link_libraries(arrow_dataset_static INTERFACE arrow_static) target_link_libraries(arrow_java_jni_dataset arrow_java_jni_dataset_headers jni - arrow_dataset_static) + ArrowDataset::arrow_dataset_static) if(BUILD_TESTING) add_executable(arrow-java-jni-dataset-test src/main/cpp/jni_util_test.cc From 654513b5442b2e3d76bbea69af21d12ca7c20d76 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Wed, 31 Aug 2022 11:01:25 +0900 Subject: [PATCH 21/24] Add missing dependencies --- cpp/CMakeLists.txt | 6 ++++++ cpp/src/arrow/ArrowConfig.cmake.in | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 63add810c8c..f4a529e875f 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -795,6 +795,9 @@ if(ARROW_S3) aws-cpp-sdk-s3 aws-cpp-sdk-core) endif() + if(UNIX) + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS CURL::libcurl) + endif() endif() if(ARROW_WITH_OPENTELEMETRY) @@ -860,6 +863,9 @@ add_dependencies(arrow_test_dependencies toolchain-tests) if(ARROW_STATIC_LINK_LIBS) add_dependencies(arrow_dependencies ${ARROW_STATIC_LINK_LIBS}) if(ARROW_HDFS OR ARROW_ORC) + if(Protobuf_SOURCE STREQUAL "SYSTEM") + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${ARROW_PROTOBUF_LIBPROTOBUF}) + endif() if(NOT MSVC_TOOLCHAIN) list(APPEND ARROW_STATIC_LINK_LIBS ${CMAKE_DL_LIBS}) list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS ${CMAKE_DL_LIBS}) diff --git a/cpp/src/arrow/ArrowConfig.cmake.in b/cpp/src/arrow/ArrowConfig.cmake.in index 83a0f849c48..515bbddf3b1 100644 --- a/cpp/src/arrow/ArrowConfig.cmake.in +++ b/cpp/src/arrow/ArrowConfig.cmake.in @@ -102,6 +102,10 @@ if(TARGET Arrow::arrow_static AND NOT TARGET Arrow::arrow_bundled_dependencies) PROPERTIES IMPORTED_LOCATION "${arrow_lib_dir}/${CMAKE_STATIC_LIBRARY_PREFIX}arrow_bundled_dependencies${CMAKE_STATIC_LIBRARY_SUFFIX}" ) + if(APPLE AND "AWS::aws-c-common" IN_LIST ARROW_BUNDLED_STATIC_LIBS) + find_library(CORE_FOUNDATION CoreFoundation) + target_link_libraries(Arrow::arrow_bundled_dependencies INTERFACE ${CORE_FOUNDATION}) + endif() endif() macro(arrow_keep_backward_compatibility namespace target_base_name) From 8627aded1f21bf785b5a306622d0cd813630967c Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Wed, 31 Aug 2022 14:12:25 +0900 Subject: [PATCH 22/24] Add cmake_policy() for IN_LIST --- cpp/src/arrow/ArrowConfig.cmake.in | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cpp/src/arrow/ArrowConfig.cmake.in b/cpp/src/arrow/ArrowConfig.cmake.in index 515bbddf3b1..2c6fd6d1b17 100644 --- a/cpp/src/arrow/ArrowConfig.cmake.in +++ b/cpp/src/arrow/ArrowConfig.cmake.in @@ -102,10 +102,16 @@ if(TARGET Arrow::arrow_static AND NOT TARGET Arrow::arrow_bundled_dependencies) PROPERTIES IMPORTED_LOCATION "${arrow_lib_dir}/${CMAKE_STATIC_LIBRARY_PREFIX}arrow_bundled_dependencies${CMAKE_STATIC_LIBRARY_SUFFIX}" ) + + # CMP0057: Support new if() IN_LIST operator. + # https://cmake.org/cmake/help/latest/policy/CMP0057.html + cmake_policy(PUSH) + cmake_policy(SET CMP0057 NEW) if(APPLE AND "AWS::aws-c-common" IN_LIST ARROW_BUNDLED_STATIC_LIBS) find_library(CORE_FOUNDATION CoreFoundation) target_link_libraries(Arrow::arrow_bundled_dependencies INTERFACE ${CORE_FOUNDATION}) endif() + cmake_policy(POP) endif() macro(arrow_keep_backward_compatibility namespace target_base_name) From e53b8243616aeaa1d46d83a2526613fc8ca9cb59 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Wed, 31 Aug 2022 14:24:33 +0900 Subject: [PATCH 23/24] Add FindProtobufAlt.cmake --- cpp/CMakeLists.txt | 7 +++-- cpp/cmake_modules/FindProtobufAlt.cmake | 32 +++++++++++++++++++++ cpp/cmake_modules/ThirdpartyToolchain.cmake | 8 ++---- 3 files changed, 38 insertions(+), 9 deletions(-) create mode 100644 cpp/cmake_modules/FindProtobufAlt.cmake diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index f4a529e875f..6a01f18e6bb 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -794,9 +794,10 @@ if(ARROW_S3) aws-cpp-sdk-cognito-identity aws-cpp-sdk-s3 aws-cpp-sdk-core) - endif() - if(UNIX) - list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS CURL::libcurl) + elseif(AWSSDK_SOURCE STREQUAL "BUNDLED") + if(UNIX) + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS CURL::libcurl) + endif() endif() endif() diff --git a/cpp/cmake_modules/FindProtobufAlt.cmake b/cpp/cmake_modules/FindProtobufAlt.cmake new file mode 100644 index 00000000000..d29f757aeb6 --- /dev/null +++ b/cpp/cmake_modules/FindProtobufAlt.cmake @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +if(ARROW_PROTOBUF_USE_SHARED) + set(Protobuf_USE_STATIC_LIBS OFF) +else() + set(Protobuf_USE_STATIC_LIBS ON) +endif() + +set(find_package_args) +if(ProtobufAlt_FIND_VERSION) + list(APPEND find_package_args ${ProtobufAlt_FIND_VERSION}) +endif() +if(ProtobufAlt_FIND_QUIETLY) + list(APPEND find_package_args QUIET) +endif() +find_package(Protobuf ${find_package_args}) +set(ProtobufAlt_FOUND ${Protobuf_FOUND}) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index d9e7e0405f9..515cdfe8ef4 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -40,12 +40,6 @@ set(ARROW_RE2_LINKAGE "static" CACHE STRING "How to link the re2 library. static|shared (default static)") -if(ARROW_PROTOBUF_USE_SHARED) - set(Protobuf_USE_STATIC_LIBS OFF) -else() - set(Protobuf_USE_STATIC_LIBS ON) -endif() - # ---------------------------------------------------------------------- # Resolve the dependencies @@ -1640,6 +1634,8 @@ if(ARROW_WITH_PROTOBUF) set(ARROW_PROTOBUF_REQUIRED_VERSION "2.6.1") endif() resolve_dependency(Protobuf + HAVE_ALT + TRUE REQUIRED_VERSION ${ARROW_PROTOBUF_REQUIRED_VERSION} PC_PACKAGE_NAMES From 13957f7a31dcff56580d6e07c14331624325b2a7 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 1 Sep 2022 16:00:46 +0900 Subject: [PATCH 24/24] Update document --- ci/scripts/java_jni_macos_build.sh | 2 - ci/scripts/java_jni_manylinux_build.sh | 2 - docs/source/developers/java/building.rst | 88 +++++++++++------------- 3 files changed, 40 insertions(+), 52 deletions(-) diff --git a/ci/scripts/java_jni_macos_build.sh b/ci/scripts/java_jni_macos_build.sh index 53553346475..342bc2d1188 100755 --- a/ci/scripts/java_jni_macos_build.sh +++ b/ci/scripts/java_jni_macos_build.sh @@ -39,7 +39,6 @@ install_dir=${build_dir}/cpp-install : ${ARROW_PARQUET:=ON} : ${ARROW_PLASMA_JAVA_CLIENT:=ON} : ${ARROW_PLASMA:=ON} -: ${ARROW_PYTHON:=OFF} : ${ARROW_S3:=ON} : ${ARROW_USE_CCACHE:=OFF} : ${CMAKE_BUILD_TYPE:=Release} @@ -73,7 +72,6 @@ cmake \ -DARROW_PARQUET=${ARROW_PARQUET} \ -DARROW_PLASMA=${ARROW_PLASMA} \ -DARROW_PLASMA_JAVA_CLIENT=${ARROW_PLASMA_JAVA_CLIENT} \ - -DARROW_PYTHON=${ARROW_PYTHON} \ -DARROW_S3=${ARROW_S3} \ -DARROW_USE_CCACHE=${ARROW_USE_CCACHE} \ -DAWSSDK_SOURCE=BUNDLED \ diff --git a/ci/scripts/java_jni_manylinux_build.sh b/ci/scripts/java_jni_manylinux_build.sh index b59eddd4374..6669c4fdaa6 100755 --- a/ci/scripts/java_jni_manylinux_build.sh +++ b/ci/scripts/java_jni_manylinux_build.sh @@ -43,7 +43,6 @@ devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/includ : ${ARROW_PARQUET:=ON} : ${ARROW_PLASMA:=ON} : ${ARROW_PLASMA_JAVA_CLIENT:=ON} -: ${ARROW_PYTHON:=OFF} : ${ARROW_S3:=ON} : ${ARROW_USE_CCACHE:=OFF} : ${CMAKE_BUILD_TYPE:=release} @@ -83,7 +82,6 @@ cmake \ -DARROW_PARQUET=${ARROW_PARQUET} \ -DARROW_PLASMA_JAVA_CLIENT=${ARROW_PLASMA_JAVA_CLIENT} \ -DARROW_PLASMA=${ARROW_PLASMA} \ - -DARROW_PYTHON=${ARROW_PYTHON} \ -DARROW_RPATH_ORIGIN=${ARROW_RPATH_ORIGIN} \ -DARROW_S3=${ARROW_S3} \ -DARROW_USE_CCACHE=${ARROW_USE_CCACHE} \ diff --git a/docs/source/developers/java/building.rst b/docs/source/developers/java/building.rst index add2b11b278..b45afa70a9d 100644 --- a/docs/source/developers/java/building.rst +++ b/docs/source/developers/java/building.rst @@ -75,78 +75,70 @@ We can build these manually or we can use `Archery`_ to build them using a Docke |__ libarrow_dataset_jni.so |__ libarrow_orc_jni.so |__ libgandiva_jni.so + |__ libplasma_java.so Building JNI Libraries on MacOS ------------------------------- Note: If you are building on Apple Silicon, be sure to use a JDK version that was compiled for that architecture. See, for example, the `Azul JDK `_. -To build only the C Data Interface library: +First, you need to build Apache Arrow C++: .. code-block:: $ cd arrow $ brew bundle --file=cpp/Brewfile Homebrew Bundle complete! 25 Brewfile dependencies now installed. + $ brew uninstall aws-sdk-cpp + (We can't use aws-sdk-cpp installed by Homebrew because it has + an issue: https://github.com/aws/aws-sdk-cpp/issues/1809 ) $ export JAVA_HOME= - $ mkdir -p java-dist java-native-c - $ cd java-native-c + $ mkdir -p java-dist cpp-jni $ cmake \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_INSTALL_PREFIX=../java-dist/lib \ - ../java - $ cmake --build . --target install - $ ls -latr ../java-dist/lib - |__ libarrow_cdata_jni.dylib - -To build other JNI libraries: - -.. code-block:: - - $ cd arrow - $ brew bundle --file=cpp/Brewfile - Homebrew Bundle complete! 25 Brewfile dependencies now installed. - $ export JAVA_HOME= - $ mkdir -p java-dist java-native-cpp - $ cd java-native-cpp - $ cmake \ - -DARROW_BOOST_USE_SHARED=OFF \ - -DARROW_BROTLI_USE_SHARED=OFF \ - -DARROW_BZ2_USE_SHARED=OFF \ - -DARROW_GFLAGS_USE_SHARED=OFF \ - -DARROW_GRPC_USE_SHARED=OFF \ - -DARROW_LZ4_USE_SHARED=OFF \ - -DARROW_OPENSSL_USE_SHARED=OFF \ - -DARROW_PROTOBUF_USE_SHARED=OFF \ - -DARROW_SNAPPY_USE_SHARED=OFF \ - -DARROW_THRIFT_USE_SHARED=OFF \ - -DARROW_UTF8PROC_USE_SHARED=OFF \ - -DARROW_ZSTD_USE_SHARED=OFF \ - -DARROW_JNI=ON \ - -DARROW_PARQUET=ON \ - -DARROW_FILESYSTEM=ON \ + -S cpp \ + -B cpp-jni \ + -DARROW_CSV=ON \ -DARROW_DATASET=ON \ + -DARROW_DEPENDENCY_USE_SHARED=OFF \ + -DARROW_FILESYSTEM=ON \ + -DARROW_GANDIVA=ON \ -DARROW_GANDIVA_JAVA=ON \ -DARROW_GANDIVA_STATIC_LIBSTDCPP=ON \ - -DARROW_GANDIVA=ON \ + -DARROW_JNI=ON \ -DARROW_ORC=ON \ - -DARROW_PLASMA_JAVA_CLIENT=ON \ + -DARROW_PARQUET=ON \ -DARROW_PLASMA=ON \ + -DARROW_PLASMA_JAVA_CLIENT=ON \ + -DARROW_S3=ON \ + -DAWSSDK_SOURCE=BUNDLED \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_LIBDIR=lib \ - -DCMAKE_INSTALL_PREFIX=../java-dist \ + -DCMAKE_INSTALL_PREFIX=java-dist \ -DCMAKE_UNITY_BUILD=ON \ - -Dre2_SOURCE=BUNDLED \ - -DBoost_SOURCE=BUNDLED \ - -Dutf8proc_SOURCE=BUNDLED \ - -DSnappy_SOURCE=BUNDLED \ - -DORC_SOURCE=BUNDLED \ - -DZLIB_SOURCE=BUNDLED \ - ../cpp - $ cmake --build . --target install + -Dre2_SOURCE=BUNDLED + $ cmake --build cpp-jni --target install $ ls -latr ../java-dist/lib - |__ libarrow_dataset_jni.dylib |__ libarrow_orc_jni.dylib |__ libgandiva_jni.dylib + |__ libplasma_java.dylib + +Then, you can build JNI libraries: + +.. code-block:: + + $ mkdir -p java-jni + $ cmake \ + -S java \ + -B java-jni \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX=java-dist/lib \ + -DCMAKE_PREFIX_PATH=java-dist + $ cmake --build java-jni --target install + $ ls -latr ../java-dist/lib + |__ libarrow_cdata_jni.dylib + |__ libarrow_dataset_jni.dylib + +To build other JNI libraries: + Building Arrow JNI Modules --------------------------