diff --git a/ci/conda_env_cpp.yml b/ci/conda_env_cpp.yml index 3c0f7f51d17..a0c2e99aca7 100644 --- a/ci/conda_env_cpp.yml +++ b/ci/conda_env_cpp.yml @@ -28,6 +28,7 @@ gmock>=1.8.1 grpc-cpp>=1.21.4 gtest=1.8.1 libprotobuf +libutf8proc lz4-c make ninja diff --git a/ci/docker/debian-10-cpp.dockerfile b/ci/docker/debian-10-cpp.dockerfile index 955e7aeba72..f86c009b57b 100644 --- a/ci/docker/debian-10-cpp.dockerfile +++ b/ci/docker/debian-10-cpp.dockerfile @@ -58,6 +58,7 @@ RUN apt-get update -y -q && \ libsnappy-dev \ libssl-dev \ libthrift-dev \ + libutf8proc-dev \ libzstd-dev \ llvm-${llvm}-dev \ make \ diff --git a/ci/docker/fedora-32-cpp.dockerfile b/ci/docker/fedora-32-cpp.dockerfile index 766e6b343da..535f8b4b761 100644 --- a/ci/docker/fedora-32-cpp.dockerfile +++ b/ci/docker/fedora-32-cpp.dockerfile @@ -53,6 +53,7 @@ RUN dnf update -y && \ re2-devel \ snappy-devel \ thrift-devel \ + utf8proc-devel \ which \ zlib-devel diff --git a/ci/docker/ubuntu-14.04-cpp.dockerfile b/ci/docker/ubuntu-14.04-cpp.dockerfile index f44326df0c6..2316b6ed0d5 100644 --- a/ci/docker/ubuntu-14.04-cpp.dockerfile +++ b/ci/docker/ubuntu-14.04-cpp.dockerfile @@ -90,4 +90,5 @@ ENV ARROW_BUILD_TESTS=ON \ RapidJSON_SOURCE=BUNDLED \ RE2_SOURCE=BUNDLED \ Thrift_SOURCE=BUNDLED \ + utf8proc_SOURCE=BUNDLED \ ZSTD_SOURCE=BUNDLED diff --git a/ci/docker/ubuntu-16.04-cpp.dockerfile b/ci/docker/ubuntu-16.04-cpp.dockerfile index 5ab0a88dd90..e94dc8ac539 100644 --- a/ci/docker/ubuntu-16.04-cpp.dockerfile +++ b/ci/docker/ubuntu-16.04-cpp.dockerfile @@ -51,6 +51,7 @@ RUN apt-get update -y -q && \ liblz4-dev \ libre2-dev \ libssl-dev \ + libutf8proc-dev \ libzstd1-dev \ llvm-${llvm}-dev \ make \ diff --git a/ci/docker/ubuntu-18.04-cpp.dockerfile b/ci/docker/ubuntu-18.04-cpp.dockerfile index 21d15ea09e7..f93f59867d3 100644 --- a/ci/docker/ubuntu-18.04-cpp.dockerfile +++ b/ci/docker/ubuntu-18.04-cpp.dockerfile @@ -78,6 +78,7 @@ RUN apt-get update -y -q && \ libre2-dev \ libsnappy-dev \ libssl-dev \ + libutf8proc-dev \ libzstd-dev \ ninja-build \ pkg-config \ diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile b/ci/docker/ubuntu-20.04-cpp.dockerfile index 726a2fdc182..2b0e32c1e85 100644 --- a/ci/docker/ubuntu-20.04-cpp.dockerfile +++ b/ci/docker/ubuntu-20.04-cpp.dockerfile @@ -65,6 +65,7 @@ RUN apt-get update -y -q && \ libsnappy-dev \ libssl-dev \ libthrift-dev \ + libutf8proc-dev \ libzstd-dev \ make \ ninja-build \ diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index b84d72c3527..6420a5331ac 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -123,6 +123,7 @@ cmake -G "${CMAKE_GENERATOR:-Ninja}" \ -DRE2_SOURCE=${RE2_SOURCE:-} \ -DSnappy_SOURCE=${Snappy_SOURCE:-} \ -DThrift_SOURCE=${Thrift_SOURCE:-} \ + -Dutf8proc_SOURCE=${utf8proc_SOURCE:-} \ -DZSTD_SOURCE=${ZSTD_SOURCE:-} \ ${CMAKE_ARGS} \ ${source_dir} diff --git a/ci/scripts/msys2_setup.sh b/ci/scripts/msys2_setup.sh index 9cd999b00e3..585a143677b 100755 --- a/ci/scripts/msys2_setup.sh +++ b/ci/scripts/msys2_setup.sh @@ -41,6 +41,7 @@ case "${target}" in packages+=(${MINGW_PACKAGE_PREFIX}-rapidjson) packages+=(${MINGW_PACKAGE_PREFIX}-snappy) packages+=(${MINGW_PACKAGE_PREFIX}-thrift) + packages+=(${MINGW_PACKAGE_PREFIX}-libutf8proc) packages+=(${MINGW_PACKAGE_PREFIX}-zlib) packages+=(${MINGW_PACKAGE_PREFIX}-zstd) ;; diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 8c05d4b84d0..f5a348de4ec 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -698,6 +698,12 @@ if(ARROW_S3) list(APPEND ARROW_LINK_LIBS ${AWSSDK_LINK_LIBRARIES}) endif() +if(ARROW_WITH_UTF8PROC) + list(APPEND ARROW_LINK_LIBS utf8proc::utf8proc) + list(APPEND ARROW_STATIC_LINK_LIBS utf8proc::utf8proc) + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS utf8proc::utf8proc) +endif() + add_custom_target(arrow_dependencies) add_custom_target(arrow_benchmark_dependencies) add_custom_target(arrow_test_dependencies) diff --git a/cpp/cmake_modules/Findutf8proc.cmake b/cpp/cmake_modules/Findutf8proc.cmake new file mode 100644 index 00000000000..ab9ae9f9878 --- /dev/null +++ b/cpp/cmake_modules/Findutf8proc.cmake @@ -0,0 +1,51 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +if(utf8proc_ROOT) + find_library( + UTF8PROC_LIB + NAMES utf8proc + "${CMAKE_SHARED_LIBRARY_PREFIX}utf8proc${CMAKE_SHARED_LIBRARY_SUFFIX}" + PATHS ${utf8proc_ROOT} + PATH_SUFFIXES ${LIB_PATH_SUFFIXES} + NO_DEFAULT_PATH) + find_path(UTF8PROC_INCLUDE_DIR + NAMES utf8proc.h + PATHS ${utf8proc_ROOT} + NO_DEFAULT_PATH + PATH_SUFFIXES ${INCLUDE_PATH_SUFFIXES}) + +else() + find_library( + UTF8PROC_LIB + NAMES utf8proc + "${CMAKE_SHARED_LIBRARY_PREFIX}utf8proc${CMAKE_SHARED_LIBRARY_SUFFIX}" + PATH_SUFFIXES ${LIB_PATH_SUFFIXES}) + find_path(UTF8PROC_INCLUDE_DIR NAMES utf8proc.h PATH_SUFFIXES ${INCLUDE_PATH_SUFFIXES}) +endif() + +find_package_handle_standard_args(utf8proc REQUIRED_VARS UTF8PROC_LIB UTF8PROC_INCLUDE_DIR) + +# CMake 3.2 does uppercase the FOUND variable +if(UTF8PROC_FOUND OR utf8proc_FOUND) + set(utf8proc_FOUND TRUE) + add_library(utf8proc::utf8proc UNKNOWN IMPORTED) + set_target_properties(utf8proc::utf8proc + PROPERTIES IMPORTED_LOCATION "${UTF8PROC_LIB}" + INTERFACE_INCLUDE_DIRECTORIES "${UTF8PROC_INCLUDE_DIR}") +endif() + diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 9a842332f94..30650e09ff7 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -58,6 +58,7 @@ set(ARROW_THIRDPARTY_DEPENDENCIES RapidJSON Snappy Thrift + utf8proc ZLIB ZSTD) @@ -135,6 +136,8 @@ macro(build_dependency DEPENDENCY_NAME) build_re2() elseif("${DEPENDENCY_NAME}" STREQUAL "Thrift") build_thrift() + elseif("${DEPENDENCY_NAME}" STREQUAL "utf8proc") + build_utf8proc() elseif("${DEPENDENCY_NAME}" STREQUAL "ZLIB") build_zlib() elseif("${DEPENDENCY_NAME}" STREQUAL "ZSTD") @@ -210,6 +213,10 @@ if(ARROW_ORC OR ARROW_FLIGHT OR ARROW_GANDIVA) set(ARROW_WITH_PROTOBUF ON) endif() +if(ARROW_COMPUTE) + set(ARROW_WITH_UTF8PROC ON) +endif() + # ---------------------------------------------------------------------- # Versions and URLs for toolchain builds, which also can be used to configure # offline builds @@ -483,16 +490,25 @@ else() ) endif() -if(DEFINED ENV{BZIP2_SOURCE_URL}) - set(BZIP2_SOURCE_URL "$ENV{BZIP2_SOURCE_URL}") +if(DEFINED ENV{ARROW_BZIP2_SOURCE_URL}) + set(ARROW_BZIP2_SOURCE_URL "$ENV{ARROW_BZIP2_SOURCE_URL}") else() set_urls( - BZIP2_SOURCE_URL + ARROW_BZIP2_SOURCE_URL "https://sourceware.org/pub/bzip2/bzip2-${ARROW_BZIP2_BUILD_VERSION}.tar.gz" "https://github.com/ursa-labs/thirdparty/releases/download/latest/bzip2-${ARROW_BZIP2_BUILD_VERSION}.tar.gz" ) endif() +if(DEFINED ENV{ARROW_UTF8PROC_SOURCE_URL}) + set(ARROW_UTF8PROC_SOURCE_URL "$ENV{ARROW_UTF8PROC_SOURCE_URL}") +else() + set_urls( + ARROW_UTF8PROC_SOURCE_URL + "https://github.com/JuliaStrings/utf8proc/archive/${ARROW_UTF8PROC_BUILD_VERSION}.tar.gz" + ) +endif() + # ---------------------------------------------------------------------- # ExternalProject options @@ -1986,7 +2002,7 @@ macro(build_bzip2) INSTALL_COMMAND ${MAKE} install PREFIX=${BZIP2_PREFIX} ${BZIP2_EXTRA_ARGS} INSTALL_DIR ${BZIP2_PREFIX} - URL ${BZIP2_SOURCE_URL} + URL ${ARROW_BZIP2_SOURCE_URL} BUILD_BYPRODUCTS "${BZIP2_STATIC_LIB}") file(MAKE_DIRECTORY "${BZIP2_PREFIX}/include") @@ -2013,6 +2029,64 @@ if(ARROW_WITH_BZ2) include_directories(SYSTEM "${BZIP2_INCLUDE_DIR}") endif() +macro(build_utf8proc) + message(STATUS "Building utf8proc from source") + set(UTF8PROC_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/utf8proc_ep-install") + if(MSVC) + set(UTF8PROC_STATIC_LIB "${UTF8PROC_PREFIX}/lib/utf8proc_static.lib") + else() + set( + UTF8PROC_STATIC_LIB + "${UTF8PROC_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}utf8proc${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) + endif() + + set(UTF8PROC_CMAKE_ARGS + ${EP_COMMON_TOOLCHAIN} + "-DCMAKE_INSTALL_PREFIX=${UTF8PROC_PREFIX}" + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + -DCMAKE_INSTALL_LIBDIR=lib + -DDBUILD_SHARED_LIBS=OFF) + + externalproject_add(utf8proc_ep + ${EP_LOG_OPTIONS} + CMAKE_ARGS ${UTF8PROC_CMAKE_ARGS} + INSTALL_DIR ${UTF8PROC_PREFIX} + URL ${ARROW_UTF8PROC_SOURCE_URL} + BUILD_BYPRODUCTS "${UTF8PROC_STATIC_LIB}") + + file(MAKE_DIRECTORY "${UTF8PROC_PREFIX}/include") + add_library(utf8proc::utf8proc STATIC IMPORTED) + set_target_properties(utf8proc::utf8proc + PROPERTIES IMPORTED_LOCATION + "${UTF8PROC_STATIC_LIB}" + INTERFACE_COMPILER_DEFINITIONS + "UTF8PROC_STATIC" + INTERFACE_INCLUDE_DIRECTORIES + "${UTF8PROC_PREFIX}/include") + + add_dependencies(toolchain utf8proc_ep) + add_dependencies(utf8proc::utf8proc utf8proc_ep) +endmacro() + +if(ARROW_WITH_UTF8PROC) + resolve_dependency(utf8proc) + + # TODO: Don't use global definitions but rather + # target_compile_definitions or target_link_libraries + get_target_property(UTF8PROC_COMPILER_DEFINITIONS utf8proc::utf8proc + INTERFACE_COMPILER_DEFINITIONS) + if(UTF8PROC_COMPILER_DEFINITIONS) + add_definitions(-D${UTF8PROC_COMPILER_DEFINITIONS}) + endif() + + # TODO: Don't use global includes but rather + # target_include_directories or target_link_libraries + get_target_property(UTF8PROC_INCLUDE_DIR utf8proc::utf8proc + INTERFACE_INCLUDE_DIRECTORIES) + include_directories(SYSTEM ${UTF8PROC_INCLUDE_DIR}) +endif() + macro(build_cares) message(STATUS "Building c-ares from source") set(CARES_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/cares_ep-install") diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 30fac026b50..becfcc4381f 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -47,6 +47,7 @@ ARROW_RE2_BUILD_VERSION=2019-08-01 ARROW_SNAPPY_BUILD_VERSION=1.1.8 ARROW_THRIFT_BUILD_VERSION=0.12.0 ARROW_THRIFT_BUILD_MD5_CHECKSUM=3deebbb4d1ca77dd9c9e009a1ea02183 +ARROW_UTF8PROC_BUILD_VERSION=v2.5.0 ARROW_ZLIB_BUILD_VERSION=1.2.11 ARROW_ZSTD_BUILD_VERSION=v1.4.5 @@ -75,6 +76,7 @@ DEPENDENCIES=( "ARROW_RE2_URL re2-${ARROW_RE2_BUILD_VERSION}.tar.gz https://github.com/google/re2/archive/${ARROW_RE2_BUILD_VERSION}.tar.gz" "ARROW_SNAPPY_URL snappy-${ARROW_SNAPPY_BUILD_VERSION}.tar.gz https://github.com/google/snappy/archive/${ARROW_SNAPPY_BUILD_VERSION}.tar.gz" "ARROW_THRIFT_URL thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz https://archive.apache.org/dist/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz" + "ARROW_UTF8PROC_URL utf8proc-${ARROW_UTF8PROC_BUILD_VERSION}.tar.gz https://github.com/JuliaStrings/utf8proc/archive/${ARROW_UTF8PROC_BUILD_VERSION}.tar.gz" "ARROW_ZLIB_URL zlib-${ARROW_ZLIB_BUILD_VERSION}.tar.gz https://zlib.net/fossils/zlib-${ARROW_ZLIB_BUILD_VERSION}.tar.gz" "ARROW_ZSTD_URL zstd-${ARROW_ZSTD_BUILD_VERSION}.tar.gz https://github.com/facebook/zstd/archive/${ARROW_ZSTD_BUILD_VERSION}.tar.gz" ) diff --git a/dev/tasks/conda-recipes/arrow-cpp/meta.yaml b/dev/tasks/conda-recipes/arrow-cpp/meta.yaml index c9b1a210faa..85c1da84f87 100644 --- a/dev/tasks/conda-recipes/arrow-cpp/meta.yaml +++ b/dev/tasks/conda-recipes/arrow-cpp/meta.yaml @@ -68,6 +68,7 @@ outputs: - glog - grpc-cpp - libprotobuf + - libutf8proc - clangdev 10.* # [not win] - llvmdev 10.* # [not win] # llvmdev 9.* or later require Visual Studio 2017 diff --git a/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/Dockerfile index 7acff38cbb6..fdb57005ca2 100644 --- a/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/apt/debian-buster/Dockerfile @@ -71,6 +71,7 @@ RUN \ libsnappy-dev \ libssl-dev \ libthrift-dev \ + libutf8proc-dev \ libzstd-dev \ lsb-release \ ninja-build \ diff --git a/dev/tasks/linux-packages/apache-arrow/apt/debian-stretch/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/debian-stretch/Dockerfile index 7194897c664..606b2c9fe70 100644 --- a/dev/tasks/linux-packages/apache-arrow/apt/debian-stretch/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/apt/debian-stretch/Dockerfile @@ -70,6 +70,7 @@ RUN \ libre2-dev \ libsnappy-dev \ libssl-dev \ + libutf8proc-dev \ libzstd-dev \ lsb-release \ ninja-build \ diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile index da52efc0f35..606db04a27c 100644 --- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-bionic/Dockerfile @@ -62,6 +62,7 @@ RUN \ libre2-dev \ libsnappy-dev \ libssl-dev \ + libutf8proc-dev \ libzstd-dev \ lsb-release \ ninja-build \ diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-eoan/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-eoan/Dockerfile index 05ee1fad90e..dceb8ba3a08 100644 --- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-eoan/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-eoan/Dockerfile @@ -64,6 +64,7 @@ RUN \ libsnappy-dev \ libssl-dev \ libthrift-dev \ + libutf8proc-dev \ libzstd-dev \ lsb-release \ ninja-build \ diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile index 3b1b3eb3987..e9468d4109c 100644 --- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-focal/Dockerfile @@ -55,6 +55,7 @@ RUN \ libsnappy-dev \ libssl-dev \ libthrift-dev \ + libutf8proc-dev \ libzstd-dev \ lsb-release \ ninja-build \ diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-xenial/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-xenial/Dockerfile index 2e09b7053af..79da9f59068 100644 --- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-xenial/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-xenial/Dockerfile @@ -68,6 +68,7 @@ RUN \ libre2-dev \ libsnappy-dev \ libssl-dev \ + libutf8proc-dev \ libzstd1-dev \ lsb-release \ pkg-config \ diff --git a/dev/tasks/linux-packages/apache-arrow/debian.ubuntu-xenial/control b/dev/tasks/linux-packages/apache-arrow/debian.ubuntu-xenial/control index 97e7410a9e6..b78e0893eee 100644 --- a/dev/tasks/linux-packages/apache-arrow/debian.ubuntu-xenial/control +++ b/dev/tasks/linux-packages/apache-arrow/debian.ubuntu-xenial/control @@ -22,6 +22,7 @@ Build-Depends: libre2-dev, libsnappy-dev, libssl-dev, + libutf8proc-dev, libzstd1-dev, nvidia-cuda-toolkit [!arm64], pkg-config, diff --git a/dev/tasks/linux-packages/apache-arrow/debian/control b/dev/tasks/linux-packages/apache-arrow/debian/control index e6c431044ff..928412ef2c4 100644 --- a/dev/tasks/linux-packages/apache-arrow/debian/control +++ b/dev/tasks/linux-packages/apache-arrow/debian/control @@ -20,6 +20,7 @@ Build-Depends: libre2-dev, libsnappy-dev, libssl-dev, + libutf8proc-dev, libzstd-dev, ninja-build, nvidia-cuda-toolkit [!arm64], diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in index 092efefe49d..7cb543db8d0 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in +++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in @@ -45,7 +45,7 @@ %define use_meson (%{_centos_ver} >= 8) %define have_rapidjson (%{_centos_ver} == 7) -%define have_zstd (%{_centos_ver} < 8) +%define have_utf8proc (%{_centos_ver} == 7) Name: @PACKAGE@ Version: @VERSION@ @@ -71,9 +71,7 @@ BuildRequires: git %if %{_centos_ver} >= 7 BuildRequires: glog-devel %endif -%if %{have_zstd} BuildRequires: libzstd-devel -%endif BuildRequires: lz4-devel BuildRequires: pkgconfig BuildRequires: python%{python_version}-devel @@ -85,6 +83,9 @@ BuildRequires: rapidjson-devel BuildRequires: re2-devel %endif BuildRequires: snappy-devel +%if %{have_utf8proc} +BuildRequires: utf8proc-devel +%endif BuildRequires: zlib-devel %if %{use_flight} @@ -223,14 +224,15 @@ Requires: brotli Requires: gflags Requires: glog %endif -%if %{have_zstd} Requires: libzstd -%endif Requires: lz4 %if %{_centos_ver} >= 8 Requires: re2 %endif Requires: snappy +%if %{have_utf8proc} +Requires: utf8proc +%endif Requires: zlib %description libs diff --git a/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile index a375c971df6..93436bbbf39 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/yum/centos-7/Dockerfile @@ -51,6 +51,7 @@ RUN \ rpmdevtools \ snappy-devel \ tar \ + utf8proc-devel \ zlib-devel && \ yum clean ${quiet} all diff --git a/dev/tasks/linux-packages/apache-arrow/yum/centos-8/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/centos-8/Dockerfile index 3f6701da3f7..19e947200ff 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/centos-8/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/yum/centos-8/Dockerfile @@ -39,7 +39,7 @@ RUN \ glog-devel \ gobject-introspection-devel \ gtk-doc \ - # libzstd-devel \ + libzstd-devel \ llvm-devel \ llvm-static \ lz4-devel \ @@ -58,5 +58,6 @@ RUN \ rpmdevtools \ snappy-devel \ tar \ + # utf8proc-devel \ zlib-devel && \ dnf clean ${quiet} all diff --git a/python/manylinux1/Dockerfile-x86_64_base b/python/manylinux1/Dockerfile-x86_64_base index 249aa1b22d6..15487e02fbc 100644 --- a/python/manylinux1/Dockerfile-x86_64_base +++ b/python/manylinux1/Dockerfile-x86_64_base @@ -101,3 +101,6 @@ RUN /build_re2.sh ADD scripts/build_bz2.sh / RUN /build_bz2.sh + +ADD scripts/build_utf8proc.sh / +RUN /build_utf8proc.sh diff --git a/python/manylinux1/scripts/build_utf8proc.sh b/python/manylinux1/scripts/build_utf8proc.sh new file mode 100755 index 00000000000..d74c36a3aeb --- /dev/null +++ b/python/manylinux1/scripts/build_utf8proc.sh @@ -0,0 +1,38 @@ +#!/bin/bash -ex +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +NCORES=$(($(grep -c ^processor /proc/cpuinfo) + 1)) +export UTF8PROC_VERSION="2.5.0" +export PREFIX="/usr/local" + +curl -sL "https://github.com/JuliaStrings/utf8proc/archive/v${UTF8PROC_VERSION}.tar.gz" -o utf8proc-$UTF8PROC_VERSION}.tar.gz +tar xf utf8proc-$UTF8PROC_VERSION}.tar.gz + +pushd utf8proc-${UTF8PROC_VERSION} +mkdir build +pushd build +cmake .. -GNinja \ + -DBUILD_SHARED_LIBS=OFF \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX=${PREFIX} + +ninja install +popd +popd + +rm -rf utf8proc-${UTF8PROC_VERSION}.tar.gz utf8proc-${UTF8PROC_VERSION} diff --git a/python/manylinux201x/Dockerfile-x86_64_base_2010 b/python/manylinux201x/Dockerfile-x86_64_base_2010 index 5eaa3f1ab7f..0d7d497ef5e 100644 --- a/python/manylinux201x/Dockerfile-x86_64_base_2010 +++ b/python/manylinux201x/Dockerfile-x86_64_base_2010 @@ -96,3 +96,6 @@ RUN /build_re2.sh ADD scripts/build_bz2.sh / RUN /build_bz2.sh + +ADD scripts/build_utf8proc.sh / +RUN /build_utf8proc.sh diff --git a/python/manylinux201x/Dockerfile-x86_64_base_2014 b/python/manylinux201x/Dockerfile-x86_64_base_2014 index 8b9ed88af08..d8c71ad9c7f 100644 --- a/python/manylinux201x/Dockerfile-x86_64_base_2014 +++ b/python/manylinux201x/Dockerfile-x86_64_base_2014 @@ -96,3 +96,6 @@ RUN /build_re2.sh ADD scripts/build_bz2.sh / RUN /build_bz2.sh + +ADD scripts/build_utf8proc.sh / +RUN /build_utf8proc.sh diff --git a/python/manylinux201x/scripts/build_utf8proc.sh b/python/manylinux201x/scripts/build_utf8proc.sh new file mode 100755 index 00000000000..d74c36a3aeb --- /dev/null +++ b/python/manylinux201x/scripts/build_utf8proc.sh @@ -0,0 +1,38 @@ +#!/bin/bash -ex +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +NCORES=$(($(grep -c ^processor /proc/cpuinfo) + 1)) +export UTF8PROC_VERSION="2.5.0" +export PREFIX="/usr/local" + +curl -sL "https://github.com/JuliaStrings/utf8proc/archive/v${UTF8PROC_VERSION}.tar.gz" -o utf8proc-$UTF8PROC_VERSION}.tar.gz +tar xf utf8proc-$UTF8PROC_VERSION}.tar.gz + +pushd utf8proc-${UTF8PROC_VERSION} +mkdir build +pushd build +cmake .. -GNinja \ + -DBUILD_SHARED_LIBS=OFF \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX=${PREFIX} + +ninja install +popd +popd + +rm -rf utf8proc-${UTF8PROC_VERSION}.tar.gz utf8proc-${UTF8PROC_VERSION} diff --git a/python/requirements-build.txt b/python/requirements-build.txt index b2832909bb4..0433e964107 100644 --- a/python/requirements-build.txt +++ b/python/requirements-build.txt @@ -1,3 +1,4 @@ cython>=0.29 -numpy>=1.14 +numpy>=1.14,<1.19; python_version < "3.6" +numpy>=1.14; python_version >= "3.6" setuptools_scm