From 5427654fddeb0422c724ad123920e5946e8eea88 Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Tue, 24 Nov 2020 14:20:29 +0100 Subject: [PATCH 1/7] ARROW-10541: [C++] Add re2 library to core arrow / ARROW_WITH_RE2 --- ci/conda_env_cpp.yml | 1 + ci/conda_env_gandiva.yml | 1 - ci/conda_env_gandiva_win.yml | 1 - cpp/CMakeLists.txt | 12 ++++++++++++ cpp/cmake_modules/DefineOptions.cmake | 1 + cpp/cmake_modules/ThirdpartyToolchain.cmake | 6 +++++- 6 files changed, 19 insertions(+), 3 deletions(-) diff --git a/ci/conda_env_cpp.yml b/ci/conda_env_cpp.yml index 90cef3ea2d1..4388df4237b 100644 --- a/ci/conda_env_cpp.yml +++ b/ci/conda_env_cpp.yml @@ -35,6 +35,7 @@ ninja pkg-config python rapidjson +re2 snappy thrift-cpp>=0.11.0 zlib diff --git a/ci/conda_env_gandiva.yml b/ci/conda_env_gandiva.yml index 5056456fc66..22c70a32e5e 100644 --- a/ci/conda_env_gandiva.yml +++ b/ci/conda_env_gandiva.yml @@ -17,4 +17,3 @@ clangdev=11 llvmdev=11 -re2 diff --git a/ci/conda_env_gandiva_win.yml b/ci/conda_env_gandiva_win.yml index 49b3b8c1de1..9098b53d1f5 100644 --- a/ci/conda_env_gandiva_win.yml +++ b/ci/conda_env_gandiva_win.yml @@ -18,4 +18,3 @@ # llvmdev=9 or later require Visual Studio 2017 clangdev=8 llvmdev=8 -re2 diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index f9ab1548fbd..e12d8b5744d 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -330,6 +330,10 @@ if(ARROW_BUILD_BENCHMARKS set(ARROW_TESTING ON) endif() +if(ARROW_GANDIVA) + set(ARROW_WITH_RE2 ON) +endif() + if(ARROW_CUDA OR ARROW_FLIGHT OR ARROW_PARQUET @@ -746,6 +750,14 @@ if(ARROW_WITH_UTF8PROC) endif() endif() +if(ARROW_WITH_RE2) + list(APPEND ARROW_LINK_LIBS RE2::re2) + list(APPEND ARROW_STATIC_LINK_LIBS RE2::re2) + if(utf8proc_SOURCE STREQUAL "SYSTEM") + list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS RE2::re2) + endif() +endif() + add_custom_target(arrow_dependencies) add_custom_target(arrow_benchmark_dependencies) add_custom_target(arrow_test_dependencies) diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake index a68c3a92cc7..436e2429407 100644 --- a/cpp/cmake_modules/DefineOptions.cmake +++ b/cpp/cmake_modules/DefineOptions.cmake @@ -363,6 +363,7 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") define_option(ARROW_WITH_UTF8PROC "Build with support for Unicode properties using the utf8proc library" ON) + define_option(ARROW_WITH_RE2 "Build with support for regular expressions using the re2 library" ON) #---------------------------------------------------------------------- if(MSVC_TOOLCHAIN) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 22531fcfc57..a0a27f17998 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -257,6 +257,9 @@ if(NOT ARROW_COMPUTE) # utf8proc is only potentially used in kernels for now set(ARROW_WITH_UTF8PROC OFF) endif() +if((NOT ARROW_COMPUTE) AND (NOT ARROW_GANDIVA)) + set(ARROW_WITH_RE2 OFF) +endif() # ---------------------------------------------------------------------- # Versions and URLs for toolchain builds, which also can be used to configure @@ -2090,8 +2093,9 @@ macro(build_re2) list(APPEND ARROW_BUNDLED_STATIC_LIBS RE2::re2) endmacro() -if(ARROW_GANDIVA) +if(ARROW_WITH_RE2) resolve_dependency(RE2) + add_definitions(-DARROW_WITH_RE2) # TODO: Don't use global includes but rather target_include_directories get_target_property(RE2_INCLUDE_DIR RE2::re2 INTERFACE_INCLUDE_DIRECTORIES) From 33fb747d45b601c5f1216e4e7429a6c714c70c36 Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Tue, 24 Nov 2020 14:27:19 +0000 Subject: [PATCH 2/7] Autoformat/render all the things [automated commit] --- cpp/cmake_modules/DefineOptions.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake index 436e2429407..43fa9e88b2c 100644 --- a/cpp/cmake_modules/DefineOptions.cmake +++ b/cpp/cmake_modules/DefineOptions.cmake @@ -363,7 +363,8 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") define_option(ARROW_WITH_UTF8PROC "Build with support for Unicode properties using the utf8proc library" ON) - define_option(ARROW_WITH_RE2 "Build with support for regular expressions using the re2 library" ON) + define_option(ARROW_WITH_RE2 + "Build with support for regular expressions using the re2 library" ON) #---------------------------------------------------------------------- if(MSVC_TOOLCHAIN) From f96b7d0da22c0d624052d5710e61d1517e7a282f Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Wed, 25 Nov 2020 14:30:01 +0100 Subject: [PATCH 3/7] Add comment about usefulness of options --- cpp/cmake_modules/DefineOptions.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake index 43fa9e88b2c..d8784afa8c2 100644 --- a/cpp/cmake_modules/DefineOptions.cmake +++ b/cpp/cmake_modules/DefineOptions.cmake @@ -361,8 +361,10 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") define_option(ARROW_WITH_ZLIB "Build with zlib compression" OFF) define_option(ARROW_WITH_ZSTD "Build with zstd compression" OFF) + # Only used when also ARROW_COMPUTE is ON define_option(ARROW_WITH_UTF8PROC "Build with support for Unicode properties using the utf8proc library" ON) + # Only used when also at least ARROW_COMPUTE or ARROW_GANDIVAE is ON define_option(ARROW_WITH_RE2 "Build with support for regular expressions using the re2 library" ON) From 512d1e3223b849bc837a529828dd888df0b791c6 Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Wed, 25 Nov 2020 15:28:51 +0100 Subject: [PATCH 4/7] Integrate comments into description --- cpp/cmake_modules/DefineOptions.cmake | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake index d8784afa8c2..e843b6900d3 100644 --- a/cpp/cmake_modules/DefineOptions.cmake +++ b/cpp/cmake_modules/DefineOptions.cmake @@ -361,12 +361,10 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") define_option(ARROW_WITH_ZLIB "Build with zlib compression" OFF) define_option(ARROW_WITH_ZSTD "Build with zstd compression" OFF) - # Only used when also ARROW_COMPUTE is ON define_option(ARROW_WITH_UTF8PROC - "Build with support for Unicode properties using the utf8proc library" ON) - # Only used when also at least ARROW_COMPUTE or ARROW_GANDIVAE is ON + "Build with support for Unicode properties using the utf8proc library (only used if ARROW_COMPUTE is ON)" ON) define_option(ARROW_WITH_RE2 - "Build with support for regular expressions using the re2 library" ON) + "Build with support for regular expressions using the re2 library (only used if ARROW_COMPUTE or ARROW_GANDIVA is ON)" ON) #---------------------------------------------------------------------- if(MSVC_TOOLCHAIN) From 195c3b530c2990b5b503d72e544a96597217c14a Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Thu, 26 Nov 2020 11:46:47 +0100 Subject: [PATCH 5/7] Update cpp/CMakeLists.txt Co-authored-by: Sutou Kouhei --- cpp/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index e12d8b5744d..27e1b3e3a41 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -753,7 +753,7 @@ endif() if(ARROW_WITH_RE2) list(APPEND ARROW_LINK_LIBS RE2::re2) list(APPEND ARROW_STATIC_LINK_LIBS RE2::re2) - if(utf8proc_SOURCE STREQUAL "SYSTEM") + if(RE2_SOURCE STREQUAL "SYSTEM") list(APPEND ARROW_STATIC_INSTALL_INTERFACE_LIBS RE2::re2) endif() endif() From 433f53ed8dc7c9afad918e168400781f466ddf8b Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Thu, 26 Nov 2020 11:48:24 +0100 Subject: [PATCH 6/7] Split option lines --- cpp/cmake_modules/DefineOptions.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake index e843b6900d3..d29dc8758a5 100644 --- a/cpp/cmake_modules/DefineOptions.cmake +++ b/cpp/cmake_modules/DefineOptions.cmake @@ -362,9 +362,9 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") define_option(ARROW_WITH_ZSTD "Build with zstd compression" OFF) define_option(ARROW_WITH_UTF8PROC - "Build with support for Unicode properties using the utf8proc library (only used if ARROW_COMPUTE is ON)" ON) + "Build with support for Unicode properties using the utf8proc library;(only used if ARROW_COMPUTE is ON)" ON) define_option(ARROW_WITH_RE2 - "Build with support for regular expressions using the re2 library (only used if ARROW_COMPUTE or ARROW_GANDIVA is ON)" ON) + "Build with support for regular expressions using the re2 library;(only used if ARROW_COMPUTE or ARROW_GANDIVA is ON)" ON) #---------------------------------------------------------------------- if(MSVC_TOOLCHAIN) From e49a781e70143902b8ac5970501b85bf01115ecc Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Thu, 26 Nov 2020 10:49:56 +0000 Subject: [PATCH 7/7] Autoformat/render all the things [automated commit] --- cpp/cmake_modules/DefineOptions.cmake | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake index d29dc8758a5..63d0aef283c 100644 --- a/cpp/cmake_modules/DefineOptions.cmake +++ b/cpp/cmake_modules/DefineOptions.cmake @@ -361,10 +361,14 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") define_option(ARROW_WITH_ZLIB "Build with zlib compression" OFF) define_option(ARROW_WITH_ZSTD "Build with zstd compression" OFF) - define_option(ARROW_WITH_UTF8PROC - "Build with support for Unicode properties using the utf8proc library;(only used if ARROW_COMPUTE is ON)" ON) - define_option(ARROW_WITH_RE2 - "Build with support for regular expressions using the re2 library;(only used if ARROW_COMPUTE or ARROW_GANDIVA is ON)" ON) + define_option( + ARROW_WITH_UTF8PROC + "Build with support for Unicode properties using the utf8proc library;(only used if ARROW_COMPUTE is ON)" + ON) + define_option( + ARROW_WITH_RE2 + "Build with support for regular expressions using the re2 library;(only used if ARROW_COMPUTE or ARROW_GANDIVA is ON)" + ON) #---------------------------------------------------------------------- if(MSVC_TOOLCHAIN)