From 5503984f228406efe1c49239c497e80775bbc52a Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Thu, 21 Sep 2023 15:37:55 +0100 Subject: [PATCH 01/95] cmake changes for emscripten --- cpp/CMakePresets.json | 36 +++++++ .../Platform/EmscriptenOverrides.cmake | 56 ++++++++++ cpp/cmake_modules/SetupCxxFlags.cmake | 4 +- cpp/cmake_modules/ThirdpartyToolchain.cmake | 8 ++ docs/source/developers/cpp/emscripten.rst | 100 ++++++++++++++++++ docs/source/developers/cpp/index.rst | 1 + python/CMakeLists.txt | 62 ++++++++--- 7 files changed, 252 insertions(+), 15 deletions(-) create mode 100644 cpp/cmake_modules/Emscripten/Platform/EmscriptenOverrides.cmake create mode 100644 docs/source/developers/cpp/emscripten.rst diff --git a/cpp/CMakePresets.json b/cpp/CMakePresets.json index f6324c1c0a9..0f7357ea5b3 100644 --- a/cpp/CMakePresets.json +++ b/cpp/CMakePresets.json @@ -46,6 +46,32 @@ "CMAKE_BUILD_TYPE": "RelWithDebInfo" } }, + { + "name": "emscripten-overrides", + "hidden": true, + "cacheVariables": { + "ARROW_BUILD_SHARED": "OFF", + "ARROW_BUILD_STATIC": "ON", + "ARROW_BUILD_TESTS": "OFF", + "ARROW_ENABLE_THREADING": "OFF", + "ARROW_CUDA": "OFF", + "ARROW_MIMALLOC": "OFF", + "ARROW_JEMALLOC": "OFF", + "ARROW_S3": "OFF", + "ARROW_DEPENDENCY_SOURCE": "BUNDLED", + "ZLIB_SOURCE": "SYSTEM", + "ARROW_IPC": "OFF", + "ARROW_ORC": "OFF", + "ARROW_SUBSTRAIT": "OFF", + "ARROW_DEPENDENCY_USE_SHARED": "OFF", + "ARROW_WITH_BROTLI": "OFF", + "ARROW_SIMD_LEVEL":"NONE", + "ARROW_RUNTIME_SIMD_LEVEL":"NONE", + "CMAKE_C_BYTE_ORDER":"LITTLE_ENDIAN", + "ARROW_WITH_OPENTELEMETRY":"OFF", + "CMAKE_TOOLCHAIN_FILE": { "type": "PATH", "value": "${sourceDir}/cmake_modules/Emscripten/Platform/EmscriptenOverrides.cmake" } + } + }, { "name": "features-minimal", "hidden": true, @@ -395,6 +421,16 @@ "displayName": "Release build for PyArrow with everything enabled", "cacheVariables": {} }, + { + "name": "ninja-release-emscripten-python", + "inherits": [ + "emscripten-overrides", + "base-release", + "features-python" + ], + "displayName": "Release build which builds an emscripten library, plus PyArrow for Pyodide", + "cacheVariables": {} + }, { "name": "ninja-release-maximal", "inherits": [ diff --git a/cpp/cmake_modules/Emscripten/Platform/EmscriptenOverrides.cmake b/cpp/cmake_modules/Emscripten/Platform/EmscriptenOverrides.cmake new file mode 100644 index 00000000000..d8e49521dba --- /dev/null +++ b/cpp/cmake_modules/Emscripten/Platform/EmscriptenOverrides.cmake @@ -0,0 +1,56 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Force some variables for emscripten +# to disable things that won't work there + +# make us be on the platforms list for cmake +get_filename_component(PLATFORM_FOLDER_PARENT ${CMAKE_CURRENT_LIST_DIR} DIRECTORY) +list(APPEND CMAKE_MODULE_PATH "${PLATFORM_FOLDER_PARENT}") + +include($ENV{EMSDK}/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake) + +# ensure zlib is built with -fpic +# and force us to link to the version in emscripten ports +if(NOT EXISTS ${EMSCRIPTEN_SYSROOT}/lib/wasm32-emscripten/pic/libz.a) + execute_process(COMMAND embuilder --pic --force build zlib) +endif() +set(ZLIB_LIBRARY ${EMSCRIPTEN_SYSROOT}/lib/wasm32-emscripten/pic/libz.a) + +# # override default in emscripten which is to not use shared libs +set_property(GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS TRUE) + +# if we leave the system name as Emscripten, then it reloads the original Emscripten.cmake every time a project() command +# is run, which does bad things like disabling shared libraries +set(CMAKE_SYSTEM_NAME EmscriptenOverrides) + +set(CMAKE_C_FLAGS "-sUSE_ZLIB=1 -sSIDE_MODULE=1 -fPIC -fexceptions") +set(CMAKE_CXX_FLAGS "-sUSE_ZLIB=1 -sSIDE_MODULE=1 -fPIC -fexceptions") + +#set(PYARROW_CPP_HOME "$ENV{ARROW_HOME}/lib") +#list(APPEND CMAKE_FIND_ROOT_PATH "${CMAKE_INSTALL_PREFIX}/cmake") + +set(Python3_INCLUDE_DIR $ENV{PYTHONINCLUDE}) +set(Python3_LIBRARY $ENV{CPYTHONLIB}) +set(Python3_NumPy_INCLUDE_DIR $ENV{NUMPY_LIB}/core/include) +set(Python3_EXECUTABLE) +set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-sUSE_ZLIB=1 -sWASM_BIGINT=1 -fexceptions") +set(CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS "-sUSE_ZLIB=1 -sWASM_BIGINT=1 -fexceptions") +set(CMAKE_SHARED_LINKER_FLAGS "-sUSE_ZLIB=1 -sWASM_BIGINT=1 -fexceptions") +set(CMAKE_STRIP FALSE) + +set(ENV{_PYTHON_SYSCONFIGDATA_NAME} $ENV{SYSCONFIG_NAME}) diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index a5f5659723c..2564ff374ac 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -24,7 +24,9 @@ include(CheckCXXSourceCompiles) message(STATUS "System processor: ${CMAKE_SYSTEM_PROCESSOR}") if(NOT DEFINED ARROW_CPU_FLAG) - if(CMAKE_SYSTEM_PROCESSOR MATCHES "AMD64|amd64|X86|x86|i[3456]86|x64") + if(CMAKE_SYSTEM_NAME MATCHES "Emscripten") + set(ARROW_CPU_FLAG "emscripten") + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "AMD64|amd64|X86|x86|i[3456]86|x64") set(ARROW_CPU_FLAG "x86") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|ARM64|arm64") set(ARROW_CPU_FLAG "aarch64") diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 85c0337d108..667784ad2e0 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -955,6 +955,11 @@ set(EP_COMMON_CMAKE_ARGS -DCMAKE_OSX_SYSROOT=${CMAKE_OSX_SYSROOT} -DCMAKE_VERBOSE_MAKEFILE=${CMAKE_VERBOSE_MAKEFILE}) +# if building with a toolchain file, pass that through +if(CMAKE_TOOLCHAIN_FILE) + list(APPEND EP_COMMON_CMAKE_ARGS -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}) +endif() + # Enable s/ccache if set by parent. if(CMAKE_C_COMPILER_LAUNCHER AND CMAKE_CXX_COMPILER_LAUNCHER) list(APPEND EP_COMMON_CMAKE_ARGS @@ -1614,6 +1619,9 @@ macro(build_thrift) if(DEFINED BOOST_ROOT) list(APPEND THRIFT_CMAKE_ARGS "-DBOOST_ROOT=${BOOST_ROOT}") endif() + if(DEFINED Boost_INCLUDE_DIR) + list(APPEND THRIFT_CMAKE_ARGS "-DBoost_INCLUDE_DIR=${Boost_INCLUDE_DIR}") + endif() if(DEFINED Boost_NAMESPACE) list(APPEND THRIFT_CMAKE_ARGS "-DBoost_NAMESPACE=${Boost_NAMESPACE}") endif() diff --git a/docs/source/developers/cpp/emscripten.rst b/docs/source/developers/cpp/emscripten.rst new file mode 100644 index 00000000000..d62e71c7d49 --- /dev/null +++ b/docs/source/developers/cpp/emscripten.rst @@ -0,0 +1,100 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + + +.. highlight:: console .. _developers-cpp-emscripten: + +################################################# + Cross compiling for Webassembly with Emscripten +################################################# + +*************** + Prerequisites +*************** +You need cmake and compilers etc. installed as per the normal build instructions. Before building with emscripten, you also need to install emscripten and +activate it using the commands below (see https://emscripten.org/docs/getting_started/downloads.html for details). + +.. code:: shell + + git clone https://github.com/emscripten-core/emsdk.git + cd emsdk + # replace with the desired EMSDK version. + # e.g. for pyodide 0.24, you need EMSDK version 3.1.45 + ./emsdk install + ./emsdk activate + source ./emsdk_env.sh + +If you want to build pyarrow for `pyodide `_, you +need ``pyodide-build`` installed via ``pip``, and to be running with the +same version of python that pyodide is built for, along with the same +versions of emsdk. + +.. code:: shell + + # install pyodide build tools. + # e.g. for version 0.24 of pyodide: + pip install pyodide-build==0.24 + +Then build with the ``ninja-release-emscripten-python`` cmake preset, +like below: + +.. code:: shell + + cmake --preset "ninja-release-emscripten-python" + ninja install + +This will install a built static library version of libarrow it into the +emscripten sysroot cache, meaning you can build things that depend on it +and they will find libarrow. + +e.g. if you want to build for pyodide, run the commands above, and then +go to ``arrow/python`` and run + +.. code:: shell + + pyodide build + +It should make a wheel targeting the currently enabled version of +pyodide (i.e. the version corresponding to the currently installed +``pyodide-build``) in the ``dist`` subdirectory. + +************** + Manual Build +************** + +If you want to manually build for emscripten, take a look at the +CMakePresets.json file in the arrow/cpp directory for a list of things +you will need to override. In particular you will need: + +#. Build dependencies set to ``BUNDLED``, so it uses properly cross + compiled build dependencies. + +#. ``CMAKE_TOOLCHAIN_FILE`` set to + ``arrow/cpp/cmake_modules/Emscripten/Platform/EmscriptenOverrides.cmake`` + +#. You will quite likely need to set ``ARROW_ENABLE_THREADING`` to ``OFF`` + for builds targeting single threaded emscripten environments such as + pyodide. + +#. ``ARROW_IPC`` and anything else that uses network probably won't + work. + +#. ``ARROW_JEMALLOC`` and ``ARROW_MIMALLOC`` again probably need to be + ``OFF`` + +#. ``ARROW_BUILD_STATIC`` set to ``ON`` and ``ARROW_BUILD_SHARED`` set to + ``OFF`` is most likely to work. diff --git a/docs/source/developers/cpp/index.rst b/docs/source/developers/cpp/index.rst index 36c9778bea1..603e1607dc5 100644 --- a/docs/source/developers/cpp/index.rst +++ b/docs/source/developers/cpp/index.rst @@ -27,5 +27,6 @@ C++ Development building development windows + emscripten conventions fuzzing diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 242ba8448f4..6b8fdade7f2 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -68,6 +68,8 @@ if(POLICY CMP0095) cmake_policy(SET CMP0095 NEW) endif() +option(DUMP_ARROW_ARGUMENTS "Dump the arrow arguments then quit" OFF) + # Use the first Python installation on PATH, not the newest one set(Python3_FIND_STRATEGY "LOCATION") # On Windows, use registry last, not first @@ -260,6 +262,22 @@ include(GNUInstallDirs) find_package(Arrow REQUIRED) +if(DUMP_ARROW_ARGUMENTS) + message(STATUS "----- ARROW_SETTINGS_DUMP -----") + get_cmake_property(_variableNames VARIABLES) + list(SORT _variableNames) + foreach(_variableName ${_variableNames}) + unset(MATCHED) + string(REGEX MATCH ^ARROW_.* MATCHED ${_variableName}) + if(NOT MATCHED) + continue() + endif() + message(STATUS "${_variableName}=${${_variableName}}") + endforeach() + message(STATUS "----- ARROW_SETTINGS_END -----") + return() +endif() + set(PYARROW_CPP_ROOT_DIR pyarrow/src) set(PYARROW_CPP_SOURCE_DIR ${PYARROW_CPP_ROOT_DIR}/arrow/python) set(PYARROW_CPP_SRCS @@ -570,28 +588,42 @@ endif() # Acero if(PYARROW_BUILD_ACERO) - if(PYARROW_BUNDLE_ARROW_CPP) - bundle_arrow_lib(${ARROW_ACERO_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) - if(MSVC) - bundle_arrow_import_lib(${ARROW_ACERO_IMPORT_LIB}) + if(ARROW_BUILD_SHARED) + + if(PYARROW_BUNDLE_ARROW_CPP) + bundle_arrow_lib(${ARROW_ACERO_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) + if(MSVC) + bundle_arrow_import_lib(${ARROW_ACERO_IMPORT_LIB}) + endif() endif() - endif() - set(ACERO_LINK_LIBS ArrowAcero::arrow_acero_shared) - list(APPEND CYTHON_EXTENSIONS _acero) + set(ACERO_LINK_LIBS ArrowAcero::arrow_acero_shared) + list(APPEND CYTHON_EXTENSIONS _acero) + else() + # ACERO is statically linked into libarrow_python already + set(ACERO_LINK_LIBS) + list(APPEND CYTHON_EXTENSIONS _acero) + endif() endif() # Dataset if(PYARROW_BUILD_DATASET) - if(PYARROW_BUNDLE_ARROW_CPP) - bundle_arrow_lib(${ARROW_DATASET_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) - if(MSVC) - bundle_arrow_import_lib(${ARROW_DATASET_IMPORT_LIB}) + if(ARROW_BUILD_SHARED) + + if(PYARROW_BUNDLE_ARROW_CPP) + bundle_arrow_lib(${ARROW_DATASET_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) + if(MSVC) + bundle_arrow_import_lib(${ARROW_DATASET_IMPORT_LIB}) + endif() endif() - endif() - set(DATASET_LINK_LIBS ArrowDataset::arrow_dataset_shared) + set(DATASET_LINK_LIBS ArrowDataset::arrow_dataset_shared) + else() + # dataset is sttically linked into libarrow_python already + set(DATASET_LINK_LIBS) + endif() list(APPEND CYTHON_EXTENSIONS _dataset) + endif() # Parquet @@ -612,7 +644,9 @@ if(PYARROW_BUILD_PARQUET) endif() set(PARQUET_LINK_LIBS Parquet::parquet_shared) else() - set(PARQUET_LINK_LIBS Parquet::parquet_static) + # parquet is linked into libarrow_python already + # so isn't needed in the extension + set(PARQUET_LINK_LIBS "") endif() list(APPEND CYTHON_EXTENSIONS _parquet) if(PYARROW_BUILD_PARQUET_ENCRYPTION) From 331aaa3fc1675fb99328240ea630a7ba8d41aea2 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Thu, 21 Sep 2023 15:46:48 +0100 Subject: [PATCH 02/95] reverted cmakelists from python --- python/CMakeLists.txt | 62 ++++++++++--------------------------------- 1 file changed, 14 insertions(+), 48 deletions(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 6b8fdade7f2..242ba8448f4 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -68,8 +68,6 @@ if(POLICY CMP0095) cmake_policy(SET CMP0095 NEW) endif() -option(DUMP_ARROW_ARGUMENTS "Dump the arrow arguments then quit" OFF) - # Use the first Python installation on PATH, not the newest one set(Python3_FIND_STRATEGY "LOCATION") # On Windows, use registry last, not first @@ -262,22 +260,6 @@ include(GNUInstallDirs) find_package(Arrow REQUIRED) -if(DUMP_ARROW_ARGUMENTS) - message(STATUS "----- ARROW_SETTINGS_DUMP -----") - get_cmake_property(_variableNames VARIABLES) - list(SORT _variableNames) - foreach(_variableName ${_variableNames}) - unset(MATCHED) - string(REGEX MATCH ^ARROW_.* MATCHED ${_variableName}) - if(NOT MATCHED) - continue() - endif() - message(STATUS "${_variableName}=${${_variableName}}") - endforeach() - message(STATUS "----- ARROW_SETTINGS_END -----") - return() -endif() - set(PYARROW_CPP_ROOT_DIR pyarrow/src) set(PYARROW_CPP_SOURCE_DIR ${PYARROW_CPP_ROOT_DIR}/arrow/python) set(PYARROW_CPP_SRCS @@ -588,42 +570,28 @@ endif() # Acero if(PYARROW_BUILD_ACERO) - if(ARROW_BUILD_SHARED) - - if(PYARROW_BUNDLE_ARROW_CPP) - bundle_arrow_lib(${ARROW_ACERO_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) - if(MSVC) - bundle_arrow_import_lib(${ARROW_ACERO_IMPORT_LIB}) - endif() + if(PYARROW_BUNDLE_ARROW_CPP) + bundle_arrow_lib(${ARROW_ACERO_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) + if(MSVC) + bundle_arrow_import_lib(${ARROW_ACERO_IMPORT_LIB}) endif() - - set(ACERO_LINK_LIBS ArrowAcero::arrow_acero_shared) - list(APPEND CYTHON_EXTENSIONS _acero) - else() - # ACERO is statically linked into libarrow_python already - set(ACERO_LINK_LIBS) - list(APPEND CYTHON_EXTENSIONS _acero) endif() + + set(ACERO_LINK_LIBS ArrowAcero::arrow_acero_shared) + list(APPEND CYTHON_EXTENSIONS _acero) endif() # Dataset if(PYARROW_BUILD_DATASET) - if(ARROW_BUILD_SHARED) - - if(PYARROW_BUNDLE_ARROW_CPP) - bundle_arrow_lib(${ARROW_DATASET_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) - if(MSVC) - bundle_arrow_import_lib(${ARROW_DATASET_IMPORT_LIB}) - endif() + if(PYARROW_BUNDLE_ARROW_CPP) + bundle_arrow_lib(${ARROW_DATASET_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) + if(MSVC) + bundle_arrow_import_lib(${ARROW_DATASET_IMPORT_LIB}) endif() - - set(DATASET_LINK_LIBS ArrowDataset::arrow_dataset_shared) - else() - # dataset is sttically linked into libarrow_python already - set(DATASET_LINK_LIBS) endif() - list(APPEND CYTHON_EXTENSIONS _dataset) + set(DATASET_LINK_LIBS ArrowDataset::arrow_dataset_shared) + list(APPEND CYTHON_EXTENSIONS _dataset) endif() # Parquet @@ -644,9 +612,7 @@ if(PYARROW_BUILD_PARQUET) endif() set(PARQUET_LINK_LIBS Parquet::parquet_shared) else() - # parquet is linked into libarrow_python already - # so isn't needed in the extension - set(PARQUET_LINK_LIBS "") + set(PARQUET_LINK_LIBS Parquet::parquet_static) endif() list(APPEND CYTHON_EXTENSIONS _parquet) if(PYARROW_BUILD_PARQUET_ENCRYPTION) From 3e4156e1be5fd759aacf8dd65f3231707d27ff6e Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Mon, 25 Sep 2023 20:41:09 +0100 Subject: [PATCH 03/95] documentation tidying after review Co-authored-by: Joris Van den Bossche Co-authored-by: Sutou Kouhei --- docs/source/developers/cpp/emscripten.rst | 25 ++++++++++++----------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/docs/source/developers/cpp/emscripten.rst b/docs/source/developers/cpp/emscripten.rst index d62e71c7d49..bf670ad40e4 100644 --- a/docs/source/developers/cpp/emscripten.rst +++ b/docs/source/developers/cpp/emscripten.rst @@ -16,16 +16,17 @@ .. under the License. -.. highlight:: console .. _developers-cpp-emscripten: +.. highlight:: console +.. _developers-cpp-emscripten: ################################################# - Cross compiling for Webassembly with Emscripten +Cross compiling for WebAssembly with Emscripten ################################################# *************** Prerequisites *************** -You need cmake and compilers etc. installed as per the normal build instructions. Before building with emscripten, you also need to install emscripten and +You need CMake and compilers etc. installed as per the normal build instructions. Before building with Emscripten, you also need to install Emscripten and activate it using the commands below (see https://emscripten.org/docs/getting_started/downloads.html for details). .. code:: shell @@ -38,9 +39,9 @@ activate it using the commands below (see https://emscripten.org/docs/getting_st ./emsdk activate source ./emsdk_env.sh -If you want to build pyarrow for `pyodide `_, you +If you want to build PyArrow for `Pyodide `_, you need ``pyodide-build`` installed via ``pip``, and to be running with the -same version of python that pyodide is built for, along with the same +same version of Python that Pyodide is built for, along with the same versions of emsdk. .. code:: shell @@ -57,11 +58,11 @@ like below: cmake --preset "ninja-release-emscripten-python" ninja install -This will install a built static library version of libarrow it into the -emscripten sysroot cache, meaning you can build things that depend on it -and they will find libarrow. +This will install a built static library version of ``libarrow`` it into the +Emscripten sysroot cache, meaning you can build things that depend on it +and they will find ``libarrow``. -e.g. if you want to build for pyodide, run the commands above, and then +e.g. if you want to build for Pyodide, run the commands above, and then go to ``arrow/python`` and run .. code:: shell @@ -76,8 +77,8 @@ pyodide (i.e. the version corresponding to the currently installed Manual Build ************** -If you want to manually build for emscripten, take a look at the -CMakePresets.json file in the arrow/cpp directory for a list of things +If you want to manually build for Emscripten, take a look at the +``CMakePresets.json`` file in the ``arrow/cpp`` directory for a list of things you will need to override. In particular you will need: #. Build dependencies set to ``BUNDLED``, so it uses properly cross @@ -87,7 +88,7 @@ you will need to override. In particular you will need: ``arrow/cpp/cmake_modules/Emscripten/Platform/EmscriptenOverrides.cmake`` #. You will quite likely need to set ``ARROW_ENABLE_THREADING`` to ``OFF`` - for builds targeting single threaded emscripten environments such as + for builds targeting single threaded Emscripten environments such as pyodide. #. ``ARROW_IPC`` and anything else that uses network probably won't From 80b21d11d1e1b81e5790bb58d351acd277245ddb Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Mon, 25 Sep 2023 20:42:49 +0100 Subject: [PATCH 04/95] preset fixes --- cpp/CMakePresets.json | 69 +++++++++++++++-------- docs/source/developers/cpp/emscripten.rst | 4 +- 2 files changed, 48 insertions(+), 25 deletions(-) diff --git a/cpp/CMakePresets.json b/cpp/CMakePresets.json index 0f7357ea5b3..bcc2a798fe4 100644 --- a/cpp/CMakePresets.json +++ b/cpp/CMakePresets.json @@ -47,29 +47,32 @@ } }, { - "name": "emscripten-overrides", + "name": "features-emscripten", "hidden": true, "cacheVariables": { "ARROW_BUILD_SHARED": "OFF", "ARROW_BUILD_STATIC": "ON", "ARROW_BUILD_TESTS": "OFF", - "ARROW_ENABLE_THREADING": "OFF", "ARROW_CUDA": "OFF", - "ARROW_MIMALLOC": "OFF", - "ARROW_JEMALLOC": "OFF", - "ARROW_S3": "OFF", "ARROW_DEPENDENCY_SOURCE": "BUNDLED", - "ZLIB_SOURCE": "SYSTEM", + "ARROW_DEPENDENCY_USE_SHARED": "OFF", + "ARROW_ENABLE_THREADING": "OFF", "ARROW_IPC": "OFF", + "ARROW_JEMALLOC": "OFF", + "ARROW_MIMALLOC": "OFF", "ARROW_ORC": "OFF", + "ARROW_RUNTIME_SIMD_LEVEL": "NONE", + "ARROW_S3": "OFF", + "ARROW_SIMD_LEVEL": "NONE", "ARROW_SUBSTRAIT": "OFF", - "ARROW_DEPENDENCY_USE_SHARED": "OFF", "ARROW_WITH_BROTLI": "OFF", - "ARROW_SIMD_LEVEL":"NONE", - "ARROW_RUNTIME_SIMD_LEVEL":"NONE", - "CMAKE_C_BYTE_ORDER":"LITTLE_ENDIAN", - "ARROW_WITH_OPENTELEMETRY":"OFF", - "CMAKE_TOOLCHAIN_FILE": { "type": "PATH", "value": "${sourceDir}/cmake_modules/Emscripten/Platform/EmscriptenOverrides.cmake" } + "ARROW_WITH_OPENTELEMETRY": "OFF", + "CMAKE_C_BYTE_ORDER": "LITTLE_ENDIAN", + "CMAKE_TOOLCHAIN_FILE": { + "type": "PATH", + "value": "${sourceDir}/cmake_modules/Emscripten/Platform/EmscriptenOverrides.cmake" + }, + "ZLIB_SOURCE": "SYSTEM" } }, { @@ -367,6 +370,26 @@ "displayName": "Release build with CUDA integration", "cacheVariables": {} }, + { + "name": "ninja-debug-emscripten", + "inherits": [ + "features-emscripten", + "base-debug", + "features-main" + ], + "displayName": "Release build which builds an emscripten library", + "cacheVariables": {} + }, + { + "name": "ninja-release-emscripten", + "inherits": [ + "features-emscripten", + "base-release", + "features-main" + ], + "displayName": "Release build which builds an emscripten library", + "cacheVariables": {} + }, { "name": "ninja-release-flight", "inherits": [ @@ -394,6 +417,16 @@ "displayName": "Release build with Gandiva", "cacheVariables": {} }, + { + "name": "ninja-release-python-emscripten", + "inherits": [ + "features-emscripten", + "base-release", + "features-python" + ], + "displayName": "Release build which builds an emscripten library suitable for PyArrow", + "cacheVariables": {} + }, { "name": "ninja-release-python-minimal", "inherits": [ @@ -421,16 +454,6 @@ "displayName": "Release build for PyArrow with everything enabled", "cacheVariables": {} }, - { - "name": "ninja-release-emscripten-python", - "inherits": [ - "emscripten-overrides", - "base-release", - "features-python" - ], - "displayName": "Release build which builds an emscripten library, plus PyArrow for Pyodide", - "cacheVariables": {} - }, { "name": "ninja-release-maximal", "inherits": [ @@ -468,4 +491,4 @@ "cacheVariables": {} } ] -} +} \ No newline at end of file diff --git a/docs/source/developers/cpp/emscripten.rst b/docs/source/developers/cpp/emscripten.rst index bf670ad40e4..87411588c94 100644 --- a/docs/source/developers/cpp/emscripten.rst +++ b/docs/source/developers/cpp/emscripten.rst @@ -50,12 +50,12 @@ versions of emsdk. # e.g. for version 0.24 of pyodide: pip install pyodide-build==0.24 -Then build with the ``ninja-release-emscripten-python`` cmake preset, +Then build with the ``ninja-release-python-emscripten`` cmake preset, like below: .. code:: shell - cmake --preset "ninja-release-emscripten-python" + cmake --preset "ninja-release-python-emscripten" ninja install This will install a built static library version of ``libarrow`` it into the From 908abf9255771946e84fb6f4947050987d3344ba Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Mon, 25 Sep 2023 21:41:09 +0100 Subject: [PATCH 05/95] updates to simplify emscripten build process --- cpp/CMakePresets.json | 4 +- .../Platform => }/EmscriptenOverrides.cmake | 39 +++++++------------ cpp/cmake_modules/SetupCxxFlags.cmake | 23 ++++++++++- cpp/cmake_modules/ThirdpartyToolchain.cmake | 17 ++++++++ python/CMakeLists.txt | 14 +++++++ 5 files changed, 69 insertions(+), 28 deletions(-) rename cpp/cmake_modules/{Emscripten/Platform => }/EmscriptenOverrides.cmake (60%) diff --git a/cpp/CMakePresets.json b/cpp/CMakePresets.json index bcc2a798fe4..5f026fd7539 100644 --- a/cpp/CMakePresets.json +++ b/cpp/CMakePresets.json @@ -68,9 +68,9 @@ "ARROW_WITH_BROTLI": "OFF", "ARROW_WITH_OPENTELEMETRY": "OFF", "CMAKE_C_BYTE_ORDER": "LITTLE_ENDIAN", - "CMAKE_TOOLCHAIN_FILE": { + "CMAKE_PROJECT_INCLUDE": { "type": "PATH", - "value": "${sourceDir}/cmake_modules/Emscripten/Platform/EmscriptenOverrides.cmake" + "value": "${sourceDir}/cmake_modules/EmscriptenOverrides.cmake" }, "ZLIB_SOURCE": "SYSTEM" } diff --git a/cpp/cmake_modules/Emscripten/Platform/EmscriptenOverrides.cmake b/cpp/cmake_modules/EmscriptenOverrides.cmake similarity index 60% rename from cpp/cmake_modules/Emscripten/Platform/EmscriptenOverrides.cmake rename to cpp/cmake_modules/EmscriptenOverrides.cmake index d8e49521dba..dbacaa5289d 100644 --- a/cpp/cmake_modules/Emscripten/Platform/EmscriptenOverrides.cmake +++ b/cpp/cmake_modules/EmscriptenOverrides.cmake @@ -15,42 +15,31 @@ # specific language governing permissions and limitations # under the License. -# Force some variables for emscripten +# Force some variables for Emscripten # to disable things that won't work there -# make us be on the platforms list for cmake -get_filename_component(PLATFORM_FOLDER_PARENT ${CMAKE_CURRENT_LIST_DIR} DIRECTORY) -list(APPEND CMAKE_MODULE_PATH "${PLATFORM_FOLDER_PARENT}") - -include($ENV{EMSDK}/upstream/emscripten/cmake/Modules/Platform/Emscripten.cmake) - -# ensure zlib is built with -fpic -# and force us to link to the version in emscripten ports -if(NOT EXISTS ${EMSCRIPTEN_SYSROOT}/lib/wasm32-emscripten/pic/libz.a) - execute_process(COMMAND embuilder --pic --force build zlib) -endif() -set(ZLIB_LIBRARY ${EMSCRIPTEN_SYSROOT}/lib/wasm32-emscripten/pic/libz.a) - -# # override default in emscripten which is to not use shared libs +# # override default in Emscripten which is to not use shared libs set_property(GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS TRUE) -# if we leave the system name as Emscripten, then it reloads the original Emscripten.cmake every time a project() command -# is run, which does bad things like disabling shared libraries -set(CMAKE_SYSTEM_NAME EmscriptenOverrides) - -set(CMAKE_C_FLAGS "-sUSE_ZLIB=1 -sSIDE_MODULE=1 -fPIC -fexceptions") -set(CMAKE_CXX_FLAGS "-sUSE_ZLIB=1 -sSIDE_MODULE=1 -fPIC -fexceptions") - -#set(PYARROW_CPP_HOME "$ENV{ARROW_HOME}/lib") -#list(APPEND CMAKE_FIND_ROOT_PATH "${CMAKE_INSTALL_PREFIX}/cmake") +# these are needed for building pyarrow +# if they aren't set, cmake cross compiling fails for python +# modules (at least under pyodide it does) set(Python3_INCLUDE_DIR $ENV{PYTHONINCLUDE}) set(Python3_LIBRARY $ENV{CPYTHONLIB}) set(Python3_NumPy_INCLUDE_DIR $ENV{NUMPY_LIB}/core/include) set(Python3_EXECUTABLE) +set(ENV{_PYTHON_SYSCONFIGDATA_NAME} $ENV{SYSCONFIG_NAME}) + +# flags for creating shared libraries (only used in pyarrow, because +# emscripten builds libarrow as static) +set(CMAKE_C_FLAGS "-sUSE_ZLIB=1 -sSIDE_MODULE=1 -fPIC -fexceptions") +set(CMAKE_CXX_FLAGS "-sUSE_ZLIB=1 -sSIDE_MODULE=1 -fPIC -fexceptions") + set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-sUSE_ZLIB=1 -sWASM_BIGINT=1 -fexceptions") set(CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS "-sUSE_ZLIB=1 -sWASM_BIGINT=1 -fexceptions") set(CMAKE_SHARED_LINKER_FLAGS "-sUSE_ZLIB=1 -sWASM_BIGINT=1 -fexceptions") + +# stripping doesn't work on emscripten set(CMAKE_STRIP FALSE) -set(ENV{_PYTHON_SYSCONFIGDATA_NAME} $ENV{SYSCONFIG_NAME}) diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index 2564ff374ac..c850f73c7fd 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -24,7 +24,7 @@ include(CheckCXXSourceCompiles) message(STATUS "System processor: ${CMAKE_SYSTEM_PROCESSOR}") if(NOT DEFINED ARROW_CPU_FLAG) - if(CMAKE_SYSTEM_NAME MATCHES "Emscripten") + if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") set(ARROW_CPU_FLAG "emscripten") elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "AMD64|amd64|X86|x86|i[3456]86|x64") set(ARROW_CPU_FLAG "x86") @@ -699,3 +699,24 @@ if(MSVC) set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${MSVC_LINKER_FLAGS}") endif() endif() + +if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + # flags are: + # 1) We're using zlib from Emscripten ports + # 2) We are building library code + # 3) We force *everything* to build as position independent + # 4) And with support for C++ exceptions + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -sUSE_ZLIB=1 -sSIDE_MODULE=1 -fPIC -fexceptions") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -sUSE_ZLIB=1 -sSIDE_MODULE=1 -fPIC -fexceptions") + + # flags for creating shared libraries (only used in pyarrow, because + # Emscripten builds libarrow as static) + # flags are: + # 1) Tell it to use zlib from Emscripten ports + # 2) Tell it to use javascript / webassembly 64 bit number support. + # 3) Tell it to build with support for C++ exceptions + set(ARROW_EMSCRIPTEN_LINKER_FLAGS "-sUSE_ZLIB=1 -sWASM_BIGINT=1 -fexceptions") + set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS ${ARROW_EMSCRIPTEN_LINKER_FLAGS}) + set(CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS ${ARROW_EMSCRIPTEN_LINKER_FLAGS}) + set(CMAKE_SHARED_LINKER_FLAGS ${ARROW_EMSCRIPTEN_LINKER_FLAGS}) +endif() \ No newline at end of file diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 667784ad2e0..6897c1a4749 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -258,6 +258,19 @@ macro(resolve_dependency DEPENDENCY_NAME) set(ARG_IS_RUNTIME_DEPENDENCY TRUE) endif() + # ensure zlib is built with -fpic + # and make sure that the build finds the version in Emscripten ports + # - n.b. the actual linking happens because -sUSE_ZLIB=1 is + # set in the compiler variables, but cmake expects + # it to exist at configuration time if we aren't building it as + # bundled + if(PACKAGE_NAME STREQUAL "ZLIB" AND CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + if(NOT EXISTS ${EMSCRIPTEN_SYSROOT}/lib/wasm32-emscripten/pic/libz.a) + execute_process(COMMAND embuilder --pic --force build zlib) + endif() + set(ZLIB_LIBRARY ${EMSCRIPTEN_SYSROOT}/lib/wasm32-emscripten/pic/libz.a) + endif() + if(ARG_HAVE_ALT) set(PACKAGE_NAME "${DEPENDENCY_NAME}Alt") else() @@ -960,6 +973,10 @@ if(CMAKE_TOOLCHAIN_FILE) list(APPEND EP_COMMON_CMAKE_ARGS -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}) endif() +if(CMAKE_PROJECT_INCLUDE) + list(APPEND EP_COMMON_CMAKE_ARGS -DCMAKE_PROJECT_INCLUDE=${CMAKE_PROJECT_INCLUDE}) +endif() + # Enable s/ccache if set by parent. if(CMAKE_C_COMPILER_LAUNCHER AND CMAKE_CXX_COMPILER_LAUNCHER) list(APPEND EP_COMMON_CMAKE_ARGS diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 242ba8448f4..18a74ab069e 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -21,6 +21,20 @@ cmake_minimum_required(VERSION 3.16) project(pyarrow) +if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + include(EmscriptenOverrides) + + # these variables are needed for building pyarrow on Emscripten + # if they aren't set, cmake cross compiling fails for python + # modules (at least under pyodide it does) + set(Python3_INCLUDE_DIR $ENV{PYTHONINCLUDE}) + set(Python3_LIBRARY $ENV{CPYTHONLIB}) + set(Python3_NumPy_INCLUDE_DIR $ENV{NUMPY_LIB}/core/include) + set(Python3_EXECUTABLE) + set(ENV{_PYTHON_SYSCONFIGDATA_NAME} $ENV{SYSCONFIG_NAME}) +endif() + + set(PYARROW_VERSION "14.0.0-SNAPSHOT") string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" PYARROW_BASE_VERSION "${PYARROW_VERSION}") From 31ca7f999b52591d47ec76c71f753cfa9592d535 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Mon, 25 Sep 2023 21:48:59 +0100 Subject: [PATCH 06/95] build fixes --- cpp/cmake_modules/EmscriptenOverrides.cmake | 19 --------------- cpp/cmake_modules/ThirdpartyToolchain.cmake | 4 ++- python/CMakeLists.txt | 27 ++++++++++----------- 3 files changed, 16 insertions(+), 34 deletions(-) diff --git a/cpp/cmake_modules/EmscriptenOverrides.cmake b/cpp/cmake_modules/EmscriptenOverrides.cmake index dbacaa5289d..82e9e45aec9 100644 --- a/cpp/cmake_modules/EmscriptenOverrides.cmake +++ b/cpp/cmake_modules/EmscriptenOverrides.cmake @@ -21,25 +21,6 @@ # # override default in Emscripten which is to not use shared libs set_property(GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS TRUE) - -# these are needed for building pyarrow -# if they aren't set, cmake cross compiling fails for python -# modules (at least under pyodide it does) -set(Python3_INCLUDE_DIR $ENV{PYTHONINCLUDE}) -set(Python3_LIBRARY $ENV{CPYTHONLIB}) -set(Python3_NumPy_INCLUDE_DIR $ENV{NUMPY_LIB}/core/include) -set(Python3_EXECUTABLE) -set(ENV{_PYTHON_SYSCONFIGDATA_NAME} $ENV{SYSCONFIG_NAME}) - -# flags for creating shared libraries (only used in pyarrow, because -# emscripten builds libarrow as static) -set(CMAKE_C_FLAGS "-sUSE_ZLIB=1 -sSIDE_MODULE=1 -fPIC -fexceptions") -set(CMAKE_CXX_FLAGS "-sUSE_ZLIB=1 -sSIDE_MODULE=1 -fPIC -fexceptions") - -set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-sUSE_ZLIB=1 -sWASM_BIGINT=1 -fexceptions") -set(CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS "-sUSE_ZLIB=1 -sWASM_BIGINT=1 -fexceptions") -set(CMAKE_SHARED_LINKER_FLAGS "-sUSE_ZLIB=1 -sWASM_BIGINT=1 -fexceptions") - # stripping doesn't work on emscripten set(CMAKE_STRIP FALSE) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 6897c1a4749..59eae8eccb2 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -264,7 +264,9 @@ macro(resolve_dependency DEPENDENCY_NAME) # set in the compiler variables, but cmake expects # it to exist at configuration time if we aren't building it as # bundled - if(PACKAGE_NAME STREQUAL "ZLIB" AND CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + # n.b. we need to do this for all packages + # as some depend on zlib + if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") if(NOT EXISTS ${EMSCRIPTEN_SYSROOT}/lib/wasm32-emscripten/pic/libz.a) execute_process(COMMAND embuilder --pic --force build zlib) endif() diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 2d0cceca2d8..180dde994d6 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -21,20 +21,6 @@ cmake_minimum_required(VERSION 3.16) project(pyarrow) -if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") - include(EmscriptenOverrides) - - # these variables are needed for building pyarrow on Emscripten - # if they aren't set, cmake cross compiling fails for python - # modules (at least under pyodide it does) - set(Python3_INCLUDE_DIR $ENV{PYTHONINCLUDE}) - set(Python3_LIBRARY $ENV{CPYTHONLIB}) - set(Python3_NumPy_INCLUDE_DIR $ENV{NUMPY_LIB}/core/include) - set(Python3_EXECUTABLE) - set(ENV{_PYTHON_SYSCONFIGDATA_NAME} $ENV{SYSCONFIG_NAME}) -endif() - - set(PYARROW_VERSION "14.0.0-SNAPSHOT") string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" PYARROW_BASE_VERSION "${PYARROW_VERSION}") @@ -54,6 +40,19 @@ if(PYARROW_CPP_HOME) list(INSERT CMAKE_PREFIX_PATH 0 "${PYARROW_CPP_HOME}") endif() +if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + include(EmscriptenOverrides) + + # these variables are needed for building pyarrow on Emscripten + # if they aren't set, cmake cross compiling fails for python + # modules (at least under pyodide it does) + set(Python3_INCLUDE_DIR $ENV{PYTHONINCLUDE}) + set(Python3_LIBRARY $ENV{CPYTHONLIB}) + set(Python3_NumPy_INCLUDE_DIR $ENV{NUMPY_LIB}/core/include) + set(Python3_EXECUTABLE) + set(ENV{_PYTHON_SYSCONFIGDATA_NAME} $ENV{SYSCONFIG_NAME}) +endif() + include(CMakeParseArguments) # MACOSX_RPATH is enabled by default. From c26d68cb0040bc15a66621a89d0dd89a95455127 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Mon, 25 Sep 2023 22:01:04 +0100 Subject: [PATCH 07/95] better comments --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 59eae8eccb2..8a888ff77e6 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -263,9 +263,9 @@ macro(resolve_dependency DEPENDENCY_NAME) # - n.b. the actual linking happens because -sUSE_ZLIB=1 is # set in the compiler variables, but cmake expects # it to exist at configuration time if we aren't building it as - # bundled - # n.b. we need to do this for all packages - # as some depend on zlib + # bundled. We need to do this for all packages + # not just zlib as some depend on zlib, but we don't rebuild + # if it exists already if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") if(NOT EXISTS ${EMSCRIPTEN_SYSROOT}/lib/wasm32-emscripten/pic/libz.a) execute_process(COMMAND embuilder --pic --force build zlib) From f5ec4443b4e39118ff0655797ed457891b334e4b Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Wed, 27 Sep 2023 11:54:07 +0100 Subject: [PATCH 08/95] cmake fixes for python emscripten build to work --- python/CMakeLists.txt | 116 ++++++++++++++++++++++++++---------------- 1 file changed, 72 insertions(+), 44 deletions(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 180dde994d6..72b942ed156 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -1,5 +1,5 @@ # Licensed to the Apache Software Foundation (ASF) under one -# or more cod ntributor license agreements. See the NOTICE file +# or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the @@ -40,18 +40,7 @@ if(PYARROW_CPP_HOME) list(INSERT CMAKE_PREFIX_PATH 0 "${PYARROW_CPP_HOME}") endif() -if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") - include(EmscriptenOverrides) - # these variables are needed for building pyarrow on Emscripten - # if they aren't set, cmake cross compiling fails for python - # modules (at least under pyodide it does) - set(Python3_INCLUDE_DIR $ENV{PYTHONINCLUDE}) - set(Python3_LIBRARY $ENV{CPYTHONLIB}) - set(Python3_NumPy_INCLUDE_DIR $ENV{NUMPY_LIB}/core/include) - set(Python3_EXECUTABLE) - set(ENV{_PYTHON_SYSCONFIGDATA_NAME} $ENV{SYSCONFIG_NAME}) -endif() include(CMakeParseArguments) @@ -81,6 +70,20 @@ if(POLICY CMP0095) cmake_policy(SET CMP0095 NEW) endif() +# this option is used to auto-set defaults for pyarrow build +option(DUMP_ARROW_ARGUMENTS "Dump the arrow arguments then quit" OFF) + +if($ENV{PYODIDE} STREQUAL 1) + # these variables are needed for building pyarrow on Emscripten + # if they aren't set, cmake cross compiling fails for python + # modules (at least under pyodide it does) + set(Python3_INCLUDE_DIR $ENV{PYTHONINCLUDE}) + set(Python3_LIBRARY $ENV{CPYTHONLIB}) + set(Python3_NumPy_INCLUDE_DIR $ENV{NUMPY_LIB}/core/include) + set(Python3_EXECUTABLE) + set(ENV{_PYTHON_SYSCONFIGDATA_NAME} $ENV{SYSCONFIG_NAME}) +endif() + # Use the first Python installation on PATH, not the newest one set(Python3_FIND_STRATEGY "LOCATION") # On Windows, use registry last, not first @@ -181,44 +184,37 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${PYARROW_CXXFLAGS}") if(MSVC) # MSVC version of -Wno-return-type-c-linkage - string(APPEND CMAKE_CXX_FLAGS " /wd4190") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4190") # Cython generates some bitshift expressions that MSVC does not like in # __Pyx_PyFloat_DivideObjC - string(APPEND CMAKE_CXX_FLAGS " /wd4293") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4293") # Converting to/from C++ bool is pretty wonky in Cython. The C4800 warning # seem harmless, and probably not worth the effort of working around it - string(APPEND CMAKE_CXX_FLAGS " /wd4800") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4800") # See https://github.com/cython/cython/issues/2731. Change introduced in # Cython 0.29.1 causes "unsafe use of type 'bool' in operation" - string(APPEND CMAKE_CXX_FLAGS " /wd4804") - - # See https://github.com/cython/cython/issues/4445. - # - # Cython 3 emits "(void)__Pyx_PyObject_CallMethod0;" to suppress a - # "unused function" warning but the code emits another "function - # call missing argument list" warning. - string(APPEND CMAKE_CXX_FLAGS " /wd4551") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4804") else() # Enable perf and other tools to work properly - string(APPEND CMAKE_CXX_FLAGS " -fno-omit-frame-pointer") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-omit-frame-pointer") # Suppress Cython warnings - string(APPEND CMAKE_CXX_FLAGS " -Wno-unused-variable -Wno-maybe-uninitialized") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-variable -Wno-maybe-uninitialized") if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang") # Cython warnings in clang - string(APPEND CMAKE_CXX_FLAGS " -Wno-parentheses-equality") - string(APPEND CMAKE_CXX_FLAGS " -Wno-constant-logical-operand") - string(APPEND CMAKE_CXX_FLAGS " -Wno-missing-declarations") - string(APPEND CMAKE_CXX_FLAGS " -Wno-sometimes-uninitialized") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-parentheses-equality") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-constant-logical-operand") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-missing-declarations") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-sometimes-uninitialized") # We have public Cython APIs which return C++ types, which are in an extern # "C" blog (no symbol mangling) and clang doesn't like this - string(APPEND CMAKE_CXX_FLAGS " -Wno-return-type-c-linkage") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-return-type-c-linkage") endif() endif() @@ -280,6 +276,22 @@ include(GNUInstallDirs) find_package(Arrow REQUIRED) +if(DUMP_ARROW_ARGUMENTS) + message(STATUS "----- ARROW_SETTINGS_DUMP -----") + get_cmake_property(_variableNames VARIABLES) + list(SORT _variableNames) + foreach(_variableName ${_variableNames}) + unset(MATCHED) + string(REGEX MATCH ^ARROW_.* MATCHED ${_variableName}) + if(NOT MATCHED) + continue() + endif() + message(STATUS "${_variableName}=${${_variableName}}") + endforeach() + message(STATUS "----- ARROW_SETTINGS_END -----") + return() +endif() + set(PYARROW_CPP_ROOT_DIR pyarrow/src) set(PYARROW_CPP_SOURCE_DIR ${PYARROW_CPP_ROOT_DIR}/arrow/python) set(PYARROW_CPP_SRCS @@ -590,28 +602,42 @@ endif() # Acero if(PYARROW_BUILD_ACERO) - if(PYARROW_BUNDLE_ARROW_CPP) - bundle_arrow_lib(${ARROW_ACERO_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) - if(MSVC) - bundle_arrow_import_lib(${ARROW_ACERO_IMPORT_LIB}) + if(ARROW_BUILD_SHARED) + + if(PYARROW_BUNDLE_ARROW_CPP) + bundle_arrow_lib(${ARROW_ACERO_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) + if(MSVC) + bundle_arrow_import_lib(${ARROW_ACERO_IMPORT_LIB}) + endif() endif() - endif() - set(ACERO_LINK_LIBS ArrowAcero::arrow_acero_shared) - list(APPEND CYTHON_EXTENSIONS _acero) + set(ACERO_LINK_LIBS ArrowAcero::arrow_acero_shared) + list(APPEND CYTHON_EXTENSIONS _acero) + else() + # ACERO is statically linked into libarrow_python already + set(ACERO_LINK_LIBS) + list(APPEND CYTHON_EXTENSIONS _acero) + endif() endif() # Dataset if(PYARROW_BUILD_DATASET) - if(PYARROW_BUNDLE_ARROW_CPP) - bundle_arrow_lib(${ARROW_DATASET_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) - if(MSVC) - bundle_arrow_import_lib(${ARROW_DATASET_IMPORT_LIB}) + if(ARROW_BUILD_SHARED) + + if(PYARROW_BUNDLE_ARROW_CPP) + bundle_arrow_lib(${ARROW_DATASET_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) + if(MSVC) + bundle_arrow_import_lib(${ARROW_DATASET_IMPORT_LIB}) + endif() endif() - endif() - set(DATASET_LINK_LIBS ArrowDataset::arrow_dataset_shared) + set(DATASET_LINK_LIBS ArrowDataset::arrow_dataset_shared) + else() + # dataset is sttically linked into libarrow_python already + set(DATASET_LINK_LIBS) + endif() list(APPEND CYTHON_EXTENSIONS _dataset) + endif() # Parquet @@ -632,7 +658,9 @@ if(PYARROW_BUILD_PARQUET) endif() set(PARQUET_LINK_LIBS Parquet::parquet_shared) else() - set(PARQUET_LINK_LIBS Parquet::parquet_static) + # parquet is linked into libarrow_python already + # so isn't needed in the extension + set(PARQUET_LINK_LIBS "") endif() list(APPEND CYTHON_EXTENSIONS _parquet) if(PYARROW_BUILD_PARQUET_ENCRYPTION) From 3c2f1797333390fac9067cbc692ea62122c82fdb Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Wed, 27 Sep 2023 11:58:26 +0100 Subject: [PATCH 09/95] fix pyodide stdc version number --- python/CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 72b942ed156..417556e0c61 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -82,6 +82,11 @@ if($ENV{PYODIDE} STREQUAL 1) set(Python3_NumPy_INCLUDE_DIR $ENV{NUMPY_LIB}/core/include) set(Python3_EXECUTABLE) set(ENV{_PYTHON_SYSCONFIGDATA_NAME} $ENV{SYSCONFIG_NAME}) + + # remove any c++ standard settings from cflags and let cmake set it. + # n.b. this is here because pyodide sets -stdc++=14, which breaks pyarrow builds + string(REGEX REPLACE "-std=c\\+\\+[0-9]+" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} ) + endif() # Use the first Python installation on PATH, not the newest one From aec8a79c26fbc40f78b1a4a3ac8e63ef04e83747 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Thu, 28 Sep 2023 11:58:33 +0100 Subject: [PATCH 10/95] made substrait, ipc, brotli build on emscripten --- cpp/CMakePresets.json | 12 ++- cpp/cmake_modules/ThirdpartyToolchain.cmake | 103 ++++++++++++++++++-- 2 files changed, 101 insertions(+), 14 deletions(-) diff --git a/cpp/CMakePresets.json b/cpp/CMakePresets.json index 5f026fd7539..09eaa2dd422 100644 --- a/cpp/CMakePresets.json +++ b/cpp/CMakePresets.json @@ -50,6 +50,7 @@ "name": "features-emscripten", "hidden": true, "cacheVariables": { + "ARROW_ACERO": "ON", "ARROW_BUILD_SHARED": "OFF", "ARROW_BUILD_STATIC": "ON", "ARROW_BUILD_TESTS": "OFF", @@ -57,16 +58,19 @@ "ARROW_DEPENDENCY_SOURCE": "BUNDLED", "ARROW_DEPENDENCY_USE_SHARED": "OFF", "ARROW_ENABLE_THREADING": "OFF", - "ARROW_IPC": "OFF", + "ARROW_FLIGHT": "OFF", + "ARROW_IPC": "ON", "ARROW_JEMALLOC": "OFF", "ARROW_MIMALLOC": "OFF", - "ARROW_ORC": "OFF", + "ARROW_ORC": "ON", "ARROW_RUNTIME_SIMD_LEVEL": "NONE", "ARROW_S3": "OFF", + "ARROW_SNAPPY_USE_SHARED": "OFF", "ARROW_SIMD_LEVEL": "NONE", - "ARROW_SUBSTRAIT": "OFF", - "ARROW_WITH_BROTLI": "OFF", + "ARROW_SUBSTRAIT": "ON", + "ARROW_WITH_BROTLI": "ON", "ARROW_WITH_OPENTELEMETRY": "OFF", + "ARROW_WITH_SNAPPY": "ON", "CMAKE_C_BYTE_ORDER": "LITTLE_ENDIAN", "CMAKE_PROJECT_INCLUDE": { "type": "PATH", diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 8a888ff77e6..d759749262e 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -1333,6 +1333,12 @@ macro(build_snappy) ${EP_COMMON_CMAKE_ARGS} -DSNAPPY_BUILD_TESTS=OFF -DSNAPPY_BUILD_BENCHMARKS=OFF "-DCMAKE_INSTALL_PREFIX=${SNAPPY_PREFIX}") + if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + # ignore linker flag errors, as snappy sets + # -Werror -Wall, and emscripten doesn't support -soname + set(SNAPPY_CMAKE_ARGS ${SNAPPY_CMAKE_ARGS} "-DCMAKE_SHARED_LINKER_FLAGS=${CMAKE_SHARED_LINKER_FLAGS} -Wno-error=linkflags") + endif() + externalproject_add(snappy_ep ${EP_COMMON_OPTIONS} BUILD_IN_SOURCE 1 @@ -1378,6 +1384,7 @@ macro(build_brotli) message(STATUS "Building brotli from source") set(BROTLI_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/brotli_ep/src/brotli_ep-install") set(BROTLI_INCLUDE_DIR "${BROTLI_PREFIX}/include") + set(BROTLI_LIB_DIR "${BROTLI_PREFIX}/lib") set(BROTLI_STATIC_LIBRARY_ENC "${BROTLI_PREFIX}/lib/${CMAKE_STATIC_LIBRARY_PREFIX}brotlienc-static${CMAKE_STATIC_LIBRARY_SUFFIX}" ) @@ -1389,16 +1396,51 @@ macro(build_brotli) ) set(BROTLI_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} "-DCMAKE_INSTALL_PREFIX=${BROTLI_PREFIX}") - externalproject_add(brotli_ep - ${EP_COMMON_OPTIONS} - URL ${BROTLI_SOURCE_URL} - URL_HASH "SHA256=${ARROW_BROTLI_BUILD_SHA256_CHECKSUM}" - BUILD_BYPRODUCTS "${BROTLI_STATIC_LIBRARY_ENC}" - "${BROTLI_STATIC_LIBRARY_DEC}" - "${BROTLI_STATIC_LIBRARY_COMMON}" - ${BROTLI_BUILD_BYPRODUCTS} - CMAKE_ARGS ${BROTLI_CMAKE_ARGS} - STEP_TARGETS headers_copy) + set(BROTLI_INSTALL_OVERRIDE) + + if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + # cmake install is disabled for brotli on emscripten, so we have + # to manually copy the libraries to our install directory + set(BROTLI_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/brotli_ep-prefix/src/brotli_ep-build) + set(BROTLI_BUILD_LIBS "${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlienc-static${CMAKE_STATIC_LIBRARY_SUFFIX}" + "${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlidec-static${CMAKE_STATIC_LIBRARY_SUFFIX}" + "${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlicommon-static${CMAKE_STATIC_LIBRARY_SUFFIX}") + + set(BROTLI_BUILD_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/brotli_ep-prefix/src/brotli_ep/c/include/brotli) + + externalproject_add(brotli_ep + ${EP_COMMON_OPTIONS} + URL ${BROTLI_SOURCE_URL} + URL_HASH "SHA256=${ARROW_BROTLI_BUILD_SHA256_CHECKSUM}" + BUILD_BYPRODUCTS "${BROTLI_STATIC_LIBRARY_ENC}" + "${BROTLI_STATIC_LIBRARY_DEC}" + "${BROTLI_STATIC_LIBRARY_COMMON}" + ${BROTLI_BUILD_BYPRODUCTS} + CMAKE_ARGS ${BROTLI_CMAKE_ARGS} + STEP_TARGETS headers_copy + INSTALL_COMMAND "" + ) + add_custom_command(TARGET brotli_ep POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different + ${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}*${CMAKE_STATIC_LIBRARY_SUFFIX} + ${BROTLI_LIB_DIR} + COMMAND ${CMAKE_COMMAND} -E copy_directory + ${BROTLI_BUILD_INCLUDE_DIR} + ${BROTLI_INCLUDE_DIR}/brotli + ) + else() # not emscripten - just behave as normal + externalproject_add(brotli_ep + ${EP_COMMON_OPTIONS} + URL ${BROTLI_SOURCE_URL} + URL_HASH "SHA256=${ARROW_BROTLI_BUILD_SHA256_CHECKSUM}" + BUILD_BYPRODUCTS "${BROTLI_STATIC_LIBRARY_ENC}" + "${BROTLI_STATIC_LIBRARY_DEC}" + "${BROTLI_STATIC_LIBRARY_COMMON}" + ${BROTLI_BUILD_BYPRODUCTS} + CMAKE_ARGS ${BROTLI_CMAKE_ARGS} + STEP_TARGETS headers_copy + ) + endif() add_dependencies(toolchain brotli_ep) file(MAKE_DIRECTORY "${BROTLI_INCLUDE_DIR}") @@ -1852,6 +1894,41 @@ if(ARROW_WITH_PROTOBUF) set(ARROW_PROTOBUF_PROTOC protobuf::protoc) endif() + if(CMAKE_CROSSCOMPILING) + # if we are cross compiling, we need to build protoc for the host + # system also, as it is used when building arrow + # We do this by calling cmake as a child process + # with CXXFLAGS / CFLAGS and cmake flags cleared + set(PROTOBUF_HOST_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/protobuf_ep_host-install") + set(PROTOBUF_HOST_COMPILER "${PROTOBUF_HOST_PREFIX}/bin/protoc") + + set(PROTOBUF_HOST_CMAKE_ARGS + "-DCMAKE_CXX_FLAGS=" + "-DCMAKE_C_FLAGS=" + "-DCMAKE_INSTALL_PREFIX=${PROTOBUF_HOST_PREFIX}" + -Dprotobuf_BUILD_TESTS=OFF + -Dprotobuf_DEBUG_POSTFIX=) + + externalproject_add(protobuf_ep_host + ${EP_COMMON_OPTIONS} + CMAKE_ARGS ${PROTOBUF_HOST_CMAKE_ARGS} + BUILD_BYPRODUCTS "${PROTOBUF_HOST_COMPILER}" + BUILD_IN_SOURCE 1 + URL ${PROTOBUF_SOURCE_URL} + URL_HASH "SHA256=${ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM}") + + add_executable(arrow::protobuf::host_protoc IMPORTED) + set_target_properties(arrow::protobuf::host_protoc PROPERTIES IMPORTED_LOCATION + "${PROTOBUF_HOST_COMPILER}") + + add_dependencies(protobuf_ep protobuf_ep_host) + + # make sure host protoc is used for compiling protobuf files + # during build of e.g. orc + set(ARROW_PROTOBUF_PROTOC arrow::protobuf::host_protoc) + + endif() + # Log protobuf paths as we often see issues with mixed sources for # the libraries and protoc. get_target_property(PROTOBUF_PROTOC_EXECUTABLE ${ARROW_PROTOBUF_PROTOC} @@ -4358,6 +4435,8 @@ macro(build_orc) get_target_property(ORC_ZSTD_ROOT ${ARROW_ZSTD_LIBZSTD} INTERFACE_INCLUDE_DIRECTORIES) get_filename_component(ORC_ZSTD_ROOT "${ORC_ZSTD_ROOT}" DIRECTORY) + set(LZ4_TARGET LZ4::lz4) + set(ORC_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} "-DCMAKE_INSTALL_PREFIX=${ORC_PREFIX}" @@ -4374,6 +4453,10 @@ macro(build_orc) "-DPROTOBUF_LIBRARY=$" "-DPROTOC_LIBRARY=$" "-DSNAPPY_HOME=${ORC_SNAPPY_ROOT}" + "-DSNAPPY_LIBRARY=$" + "-DLZ4_LIBRARY=$" + "-DLZ4_STATIC_LIBRARY=$" + "-DLZ4_INCLUDE_DIR=${ORC_LZ4_ROOT}/include" "-DSNAPPY_INCLUDE_DIR=${ORC_SNAPPY_INCLUDE_DIR}" "-DZSTD_HOME=${ORC_ZSTD_ROOT}" "-DZSTD_INCLUDE_DIR=$" From 0db01d74e569b2706393aa7f6612008808253548 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Thu, 28 Sep 2023 12:00:37 +0100 Subject: [PATCH 11/95] docs fix --- docs/source/developers/cpp/emscripten.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/developers/cpp/emscripten.rst b/docs/source/developers/cpp/emscripten.rst index 87411588c94..b12253eb132 100644 --- a/docs/source/developers/cpp/emscripten.rst +++ b/docs/source/developers/cpp/emscripten.rst @@ -91,7 +91,7 @@ you will need to override. In particular you will need: for builds targeting single threaded Emscripten environments such as pyodide. -#. ``ARROW_IPC`` and anything else that uses network probably won't +#. ``ARROW_FLIGHT`` and anything else that uses network probably won't work. #. ``ARROW_JEMALLOC`` and ``ARROW_MIMALLOC`` again probably need to be From 0daa6705ba2cfb154c609253338fa9a6772f7390 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Thu, 28 Sep 2023 12:10:04 +0100 Subject: [PATCH 12/95] bad if statement fix --- python/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 417556e0c61..dc5d87e4383 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -73,7 +73,7 @@ endif() # this option is used to auto-set defaults for pyarrow build option(DUMP_ARROW_ARGUMENTS "Dump the arrow arguments then quit" OFF) -if($ENV{PYODIDE} STREQUAL 1) +if($ENV{PYODIDE}) # these variables are needed for building pyarrow on Emscripten # if they aren't set, cmake cross compiling fails for python # modules (at least under pyodide it does) From ff5848148e39a679d7b2206c8cecb79663ccc6cb Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Thu, 28 Sep 2023 12:12:48 +0100 Subject: [PATCH 13/95] cmake-format --- cpp/cmake_modules/EmscriptenOverrides.cmake | 1 - cpp/cmake_modules/SetupCxxFlags.cmake | 7 +- cpp/cmake_modules/ThirdpartyToolchain.cmake | 102 ++++++++++---------- python/CMakeLists.txt | 6 +- 4 files changed, 58 insertions(+), 58 deletions(-) diff --git a/cpp/cmake_modules/EmscriptenOverrides.cmake b/cpp/cmake_modules/EmscriptenOverrides.cmake index 82e9e45aec9..1b82978c182 100644 --- a/cpp/cmake_modules/EmscriptenOverrides.cmake +++ b/cpp/cmake_modules/EmscriptenOverrides.cmake @@ -23,4 +23,3 @@ set_property(GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS TRUE) # stripping doesn't work on emscripten set(CMAKE_STRIP FALSE) - diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index 7269890c745..a7d9ab13dd9 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -710,11 +710,12 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") # 3) We force *everything* to build as position independent # 4) And with support for C++ exceptions set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -sUSE_ZLIB=1 -sSIDE_MODULE=1 -fPIC -fexceptions") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -sUSE_ZLIB=1 -sSIDE_MODULE=1 -fPIC -fexceptions") + set(CMAKE_CXX_FLAGS + "${CMAKE_CXX_FLAGS} -sUSE_ZLIB=1 -sSIDE_MODULE=1 -fPIC -fexceptions") # flags for creating shared libraries (only used in pyarrow, because # Emscripten builds libarrow as static) - # flags are: + # flags are: # 1) Tell it to use zlib from Emscripten ports # 2) Tell it to use javascript / webassembly 64 bit number support. # 3) Tell it to build with support for C++ exceptions @@ -722,4 +723,4 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS ${ARROW_EMSCRIPTEN_LINKER_FLAGS}) set(CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS ${ARROW_EMSCRIPTEN_LINKER_FLAGS}) set(CMAKE_SHARED_LINKER_FLAGS ${ARROW_EMSCRIPTEN_LINKER_FLAGS}) -endif() \ No newline at end of file +endif() diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index d759749262e..b50f3f1f132 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -260,9 +260,9 @@ macro(resolve_dependency DEPENDENCY_NAME) # ensure zlib is built with -fpic # and make sure that the build finds the version in Emscripten ports - # - n.b. the actual linking happens because -sUSE_ZLIB=1 is + # - n.b. the actual linking happens because -sUSE_ZLIB=1 is # set in the compiler variables, but cmake expects - # it to exist at configuration time if we aren't building it as + # it to exist at configuration time if we aren't building it as # bundled. We need to do this for all packages # not just zlib as some depend on zlib, but we don't rebuild # if it exists already @@ -1336,7 +1336,9 @@ macro(build_snappy) if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") # ignore linker flag errors, as snappy sets # -Werror -Wall, and emscripten doesn't support -soname - set(SNAPPY_CMAKE_ARGS ${SNAPPY_CMAKE_ARGS} "-DCMAKE_SHARED_LINKER_FLAGS=${CMAKE_SHARED_LINKER_FLAGS} -Wno-error=linkflags") + set(SNAPPY_CMAKE_ARGS + ${SNAPPY_CMAKE_ARGS} + "-DCMAKE_SHARED_LINKER_FLAGS=${CMAKE_SHARED_LINKER_FLAGS} -Wno-error=linkflags") endif() externalproject_add(snappy_ep @@ -1402,44 +1404,44 @@ macro(build_brotli) # cmake install is disabled for brotli on emscripten, so we have # to manually copy the libraries to our install directory set(BROTLI_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/brotli_ep-prefix/src/brotli_ep-build) - set(BROTLI_BUILD_LIBS "${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlienc-static${CMAKE_STATIC_LIBRARY_SUFFIX}" - "${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlidec-static${CMAKE_STATIC_LIBRARY_SUFFIX}" - "${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlicommon-static${CMAKE_STATIC_LIBRARY_SUFFIX}") + set(BROTLI_BUILD_LIBS + "${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlienc-static${CMAKE_STATIC_LIBRARY_SUFFIX}" + "${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlidec-static${CMAKE_STATIC_LIBRARY_SUFFIX}" + "${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlicommon-static${CMAKE_STATIC_LIBRARY_SUFFIX}" + ) - set(BROTLI_BUILD_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/brotli_ep-prefix/src/brotli_ep/c/include/brotli) + set(BROTLI_BUILD_INCLUDE_DIR + ${CMAKE_CURRENT_BINARY_DIR}/brotli_ep-prefix/src/brotli_ep/c/include/brotli) externalproject_add(brotli_ep - ${EP_COMMON_OPTIONS} - URL ${BROTLI_SOURCE_URL} - URL_HASH "SHA256=${ARROW_BROTLI_BUILD_SHA256_CHECKSUM}" - BUILD_BYPRODUCTS "${BROTLI_STATIC_LIBRARY_ENC}" - "${BROTLI_STATIC_LIBRARY_DEC}" - "${BROTLI_STATIC_LIBRARY_COMMON}" - ${BROTLI_BUILD_BYPRODUCTS} - CMAKE_ARGS ${BROTLI_CMAKE_ARGS} - STEP_TARGETS headers_copy - INSTALL_COMMAND "" - ) - add_custom_command(TARGET brotli_ep POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_if_different - ${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}*${CMAKE_STATIC_LIBRARY_SUFFIX} - ${BROTLI_LIB_DIR} - COMMAND ${CMAKE_COMMAND} -E copy_directory - ${BROTLI_BUILD_INCLUDE_DIR} - ${BROTLI_INCLUDE_DIR}/brotli - ) + ${EP_COMMON_OPTIONS} + URL ${BROTLI_SOURCE_URL} + URL_HASH "SHA256=${ARROW_BROTLI_BUILD_SHA256_CHECKSUM}" + BUILD_BYPRODUCTS "${BROTLI_STATIC_LIBRARY_ENC}" + "${BROTLI_STATIC_LIBRARY_DEC}" + "${BROTLI_STATIC_LIBRARY_COMMON}" + ${BROTLI_BUILD_BYPRODUCTS} + CMAKE_ARGS ${BROTLI_CMAKE_ARGS} + STEP_TARGETS headers_copy + INSTALL_COMMAND "") + add_custom_command(TARGET brotli_ep + POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different + ${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}*${CMAKE_STATIC_LIBRARY_SUFFIX} + ${BROTLI_LIB_DIR} + COMMAND ${CMAKE_COMMAND} -E copy_directory + ${BROTLI_BUILD_INCLUDE_DIR} ${BROTLI_INCLUDE_DIR}/brotli) else() # not emscripten - just behave as normal externalproject_add(brotli_ep - ${EP_COMMON_OPTIONS} - URL ${BROTLI_SOURCE_URL} - URL_HASH "SHA256=${ARROW_BROTLI_BUILD_SHA256_CHECKSUM}" - BUILD_BYPRODUCTS "${BROTLI_STATIC_LIBRARY_ENC}" - "${BROTLI_STATIC_LIBRARY_DEC}" - "${BROTLI_STATIC_LIBRARY_COMMON}" - ${BROTLI_BUILD_BYPRODUCTS} - CMAKE_ARGS ${BROTLI_CMAKE_ARGS} - STEP_TARGETS headers_copy - ) + ${EP_COMMON_OPTIONS} + URL ${BROTLI_SOURCE_URL} + URL_HASH "SHA256=${ARROW_BROTLI_BUILD_SHA256_CHECKSUM}" + BUILD_BYPRODUCTS "${BROTLI_STATIC_LIBRARY_ENC}" + "${BROTLI_STATIC_LIBRARY_DEC}" + "${BROTLI_STATIC_LIBRARY_COMMON}" + ${BROTLI_BUILD_BYPRODUCTS} + CMAKE_ARGS ${BROTLI_CMAKE_ARGS} + STEP_TARGETS headers_copy) endif() add_dependencies(toolchain brotli_ep) @@ -1896,30 +1898,30 @@ if(ARROW_WITH_PROTOBUF) if(CMAKE_CROSSCOMPILING) # if we are cross compiling, we need to build protoc for the host - # system also, as it is used when building arrow + # system also, as it is used when building arrow # We do this by calling cmake as a child process # with CXXFLAGS / CFLAGS and cmake flags cleared set(PROTOBUF_HOST_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/protobuf_ep_host-install") set(PROTOBUF_HOST_COMPILER "${PROTOBUF_HOST_PREFIX}/bin/protoc") set(PROTOBUF_HOST_CMAKE_ARGS - "-DCMAKE_CXX_FLAGS=" - "-DCMAKE_C_FLAGS=" - "-DCMAKE_INSTALL_PREFIX=${PROTOBUF_HOST_PREFIX}" - -Dprotobuf_BUILD_TESTS=OFF - -Dprotobuf_DEBUG_POSTFIX=) + "-DCMAKE_CXX_FLAGS=" + "-DCMAKE_C_FLAGS=" + "-DCMAKE_INSTALL_PREFIX=${PROTOBUF_HOST_PREFIX}" + -Dprotobuf_BUILD_TESTS=OFF + -Dprotobuf_DEBUG_POSTFIX=) externalproject_add(protobuf_ep_host - ${EP_COMMON_OPTIONS} - CMAKE_ARGS ${PROTOBUF_HOST_CMAKE_ARGS} - BUILD_BYPRODUCTS "${PROTOBUF_HOST_COMPILER}" - BUILD_IN_SOURCE 1 - URL ${PROTOBUF_SOURCE_URL} - URL_HASH "SHA256=${ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM}") + ${EP_COMMON_OPTIONS} + CMAKE_ARGS ${PROTOBUF_HOST_CMAKE_ARGS} + BUILD_BYPRODUCTS "${PROTOBUF_HOST_COMPILER}" + BUILD_IN_SOURCE 1 + URL ${PROTOBUF_SOURCE_URL} + URL_HASH "SHA256=${ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM}") add_executable(arrow::protobuf::host_protoc IMPORTED) - set_target_properties(arrow::protobuf::host_protoc PROPERTIES IMPORTED_LOCATION - "${PROTOBUF_HOST_COMPILER}") + set_target_properties(arrow::protobuf::host_protoc + PROPERTIES IMPORTED_LOCATION "${PROTOBUF_HOST_COMPILER}") add_dependencies(protobuf_ep protobuf_ep_host) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index dc5d87e4383..c4b17832f8d 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -40,8 +40,6 @@ if(PYARROW_CPP_HOME) list(INSERT CMAKE_PREFIX_PATH 0 "${PYARROW_CPP_HOME}") endif() - - include(CMakeParseArguments) # MACOSX_RPATH is enabled by default. @@ -75,7 +73,7 @@ option(DUMP_ARROW_ARGUMENTS "Dump the arrow arguments then quit" OFF) if($ENV{PYODIDE}) # these variables are needed for building pyarrow on Emscripten - # if they aren't set, cmake cross compiling fails for python + # if they aren't set, cmake cross compiling fails for python # modules (at least under pyodide it does) set(Python3_INCLUDE_DIR $ENV{PYTHONINCLUDE}) set(Python3_LIBRARY $ENV{CPYTHONLIB}) @@ -85,7 +83,7 @@ if($ENV{PYODIDE}) # remove any c++ standard settings from cflags and let cmake set it. # n.b. this is here because pyodide sets -stdc++=14, which breaks pyarrow builds - string(REGEX REPLACE "-std=c\\+\\+[0-9]+" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} ) + string(REGEX REPLACE "-std=c\\+\\+[0-9]+" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) endif() From 20f9f05450e74c979b80241c9a49f92e76c6e720 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Mon, 2 Oct 2023 20:14:19 +0100 Subject: [PATCH 14/95] removed python cmake changes from emscripten C++ build pr --- python/CMakeLists.txt | 110 +++++++++++++----------------------------- 1 file changed, 33 insertions(+), 77 deletions(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index c4b17832f8d..29f8d2da72f 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -1,5 +1,5 @@ # Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file +# or more cod ntributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the @@ -68,25 +68,6 @@ if(POLICY CMP0095) cmake_policy(SET CMP0095 NEW) endif() -# this option is used to auto-set defaults for pyarrow build -option(DUMP_ARROW_ARGUMENTS "Dump the arrow arguments then quit" OFF) - -if($ENV{PYODIDE}) - # these variables are needed for building pyarrow on Emscripten - # if they aren't set, cmake cross compiling fails for python - # modules (at least under pyodide it does) - set(Python3_INCLUDE_DIR $ENV{PYTHONINCLUDE}) - set(Python3_LIBRARY $ENV{CPYTHONLIB}) - set(Python3_NumPy_INCLUDE_DIR $ENV{NUMPY_LIB}/core/include) - set(Python3_EXECUTABLE) - set(ENV{_PYTHON_SYSCONFIGDATA_NAME} $ENV{SYSCONFIG_NAME}) - - # remove any c++ standard settings from cflags and let cmake set it. - # n.b. this is here because pyodide sets -stdc++=14, which breaks pyarrow builds - string(REGEX REPLACE "-std=c\\+\\+[0-9]+" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS}) - -endif() - # Use the first Python installation on PATH, not the newest one set(Python3_FIND_STRATEGY "LOCATION") # On Windows, use registry last, not first @@ -187,37 +168,44 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${PYARROW_CXXFLAGS}") if(MSVC) # MSVC version of -Wno-return-type-c-linkage - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4190") + string(APPEND CMAKE_CXX_FLAGS " /wd4190") # Cython generates some bitshift expressions that MSVC does not like in # __Pyx_PyFloat_DivideObjC - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4293") + string(APPEND CMAKE_CXX_FLAGS " /wd4293") # Converting to/from C++ bool is pretty wonky in Cython. The C4800 warning # seem harmless, and probably not worth the effort of working around it - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4800") + string(APPEND CMAKE_CXX_FLAGS " /wd4800") # See https://github.com/cython/cython/issues/2731. Change introduced in # Cython 0.29.1 causes "unsafe use of type 'bool' in operation" - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4804") + string(APPEND CMAKE_CXX_FLAGS " /wd4804") + + # See https://github.com/cython/cython/issues/4445. + # + # Cython 3 emits "(void)__Pyx_PyObject_CallMethod0;" to suppress a + # "unused function" warning but the code emits another "function + # call missing argument list" warning. + string(APPEND CMAKE_CXX_FLAGS " /wd4551") else() # Enable perf and other tools to work properly - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-omit-frame-pointer") + string(APPEND CMAKE_CXX_FLAGS " -fno-omit-frame-pointer") # Suppress Cython warnings - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-variable -Wno-maybe-uninitialized") + string(APPEND CMAKE_CXX_FLAGS " -Wno-unused-variable -Wno-maybe-uninitialized") if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang") # Cython warnings in clang - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-parentheses-equality") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-constant-logical-operand") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-missing-declarations") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-sometimes-uninitialized") + string(APPEND CMAKE_CXX_FLAGS " -Wno-parentheses-equality") + string(APPEND CMAKE_CXX_FLAGS " -Wno-constant-logical-operand") + string(APPEND CMAKE_CXX_FLAGS " -Wno-missing-declarations") + string(APPEND CMAKE_CXX_FLAGS " -Wno-sometimes-uninitialized") # We have public Cython APIs which return C++ types, which are in an extern # "C" blog (no symbol mangling) and clang doesn't like this - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-return-type-c-linkage") + string(APPEND CMAKE_CXX_FLAGS " -Wno-return-type-c-linkage") endif() endif() @@ -279,22 +267,6 @@ include(GNUInstallDirs) find_package(Arrow REQUIRED) -if(DUMP_ARROW_ARGUMENTS) - message(STATUS "----- ARROW_SETTINGS_DUMP -----") - get_cmake_property(_variableNames VARIABLES) - list(SORT _variableNames) - foreach(_variableName ${_variableNames}) - unset(MATCHED) - string(REGEX MATCH ^ARROW_.* MATCHED ${_variableName}) - if(NOT MATCHED) - continue() - endif() - message(STATUS "${_variableName}=${${_variableName}}") - endforeach() - message(STATUS "----- ARROW_SETTINGS_END -----") - return() -endif() - set(PYARROW_CPP_ROOT_DIR pyarrow/src) set(PYARROW_CPP_SOURCE_DIR ${PYARROW_CPP_ROOT_DIR}/arrow/python) set(PYARROW_CPP_SRCS @@ -605,42 +577,28 @@ endif() # Acero if(PYARROW_BUILD_ACERO) - if(ARROW_BUILD_SHARED) - - if(PYARROW_BUNDLE_ARROW_CPP) - bundle_arrow_lib(${ARROW_ACERO_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) - if(MSVC) - bundle_arrow_import_lib(${ARROW_ACERO_IMPORT_LIB}) - endif() + if(PYARROW_BUNDLE_ARROW_CPP) + bundle_arrow_lib(${ARROW_ACERO_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) + if(MSVC) + bundle_arrow_import_lib(${ARROW_ACERO_IMPORT_LIB}) endif() - - set(ACERO_LINK_LIBS ArrowAcero::arrow_acero_shared) - list(APPEND CYTHON_EXTENSIONS _acero) - else() - # ACERO is statically linked into libarrow_python already - set(ACERO_LINK_LIBS) - list(APPEND CYTHON_EXTENSIONS _acero) endif() + + set(ACERO_LINK_LIBS ArrowAcero::arrow_acero_shared) + list(APPEND CYTHON_EXTENSIONS _acero) endif() # Dataset if(PYARROW_BUILD_DATASET) - if(ARROW_BUILD_SHARED) - - if(PYARROW_BUNDLE_ARROW_CPP) - bundle_arrow_lib(${ARROW_DATASET_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) - if(MSVC) - bundle_arrow_import_lib(${ARROW_DATASET_IMPORT_LIB}) - endif() + if(PYARROW_BUNDLE_ARROW_CPP) + bundle_arrow_lib(${ARROW_DATASET_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) + if(MSVC) + bundle_arrow_import_lib(${ARROW_DATASET_IMPORT_LIB}) endif() - - set(DATASET_LINK_LIBS ArrowDataset::arrow_dataset_shared) - else() - # dataset is sttically linked into libarrow_python already - set(DATASET_LINK_LIBS) endif() - list(APPEND CYTHON_EXTENSIONS _dataset) + set(DATASET_LINK_LIBS ArrowDataset::arrow_dataset_shared) + list(APPEND CYTHON_EXTENSIONS _dataset) endif() # Parquet @@ -661,9 +619,7 @@ if(PYARROW_BUILD_PARQUET) endif() set(PARQUET_LINK_LIBS Parquet::parquet_shared) else() - # parquet is linked into libarrow_python already - # so isn't needed in the extension - set(PARQUET_LINK_LIBS "") + set(PARQUET_LINK_LIBS Parquet::parquet_static) endif() list(APPEND CYTHON_EXTENSIONS _parquet) if(PYARROW_BUILD_PARQUET_ENCRYPTION) From b777f69cb9c73595b931113d258c468b7424ba37 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Mon, 2 Oct 2023 20:49:12 +0100 Subject: [PATCH 15/95] documentation fix --- docs/source/developers/cpp/emscripten.rst | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/docs/source/developers/cpp/emscripten.rst b/docs/source/developers/cpp/emscripten.rst index b12253eb132..3ed97749e08 100644 --- a/docs/source/developers/cpp/emscripten.rst +++ b/docs/source/developers/cpp/emscripten.rst @@ -50,12 +50,12 @@ versions of emsdk. # e.g. for version 0.24 of pyodide: pip install pyodide-build==0.24 -Then build with the ``ninja-release-python-emscripten`` cmake preset, +Then build with the ``ninja-release-emscripten`` cmake preset, like below: .. code:: shell - cmake --preset "ninja-release-python-emscripten" + emcmake cmake --preset "ninja-release-emscripten" ninja install This will install a built static library version of ``libarrow`` it into the @@ -84,8 +84,7 @@ you will need to override. In particular you will need: #. Build dependencies set to ``BUNDLED``, so it uses properly cross compiled build dependencies. -#. ``CMAKE_TOOLCHAIN_FILE`` set to - ``arrow/cpp/cmake_modules/Emscripten/Platform/EmscriptenOverrides.cmake`` +#. ``CMAKE_TOOLCHAIN_FILE`` set by using ``emcmake cmake`` instead of just ``cmake``. #. You will quite likely need to set ``ARROW_ENABLE_THREADING`` to ``OFF`` for builds targeting single threaded Emscripten environments such as From d93d8cc765bcc2632224b754c948cbb4fd6cab0d Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Mon, 2 Oct 2023 21:10:03 +0100 Subject: [PATCH 16/95] preset fixes --- cpp/CMakePresets.json | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/cpp/CMakePresets.json b/cpp/CMakePresets.json index 09eaa2dd422..dd59c99a8d1 100644 --- a/cpp/CMakePresets.json +++ b/cpp/CMakePresets.json @@ -65,7 +65,6 @@ "ARROW_ORC": "ON", "ARROW_RUNTIME_SIMD_LEVEL": "NONE", "ARROW_S3": "OFF", - "ARROW_SNAPPY_USE_SHARED": "OFF", "ARROW_SIMD_LEVEL": "NONE", "ARROW_SUBSTRAIT": "ON", "ARROW_WITH_BROTLI": "ON", @@ -75,8 +74,7 @@ "CMAKE_PROJECT_INCLUDE": { "type": "PATH", "value": "${sourceDir}/cmake_modules/EmscriptenOverrides.cmake" - }, - "ZLIB_SOURCE": "SYSTEM" + } } }, { @@ -377,21 +375,19 @@ { "name": "ninja-debug-emscripten", "inherits": [ - "features-emscripten", "base-debug", - "features-main" + "features-emscripten", ], - "displayName": "Release build which builds an emscripten library", + "displayName": "Debug build which builds an Emscripten library", "cacheVariables": {} }, { "name": "ninja-release-emscripten", "inherits": [ - "features-emscripten", "base-release", - "features-main" + "features-emscripten", ], - "displayName": "Release build which builds an emscripten library", + "displayName": "Release build which builds an Emscripten library", "cacheVariables": {} }, { @@ -421,16 +417,6 @@ "displayName": "Release build with Gandiva", "cacheVariables": {} }, - { - "name": "ninja-release-python-emscripten", - "inherits": [ - "features-emscripten", - "base-release", - "features-python" - ], - "displayName": "Release build which builds an emscripten library suitable for PyArrow", - "cacheVariables": {} - }, { "name": "ninja-release-python-minimal", "inherits": [ From 831715d67cf74848d4c95d0ebed28ec3ae61627a Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Mon, 2 Oct 2023 21:12:17 +0100 Subject: [PATCH 17/95] toolchain fixes --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 169 +++++++++++--------- 1 file changed, 93 insertions(+), 76 deletions(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index b50f3f1f132..544a735a4a5 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -258,20 +258,6 @@ macro(resolve_dependency DEPENDENCY_NAME) set(ARG_IS_RUNTIME_DEPENDENCY TRUE) endif() - # ensure zlib is built with -fpic - # and make sure that the build finds the version in Emscripten ports - # - n.b. the actual linking happens because -sUSE_ZLIB=1 is - # set in the compiler variables, but cmake expects - # it to exist at configuration time if we aren't building it as - # bundled. We need to do this for all packages - # not just zlib as some depend on zlib, but we don't rebuild - # if it exists already - if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") - if(NOT EXISTS ${EMSCRIPTEN_SYSROOT}/lib/wasm32-emscripten/pic/libz.a) - execute_process(COMMAND embuilder --pic --force build zlib) - endif() - set(ZLIB_LIBRARY ${EMSCRIPTEN_SYSROOT}/lib/wasm32-emscripten/pic/libz.a) - endif() if(ARG_HAVE_ALT) set(PACKAGE_NAME "${DEPENDENCY_NAME}Alt") @@ -975,6 +961,11 @@ if(CMAKE_TOOLCHAIN_FILE) list(APPEND EP_COMMON_CMAKE_ARGS -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}) endif() +# and crosscompiling emulator (for try_run() ) +if(CMAKE_CROSSCOMPILING_EMULATOR) + list(APPEND EP_COMMON_CMAKE_ARGS -DCMAKE_CROSSCOMPILING_EMULATOR=${CMAKE_CROSSCOMPILING_EMULATOR}) +endif() + if(CMAKE_PROJECT_INCLUDE) list(APPEND EP_COMMON_CMAKE_ARGS -DCMAKE_PROJECT_INCLUDE=${CMAKE_PROJECT_INCLUDE}) endif() @@ -1398,8 +1389,6 @@ macro(build_brotli) ) set(BROTLI_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} "-DCMAKE_INSTALL_PREFIX=${BROTLI_PREFIX}") - set(BROTLI_INSTALL_OVERRIDE) - if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") # cmake install is disabled for brotli on emscripten, so we have # to manually copy the libraries to our install directory @@ -1828,6 +1817,39 @@ macro(build_protobuf) add_dependencies(arrow::protobuf::protoc protobuf_ep) list(APPEND ARROW_BUNDLED_STATIC_LIBS arrow::protobuf::libprotobuf) + + if(CMAKE_CROSSCOMPILING) + # if we are cross compiling, we need to build protoc for the host + # system also, as it is used when building arrow + # We do this by calling cmake as a child process + # with CXXFLAGS / CFLAGS and cmake flags cleared + set(PROTOBUF_HOST_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/protobuf_ep_host-install") + set(PROTOBUF_HOST_COMPILER "${PROTOBUF_HOST_PREFIX}/bin/protoc") + + set(PROTOBUF_HOST_CMAKE_ARGS + "-DCMAKE_CXX_FLAGS=" + "-DCMAKE_C_FLAGS=" + "-DCMAKE_INSTALL_PREFIX=${PROTOBUF_HOST_PREFIX}" + -Dprotobuf_BUILD_TESTS=OFF + -Dprotobuf_DEBUG_POSTFIX=) + + externalproject_add(protobuf_ep_host + ${EP_COMMON_OPTIONS} + CMAKE_ARGS ${PROTOBUF_HOST_CMAKE_ARGS} + BUILD_BYPRODUCTS "${PROTOBUF_HOST_COMPILER}" + BUILD_IN_SOURCE 1 + URL ${PROTOBUF_SOURCE_URL} + URL_HASH "SHA256=${ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM}") + + add_executable(arrow::protobuf::host_protoc IMPORTED) + set_target_properties(arrow::protobuf::host_protoc + PROPERTIES IMPORTED_LOCATION "${PROTOBUF_HOST_COMPILER}") + + add_dependencies(arrow::protobuf::host_protoc protobuf_ep_host) + + endif() + + endmacro() if(ARROW_WITH_PROTOBUF) @@ -1885,7 +1907,11 @@ if(ARROW_WITH_PROTOBUF) else() set(ARROW_PROTOBUF_LIBPROTOC protobuf::libprotoc) endif() - if(TARGET arrow::protobuf::protoc) + if(TARGET arrow::protobuf::host_protoc) + # make sure host protoc is used for compiling protobuf files + # during build of e.g. orc + set(ARROW_PROTOBUF_PROTOC arrow::protobuf::host_protoc) + elseif(TARGET arrow::protobuf::protoc) set(ARROW_PROTOBUF_PROTOC arrow::protobuf::protoc) else() if(NOT TARGET protobuf::protoc) @@ -1896,41 +1922,6 @@ if(ARROW_WITH_PROTOBUF) set(ARROW_PROTOBUF_PROTOC protobuf::protoc) endif() - if(CMAKE_CROSSCOMPILING) - # if we are cross compiling, we need to build protoc for the host - # system also, as it is used when building arrow - # We do this by calling cmake as a child process - # with CXXFLAGS / CFLAGS and cmake flags cleared - set(PROTOBUF_HOST_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/protobuf_ep_host-install") - set(PROTOBUF_HOST_COMPILER "${PROTOBUF_HOST_PREFIX}/bin/protoc") - - set(PROTOBUF_HOST_CMAKE_ARGS - "-DCMAKE_CXX_FLAGS=" - "-DCMAKE_C_FLAGS=" - "-DCMAKE_INSTALL_PREFIX=${PROTOBUF_HOST_PREFIX}" - -Dprotobuf_BUILD_TESTS=OFF - -Dprotobuf_DEBUG_POSTFIX=) - - externalproject_add(protobuf_ep_host - ${EP_COMMON_OPTIONS} - CMAKE_ARGS ${PROTOBUF_HOST_CMAKE_ARGS} - BUILD_BYPRODUCTS "${PROTOBUF_HOST_COMPILER}" - BUILD_IN_SOURCE 1 - URL ${PROTOBUF_SOURCE_URL} - URL_HASH "SHA256=${ARROW_PROTOBUF_BUILD_SHA256_CHECKSUM}") - - add_executable(arrow::protobuf::host_protoc IMPORTED) - set_target_properties(arrow::protobuf::host_protoc - PROPERTIES IMPORTED_LOCATION "${PROTOBUF_HOST_COMPILER}") - - add_dependencies(protobuf_ep protobuf_ep_host) - - # make sure host protoc is used for compiling protobuf files - # during build of e.g. orc - set(ARROW_PROTOBUF_PROTOC arrow::protobuf::host_protoc) - - endif() - # Log protobuf paths as we often see issues with mixed sources for # the libraries and protoc. get_target_property(PROTOBUF_PROTOC_EXECUTABLE ${ARROW_PROTOBUF_PROTOC} @@ -2453,36 +2444,62 @@ endif() macro(build_zlib) message(STATUS "Building ZLIB from source") - set(ZLIB_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zlib_ep/src/zlib_ep-install") - if(MSVC) - if(${UPPERCASE_BUILD_TYPE} STREQUAL "DEBUG") - set(ZLIB_STATIC_LIB_NAME zlibstaticd.lib) - else() - set(ZLIB_STATIC_LIB_NAME zlibstatic.lib) + + # ensure zlib is built with -fpic + # and make sure that the build finds the version in Emscripten ports + # - n.b. the actual linking happens because -sUSE_ZLIB=1 is + # set in the compiler variables, but cmake expects + # it to exist at configuration time if we aren't building it as + # bundled. We need to do this for all packages + # not just zlib as some depend on zlib, but we don't rebuild + # if it exists already + if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + # build zlib using Emscripten ports + if(NOT EXISTS ${EMSCRIPTEN_SYSROOT}/lib/wasm32-emscripten/pic/libz.a) + execute_process(COMMAND embuilder --pic --force build zlib) endif() + set(ZLIB_STATIC_LIB ${EMSCRIPTEN_SYSROOT}/lib/wasm32-emscripten/pic/libz.a) + set(ZLIB_LIBRARIES ${ZLIB_LIBRARY}) +# set(ZLIB_INCLUDE_DIRS "${ZLIB_PREFIX}/include") + + add_library(ZLIB::ZLIB STATIC IMPORTED) + set(ZLIB_LIBRARIES ${ZLIB_STATIC_LIB}) + set(ZLIB_INCLUDE_DIRS "${ZLIB_PREFIX}/include") + set_target_properties(ZLIB::ZLIB PROPERTIES IMPORTED_LOCATION ${ZLIB_LIBRARIES}) +# target_include_directories(ZLIB::ZLIB BEFORE INTERFACE "${ZLIB_INCLUDE_DIRS}") + else() - set(ZLIB_STATIC_LIB_NAME libz.a) - endif() - set(ZLIB_STATIC_LIB "${ZLIB_PREFIX}/lib/${ZLIB_STATIC_LIB_NAME}") - set(ZLIB_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} "-DCMAKE_INSTALL_PREFIX=${ZLIB_PREFIX}") + set(ZLIB_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zlib_ep/src/zlib_ep-install") + if(MSVC) + if(${UPPERCASE_BUILD_TYPE} STREQUAL "DEBUG") + set(ZLIB_STATIC_LIB_NAME zlibstaticd.lib) + else() + set(ZLIB_STATIC_LIB_NAME zlibstatic.lib) + endif() + else() + set(ZLIB_STATIC_LIB_NAME libz.a) + endif() + set(ZLIB_STATIC_LIB "${ZLIB_PREFIX}/lib/${ZLIB_STATIC_LIB_NAME}") + set(ZLIB_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} "-DCMAKE_INSTALL_PREFIX=${ZLIB_PREFIX}") - externalproject_add(zlib_ep - ${EP_COMMON_OPTIONS} - URL ${ZLIB_SOURCE_URL} - URL_HASH "SHA256=${ARROW_ZLIB_BUILD_SHA256_CHECKSUM}" - BUILD_BYPRODUCTS "${ZLIB_STATIC_LIB}" - CMAKE_ARGS ${ZLIB_CMAKE_ARGS}) + externalproject_add(zlib_ep + ${EP_COMMON_OPTIONS} + URL ${ZLIB_SOURCE_URL} + URL_HASH "SHA256=${ARROW_ZLIB_BUILD_SHA256_CHECKSUM}" + BUILD_BYPRODUCTS "${ZLIB_STATIC_LIB}" + CMAKE_ARGS ${ZLIB_CMAKE_ARGS}) - file(MAKE_DIRECTORY "${ZLIB_PREFIX}/include") + file(MAKE_DIRECTORY "${ZLIB_PREFIX}/include") - add_library(ZLIB::ZLIB STATIC IMPORTED) - set(ZLIB_LIBRARIES ${ZLIB_STATIC_LIB}) - set(ZLIB_INCLUDE_DIRS "${ZLIB_PREFIX}/include") - set_target_properties(ZLIB::ZLIB PROPERTIES IMPORTED_LOCATION ${ZLIB_LIBRARIES}) - target_include_directories(ZLIB::ZLIB BEFORE INTERFACE "${ZLIB_INCLUDE_DIRS}") + add_library(ZLIB::ZLIB STATIC IMPORTED) + set(ZLIB_LIBRARIES ${ZLIB_STATIC_LIB}) + set(ZLIB_INCLUDE_DIRS "${ZLIB_PREFIX}/include") + set_target_properties(ZLIB::ZLIB PROPERTIES IMPORTED_LOCATION ${ZLIB_LIBRARIES}) + target_include_directories(ZLIB::ZLIB BEFORE INTERFACE "${ZLIB_INCLUDE_DIRS}") - add_dependencies(toolchain zlib_ep) - add_dependencies(ZLIB::ZLIB zlib_ep) + add_dependencies(toolchain zlib_ep) + add_dependencies(ZLIB::ZLIB zlib_ep) + endif() list(APPEND ARROW_BUNDLED_STATIC_LIBS ZLIB::ZLIB) set(ZLIB_VENDORED TRUE) From 3fd8a1c26f33b37f200e0b02629a7952d89bf7a1 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Mon, 2 Oct 2023 21:13:48 +0100 Subject: [PATCH 18/95] typo --- cpp/CMakePresets.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/CMakePresets.json b/cpp/CMakePresets.json index dd59c99a8d1..bc9b3089dac 100644 --- a/cpp/CMakePresets.json +++ b/cpp/CMakePresets.json @@ -376,7 +376,7 @@ "name": "ninja-debug-emscripten", "inherits": [ "base-debug", - "features-emscripten", + "features-emscripten" ], "displayName": "Debug build which builds an Emscripten library", "cacheVariables": {} @@ -385,7 +385,7 @@ "name": "ninja-release-emscripten", "inherits": [ "base-release", - "features-emscripten", + "features-emscripten" ], "displayName": "Release build which builds an Emscripten library", "cacheVariables": {} From 28f675d649e607b56ec8f8c96d360e80a1aeb073 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Mon, 2 Oct 2023 21:19:39 +0100 Subject: [PATCH 19/95] fix for webassembly where size_t != int64_t --- cpp/src/arrow/testing/builder.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/testing/builder.h b/cpp/src/arrow/testing/builder.h index 09e8f49dea9..2b4d7a4d6e1 100644 --- a/cpp/src/arrow/testing/builder.h +++ b/cpp/src/arrow/testing/builder.h @@ -151,7 +151,7 @@ Status MakeArray(const std::vector& valid_bytes, const std::vector& int64_t size, Builder* builder, std::shared_ptr* out) { // Append the first 1000 for (int64_t i = 0; i < size; ++i) { - if (valid_bytes[i] > 0) { + if (valid_bytes[(size_t)i] > 0) { RETURN_NOT_OK(builder->Append(values[i])); } else { RETURN_NOT_OK(builder->AppendNull()); From 97e58f99cc81ab291904c09b60ccc87d9707841d Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Mon, 2 Oct 2023 21:29:06 +0100 Subject: [PATCH 20/95] size_t not always == in64_t --- cpp/src/arrow/chunk_resolver.h | 8 ++++---- cpp/src/arrow/testing/builder.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/chunk_resolver.h b/cpp/src/arrow/chunk_resolver.h index 818070ffe35..bd2d33c78b4 100644 --- a/cpp/src/arrow/chunk_resolver.h +++ b/cpp/src/arrow/chunk_resolver.h @@ -62,13 +62,13 @@ struct ARROW_EXPORT ChunkResolver { } const auto cached_chunk = cached_chunk_.load(); const bool cache_hit = - (index >= offsets_[cached_chunk] && index < offsets_[cached_chunk + 1]); + (index >= offsets_[static_cast(cached_chunk)] && index < offsets_[static_cast(cached_chunk + 1)]); if (ARROW_PREDICT_TRUE(cache_hit)) { - return {cached_chunk, index - offsets_[cached_chunk]}; + return {cached_chunk, index - offsets_[static_cast(cached_chunk)]}; } auto chunk_index = Bisect(index); cached_chunk_.store(chunk_index); - return {chunk_index, index - offsets_[chunk_index]}; + return {chunk_index, index - offsets_[static_cast(chunk_index)]}; } protected: @@ -81,7 +81,7 @@ struct ARROW_EXPORT ChunkResolver { while (n > 1) { const int64_t m = n >> 1; const int64_t mid = lo + m; - if (static_cast(index) >= offsets_[mid]) { + if (static_cast(index) >= offsets_[static_cast(mid)]) { lo = mid; n -= m; } else { diff --git a/cpp/src/arrow/testing/builder.h b/cpp/src/arrow/testing/builder.h index 2b4d7a4d6e1..5c59cc8af40 100644 --- a/cpp/src/arrow/testing/builder.h +++ b/cpp/src/arrow/testing/builder.h @@ -151,7 +151,7 @@ Status MakeArray(const std::vector& valid_bytes, const std::vector& int64_t size, Builder* builder, std::shared_ptr* out) { // Append the first 1000 for (int64_t i = 0; i < size; ++i) { - if (valid_bytes[(size_t)i] > 0) { + if (valid_bytes[static_cast(i)] > 0) { RETURN_NOT_OK(builder->Append(values[i])); } else { RETURN_NOT_OK(builder->AppendNull()); From a00f068c9afae5bb0b8c5d070e4b794664608b97 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Mon, 2 Oct 2023 21:47:35 +0100 Subject: [PATCH 21/95] reverting size_t fixes --- cpp/src/arrow/chunk_resolver.h | 8 ++++---- cpp/src/arrow/testing/builder.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/chunk_resolver.h b/cpp/src/arrow/chunk_resolver.h index bd2d33c78b4..818070ffe35 100644 --- a/cpp/src/arrow/chunk_resolver.h +++ b/cpp/src/arrow/chunk_resolver.h @@ -62,13 +62,13 @@ struct ARROW_EXPORT ChunkResolver { } const auto cached_chunk = cached_chunk_.load(); const bool cache_hit = - (index >= offsets_[static_cast(cached_chunk)] && index < offsets_[static_cast(cached_chunk + 1)]); + (index >= offsets_[cached_chunk] && index < offsets_[cached_chunk + 1]); if (ARROW_PREDICT_TRUE(cache_hit)) { - return {cached_chunk, index - offsets_[static_cast(cached_chunk)]}; + return {cached_chunk, index - offsets_[cached_chunk]}; } auto chunk_index = Bisect(index); cached_chunk_.store(chunk_index); - return {chunk_index, index - offsets_[static_cast(chunk_index)]}; + return {chunk_index, index - offsets_[chunk_index]}; } protected: @@ -81,7 +81,7 @@ struct ARROW_EXPORT ChunkResolver { while (n > 1) { const int64_t m = n >> 1; const int64_t mid = lo + m; - if (static_cast(index) >= offsets_[static_cast(mid)]) { + if (static_cast(index) >= offsets_[mid]) { lo = mid; n -= m; } else { diff --git a/cpp/src/arrow/testing/builder.h b/cpp/src/arrow/testing/builder.h index 5c59cc8af40..09e8f49dea9 100644 --- a/cpp/src/arrow/testing/builder.h +++ b/cpp/src/arrow/testing/builder.h @@ -151,7 +151,7 @@ Status MakeArray(const std::vector& valid_bytes, const std::vector& int64_t size, Builder* builder, std::shared_ptr* out) { // Append the first 1000 for (int64_t i = 0; i < size; ++i) { - if (valid_bytes[static_cast(i)] > 0) { + if (valid_bytes[i] > 0) { RETURN_NOT_OK(builder->Append(values[i])); } else { RETURN_NOT_OK(builder->AppendNull()); From bda2b26bf9c042a285aeb88b8c16ccfc0caacf14 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Mon, 2 Oct 2023 22:08:58 +0100 Subject: [PATCH 22/95] debug build fixes --- cpp/cmake_modules/SetupCxxFlags.cmake | 9 ++++++--- cpp/cmake_modules/ThirdpartyToolchain.cmake | 1 - 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index a7d9ab13dd9..89aa12b9406 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -709,9 +709,11 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") # 2) We are building library code # 3) We force *everything* to build as position independent # 4) And with support for C++ exceptions - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -sUSE_ZLIB=1 -sSIDE_MODULE=1 -fPIC -fexceptions") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -sUSE_ZLIB=1 -fPIC -fexceptions") + # size_t is 32 bit in emscripten wasm32 - ignore conversion errors + # deprecated-literal-operator error is thrown in datetime (vendored lib in arrow) set(CMAKE_CXX_FLAGS - "${CMAKE_CXX_FLAGS} -sUSE_ZLIB=1 -sSIDE_MODULE=1 -fPIC -fexceptions") + "${CMAKE_CXX_FLAGS} -sUSE_ZLIB=1 -fPIC -fexceptions -Wno-error=shorten-64-to-32 -Wno-error=deprecated-literal-operator") # flags for creating shared libraries (only used in pyarrow, because # Emscripten builds libarrow as static) @@ -719,7 +721,8 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") # 1) Tell it to use zlib from Emscripten ports # 2) Tell it to use javascript / webassembly 64 bit number support. # 3) Tell it to build with support for C++ exceptions - set(ARROW_EMSCRIPTEN_LINKER_FLAGS "-sUSE_ZLIB=1 -sWASM_BIGINT=1 -fexceptions") + # 4) Skip linker flags error which happens with -soname parameter + set(ARROW_EMSCRIPTEN_LINKER_FLAGS "-sSIDE_MODULE=1 -sUSE_ZLIB=1 -sWASM_BIGINT=1 -fexceptions -Wno-error=linkflags") set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS ${ARROW_EMSCRIPTEN_LINKER_FLAGS}) set(CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS ${ARROW_EMSCRIPTEN_LINKER_FLAGS}) set(CMAKE_SHARED_LINKER_FLAGS ${ARROW_EMSCRIPTEN_LINKER_FLAGS}) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 544a735a4a5..b82bad10d3b 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -1786,7 +1786,6 @@ macro(build_protobuf) endif() set(PROTOBUF_EXTERNAL_PROJECT_ADD_ARGS CMAKE_ARGS ${PROTOBUF_CMAKE_ARGS} SOURCE_SUBDIR "cmake") - externalproject_add(protobuf_ep ${EP_COMMON_OPTIONS} ${PROTOBUF_EXTERNAL_PROJECT_ADD_ARGS} BUILD_BYPRODUCTS "${PROTOBUF_STATIC_LIB}" "${PROTOBUF_COMPILER}" From 14b4968ed9a34f8a47ed5c10430e1c38ed502f37 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Mon, 2 Oct 2023 22:09:34 +0100 Subject: [PATCH 23/95] fixed order of build preset dependencies --- cpp/CMakePresets.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/CMakePresets.json b/cpp/CMakePresets.json index bc9b3089dac..6e68ddd6023 100644 --- a/cpp/CMakePresets.json +++ b/cpp/CMakePresets.json @@ -375,8 +375,8 @@ { "name": "ninja-debug-emscripten", "inherits": [ - "base-debug", - "features-emscripten" + "features-emscripten", + "base-debug" ], "displayName": "Debug build which builds an Emscripten library", "cacheVariables": {} @@ -384,8 +384,8 @@ { "name": "ninja-release-emscripten", "inherits": [ - "base-release", - "features-emscripten" + "features-emscripten", + "base-release" ], "displayName": "Release build which builds an Emscripten library", "cacheVariables": {} From 834b5a3702e0b9c36ce35741d14e497aca3c0235 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Wed, 4 Oct 2023 06:11:04 +0100 Subject: [PATCH 24/95] debug build fixes --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index b82bad10d3b..ac442348d92 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -2202,8 +2202,15 @@ function(build_gtest) if(APPLE) string(APPEND CMAKE_CXX_FLAGS " -Wno-unused-value" " -Wno-ignored-attributes") endif() - set(BUILD_SHARED_LIBS ON) - set(BUILD_STATIC_LIBS OFF) + # If we're building static libs for Emscripten, we need to build *everything* as + # static libs. + if(NOT (CMAKE_SYSTEM_NAME STREQUAL "Emscripten") OR ARROW_BUILD_SHARED) + set(BUILD_SHARED_LIBS ON) + set(BUILD_STATIC_LIBS OFF) + else() + set(BUILD_SHARED_LIBS OFF) + set(BUILD_STATIC_LIBS ON) +endif() # We need to use "cache" variable to override the default # INSTALL_GTEST option by this value. See also: # https://cmake.org/cmake/help/latest/policy/CMP0077.html From ff2a7c95363b20511c8063be43b7fe7601dafd7d Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Wed, 4 Oct 2023 06:14:03 +0100 Subject: [PATCH 25/95] cmake format --- cpp/cmake_modules/SetupCxxFlags.cmake | 6 ++++-- cpp/cmake_modules/ThirdpartyToolchain.cmake | 15 +++++++-------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index 89aa12b9406..2c3ea2611bb 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -713,7 +713,8 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") # size_t is 32 bit in emscripten wasm32 - ignore conversion errors # deprecated-literal-operator error is thrown in datetime (vendored lib in arrow) set(CMAKE_CXX_FLAGS - "${CMAKE_CXX_FLAGS} -sUSE_ZLIB=1 -fPIC -fexceptions -Wno-error=shorten-64-to-32 -Wno-error=deprecated-literal-operator") + "${CMAKE_CXX_FLAGS} -sUSE_ZLIB=1 -fPIC -fexceptions -Wno-error=shorten-64-to-32 -Wno-error=deprecated-literal-operator" + ) # flags for creating shared libraries (only used in pyarrow, because # Emscripten builds libarrow as static) @@ -722,7 +723,8 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") # 2) Tell it to use javascript / webassembly 64 bit number support. # 3) Tell it to build with support for C++ exceptions # 4) Skip linker flags error which happens with -soname parameter - set(ARROW_EMSCRIPTEN_LINKER_FLAGS "-sSIDE_MODULE=1 -sUSE_ZLIB=1 -sWASM_BIGINT=1 -fexceptions -Wno-error=linkflags") + set(ARROW_EMSCRIPTEN_LINKER_FLAGS + "-sSIDE_MODULE=1 -sUSE_ZLIB=1 -sWASM_BIGINT=1 -fexceptions -Wno-error=linkflags") set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS ${ARROW_EMSCRIPTEN_LINKER_FLAGS}) set(CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS ${ARROW_EMSCRIPTEN_LINKER_FLAGS}) set(CMAKE_SHARED_LINKER_FLAGS ${ARROW_EMSCRIPTEN_LINKER_FLAGS}) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index ac442348d92..e301cf477c6 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -258,7 +258,6 @@ macro(resolve_dependency DEPENDENCY_NAME) set(ARG_IS_RUNTIME_DEPENDENCY TRUE) endif() - if(ARG_HAVE_ALT) set(PACKAGE_NAME "${DEPENDENCY_NAME}Alt") else() @@ -963,7 +962,8 @@ endif() # and crosscompiling emulator (for try_run() ) if(CMAKE_CROSSCOMPILING_EMULATOR) - list(APPEND EP_COMMON_CMAKE_ARGS -DCMAKE_CROSSCOMPILING_EMULATOR=${CMAKE_CROSSCOMPILING_EMULATOR}) + list(APPEND EP_COMMON_CMAKE_ARGS + -DCMAKE_CROSSCOMPILING_EMULATOR=${CMAKE_CROSSCOMPILING_EMULATOR}) endif() if(CMAKE_PROJECT_INCLUDE) @@ -1848,7 +1848,6 @@ macro(build_protobuf) endif() - endmacro() if(ARROW_WITH_PROTOBUF) @@ -1907,7 +1906,7 @@ if(ARROW_WITH_PROTOBUF) set(ARROW_PROTOBUF_LIBPROTOC protobuf::libprotoc) endif() if(TARGET arrow::protobuf::host_protoc) - # make sure host protoc is used for compiling protobuf files + # make sure host protoc is used for compiling protobuf files # during build of e.g. orc set(ARROW_PROTOBUF_PROTOC arrow::protobuf::host_protoc) elseif(TARGET arrow::protobuf::protoc) @@ -2202,7 +2201,7 @@ function(build_gtest) if(APPLE) string(APPEND CMAKE_CXX_FLAGS " -Wno-unused-value" " -Wno-ignored-attributes") endif() - # If we're building static libs for Emscripten, we need to build *everything* as + # If we're building static libs for Emscripten, we need to build *everything* as # static libs. if(NOT (CMAKE_SYSTEM_NAME STREQUAL "Emscripten") OR ARROW_BUILD_SHARED) set(BUILD_SHARED_LIBS ON) @@ -2210,7 +2209,7 @@ function(build_gtest) else() set(BUILD_SHARED_LIBS OFF) set(BUILD_STATIC_LIBS ON) -endif() + endif() # We need to use "cache" variable to override the default # INSTALL_GTEST option by this value. See also: # https://cmake.org/cmake/help/latest/policy/CMP0077.html @@ -2466,13 +2465,13 @@ macro(build_zlib) endif() set(ZLIB_STATIC_LIB ${EMSCRIPTEN_SYSROOT}/lib/wasm32-emscripten/pic/libz.a) set(ZLIB_LIBRARIES ${ZLIB_LIBRARY}) -# set(ZLIB_INCLUDE_DIRS "${ZLIB_PREFIX}/include") + # set(ZLIB_INCLUDE_DIRS "${ZLIB_PREFIX}/include") add_library(ZLIB::ZLIB STATIC IMPORTED) set(ZLIB_LIBRARIES ${ZLIB_STATIC_LIB}) set(ZLIB_INCLUDE_DIRS "${ZLIB_PREFIX}/include") set_target_properties(ZLIB::ZLIB PROPERTIES IMPORTED_LOCATION ${ZLIB_LIBRARIES}) -# target_include_directories(ZLIB::ZLIB BEFORE INTERFACE "${ZLIB_INCLUDE_DIRS}") + # target_include_directories(ZLIB::ZLIB BEFORE INTERFACE "${ZLIB_INCLUDE_DIRS}") else() set(ZLIB_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zlib_ep/src/zlib_ep-install") From 45ece1eb95c5a3259d093a8a2a1a592c58f62930 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Wed, 4 Oct 2023 10:20:40 +0100 Subject: [PATCH 26/95] typos etc. from code review Co-authored-by: Sutou Kouhei --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 3 +-- docs/source/developers/cpp/emscripten.rst | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index e301cf477c6..67e5fdad9fc 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -1327,8 +1327,7 @@ macro(build_snappy) if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") # ignore linker flag errors, as snappy sets # -Werror -Wall, and emscripten doesn't support -soname - set(SNAPPY_CMAKE_ARGS - ${SNAPPY_CMAKE_ARGS} + list(APPEND SNAPPY_CMAKE_ARGS "-DCMAKE_SHARED_LINKER_FLAGS=${CMAKE_SHARED_LINKER_FLAGS} -Wno-error=linkflags") endif() diff --git a/docs/source/developers/cpp/emscripten.rst b/docs/source/developers/cpp/emscripten.rst index 3ed97749e08..765c23bca96 100644 --- a/docs/source/developers/cpp/emscripten.rst +++ b/docs/source/developers/cpp/emscripten.rst @@ -70,7 +70,7 @@ go to ``arrow/python`` and run pyodide build It should make a wheel targeting the currently enabled version of -pyodide (i.e. the version corresponding to the currently installed +Pyodide (i.e. the version corresponding to the currently installed ``pyodide-build``) in the ``dist`` subdirectory. ************** From 28ccba9de7edcb7ddb8d2f30282d87d2e170d9d9 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Wed, 4 Oct 2023 09:23:22 +0000 Subject: [PATCH 27/95] review fixes --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index e301cf477c6..b3a89caa950 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -4459,8 +4459,6 @@ macro(build_orc) get_target_property(ORC_ZSTD_ROOT ${ARROW_ZSTD_LIBZSTD} INTERFACE_INCLUDE_DIRECTORIES) get_filename_component(ORC_ZSTD_ROOT "${ORC_ZSTD_ROOT}" DIRECTORY) - set(LZ4_TARGET LZ4::lz4) - set(ORC_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} "-DCMAKE_INSTALL_PREFIX=${ORC_PREFIX}" @@ -4478,8 +4476,8 @@ macro(build_orc) "-DPROTOC_LIBRARY=$" "-DSNAPPY_HOME=${ORC_SNAPPY_ROOT}" "-DSNAPPY_LIBRARY=$" - "-DLZ4_LIBRARY=$" - "-DLZ4_STATIC_LIBRARY=$" + "-DLZ4_LIBRARY=$" + "-DLZ4_STATIC_LIBRARY=$" "-DLZ4_INCLUDE_DIR=${ORC_LZ4_ROOT}/include" "-DSNAPPY_INCLUDE_DIR=${ORC_SNAPPY_INCLUDE_DIR}" "-DZSTD_HOME=${ORC_ZSTD_ROOT}" From adc68e89e85c9eaf5936bbfb4be30d6d7c0dc72d Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Wed, 4 Oct 2023 10:32:13 +0100 Subject: [PATCH 28/95] docs fixes --- docs/source/developers/cpp/emscripten.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/source/developers/cpp/emscripten.rst b/docs/source/developers/cpp/emscripten.rst index 765c23bca96..07eb2561e24 100644 --- a/docs/source/developers/cpp/emscripten.rst +++ b/docs/source/developers/cpp/emscripten.rst @@ -34,7 +34,7 @@ activate it using the commands below (see https://emscripten.org/docs/getting_st git clone https://github.com/emscripten-core/emsdk.git cd emsdk # replace with the desired EMSDK version. - # e.g. for pyodide 0.24, you need EMSDK version 3.1.45 + # e.g. for Pyodide 0.24, you need EMSDK version 3.1.45 ./emsdk install ./emsdk activate source ./emsdk_env.sh @@ -42,15 +42,15 @@ activate it using the commands below (see https://emscripten.org/docs/getting_st If you want to build PyArrow for `Pyodide `_, you need ``pyodide-build`` installed via ``pip``, and to be running with the same version of Python that Pyodide is built for, along with the same -versions of emsdk. +versions of emsdk tools. .. code:: shell - # install pyodide build tools. - # e.g. for version 0.24 of pyodide: + # install Pyodide build tools. + # e.g. for version 0.24 of Pyodide: pip install pyodide-build==0.24 -Then build with the ``ninja-release-emscripten`` cmake preset, +Then build with the ``ninja-release-emscripten`` CMake preset, like below: .. code:: shell @@ -88,7 +88,7 @@ you will need to override. In particular you will need: #. You will quite likely need to set ``ARROW_ENABLE_THREADING`` to ``OFF`` for builds targeting single threaded Emscripten environments such as - pyodide. + Pyodide. #. ``ARROW_FLIGHT`` and anything else that uses network probably won't work. From 7cb0da941df50cf9bf1f2e2ac26117de2bc75c78 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Wed, 4 Oct 2023 10:33:24 +0100 Subject: [PATCH 29/95] comments grammar --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 9b31017a510..4432eb2ffbd 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -1817,10 +1817,10 @@ macro(build_protobuf) list(APPEND ARROW_BUNDLED_STATIC_LIBS arrow::protobuf::libprotobuf) if(CMAKE_CROSSCOMPILING) - # if we are cross compiling, we need to build protoc for the host - # system also, as it is used when building arrow - # We do this by calling cmake as a child process - # with CXXFLAGS / CFLAGS and cmake flags cleared + # If we are cross compiling, we need to build protoc for the host + # system also, as it is used when building Arrow + # We do this by calling CMake as a child process + # with CXXFLAGS / CFLAGS and CMake flags cleared. set(PROTOBUF_HOST_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/protobuf_ep_host-install") set(PROTOBUF_HOST_COMPILER "${PROTOBUF_HOST_PREFIX}/bin/protoc") From e3f1a81cd88d0d8d0c0a14eb2f9bcda1909b91bc Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Wed, 4 Oct 2023 10:50:17 +0100 Subject: [PATCH 30/95] lint --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 4432eb2ffbd..9db6e731442 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -1328,7 +1328,7 @@ macro(build_snappy) # ignore linker flag errors, as snappy sets # -Werror -Wall, and emscripten doesn't support -soname list(APPEND SNAPPY_CMAKE_ARGS - "-DCMAKE_SHARED_LINKER_FLAGS=${CMAKE_SHARED_LINKER_FLAGS} -Wno-error=linkflags") + "-DCMAKE_SHARED_LINKER_FLAGS=${CMAKE_SHARED_LINKER_FLAGS} -Wno-error=linkflags") endif() externalproject_add(snappy_ep From 123813f6f17d8678a83f311eab2d3eb9b0a6a8e1 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Mon, 9 Oct 2023 14:42:56 +0100 Subject: [PATCH 31/95] correct separators in externalproject call Co-authored-by: Sutou Kouhei --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 9db6e731442..daaf80b95d8 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -962,8 +962,10 @@ endif() # and crosscompiling emulator (for try_run() ) if(CMAKE_CROSSCOMPILING_EMULATOR) + string(REPLACE ";" ${EP_LIST_SEPARATOR} EP_CMAKE_CROSSCOMPILING_EMULATOR + "${CMAKE_CROSSCOMPILING_EMULATOR}") list(APPEND EP_COMMON_CMAKE_ARGS - -DCMAKE_CROSSCOMPILING_EMULATOR=${CMAKE_CROSSCOMPILING_EMULATOR}) + -DCMAKE_CROSSCOMPILING_EMULATOR=${EP_CMAKE_CROSSCOMPILING_EMULATOR}) endif() if(CMAKE_PROJECT_INCLUDE) From 9fff2ce0713e9f4de6141919392dd7b85c87df60 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Mon, 9 Oct 2023 14:48:53 +0100 Subject: [PATCH 32/95] test changes for emscripten --- ci/docker/ubuntu-22.04-cpp.dockerfile | 5 + ci/scripts/cpp_build.sh | 224 ++++++++++++++------------ dev/tasks/tasks.yml | 8 + docker-compose.yml | 25 +++ 4 files changed, 157 insertions(+), 105 deletions(-) diff --git a/ci/docker/ubuntu-22.04-cpp.dockerfile b/ci/docker/ubuntu-22.04-cpp.dockerfile index fffafe2b052..bf8f370bec5 100644 --- a/ci/docker/ubuntu-22.04-cpp.dockerfile +++ b/ci/docker/ubuntu-22.04-cpp.dockerfile @@ -69,6 +69,7 @@ RUN apt-get update -y -q && \ ccache \ cmake \ curl \ + emscripten \ gdb \ git \ libbenchmark-dev \ @@ -149,6 +150,10 @@ RUN if [ "${gcc_version}" = "" ]; then \ update-alternatives --set c++ /usr/bin/g++; \ fi +RUN EM_CACHE=/usr/share/emscripten/cache \ + EM_FROZEN_CACHE= \ + embuilder --pic build zlib + COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_minio.sh latest /usr/local diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index 1f5596e2a50..8e13b059030 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -81,111 +81,125 @@ esac mkdir -p ${build_dir} pushd ${build_dir} -cmake \ - -Dabsl_SOURCE=${absl_SOURCE:-} \ - -DARROW_ACERO=${ARROW_ACERO:-OFF} \ - -DARROW_AZURE=${ARROW_AZURE:-OFF} \ - -DARROW_BOOST_USE_SHARED=${ARROW_BOOST_USE_SHARED:-ON} \ - -DARROW_BUILD_BENCHMARKS_REFERENCE=${ARROW_BUILD_BENCHMARKS:-OFF} \ - -DARROW_BUILD_BENCHMARKS=${ARROW_BUILD_BENCHMARKS:-OFF} \ - -DARROW_BUILD_EXAMPLES=${ARROW_BUILD_EXAMPLES:-OFF} \ - -DARROW_BUILD_INTEGRATION=${ARROW_BUILD_INTEGRATION:-OFF} \ - -DARROW_BUILD_SHARED=${ARROW_BUILD_SHARED:-ON} \ - -DARROW_BUILD_STATIC=${ARROW_BUILD_STATIC:-ON} \ - -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS:-OFF} \ - -DARROW_BUILD_UTILITIES=${ARROW_BUILD_UTILITIES:-ON} \ - -DARROW_COMPUTE=${ARROW_COMPUTE:-ON} \ - -DARROW_CSV=${ARROW_CSV:-ON} \ - -DARROW_CUDA=${ARROW_CUDA:-OFF} \ - -DARROW_CXXFLAGS=${ARROW_CXXFLAGS:-} \ - -DARROW_CXX_FLAGS_DEBUG="${ARROW_CXX_FLAGS_DEBUG:-}" \ - -DARROW_CXX_FLAGS_RELEASE="${ARROW_CXX_FLAGS_RELEASE:-}" \ - -DARROW_CXX_FLAGS_RELWITHDEBINFO="${ARROW_CXX_FLAGS_RELWITHDEBINFO:-}" \ - -DARROW_C_FLAGS_DEBUG="${ARROW_C_FLAGS_DEBUG:-}" \ - -DARROW_C_FLAGS_RELEASE="${ARROW_C_FLAGS_RELEASE:-}" \ - -DARROW_C_FLAGS_RELWITHDEBINFO="${ARROW_C_FLAGS_RELWITHDEBINFO:-}" \ - -DARROW_DATASET=${ARROW_DATASET:-OFF} \ - -DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-AUTO} \ - -DARROW_ENABLE_THREADING=${ARROW_ENABLE_THREADING:-ON} \ - -DARROW_ENABLE_TIMING_TESTS=${ARROW_ENABLE_TIMING_TESTS:-ON} \ - -DARROW_EXTRA_ERROR_CONTEXT=${ARROW_EXTRA_ERROR_CONTEXT:-OFF} \ - -DARROW_FILESYSTEM=${ARROW_FILESYSTEM:-ON} \ - -DARROW_FLIGHT=${ARROW_FLIGHT:-OFF} \ - -DARROW_FLIGHT_SQL=${ARROW_FLIGHT_SQL:-OFF} \ - -DARROW_FUZZING=${ARROW_FUZZING:-OFF} \ - -DARROW_GANDIVA_PC_CXX_FLAGS=${ARROW_GANDIVA_PC_CXX_FLAGS:-} \ - -DARROW_GANDIVA=${ARROW_GANDIVA:-OFF} \ - -DARROW_GCS=${ARROW_GCS:-OFF} \ - -DARROW_HDFS=${ARROW_HDFS:-ON} \ - -DARROW_INSTALL_NAME_RPATH=${ARROW_INSTALL_NAME_RPATH:-ON} \ - -DARROW_JEMALLOC=${ARROW_JEMALLOC:-ON} \ - -DARROW_JSON=${ARROW_JSON:-ON} \ - -DARROW_LARGE_MEMORY_TESTS=${ARROW_LARGE_MEMORY_TESTS:-OFF} \ - -DARROW_MIMALLOC=${ARROW_MIMALLOC:-OFF} \ - -DARROW_NO_DEPRECATED_API=${ARROW_NO_DEPRECATED_API:-OFF} \ - -DARROW_ORC=${ARROW_ORC:-OFF} \ - -DARROW_PARQUET=${ARROW_PARQUET:-OFF} \ - -DARROW_RUNTIME_SIMD_LEVEL=${ARROW_RUNTIME_SIMD_LEVEL:-MAX} \ - -DARROW_S3=${ARROW_S3:-OFF} \ - -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL:-DEFAULT} \ - -DARROW_SKYHOOK=${ARROW_SKYHOOK:-OFF} \ - -DARROW_SUBSTRAIT=${ARROW_SUBSTRAIT:-OFF} \ - -DARROW_TEST_LINKAGE=${ARROW_TEST_LINKAGE:-shared} \ - -DARROW_TEST_MEMCHECK=${ARROW_TEST_MEMCHECK:-OFF} \ - -DARROW_USE_ASAN=${ARROW_USE_ASAN:-OFF} \ - -DARROW_USE_CCACHE=${ARROW_USE_CCACHE:-ON} \ - -DARROW_USE_GLOG=${ARROW_USE_GLOG:-OFF} \ - -DARROW_USE_LD_GOLD=${ARROW_USE_LD_GOLD:-OFF} \ - -DARROW_USE_PRECOMPILED_HEADERS=${ARROW_USE_PRECOMPILED_HEADERS:-OFF} \ - -DARROW_USE_STATIC_CRT=${ARROW_USE_STATIC_CRT:-OFF} \ - -DARROW_USE_TSAN=${ARROW_USE_TSAN:-OFF} \ - -DARROW_USE_UBSAN=${ARROW_USE_UBSAN:-OFF} \ - -DARROW_VERBOSE_THIRDPARTY_BUILD=${ARROW_VERBOSE_THIRDPARTY_BUILD:-OFF} \ - -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI:-OFF} \ - -DARROW_WITH_BZ2=${ARROW_WITH_BZ2:-OFF} \ - -DARROW_WITH_LZ4=${ARROW_WITH_LZ4:-OFF} \ - -DARROW_WITH_OPENTELEMETRY=${ARROW_WITH_OPENTELEMETRY:-OFF} \ - -DARROW_WITH_MUSL=${ARROW_WITH_MUSL:-OFF} \ - -DARROW_WITH_SNAPPY=${ARROW_WITH_SNAPPY:-OFF} \ - -DARROW_WITH_UCX=${ARROW_WITH_UCX:-OFF} \ - -DARROW_WITH_UTF8PROC=${ARROW_WITH_UTF8PROC:-ON} \ - -DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB:-OFF} \ - -DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD:-OFF} \ - -DAWSSDK_SOURCE=${AWSSDK_SOURCE:-} \ - -DAzure_SOURCE=${Azure_SOURCE:-} \ - -Dbenchmark_SOURCE=${benchmark_SOURCE:-} \ - -DBOOST_SOURCE=${BOOST_SOURCE:-} \ - -DBrotli_SOURCE=${Brotli_SOURCE:-} \ - -DBUILD_WARNING_LEVEL=${BUILD_WARNING_LEVEL:-CHECKIN} \ - -Dc-ares_SOURCE=${cares_SOURCE:-} \ - -DCMAKE_BUILD_TYPE=${ARROW_BUILD_TYPE:-debug} \ - -DCMAKE_VERBOSE_MAKEFILE=${CMAKE_VERBOSE_MAKEFILE:-OFF} \ - -DCMAKE_C_FLAGS="${CFLAGS:-}" \ - -DCMAKE_CXX_FLAGS="${CXXFLAGS:-}" \ - -DCMAKE_CXX_STANDARD="${CMAKE_CXX_STANDARD:-17}" \ - -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR:-lib} \ - -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX:-${ARROW_HOME}} \ - -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \ - -Dgflags_SOURCE=${gflags_SOURCE:-} \ - -Dgoogle_cloud_cpp_storage_SOURCE=${google_cloud_cpp_storage_SOURCE:-} \ - -DgRPC_SOURCE=${gRPC_SOURCE:-} \ - -DGTest_SOURCE=${GTest_SOURCE:-} \ - -Dlz4_SOURCE=${lz4_SOURCE:-} \ - -DORC_SOURCE=${ORC_SOURCE:-} \ - -DPARQUET_BUILD_EXAMPLES=${PARQUET_BUILD_EXAMPLES:-OFF} \ - -DPARQUET_BUILD_EXECUTABLES=${PARQUET_BUILD_EXECUTABLES:-OFF} \ - -DPARQUET_REQUIRE_ENCRYPTION=${PARQUET_REQUIRE_ENCRYPTION:-ON} \ - -DProtobuf_SOURCE=${Protobuf_SOURCE:-} \ - -DRapidJSON_SOURCE=${RapidJSON_SOURCE:-} \ - -Dre2_SOURCE=${re2_SOURCE:-} \ - -DSnappy_SOURCE=${Snappy_SOURCE:-} \ - -DThrift_SOURCE=${Thrift_SOURCE:-} \ - -Dutf8proc_SOURCE=${utf8proc_SOURCE:-} \ - -Dzstd_SOURCE=${zstd_SOURCE:-} \ - -Dxsimd_SOURCE=${xsimd_SOURCE:-} \ - -G "${CMAKE_GENERATOR:-Ninja}" \ - ${ARROW_CMAKE_ARGS} \ - ${source_dir} +if [ "${ARROW_EMSCRIPTEN:-OFF}" = "ON" ]; then + emcmake cmake \ + --preset=ninja-${ARROW_BUILD_TYPE:-debug}-emscripten \ + -DCMAKE_VERBOSE_MAKEFILE=${CMAKE_VERBOSE_MAKEFILE:-OFF} \ + -DCMAKE_C_FLAGS="${CFLAGS:-}" \ + -DCMAKE_CXX_FLAGS="${CXXFLAGS:-}" \ + -DCMAKE_CXX_STANDARD="${CMAKE_CXX_STANDARD:-17}" \ + -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR:-lib} \ + -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX:-${ARROW_HOME}} \ + -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \ + ${ARROW_CMAKE_ARGS} \ + ${source_dir} +else + cmake \ + -Dabsl_SOURCE=${absl_SOURCE:-} \ + -DARROW_ACERO=${ARROW_ACERO:-OFF} \ + -DARROW_AZURE=${ARROW_AZURE:-OFF} \ + -DARROW_BOOST_USE_SHARED=${ARROW_BOOST_USE_SHARED:-ON} \ + -DARROW_BUILD_BENCHMARKS_REFERENCE=${ARROW_BUILD_BENCHMARKS:-OFF} \ + -DARROW_BUILD_BENCHMARKS=${ARROW_BUILD_BENCHMARKS:-OFF} \ + -DARROW_BUILD_EXAMPLES=${ARROW_BUILD_EXAMPLES:-OFF} \ + -DARROW_BUILD_INTEGRATION=${ARROW_BUILD_INTEGRATION:-OFF} \ + -DARROW_BUILD_SHARED=${ARROW_BUILD_SHARED:-ON} \ + -DARROW_BUILD_STATIC=${ARROW_BUILD_STATIC:-ON} \ + -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS:-OFF} \ + -DARROW_BUILD_UTILITIES=${ARROW_BUILD_UTILITIES:-ON} \ + -DARROW_COMPUTE=${ARROW_COMPUTE:-ON} \ + -DARROW_CSV=${ARROW_CSV:-ON} \ + -DARROW_CUDA=${ARROW_CUDA:-OFF} \ + -DARROW_CXXFLAGS=${ARROW_CXXFLAGS:-} \ + -DARROW_CXX_FLAGS_DEBUG="${ARROW_CXX_FLAGS_DEBUG:-}" \ + -DARROW_CXX_FLAGS_RELEASE="${ARROW_CXX_FLAGS_RELEASE:-}" \ + -DARROW_CXX_FLAGS_RELWITHDEBINFO="${ARROW_CXX_FLAGS_RELWITHDEBINFO:-}" \ + -DARROW_C_FLAGS_DEBUG="${ARROW_C_FLAGS_DEBUG:-}" \ + -DARROW_C_FLAGS_RELEASE="${ARROW_C_FLAGS_RELEASE:-}" \ + -DARROW_C_FLAGS_RELWITHDEBINFO="${ARROW_C_FLAGS_RELWITHDEBINFO:-}" \ + -DARROW_DATASET=${ARROW_DATASET:-OFF} \ + -DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-AUTO} \ + -DARROW_ENABLE_THREADING=${ARROW_ENABLE_THREADING:-ON} \ + -DARROW_ENABLE_TIMING_TESTS=${ARROW_ENABLE_TIMING_TESTS:-ON} \ + -DARROW_EXTRA_ERROR_CONTEXT=${ARROW_EXTRA_ERROR_CONTEXT:-OFF} \ + -DARROW_FILESYSTEM=${ARROW_FILESYSTEM:-ON} \ + -DARROW_FLIGHT=${ARROW_FLIGHT:-OFF} \ + -DARROW_FLIGHT_SQL=${ARROW_FLIGHT_SQL:-OFF} \ + -DARROW_FUZZING=${ARROW_FUZZING:-OFF} \ + -DARROW_GANDIVA_PC_CXX_FLAGS=${ARROW_GANDIVA_PC_CXX_FLAGS:-} \ + -DARROW_GANDIVA=${ARROW_GANDIVA:-OFF} \ + -DARROW_GCS=${ARROW_GCS:-OFF} \ + -DARROW_HDFS=${ARROW_HDFS:-ON} \ + -DARROW_INSTALL_NAME_RPATH=${ARROW_INSTALL_NAME_RPATH:-ON} \ + -DARROW_JEMALLOC=${ARROW_JEMALLOC:-ON} \ + -DARROW_JSON=${ARROW_JSON:-ON} \ + -DARROW_LARGE_MEMORY_TESTS=${ARROW_LARGE_MEMORY_TESTS:-OFF} \ + -DARROW_MIMALLOC=${ARROW_MIMALLOC:-OFF} \ + -DARROW_NO_DEPRECATED_API=${ARROW_NO_DEPRECATED_API:-OFF} \ + -DARROW_ORC=${ARROW_ORC:-OFF} \ + -DARROW_PARQUET=${ARROW_PARQUET:-OFF} \ + -DARROW_RUNTIME_SIMD_LEVEL=${ARROW_RUNTIME_SIMD_LEVEL:-MAX} \ + -DARROW_S3=${ARROW_S3:-OFF} \ + -DARROW_SIMD_LEVEL=${ARROW_SIMD_LEVEL:-DEFAULT} \ + -DARROW_SKYHOOK=${ARROW_SKYHOOK:-OFF} \ + -DARROW_SUBSTRAIT=${ARROW_SUBSTRAIT:-OFF} \ + -DARROW_TEST_LINKAGE=${ARROW_TEST_LINKAGE:-shared} \ + -DARROW_TEST_MEMCHECK=${ARROW_TEST_MEMCHECK:-OFF} \ + -DARROW_USE_ASAN=${ARROW_USE_ASAN:-OFF} \ + -DARROW_USE_CCACHE=${ARROW_USE_CCACHE:-ON} \ + -DARROW_USE_GLOG=${ARROW_USE_GLOG:-OFF} \ + -DARROW_USE_LD_GOLD=${ARROW_USE_LD_GOLD:-OFF} \ + -DARROW_USE_PRECOMPILED_HEADERS=${ARROW_USE_PRECOMPILED_HEADERS:-OFF} \ + -DARROW_USE_STATIC_CRT=${ARROW_USE_STATIC_CRT:-OFF} \ + -DARROW_USE_TSAN=${ARROW_USE_TSAN:-OFF} \ + -DARROW_USE_UBSAN=${ARROW_USE_UBSAN:-OFF} \ + -DARROW_VERBOSE_THIRDPARTY_BUILD=${ARROW_VERBOSE_THIRDPARTY_BUILD:-OFF} \ + -DARROW_WITH_BROTLI=${ARROW_WITH_BROTLI:-OFF} \ + -DARROW_WITH_BZ2=${ARROW_WITH_BZ2:-OFF} \ + -DARROW_WITH_LZ4=${ARROW_WITH_LZ4:-OFF} \ + -DARROW_WITH_OPENTELEMETRY=${ARROW_WITH_OPENTELEMETRY:-OFF} \ + -DARROW_WITH_MUSL=${ARROW_WITH_MUSL:-OFF} \ + -DARROW_WITH_SNAPPY=${ARROW_WITH_SNAPPY:-OFF} \ + -DARROW_WITH_UCX=${ARROW_WITH_UCX:-OFF} \ + -DARROW_WITH_UTF8PROC=${ARROW_WITH_UTF8PROC:-ON} \ + -DARROW_WITH_ZLIB=${ARROW_WITH_ZLIB:-OFF} \ + -DARROW_WITH_ZSTD=${ARROW_WITH_ZSTD:-OFF} \ + -DAWSSDK_SOURCE=${AWSSDK_SOURCE:-} \ + -DAzure_SOURCE=${Azure_SOURCE:-} \ + -Dbenchmark_SOURCE=${benchmark_SOURCE:-} \ + -DBOOST_SOURCE=${BOOST_SOURCE:-} \ + -DBrotli_SOURCE=${Brotli_SOURCE:-} \ + -DBUILD_WARNING_LEVEL=${BUILD_WARNING_LEVEL:-CHECKIN} \ + -Dc-ares_SOURCE=${cares_SOURCE:-} \ + -DCMAKE_BUILD_TYPE=${ARROW_BUILD_TYPE:-debug} \ + -DCMAKE_VERBOSE_MAKEFILE=${CMAKE_VERBOSE_MAKEFILE:-OFF} \ + -DCMAKE_C_FLAGS="${CFLAGS:-}" \ + -DCMAKE_CXX_FLAGS="${CXXFLAGS:-}" \ + -DCMAKE_CXX_STANDARD="${CMAKE_CXX_STANDARD:-17}" \ + -DCMAKE_INSTALL_LIBDIR=${CMAKE_INSTALL_LIBDIR:-lib} \ + -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX:-${ARROW_HOME}} \ + -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \ + -Dgflags_SOURCE=${gflags_SOURCE:-} \ + -Dgoogle_cloud_cpp_storage_SOURCE=${google_cloud_cpp_storage_SOURCE:-} \ + -DgRPC_SOURCE=${gRPC_SOURCE:-} \ + -DGTest_SOURCE=${GTest_SOURCE:-} \ + -Dlz4_SOURCE=${lz4_SOURCE:-} \ + -DORC_SOURCE=${ORC_SOURCE:-} \ + -DPARQUET_BUILD_EXAMPLES=${PARQUET_BUILD_EXAMPLES:-OFF} \ + -DPARQUET_BUILD_EXECUTABLES=${PARQUET_BUILD_EXECUTABLES:-OFF} \ + -DPARQUET_REQUIRE_ENCRYPTION=${PARQUET_REQUIRE_ENCRYPTION:-ON} \ + -DProtobuf_SOURCE=${Protobuf_SOURCE:-} \ + -DRapidJSON_SOURCE=${RapidJSON_SOURCE:-} \ + -Dre2_SOURCE=${re2_SOURCE:-} \ + -DSnappy_SOURCE=${Snappy_SOURCE:-} \ + -DThrift_SOURCE=${Thrift_SOURCE:-} \ + -Dutf8proc_SOURCE=${utf8proc_SOURCE:-} \ + -Dzstd_SOURCE=${zstd_SOURCE:-} \ + -Dxsimd_SOURCE=${xsimd_SOURCE:-} \ + -G "${CMAKE_GENERATOR:-Ninja}" \ + ${ARROW_CMAKE_ARGS} \ + ${source_dir} +fi export CMAKE_BUILD_PARALLEL_LEVEL=${CMAKE_BUILD_PARALLEL_LEVEL:-$[${n_jobs} + 1]} time cmake --build . --target install diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 859ff8ddb5b..b8b0d9ed2da 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -1255,6 +1255,14 @@ tasks: flags: "-e ARROW_CSV=ON -e ARROW_PARQUET=ON" image: ubuntu-cpp-minimal + test-ubuntu-22.04-cpp-emscripten: + ci: github + template: docker-tests/github.linux.yml + params: + env: + UBUNTU: 22.04 + image: ubuntu-cpp-emscripten + {% for python_version in ["3.8", "3.9", "3.10", "3.11"] %} test-conda-python-{{ python_version }}: ci: github diff --git a/docker-compose.yml b/docker-compose.yml index 62e5aee0a84..ea3b192818b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -158,6 +158,7 @@ x-hierarchy: - ubuntu-csharp - ubuntu-cpp-sanitizer - ubuntu-cpp-thread-sanitizer + - ubuntu-cpp-emscripten - ubuntu-r-sanitizer - ubuntu-r-valgrind - ubuntu-swift @@ -649,6 +650,30 @@ services: ARROW_USE_TSAN: "ON" command: *cpp-command + ubuntu-cpp-emscripten: + # Usage: + # docker-compose build ubuntu-cpp-emscripten + # docker-compose run --rm ubuntu-cpp-emscripten + # Parameters: + # ARCH: amd64, arm64v8, ... + # UBUNTU: 22.04 + image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp + build: + context: . + dockerfile: ci/docker/ubuntu-${UBUNTU}-cpp.dockerfile + cache_from: + - ${REPO}:${ARCH}-ubuntu-${UBUNTU}-cpp + args: + arch: ${ARCH} + clang_tools: ${CLANG_TOOLS} + llvm: ${LLVM} + shm_size: *shm-size + volumes: *ubuntu-volumes + environment: + <<: [*common, *ccache, *sccache, *cpp] + ARROW_EMSCRIPTEN: "ON" + command: *cpp-command + fedora-cpp: # Usage: # docker-compose build fedora-cpp From 0b5cc3bd4f2caf6951e345a2dc630ee9df93aa11 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Wed, 11 Oct 2023 13:08:38 +0100 Subject: [PATCH 33/95] emscripten archery fixes --- ci/docker/ubuntu-22.04-cpp.dockerfile | 21 +++++++++++++++------ ci/scripts/cpp_build.sh | 6 ++++++ docker-compose.yml | 1 + 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/ci/docker/ubuntu-22.04-cpp.dockerfile b/ci/docker/ubuntu-22.04-cpp.dockerfile index bf8f370bec5..42d4021d99b 100644 --- a/ci/docker/ubuntu-22.04-cpp.dockerfile +++ b/ci/docker/ubuntu-22.04-cpp.dockerfile @@ -65,11 +65,11 @@ RUN latest_system_llvm=14 && \ RUN apt-get update -y -q && \ apt-get install -y -q --no-install-recommends \ autoconf \ + bzip2 \ ca-certificates \ ccache \ cmake \ - curl \ - emscripten \ + curl \ gdb \ git \ libbenchmark-dev \ @@ -114,10 +114,20 @@ RUN apt-get update -y -q && \ rapidjson-dev \ rsync \ tzdata \ - wget && \ + wget \ + xz-utils && \ apt-get clean && \ rm -rf /var/lib/apt/lists* +# install emscripten using EMSDK +ARG emscripten_version="3.1.45" +RUN cd ~ && git clone https://github.com/emscripten-core/emsdk.git && \ + cd emsdk && \ + ./emsdk install ${emscripten_version} && \ + ./emsdk activate ${emscripten_version} && \ + echo "Installed emsdk to:" ~/emsdk + + ARG gcc_version="" RUN if [ "${gcc_version}" = "" ]; then \ apt-get update -y -q && \ @@ -150,9 +160,8 @@ RUN if [ "${gcc_version}" = "" ]; then \ update-alternatives --set c++ /usr/bin/g++; \ fi -RUN EM_CACHE=/usr/share/emscripten/cache \ - EM_FROZEN_CACHE= \ - embuilder --pic build zlib +# make sure zlib is cached in the EMSDK folder +RUN source ~/emsdk/emsdk_env.sh && embuilder --pic build zlib COPY ci/scripts/install_minio.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_minio.sh latest /usr/local diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index 8e13b059030..430b883a12d 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -82,6 +82,12 @@ mkdir -p ${build_dir} pushd ${build_dir} if [ "${ARROW_EMSCRIPTEN:-OFF}" = "ON" ]; then + export + if [ "${UBUNTU}" = "20.04" ]; then + echo "arrow emscripten build is not supported on Ubuntu 20.04, run with UBUNTU=22.04" + exit -1 + fi + source ~/emsdk/emsdk_env.sh emcmake cmake \ --preset=ninja-${ARROW_BUILD_TYPE:-debug}-emscripten \ -DCMAKE_VERBOSE_MAKEFILE=${CMAKE_VERBOSE_MAKEFILE:-OFF} \ diff --git a/docker-compose.yml b/docker-compose.yml index ea3b192818b..64089ece7ff 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -672,6 +672,7 @@ services: environment: <<: [*common, *ccache, *sccache, *cpp] ARROW_EMSCRIPTEN: "ON" + UBUNTU: command: *cpp-command fedora-cpp: From b5fedaaa0fe74ab3390934de275be653f3d8bb59 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Wed, 11 Oct 2023 14:10:53 +0100 Subject: [PATCH 34/95] force non-parallel emscripten debug build --- ci/scripts/cpp_build.sh | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index 430b883a12d..d93ea6690df 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -82,7 +82,15 @@ mkdir -p ${build_dir} pushd ${build_dir} if [ "${ARROW_EMSCRIPTEN:-OFF}" = "ON" ]; then - export + if [ "${ARROW_BUILD_TYPE:-debug}" = "debug" ]; then + echo "Forcing non-parallel build for emscripten debug" + export CMAKE_BUILD_PARALLEL_LEVEL=1 + # emscripten debug linking takes *tons* of memory + # https://github.com/WebAssembly/binaryen/issues/4261 + # so stop parallel builds for debug build + # or else crossbow CI runs out of memory + fi + if [ "${UBUNTU}" = "20.04" ]; then echo "arrow emscripten build is not supported on Ubuntu 20.04, run with UBUNTU=22.04" exit -1 From 76d464bc95b174f15e2bddb8bb36b27b01bc6736 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Thu, 12 Oct 2023 15:57:13 +0100 Subject: [PATCH 35/95] split dwarf debug info on emscripten --- cpp/cmake_modules/SetupCxxFlags.cmake | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index 2c3ea2611bb..45e0733e310 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -728,4 +728,10 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS ${ARROW_EMSCRIPTEN_LINKER_FLAGS}) set(CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS ${ARROW_EMSCRIPTEN_LINKER_FLAGS}) set(CMAKE_SHARED_LINKER_FLAGS ${ARROW_EMSCRIPTEN_LINKER_FLAGS}) + + # split dwarf out of object files for faster debug builds + if(${UPPERCASE_BUILD_TYPE} STREQUAL "DEBUG") + string(APPEND CMAKE_CXX_FLAGS " -gsplit-dwarf") + endif() + endif() From b2dd85bea96e29765980196d284123a54171e551 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Fri, 13 Oct 2023 09:50:18 +0100 Subject: [PATCH 36/95] use limited debug info on emscripten builds --- cpp/cmake_modules/SetupCxxFlags.cmake | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index 45e0733e310..0625fd1e562 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -729,9 +729,11 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") set(CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS ${ARROW_EMSCRIPTEN_LINKER_FLAGS}) set(CMAKE_SHARED_LINKER_FLAGS ${ARROW_EMSCRIPTEN_LINKER_FLAGS}) - # split dwarf out of object files for faster debug builds + # limit debug info because building with DWARF debug info requires + # absolutely tons of memory + # https://github.com/WebAssembly/binaryen/issues/4261 if(${UPPERCASE_BUILD_TYPE} STREQUAL "DEBUG") - string(APPEND CMAKE_CXX_FLAGS " -gsplit-dwarf") + string(APPEND CMAKE_CXX_FLAGS " -g2") endif() endif() From 8d3b25547340ef0a1be9dfb04d3f915f9188f008 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Fri, 13 Oct 2023 09:51:35 +0100 Subject: [PATCH 37/95] disable test that won't work on emscripten --- cpp/src/arrow/array/array_dict_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/array/array_dict_test.cc b/cpp/src/arrow/array/array_dict_test.cc index 3c1a1aaa86d..797e681a087 100644 --- a/cpp/src/arrow/array/array_dict_test.cc +++ b/cpp/src/arrow/array/array_dict_test.cc @@ -1129,7 +1129,7 @@ TEST(TestDictionary, Validate) { arr = std::make_shared(dict_type, indices, MakeArray(invalid_data)); ASSERT_RAISES(Invalid, arr->ValidateFull()); -#if !defined(__APPLE__) +#if !defined(__APPLE__) and !defined(EMSCRIPTEN) // GH-35712: ASSERT_DEATH would make testing slow on MacOS. ASSERT_DEATH( { From 84867d2ffc43972398b37f1d3f4ebe249a078ebf Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Fri, 13 Oct 2023 11:00:49 +0100 Subject: [PATCH 38/95] set debug flag to g2 --- cpp/cmake_modules/SetupCxxFlags.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index 95c584a540e..54fe482bc7e 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -740,7 +740,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") # absolutely tons of memory # https://github.com/WebAssembly/binaryen/issues/4261 if(${UPPERCASE_BUILD_TYPE} STREQUAL "DEBUG") - string(APPEND CMAKE_CXX_FLAGS " -g2") + string(APPEND CMAKE_CXX_FLAGS_DEBUG " -g2") endif() endif() From 9c7ad2ed3571bfa85da54279555cfb9c47cd2e3c Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Fri, 13 Oct 2023 11:13:42 +0100 Subject: [PATCH 39/95] lint --- cpp/cmake_modules/SetupCxxFlags.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index 54fe482bc7e..c42440b6f16 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -736,7 +736,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") set(CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS ${ARROW_EMSCRIPTEN_LINKER_FLAGS}) set(CMAKE_SHARED_LINKER_FLAGS ${ARROW_EMSCRIPTEN_LINKER_FLAGS}) - # limit debug info because building with DWARF debug info requires + # limit debug info because building with DWARF debug info requires # absolutely tons of memory # https://github.com/WebAssembly/binaryen/issues/4261 if(${UPPERCASE_BUILD_TYPE} STREQUAL "DEBUG") From 275c08a8ea832efa7f83314c9f3b5042c5564a46 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Fri, 13 Oct 2023 11:32:07 +0100 Subject: [PATCH 40/95] typo --- cpp/src/arrow/array/array_dict_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/array/array_dict_test.cc b/cpp/src/arrow/array/array_dict_test.cc index 8e9d9d9c4e3..18c0d1d05d6 100644 --- a/cpp/src/arrow/array/array_dict_test.cc +++ b/cpp/src/arrow/array/array_dict_test.cc @@ -1129,7 +1129,7 @@ TEST(TestDictionary, Validate) { arr = std::make_shared(dict_type, indices, MakeArray(invalid_data)); ASSERT_RAISES(Invalid, arr->ValidateFull()); -#if !defined(__APPLE__) and !defined(EMSCRIPTEN) +#if !defined(__APPLE__) && !defined(EMSCRIPTEN) // GH-35712: ASSERT_DEATH would make testing slow on MacOS. ASSERT_DEATH( { From 22ace85cf8a139cec9209d940d56d0baedd08c2c Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Mon, 16 Oct 2023 12:54:09 +0100 Subject: [PATCH 41/95] test fixes --- cpp/CMakePresets.json | 2 +- cpp/build-support/emscripten-test-init.js | 5 + cpp/cmake_modules/BuildUtils.cmake | 7 ++ cpp/cmake_modules/SetupCxxFlags.cmake | 14 ++- .../compute/kernels/scalar_temporal_test.cc | 7 ++ cpp/src/arrow/io/file_test.cc | 8 ++ cpp/src/arrow/util/async_generator_test.cc | 8 ++ cpp/src/arrow/util/atfork_test.cc | 20 ++++ cpp/src/arrow/util/cache_test.cc | 4 + cpp/src/arrow/util/cancel_test.cc | 24 ++++ cpp/src/arrow/util/counting_semaphore_test.cc | 12 ++ cpp/src/arrow/util/future_test.cc | 16 +++ cpp/src/arrow/util/io_util.cc | 110 +++++++++--------- cpp/src/arrow/util/io_util_test.cc | 30 +++++ cpp/src/arrow/util/mutex.cc | 2 + 15 files changed, 212 insertions(+), 57 deletions(-) create mode 100644 cpp/build-support/emscripten-test-init.js diff --git a/cpp/CMakePresets.json b/cpp/CMakePresets.json index 6e68ddd6023..fe2f405e82a 100644 --- a/cpp/CMakePresets.json +++ b/cpp/CMakePresets.json @@ -53,7 +53,7 @@ "ARROW_ACERO": "ON", "ARROW_BUILD_SHARED": "OFF", "ARROW_BUILD_STATIC": "ON", - "ARROW_BUILD_TESTS": "OFF", + "ARROW_BUILD_TESTS": "ON", "ARROW_CUDA": "OFF", "ARROW_DEPENDENCY_SOURCE": "BUNDLED", "ARROW_DEPENDENCY_USE_SHARED": "OFF", diff --git a/cpp/build-support/emscripten-test-init.js b/cpp/build-support/emscripten-test-init.js new file mode 100644 index 00000000000..e60d8dcdc50 --- /dev/null +++ b/cpp/build-support/emscripten-test-init.js @@ -0,0 +1,5 @@ +var Module = { +}; + +// make sure tests can access the current parquet test data files +Module.preRun = () => {ENV.PARQUET_TEST_DATA = process.env.PARQUET_TEST_DATA}; \ No newline at end of file diff --git a/cpp/cmake_modules/BuildUtils.cmake b/cpp/cmake_modules/BuildUtils.cmake index 083ac2fe9a8..86dc951ffe7 100644 --- a/cpp/cmake_modules/BuildUtils.cmake +++ b/cpp/cmake_modules/BuildUtils.cmake @@ -742,6 +742,13 @@ function(ADD_TEST_CASE REL_TEST_NAME) --error-exitcode=1 ${TEST_PATH} ${ARG_TEST_ARGUMENTS}") elseif(WIN32) add_test(${TEST_NAME} ${TEST_PATH} ${ARG_TEST_ARGUMENTS}) + elseif(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + add_test(${TEST_NAME} + ${BUILD_SUPPORT_DIR}/run-test.sh + ${CMAKE_BINARY_DIR} + test + ${CMAKE_CROSSCOMPILING_EMULATOR} ${TEST_PATH} + ${ARG_TEST_ARGUMENTS}) else() add_test(${TEST_NAME} ${BUILD_SUPPORT_DIR}/run-test.sh diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index c42440b6f16..43f905239cc 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -731,10 +731,16 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") # 3) Tell it to build with support for C++ exceptions # 4) Skip linker flags error which happens with -soname parameter set(ARROW_EMSCRIPTEN_LINKER_FLAGS - "-sSIDE_MODULE=1 -sUSE_ZLIB=1 -sWASM_BIGINT=1 -fexceptions -Wno-error=linkflags") - set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS ${ARROW_EMSCRIPTEN_LINKER_FLAGS}) - set(CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS ${ARROW_EMSCRIPTEN_LINKER_FLAGS}) - set(CMAKE_SHARED_LINKER_FLAGS ${ARROW_EMSCRIPTEN_LINKER_FLAGS}) + "-sUSE_ZLIB=1 -sWASM_BIGINT=1 -fexceptions -Wno-error=linkflags") + set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-sSIDE_MODULE=1 ${ARROW_EMSCRIPTEN_LINKER_FLAGS}") + set(CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS "-sSIDE_MODULE=1 ${ARROW_EMSCRIPTEN_LINKER_FLAGS}") + set(CMAKE_SHARED_LINKER_FLAGS "-sSIDE_MODULE=1 ${ARROW_EMSCRIPTEN_LINKER_FLAGS}") + if(ARROW_TESTING) + # flags for building test executables for use in node + set(CMAKE_EXE_LINKER_FLAGS "${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sALLOW_MEMORY_GROWTH -lnodefs.js -lnoderawfs.js --pre-js ${BUILD_SUPPORT_DIR}/emscripten-test-init.js") + else() + set(CMAKE_EXE_LINKER_FLAGS "${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sALLOW_MEMORY_GROWTH") + endif() # limit debug info because building with DWARF debug info requires # absolutely tons of memory diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index 4c7975add03..f6f023f15ba 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -2143,7 +2143,10 @@ TEST_F(ScalarTemporalTest, StrftimeCLocale) { TEST_F(ScalarTemporalTest, StrftimeOtherLocale) { #ifdef _WIN32 GTEST_SKIP() << "There is a known bug in strftime for locales on Windows (ARROW-15922)"; +#elif defined(EMSCRIPTEN) + GTEST_SKIP() << "Emscripten doesn't build with multiple locales as default"; #else + if (!LocaleExists("fr_FR.UTF-8")) { GTEST_SKIP() << "locale 'fr_FR.UTF-8' doesn't exist on this system"; } @@ -2159,6 +2162,9 @@ TEST_F(ScalarTemporalTest, StrftimeOtherLocale) { } TEST_F(ScalarTemporalTest, StrftimeInvalidLocale) { +#ifdef EMSCRIPTEN + GTEST_SKIP() << "Emscripten doesn't build with multiple locales as default"; +#else auto options = StrftimeOptions("%d %B %Y %H:%M:%S", "non-existent"); const char* seconds = R"(["1970-01-01T00:00:59", null])"; auto arr = ArrayFromJSON(timestamp(TimeUnit::SECOND, "UTC"), seconds); @@ -2166,6 +2172,7 @@ TEST_F(ScalarTemporalTest, StrftimeInvalidLocale) { EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, testing::HasSubstr("Cannot find locale 'non-existent'"), Strftime(arr, options)); +#endif } TEST_F(ScalarTemporalTest, TestTemporalDifferenceZoned) { diff --git a/cpp/src/arrow/io/file_test.cc b/cpp/src/arrow/io/file_test.cc index e7e7ba949c9..a3e39b91430 100644 --- a/cpp/src/arrow/io/file_test.cc +++ b/cpp/src/arrow/io/file_test.cc @@ -486,6 +486,10 @@ TEST_F(TestReadableFile, CustomMemoryPool) { } TEST_F(TestReadableFile, ThreadSafety) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + std::string data = "foobar"; { std::ofstream stream; @@ -1048,6 +1052,10 @@ TEST_F(TestMemoryMappedFile, CastableToFileInterface) { } TEST_F(TestMemoryMappedFile, ThreadSafety) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + std::string data = "foobar"; std::string path = TempFile("ipc-multithreading-test"); CreateFile(path, static_cast(data.size())); diff --git a/cpp/src/arrow/util/async_generator_test.cc b/cpp/src/arrow/util/async_generator_test.cc index 7fb99f167c6..158cf72a834 100644 --- a/cpp/src/arrow/util/async_generator_test.cc +++ b/cpp/src/arrow/util/async_generator_test.cc @@ -399,6 +399,10 @@ TEST(TestAsyncUtil, MapParallelStress) { } TEST(TestAsyncUtil, MapQueuingFailStress) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + constexpr int NTASKS = 10; constexpr int NITEMS = 10; for (bool slow : {true, false}) { @@ -1872,6 +1876,10 @@ TEST(PushGenerator, DanglingProducer) { } TEST(PushGenerator, Stress) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + const int NTHREADS = 20; const int NVALUES = 2000; const int NFUTURES = NVALUES + 100; diff --git a/cpp/src/arrow/util/atfork_test.cc b/cpp/src/arrow/util/atfork_test.cc index 004e28e1951..9000ab15a73 100644 --- a/cpp/src/arrow/util/atfork_test.cc +++ b/cpp/src/arrow/util/atfork_test.cc @@ -109,6 +109,10 @@ class TestAtFork : public ::testing::Test { #ifndef _WIN32 TEST_F(TestAtFork, EmptyHandlers) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + auto handlers = std::make_shared(); RegisterAtFork(handlers); @@ -130,6 +134,10 @@ TEST_F(TestAtFork, EmptyHandlers) { } TEST_F(TestAtFork, SingleThread) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + auto handlers1 = std::make_shared(PushBefore(1), PushParentAfter(11), PushChildAfter(21)); auto handlers2 = std::make_shared(PushBefore(2), PushParentAfter(12), @@ -188,6 +196,10 @@ TEST_F(TestAtFork, SingleThread) { // https://github.com/google/sanitizers/issues/950. TEST_F(TestAtFork, MultipleThreads) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + const int kNumThreads = 5; const int kNumIterations = 40; const int kParentAfterAddend = 10000; @@ -245,6 +257,10 @@ TEST_F(TestAtFork, NestedChild) { #ifdef __APPLE__ GTEST_SKIP() << "Nested fork is not supported on macOS"; #endif +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + auto handlers1 = std::make_shared(PushBefore(1), PushParentAfter(11), PushChildAfter(21)); @@ -286,6 +302,10 @@ TEST_F(TestAtFork, NestedChild) { #ifdef _WIN32 TEST_F(TestAtFork, NoOp) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + auto handlers = std::make_shared(PushBefore(1), PushParentAfter(11), PushChildAfter(21)); diff --git a/cpp/src/arrow/util/cache_test.cc b/cpp/src/arrow/util/cache_test.cc index 6b71baa369b..7c61b3980bf 100644 --- a/cpp/src/arrow/util/cache_test.cc +++ b/cpp/src/arrow/util/cache_test.cc @@ -255,6 +255,10 @@ TYPED_TEST(TestMemoizeLru, Basics) { this->TestBasics(); } class TestMemoizeLruThreadSafe : public TestMemoizeLru {}; TEST_F(TestMemoizeLruThreadSafe, Threads) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + using V = IntValue; Callable c; diff --git a/cpp/src/arrow/util/cancel_test.cc b/cpp/src/arrow/util/cancel_test.cc index 45f6cde4f55..713418f15a0 100644 --- a/cpp/src/arrow/util/cancel_test.cc +++ b/cpp/src/arrow/util/cancel_test.cc @@ -232,6 +232,10 @@ class SignalCancelTest : public CancelTest { }; TEST_F(SignalCancelTest, Register) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + RegisterHandler(); TriggerSignal(); @@ -239,6 +243,10 @@ TEST_F(SignalCancelTest, Register) { } TEST_F(SignalCancelTest, RegisterUnregister) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + // The signal stop source was set up but no handler was registered, // so the token shouldn't be signalled. TriggerSignal(); @@ -261,6 +269,10 @@ TEST_F(SignalCancelTest, RegisterUnregister) { #if !(defined(_WIN32) || defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) || \ defined(THREAD_SANITIZER)) TEST_F(SignalCancelTest, ForkSafetyUnregisteredHandlers) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + RunInChild([&]() { // Child TriggerSignal(); @@ -284,6 +296,10 @@ TEST_F(SignalCancelTest, ForkSafetyUnregisteredHandlers) { } TEST_F(SignalCancelTest, ForkSafetyRegisteredHandlers) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + RegisterHandler(); RunInChild([&]() { @@ -307,6 +323,10 @@ TEST_F(SignalCancelTest, ForkSafetyRegisteredHandlers) { #endif TEST_F(CancelTest, ThreadedPollSuccess) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + constexpr int kNumThreads = 10; std::vector results(kNumThreads); @@ -339,6 +359,10 @@ TEST_F(CancelTest, ThreadedPollSuccess) { } TEST_F(CancelTest, ThreadedPollCancel) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + constexpr int kNumThreads = 10; std::vector results(kNumThreads); diff --git a/cpp/src/arrow/util/counting_semaphore_test.cc b/cpp/src/arrow/util/counting_semaphore_test.cc index a5fa9f6bde8..e5e129636c1 100644 --- a/cpp/src/arrow/util/counting_semaphore_test.cc +++ b/cpp/src/arrow/util/counting_semaphore_test.cc @@ -28,6 +28,10 @@ namespace arrow { namespace util { TEST(CountingSemaphore, Basic) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + CountingSemaphore semaphore; std::atomic acquired{false}; std::atomic started{false}; @@ -50,6 +54,10 @@ TEST(CountingSemaphore, Basic) { } TEST(CountingSemaphore, CloseAborts) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + CountingSemaphore semaphore; std::atomic cleanup{false}; std::thread acquirer([&] { @@ -64,6 +72,10 @@ TEST(CountingSemaphore, CloseAborts) { } TEST(CountingSemaphore, Stress) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + constexpr uint32_t NTHREADS = 10; CountingSemaphore semaphore; std::vector max_allowed_cases = {1, 3}; diff --git a/cpp/src/arrow/util/future_test.cc b/cpp/src/arrow/util/future_test.cc index 87891e48efa..2ed2b69aed5 100644 --- a/cpp/src/arrow/util/future_test.cc +++ b/cpp/src/arrow/util/future_test.cc @@ -415,6 +415,10 @@ TEST(FutureRefTest, HeadRemoved) { } TEST(FutureStressTest, Callback) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + #ifdef ARROW_VALGRIND const int NITERS = 2; #else @@ -471,6 +475,10 @@ TEST(FutureStressTest, Callback) { } TEST(FutureStressTest, TryAddCallback) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + for (unsigned int n = 0; n < 1; n++) { auto fut = Future<>::Make(); std::atomic callbacks_added(0); @@ -527,6 +535,10 @@ TEST(FutureStressTest, TryAddCallback) { } TEST(FutureStressTest, DeleteAfterWait) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + constexpr int kNumTasks = 100; for (int i = 0; i < kNumTasks; i++) { { @@ -1543,6 +1555,10 @@ TEST(FnOnceTest, MoveOnlyDataType) { } TEST(FutureTest, MatcherExamples) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + EXPECT_THAT(Future::MakeFinished(Status::Invalid("arbitrary error")), Finishes(Raises(StatusCode::Invalid))); diff --git a/cpp/src/arrow/util/io_util.cc b/cpp/src/arrow/util/io_util.cc index ac92618ff66..1f54809db47 100644 --- a/cpp/src/arrow/util/io_util.cc +++ b/cpp/src/arrow/util/io_util.cc @@ -1476,61 +1476,65 @@ Status MemoryMapRemap(void* addr, size_t old_size, size_t new_size, int fildes, } Status MemoryAdviseWillNeed(const std::vector& regions) { - const auto page_size = static_cast(GetPageSize()); - DCHECK_GT(page_size, 0); - const size_t page_mask = ~(page_size - 1); - DCHECK_EQ(page_mask & page_size, page_size); - - auto align_region = [=](const MemoryRegion& region) -> MemoryRegion { - const auto addr = reinterpret_cast(region.addr); - const auto aligned_addr = addr & page_mask; - DCHECK_LT(addr - aligned_addr, page_size); - return {reinterpret_cast(aligned_addr), - region.size + static_cast(addr - aligned_addr)}; - }; - -#ifdef _WIN32 - // PrefetchVirtualMemory() is available on Windows 8 or later - struct PrefetchEntry { // Like WIN32_MEMORY_RANGE_ENTRY - void* VirtualAddress; - size_t NumberOfBytes; - - PrefetchEntry(const MemoryRegion& region) // NOLINT runtime/explicit - : VirtualAddress(region.addr), NumberOfBytes(region.size) {} - }; - using PrefetchVirtualMemoryFunc = BOOL (*)(HANDLE, ULONG_PTR, PrefetchEntry*, ULONG); - static const auto prefetch_virtual_memory = reinterpret_cast( - GetProcAddress(GetModuleHandleW(L"kernel32.dll"), "PrefetchVirtualMemory")); - if (prefetch_virtual_memory != nullptr) { - std::vector entries; - entries.reserve(regions.size()); - for (const auto& region : regions) { - if (region.size != 0) { - entries.emplace_back(align_region(region)); +#ifndef EMSCRIPTEN + const auto page_size = static_cast(GetPageSize()); + DCHECK_GT(page_size, 0); + const size_t page_mask = ~(page_size - 1); + DCHECK_EQ(page_mask & page_size, page_size); + + auto align_region = [=](const MemoryRegion& region) -> MemoryRegion { + const auto addr = reinterpret_cast(region.addr); + const auto aligned_addr = addr & page_mask; + DCHECK_LT(addr - aligned_addr, page_size); + return {reinterpret_cast(aligned_addr), + region.size + static_cast(addr - aligned_addr)}; + }; + + #ifdef _WIN32 + // PrefetchVirtualMemory() is available on Windows 8 or later + struct PrefetchEntry { // Like WIN32_MEMORY_RANGE_ENTRY + void* VirtualAddress; + size_t NumberOfBytes; + + PrefetchEntry(const MemoryRegion& region) // NOLINT runtime/explicit + : VirtualAddress(region.addr), NumberOfBytes(region.size) {} + }; + using PrefetchVirtualMemoryFunc = BOOL (*)(HANDLE, ULONG_PTR, PrefetchEntry*, ULONG); + static const auto prefetch_virtual_memory = reinterpret_cast( + GetProcAddress(GetModuleHandleW(L"kernel32.dll"), "PrefetchVirtualMemory")); + if (prefetch_virtual_memory != nullptr) { + std::vector entries; + entries.reserve(regions.size()); + for (const auto& region : regions) { + if (region.size != 0) { + entries.emplace_back(align_region(region)); + } + } + if (!entries.empty() && + !prefetch_virtual_memory(GetCurrentProcess(), + static_cast(entries.size()), entries.data(), + 0)) { + return IOErrorFromWinError(GetLastError(), "PrefetchVirtualMemory failed"); } } - if (!entries.empty() && - !prefetch_virtual_memory(GetCurrentProcess(), - static_cast(entries.size()), entries.data(), - 0)) { - return IOErrorFromWinError(GetLastError(), "PrefetchVirtualMemory failed"); - } - } - return Status::OK(); -#elif defined(POSIX_MADV_WILLNEED) - for (const auto& region : regions) { - if (region.size != 0) { - const auto aligned = align_region(region); - int err = posix_madvise(aligned.addr, aligned.size, POSIX_MADV_WILLNEED); - // EBADF can be returned on Linux in the following cases: - // - the kernel version is older than 3.9 - // - the kernel was compiled with CONFIG_SWAP disabled (ARROW-9577) - if (err != 0 && err != EBADF) { - return IOErrorFromErrno(err, "posix_madvise failed"); + return Status::OK(); + #elif defined(POSIX_MADV_WILLNEED) + for (const auto& region : regions) { + if (region.size != 0) { + const auto aligned = align_region(region); + int err = posix_madvise(aligned.addr, aligned.size, POSIX_MADV_WILLNEED); + // EBADF can be returned on Linux in the following cases: + // - the kernel version is older than 3.9 + // - the kernel was compiled with CONFIG_SWAP disabled (ARROW-9577) + if (err != 0 && err != EBADF) { + return IOErrorFromErrno(err, "posix_madvise failed"); + } } } - } - return Status::OK(); + return Status::OK(); + #else + return Status::OK(); + #endif #else return Status::OK(); #endif @@ -2058,7 +2062,9 @@ Status SendSignal(int signum) { } Status SendSignalToThread(int signum, uint64_t thread_id) { -#ifdef _WIN32 +#if !ARROW_ENABLE_THREADING + return Status::NotImplemented("Can't send signal with no threads"); +#elif _WIN32 return Status::NotImplemented("Cannot send signal to specific thread on Windows"); #else // Have to use a C-style cast because pthread_t can be a pointer *or* integer type diff --git a/cpp/src/arrow/util/io_util_test.cc b/cpp/src/arrow/util/io_util_test.cc index 2599c92d821..ca8380942a6 100644 --- a/cpp/src/arrow/util/io_util_test.cc +++ b/cpp/src/arrow/util/io_util_test.cc @@ -368,6 +368,10 @@ TestSelfPipe* TestSelfPipe::instance_; TEST_F(TestSelfPipe, MakeAndShutdown) {} TEST_F(TestSelfPipe, WaitAndSend) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + StartReading(); SleepABit(); AssertPayloadsEventually({}); @@ -380,6 +384,10 @@ TEST_F(TestSelfPipe, WaitAndSend) { } TEST_F(TestSelfPipe, SendAndWait) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + self_pipe_->Send(123456789123456789ULL); StartReading(); SleepABit(); @@ -390,6 +398,10 @@ TEST_F(TestSelfPipe, SendAndWait) { } TEST_F(TestSelfPipe, WaitAndShutdown) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + StartReading(); SleepABit(); ASSERT_OK(self_pipe_->Shutdown()); @@ -401,6 +413,9 @@ TEST_F(TestSelfPipe, WaitAndShutdown) { } TEST_F(TestSelfPipe, ShutdownAndWait) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif self_pipe_->Send(123456789123456789ULL); ASSERT_OK(self_pipe_->Shutdown()); StartReading(); @@ -413,6 +428,10 @@ TEST_F(TestSelfPipe, ShutdownAndWait) { } TEST_F(TestSelfPipe, WaitAndSendFromSignal) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + signal_received_.store(0); SignalHandlerGuard guard(SIGINT, &HandleSignal); @@ -431,6 +450,10 @@ TEST_F(TestSelfPipe, WaitAndSendFromSignal) { } TEST_F(TestSelfPipe, SendFromSignalAndWait) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + signal_received_.store(0); SignalHandlerGuard guard(SIGINT, &HandleSignal); @@ -450,6 +473,10 @@ TEST_F(TestSelfPipe, SendFromSignalAndWait) { #if !(defined(_WIN32) || defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) || \ defined(THREAD_SANITIZER)) TEST_F(TestSelfPipe, ForkSafety) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif + self_pipe_->Send(123456789123456789ULL); auto child_pid = fork(); @@ -1025,6 +1052,9 @@ TEST_F(TestSendSignal, Generic) { } TEST_F(TestSendSignal, ToThread) { + #ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "SendSignalToThread requires threading"; + #endif #ifdef _WIN32 uint64_t dummy_thread_id = 42; ASSERT_RAISES(NotImplemented, SendSignalToThread(SIGINT, dummy_thread_id)); diff --git a/cpp/src/arrow/util/mutex.cc b/cpp/src/arrow/util/mutex.cc index 9f82ad45b07..48cce34abc3 100644 --- a/cpp/src/arrow/util/mutex.cc +++ b/cpp/src/arrow/util/mutex.cc @@ -35,7 +35,9 @@ struct Mutex::Impl { Mutex::Guard::Guard(Mutex* locked) : locked_(locked, [](Mutex* locked) { +#if !EMSCRIPTEN || ARROW_ENABLE_THREADING DCHECK(!locked->impl_->mutex_.try_lock()); +#endif locked->impl_->mutex_.unlock(); }) {} From 527e47756beea3ca11621918ab10f7f9b40d9df5 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Wed, 18 Oct 2023 14:39:42 +0100 Subject: [PATCH 42/95] fixes to tests for emscripten --- cpp/build-support/emscripten-test-init.js | 4 +++- .../compute/kernels/scalar_string_test.cc | 20 ++++++++++++++++--- cpp/src/arrow/io/file.cc | 10 ++++++++-- cpp/src/arrow/io/file_test.cc | 8 ++++++++ cpp/src/arrow/util/io_util_test.cc | 4 ++-- 5 files changed, 38 insertions(+), 8 deletions(-) diff --git a/cpp/build-support/emscripten-test-init.js b/cpp/build-support/emscripten-test-init.js index e60d8dcdc50..6de62a3d234 100644 --- a/cpp/build-support/emscripten-test-init.js +++ b/cpp/build-support/emscripten-test-init.js @@ -2,4 +2,6 @@ var Module = { }; // make sure tests can access the current parquet test data files -Module.preRun = () => {ENV.PARQUET_TEST_DATA = process.env.PARQUET_TEST_DATA}; \ No newline at end of file +Module.preRun = () => {ENV.PARQUET_TEST_DATA = process.env.PARQUET_TEST_DATA; + ENV.ARROW_TEST_DATA = process.env.ARROW_TEST_DATA; +}; \ No newline at end of file diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc index ff14f5e7a5c..20b419a1688 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc @@ -22,6 +22,7 @@ #include #include +#include #ifdef ARROW_WITH_UTF8PROC #include @@ -1878,17 +1879,29 @@ TYPED_TEST(TestStringKernels, Strptime) { this->CheckUnary("strptime", input4, unit, output4, &options); options.format = "%m/%d/%Y %%z"; - this->CheckUnary("strptime", input5, unit, output1, &options); + #ifndef EMSCRIPTEN + // emscripten bug https://github.com/emscripten-core/emscripten/issues/20466 + this->CheckUnary("strptime", input5, unit, output1, &options); - options.error_is_null = false; - this->CheckUnary("strptime", input5, unit, output1, &options); + options.error_is_null = false; + this->CheckUnary("strptime", input5, unit, output1, &options); EXPECT_RAISES_WITH_MESSAGE_THAT( Invalid, testing::HasSubstr("Invalid: Failed to parse string: '5/1/2020'"), Strptime(ArrayFromJSON(this->type(), input1), options)); + + #else + GTEST_SKIP()<< "Skipping some strptime tests due to emscripten bug https://github.com/emscripten-core/emscripten/issues/20466"; + #endif + + } TYPED_TEST(TestStringKernels, StrptimeZoneOffset) { +#ifdef EMSCRIPTEN + GTEST_SKIP() << "Emscripten bug https://github.com/emscripten-core/emscripten/issues/20467 "; +#else + if (!arrow::internal::kStrptimeSupportsZone) { GTEST_SKIP() << "strptime does not support %z on this platform"; } @@ -1906,6 +1919,7 @@ TYPED_TEST(TestStringKernels, StrptimeZoneOffset) { StrptimeOptions options2("%Y-%m-%dT%H:%M%z", TimeUnit::MICRO, /*error_is_null=*/true); this->CheckUnary("strptime", input2, timestamp(TimeUnit::MICRO, "UTC"), output, &options2); +#endif } TYPED_TEST(TestStringKernels, StrptimeDoesNotProvideDefaultOptions) { diff --git a/cpp/src/arrow/io/file.cc b/cpp/src/arrow/io/file.cc index 543fa90a86e..39e55773df3 100644 --- a/cpp/src/arrow/io/file.cc +++ b/cpp/src/arrow/io/file.cc @@ -397,8 +397,14 @@ class MemoryMappedFile::MemoryMap ~Region() { if (data_ != nullptr) { - int result = munmap(data(), static_cast(size_)); - ARROW_CHECK_EQ(result, 0) << "munmap failed"; + #ifndef __EMSCRIPTEN__ + int result = munmap(data(), static_cast(size_)); + // emscripten erroneously reports failures in munmap + // https://github.com/emscripten-core/emscripten/issues/20459 + ARROW_CHECK_EQ(result, 0) << "munmap failed"; + #else + munmap(data(), static_cast(size_)); + #endif } } diff --git a/cpp/src/arrow/io/file_test.cc b/cpp/src/arrow/io/file_test.cc index a3e39b91430..5444496ca84 100644 --- a/cpp/src/arrow/io/file_test.cc +++ b/cpp/src/arrow/io/file_test.cc @@ -544,6 +544,9 @@ class TestPipeIO : public ::testing::Test { }; TEST_F(TestPipeIO, TestWrite) { + #ifdef __EMSCRIPTEN__ + GTEST_SKIP() << "Pipes not supported on Emscripten"; + #endif std::string data1 = "test", data2 = "data!"; std::shared_ptr file; uint8_t buffer[10]; @@ -574,6 +577,9 @@ TEST_F(TestPipeIO, TestWrite) { } TEST_F(TestPipeIO, ReadableFileFails) { + #ifdef __EMSCRIPTEN__ + GTEST_SKIP() << "Pipes not supported on Emscripten"; + #endif // ReadableFile fails on non-seekable fd ASSERT_RAISES(IOError, ReadableFile::Open(pipe_.rfd.fd())); } @@ -995,6 +1001,8 @@ TEST_F(TestMemoryMappedFile, LARGE_MEMORY_TEST(ReadWriteOver4GbFile)) { } TEST_F(TestMemoryMappedFile, RetainMemoryMapReference) { + + // ARROW-494 const int64_t buffer_size = 1024; diff --git a/cpp/src/arrow/util/io_util_test.cc b/cpp/src/arrow/util/io_util_test.cc index ca8380942a6..0614ef5210b 100644 --- a/cpp/src/arrow/util/io_util_test.cc +++ b/cpp/src/arrow/util/io_util_test.cc @@ -146,8 +146,8 @@ TEST(MemoryAdviseWillNeed, Basics) { ASSERT_OK(MemoryAdviseWillNeed({{addr1, 0}, {addr2 + 1, 0}})); // Should probably fail - // (but on Windows, MemoryAdviseWillNeed can be a no-op) -#ifndef _WIN32 + // (but on Windows or Emscripten, MemoryAdviseWillNeed can be a no-op) +#if !defined(_WIN32) && !defined(EMSCRIPTEN) ASSERT_RAISES(IOError, MemoryAdviseWillNeed({{nullptr, std::numeric_limits::max()}})); #endif From a96f79386c3ac148dfc5fd8f958165a84eb17299 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Wed, 18 Oct 2023 15:31:24 +0000 Subject: [PATCH 43/95] emscripten test fixes --- cpp/src/arrow/util/value_parsing_test.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cpp/src/arrow/util/value_parsing_test.cc b/cpp/src/arrow/util/value_parsing_test.cc index 30c5e6aae74..7a3e13d73b4 100644 --- a/cpp/src/arrow/util/value_parsing_test.cc +++ b/cpp/src/arrow/util/value_parsing_test.cc @@ -794,6 +794,10 @@ TEST(TimestampParser, StrptimeZoneOffset) { if (!kStrptimeSupportsZone) { GTEST_SKIP() << "strptime does not support %z on this platform"; } + #ifdef EMSCRIPTEN + GTEST_SKIP() << "Test temporarily disabled due to emscripten bug https://github.com/emscripten-core/emscripten/issues/20467 "; + #endif + std::string format = "%Y-%d-%m %H:%M:%S%z"; auto parser = TimestampParser::MakeStrptime(format); From 5e8ee705dacd2cbc4f36ad7eccde9419d39da2df Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Wed, 18 Oct 2023 15:37:35 +0000 Subject: [PATCH 44/95] more test fixes --- cpp/src/arrow/ipc/read_write_test.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/cpp/src/arrow/ipc/read_write_test.cc b/cpp/src/arrow/ipc/read_write_test.cc index 3ae007c20ef..44790a358e0 100644 --- a/cpp/src/arrow/ipc/read_write_test.cc +++ b/cpp/src/arrow/ipc/read_write_test.cc @@ -1035,6 +1035,9 @@ class RecursionLimits : public ::testing::Test, public io::MemoryMapFixture { }; TEST_F(RecursionLimits, WriteLimit) { + #ifdef EMSCRIPTEN + GTEST_SKIP() << "This crashes the Emscripten runtime."; + #endif int32_t metadata_length = -1; int64_t body_length = -1; std::shared_ptr schema; @@ -1067,6 +1070,10 @@ TEST_F(RecursionLimits, ReadLimit) { // Test fails with a structured exception on Windows + Debug #if !defined(_WIN32) || defined(NDEBUG) TEST_F(RecursionLimits, StressLimit) { + #ifdef EMSCRIPTEN + GTEST_SKIP() << "This crashes the Emscripten runtime."; + #endif + auto CheckDepth = [this](int recursion_depth, bool* it_works) { int32_t metadata_length = -1; int64_t body_length = -1; From ae1c579d3bd32a75d2602a38a54e7bcf7866be0a Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Wed, 18 Oct 2023 15:59:48 +0000 Subject: [PATCH 45/95] fix to tests for emscripten --- cpp/src/arrow/util/rle_encoding_test.cc | 30 +++++++++++++++++++------ 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/cpp/src/arrow/util/rle_encoding_test.cc b/cpp/src/arrow/util/rle_encoding_test.cc index 01d1ffd767f..83116aa1b60 100644 --- a/cpp/src/arrow/util/rle_encoding_test.cc +++ b/cpp/src/arrow/util/rle_encoding_test.cc @@ -33,6 +33,8 @@ #include "arrow/util/io_util.h" #include "arrow/util/rle_encoding.h" +#include + namespace arrow { namespace util { @@ -214,7 +216,12 @@ TEST(BitUtil, RoundTripIntValues) { void ValidateRle(const std::vector& values, int bit_width, uint8_t* expected_encoding, int expected_len) { const int len = 64 * 1024; - uint8_t buffer[len]; + #ifdef EMSCRIPTEN + // on Emscripten, this buffer won't fit in the stack + static uint8_t buffer[len]; + #else + uint8_t buffer[len]; + #endif EXPECT_LE(expected_len, len); RleEncoder encoder(buffer, len, bit_width); @@ -227,10 +234,9 @@ void ValidateRle(const std::vector& values, int bit_width, if (expected_len != -1) { EXPECT_EQ(encoded_len, expected_len); } - if (expected_encoding != NULL) { + if (expected_encoding != NULL && encoded_len==expected_len) { EXPECT_EQ(memcmp(buffer, expected_encoding, encoded_len), 0); } - // Verify read { RleDecoder decoder(buffer, len, bit_width); @@ -256,8 +262,13 @@ void ValidateRle(const std::vector& values, int bit_width, // the returned values are not all the same bool CheckRoundTrip(const std::vector& values, int bit_width) { const int len = 64 * 1024; - uint8_t buffer[len]; - RleEncoder encoder(buffer, len, bit_width); + #ifdef EMSCRIPTEN + // on Emscripten, this buffer won't fit in the stack + static uint8_t buffer[len]; + #else + uint8_t buffer[len]; + #endif + RleEncoder encoder(buffer, len, bit_width); for (size_t i = 0; i < values.size(); ++i) { bool result = encoder.Put(values[i]); if (!result) { @@ -300,6 +311,7 @@ TEST(Rle, SpecificSequences) { std::vector values; // Test 50 0' followed by 50 1's + values.resize(100); for (int i = 0; i < 50; ++i) { values[i] = 0; @@ -307,7 +319,7 @@ TEST(Rle, SpecificSequences) { for (int i = 50; i < 100; ++i) { values[i] = 1; } - + // expected_buffer valid for bit width <= 1 byte expected_buffer[0] = (50 << 1); expected_buffer[1] = 0; @@ -321,7 +333,7 @@ TEST(Rle, SpecificSequences) { ValidateRle(values, width, nullptr, 2 * (1 + static_cast(bit_util::CeilDiv(width, 8)))); } - + // Test 100 0's and 1's alternating for (int i = 0; i < 100; ++i) { values[i] = i % 2; @@ -331,6 +343,7 @@ TEST(Rle, SpecificSequences) { for (int i = 1; i <= 100 / 8; ++i) { expected_buffer[i] = 0xAA /* 0b10101010 */; } + // Values for the last 4 0 and 1's. The upper 4 bits should be padded to 0. expected_buffer[100 / 8 + 1] = 0x0A /* 0b00001010 */; @@ -341,6 +354,7 @@ TEST(Rle, SpecificSequences) { ValidateRle(values, width, nullptr, 1 + static_cast(bit_util::CeilDiv(width * num_values, 8))); } + // Test 16-bit values to confirm encoded values are stored in little endian values.resize(28); @@ -358,6 +372,7 @@ TEST(Rle, SpecificSequences) { expected_buffer[5] = 0xaa; ValidateRle(values, 16, expected_buffer, 6); + // Test 32-bit values to confirm encoded values are stored in little endian values.resize(28); @@ -367,6 +382,7 @@ TEST(Rle, SpecificSequences) { for (int i = 16; i < 28; ++i) { values[i] = 0x5aaaa555; } + expected_buffer[0] = (16 << 1); expected_buffer[1] = 0xa5; expected_buffer[2] = 0xaa; From 688d1abd307628bafeb3b41a6482cd396d9415a8 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Thu, 19 Oct 2023 11:53:58 +0100 Subject: [PATCH 46/95] lint --- cpp/cmake_modules/BuildUtils.cmake | 11 +- cpp/cmake_modules/SetupCxxFlags.cmake | 12 +- .../compute/kernels/scalar_string_test.cc | 26 ++--- cpp/src/arrow/io/file_test.cc | 14 +-- cpp/src/arrow/util/io_util.cc | 108 +++++++++--------- cpp/src/arrow/util/rle_encoding_test.cc | 42 ++++--- 6 files changed, 107 insertions(+), 106 deletions(-) diff --git a/cpp/cmake_modules/BuildUtils.cmake b/cpp/cmake_modules/BuildUtils.cmake index 86dc951ffe7..3930faa814b 100644 --- a/cpp/cmake_modules/BuildUtils.cmake +++ b/cpp/cmake_modules/BuildUtils.cmake @@ -744,11 +744,12 @@ function(ADD_TEST_CASE REL_TEST_NAME) add_test(${TEST_NAME} ${TEST_PATH} ${ARG_TEST_ARGUMENTS}) elseif(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") add_test(${TEST_NAME} - ${BUILD_SUPPORT_DIR}/run-test.sh - ${CMAKE_BINARY_DIR} - test - ${CMAKE_CROSSCOMPILING_EMULATOR} ${TEST_PATH} - ${ARG_TEST_ARGUMENTS}) + ${BUILD_SUPPORT_DIR}/run-test.sh + ${CMAKE_BINARY_DIR} + test + ${CMAKE_CROSSCOMPILING_EMULATOR} + ${TEST_PATH} + ${ARG_TEST_ARGUMENTS}) else() add_test(${TEST_NAME} ${BUILD_SUPPORT_DIR}/run-test.sh diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index 43f905239cc..6d47a9720a7 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -732,14 +732,18 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") # 4) Skip linker flags error which happens with -soname parameter set(ARROW_EMSCRIPTEN_LINKER_FLAGS "-sUSE_ZLIB=1 -sWASM_BIGINT=1 -fexceptions -Wno-error=linkflags") - set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-sSIDE_MODULE=1 ${ARROW_EMSCRIPTEN_LINKER_FLAGS}") - set(CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS "-sSIDE_MODULE=1 ${ARROW_EMSCRIPTEN_LINKER_FLAGS}") + set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS + "-sSIDE_MODULE=1 ${ARROW_EMSCRIPTEN_LINKER_FLAGS}") + set(CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS + "-sSIDE_MODULE=1 ${ARROW_EMSCRIPTEN_LINKER_FLAGS}") set(CMAKE_SHARED_LINKER_FLAGS "-sSIDE_MODULE=1 ${ARROW_EMSCRIPTEN_LINKER_FLAGS}") if(ARROW_TESTING) # flags for building test executables for use in node - set(CMAKE_EXE_LINKER_FLAGS "${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sALLOW_MEMORY_GROWTH -lnodefs.js -lnoderawfs.js --pre-js ${BUILD_SUPPORT_DIR}/emscripten-test-init.js") + set(CMAKE_EXE_LINKER_FLAGS + "${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sALLOW_MEMORY_GROWTH -lnodefs.js -lnoderawfs.js --pre-js ${BUILD_SUPPORT_DIR}/emscripten-test-init.js" + ) else() - set(CMAKE_EXE_LINKER_FLAGS "${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sALLOW_MEMORY_GROWTH") + set(CMAKE_EXE_LINKER_FLAGS "${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sALLOW_MEMORY_GROWTH") endif() # limit debug info because building with DWARF debug info requires diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc index 20b419a1688..96a4302c25c 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc @@ -21,8 +21,8 @@ #include #include -#include #include +#include #ifdef ARROW_WITH_UTF8PROC #include @@ -1879,27 +1879,27 @@ TYPED_TEST(TestStringKernels, Strptime) { this->CheckUnary("strptime", input4, unit, output4, &options); options.format = "%m/%d/%Y %%z"; - #ifndef EMSCRIPTEN - // emscripten bug https://github.com/emscripten-core/emscripten/issues/20466 - this->CheckUnary("strptime", input5, unit, output1, &options); +#ifndef EMSCRIPTEN + // emscripten bug https://github.com/emscripten-core/emscripten/issues/20466 + this->CheckUnary("strptime", input5, unit, output1, &options); - options.error_is_null = false; - this->CheckUnary("strptime", input5, unit, output1, &options); + options.error_is_null = false; + this->CheckUnary("strptime", input5, unit, output1, &options); EXPECT_RAISES_WITH_MESSAGE_THAT( Invalid, testing::HasSubstr("Invalid: Failed to parse string: '5/1/2020'"), Strptime(ArrayFromJSON(this->type(), input1), options)); - #else - GTEST_SKIP()<< "Skipping some strptime tests due to emscripten bug https://github.com/emscripten-core/emscripten/issues/20466"; - #endif - - +#else + GTEST_SKIP() << "Skipping some strptime tests due to emscripten bug " + "https://github.com/emscripten-core/emscripten/issues/20466"; +#endif } TYPED_TEST(TestStringKernels, StrptimeZoneOffset) { #ifdef EMSCRIPTEN - GTEST_SKIP() << "Emscripten bug https://github.com/emscripten-core/emscripten/issues/20467 "; + GTEST_SKIP() + << "Emscripten bug https://github.com/emscripten-core/emscripten/issues/20467 "; #else if (!arrow::internal::kStrptimeSupportsZone) { @@ -1919,7 +1919,7 @@ TYPED_TEST(TestStringKernels, StrptimeZoneOffset) { StrptimeOptions options2("%Y-%m-%dT%H:%M%z", TimeUnit::MICRO, /*error_is_null=*/true); this->CheckUnary("strptime", input2, timestamp(TimeUnit::MICRO, "UTC"), output, &options2); -#endif +#endif } TYPED_TEST(TestStringKernels, StrptimeDoesNotProvideDefaultOptions) { diff --git a/cpp/src/arrow/io/file_test.cc b/cpp/src/arrow/io/file_test.cc index 5444496ca84..3111539867f 100644 --- a/cpp/src/arrow/io/file_test.cc +++ b/cpp/src/arrow/io/file_test.cc @@ -544,9 +544,9 @@ class TestPipeIO : public ::testing::Test { }; TEST_F(TestPipeIO, TestWrite) { - #ifdef __EMSCRIPTEN__ - GTEST_SKIP() << "Pipes not supported on Emscripten"; - #endif +#ifdef __EMSCRIPTEN__ + GTEST_SKIP() << "Pipes not supported on Emscripten"; +#endif std::string data1 = "test", data2 = "data!"; std::shared_ptr file; uint8_t buffer[10]; @@ -577,9 +577,9 @@ TEST_F(TestPipeIO, TestWrite) { } TEST_F(TestPipeIO, ReadableFileFails) { - #ifdef __EMSCRIPTEN__ - GTEST_SKIP() << "Pipes not supported on Emscripten"; - #endif +#ifdef __EMSCRIPTEN__ + GTEST_SKIP() << "Pipes not supported on Emscripten"; +#endif // ReadableFile fails on non-seekable fd ASSERT_RAISES(IOError, ReadableFile::Open(pipe_.rfd.fd())); } @@ -1001,8 +1001,6 @@ TEST_F(TestMemoryMappedFile, LARGE_MEMORY_TEST(ReadWriteOver4GbFile)) { } TEST_F(TestMemoryMappedFile, RetainMemoryMapReference) { - - // ARROW-494 const int64_t buffer_size = 1024; diff --git a/cpp/src/arrow/util/io_util.cc b/cpp/src/arrow/util/io_util.cc index 1f54809db47..9819f9516c0 100644 --- a/cpp/src/arrow/util/io_util.cc +++ b/cpp/src/arrow/util/io_util.cc @@ -1477,64 +1477,64 @@ Status MemoryMapRemap(void* addr, size_t old_size, size_t new_size, int fildes, Status MemoryAdviseWillNeed(const std::vector& regions) { #ifndef EMSCRIPTEN - const auto page_size = static_cast(GetPageSize()); - DCHECK_GT(page_size, 0); - const size_t page_mask = ~(page_size - 1); - DCHECK_EQ(page_mask & page_size, page_size); - - auto align_region = [=](const MemoryRegion& region) -> MemoryRegion { - const auto addr = reinterpret_cast(region.addr); - const auto aligned_addr = addr & page_mask; - DCHECK_LT(addr - aligned_addr, page_size); - return {reinterpret_cast(aligned_addr), - region.size + static_cast(addr - aligned_addr)}; - }; - - #ifdef _WIN32 - // PrefetchVirtualMemory() is available on Windows 8 or later - struct PrefetchEntry { // Like WIN32_MEMORY_RANGE_ENTRY - void* VirtualAddress; - size_t NumberOfBytes; - - PrefetchEntry(const MemoryRegion& region) // NOLINT runtime/explicit - : VirtualAddress(region.addr), NumberOfBytes(region.size) {} - }; - using PrefetchVirtualMemoryFunc = BOOL (*)(HANDLE, ULONG_PTR, PrefetchEntry*, ULONG); - static const auto prefetch_virtual_memory = reinterpret_cast( - GetProcAddress(GetModuleHandleW(L"kernel32.dll"), "PrefetchVirtualMemory")); - if (prefetch_virtual_memory != nullptr) { - std::vector entries; - entries.reserve(regions.size()); - for (const auto& region : regions) { - if (region.size != 0) { - entries.emplace_back(align_region(region)); - } - } - if (!entries.empty() && - !prefetch_virtual_memory(GetCurrentProcess(), - static_cast(entries.size()), entries.data(), - 0)) { - return IOErrorFromWinError(GetLastError(), "PrefetchVirtualMemory failed"); - } - } - return Status::OK(); - #elif defined(POSIX_MADV_WILLNEED) + const auto page_size = static_cast(GetPageSize()); + DCHECK_GT(page_size, 0); + const size_t page_mask = ~(page_size - 1); + DCHECK_EQ(page_mask & page_size, page_size); + + auto align_region = [=](const MemoryRegion& region) -> MemoryRegion { + const auto addr = reinterpret_cast(region.addr); + const auto aligned_addr = addr & page_mask; + DCHECK_LT(addr - aligned_addr, page_size); + return {reinterpret_cast(aligned_addr), + region.size + static_cast(addr - aligned_addr)}; + }; + +#ifdef _WIN32 + // PrefetchVirtualMemory() is available on Windows 8 or later + struct PrefetchEntry { // Like WIN32_MEMORY_RANGE_ENTRY + void* VirtualAddress; + size_t NumberOfBytes; + + PrefetchEntry(const MemoryRegion& region) // NOLINT runtime/explicit + : VirtualAddress(region.addr), NumberOfBytes(region.size) {} + }; + using PrefetchVirtualMemoryFunc = BOOL (*)(HANDLE, ULONG_PTR, PrefetchEntry*, ULONG); + static const auto prefetch_virtual_memory = reinterpret_cast( + GetProcAddress(GetModuleHandleW(L"kernel32.dll"), "PrefetchVirtualMemory")); + if (prefetch_virtual_memory != nullptr) { + std::vector entries; + entries.reserve(regions.size()); for (const auto& region : regions) { if (region.size != 0) { - const auto aligned = align_region(region); - int err = posix_madvise(aligned.addr, aligned.size, POSIX_MADV_WILLNEED); - // EBADF can be returned on Linux in the following cases: - // - the kernel version is older than 3.9 - // - the kernel was compiled with CONFIG_SWAP disabled (ARROW-9577) - if (err != 0 && err != EBADF) { - return IOErrorFromErrno(err, "posix_madvise failed"); - } + entries.emplace_back(align_region(region)); } } - return Status::OK(); - #else - return Status::OK(); - #endif + if (!entries.empty() && + !prefetch_virtual_memory(GetCurrentProcess(), + static_cast(entries.size()), entries.data(), + 0)) { + return IOErrorFromWinError(GetLastError(), "PrefetchVirtualMemory failed"); + } + } + return Status::OK(); +#elif defined(POSIX_MADV_WILLNEED) + for (const auto& region : regions) { + if (region.size != 0) { + const auto aligned = align_region(region); + int err = posix_madvise(aligned.addr, aligned.size, POSIX_MADV_WILLNEED); + // EBADF can be returned on Linux in the following cases: + // - the kernel version is older than 3.9 + // - the kernel was compiled with CONFIG_SWAP disabled (ARROW-9577) + if (err != 0 && err != EBADF) { + return IOErrorFromErrno(err, "posix_madvise failed"); + } + } + } + return Status::OK(); +#else + return Status::OK(); +#endif #else return Status::OK(); #endif diff --git a/cpp/src/arrow/util/rle_encoding_test.cc b/cpp/src/arrow/util/rle_encoding_test.cc index 83116aa1b60..b273170b261 100644 --- a/cpp/src/arrow/util/rle_encoding_test.cc +++ b/cpp/src/arrow/util/rle_encoding_test.cc @@ -33,7 +33,7 @@ #include "arrow/util/io_util.h" #include "arrow/util/rle_encoding.h" -#include +#include namespace arrow { namespace util { @@ -216,12 +216,12 @@ TEST(BitUtil, RoundTripIntValues) { void ValidateRle(const std::vector& values, int bit_width, uint8_t* expected_encoding, int expected_len) { const int len = 64 * 1024; - #ifdef EMSCRIPTEN - // on Emscripten, this buffer won't fit in the stack - static uint8_t buffer[len]; - #else - uint8_t buffer[len]; - #endif +#ifdef EMSCRIPTEN + // on Emscripten, this buffer won't fit in the stack + static uint8_t buffer[len]; +#else + uint8_t buffer[len]; +#endif EXPECT_LE(expected_len, len); RleEncoder encoder(buffer, len, bit_width); @@ -234,7 +234,7 @@ void ValidateRle(const std::vector& values, int bit_width, if (expected_len != -1) { EXPECT_EQ(encoded_len, expected_len); } - if (expected_encoding != NULL && encoded_len==expected_len) { + if (expected_encoding != NULL && encoded_len == expected_len) { EXPECT_EQ(memcmp(buffer, expected_encoding, encoded_len), 0); } // Verify read @@ -262,13 +262,13 @@ void ValidateRle(const std::vector& values, int bit_width, // the returned values are not all the same bool CheckRoundTrip(const std::vector& values, int bit_width) { const int len = 64 * 1024; - #ifdef EMSCRIPTEN - // on Emscripten, this buffer won't fit in the stack - static uint8_t buffer[len]; - #else - uint8_t buffer[len]; - #endif - RleEncoder encoder(buffer, len, bit_width); +#ifdef EMSCRIPTEN + // on Emscripten, this buffer won't fit in the stack + static uint8_t buffer[len]; +#else + uint8_t buffer[len]; +#endif + RleEncoder encoder(buffer, len, bit_width); for (size_t i = 0; i < values.size(); ++i) { bool result = encoder.Put(values[i]); if (!result) { @@ -311,7 +311,7 @@ TEST(Rle, SpecificSequences) { std::vector values; // Test 50 0' followed by 50 1's - + values.resize(100); for (int i = 0; i < 50; ++i) { values[i] = 0; @@ -319,7 +319,7 @@ TEST(Rle, SpecificSequences) { for (int i = 50; i < 100; ++i) { values[i] = 1; } - + // expected_buffer valid for bit width <= 1 byte expected_buffer[0] = (50 << 1); expected_buffer[1] = 0; @@ -333,7 +333,7 @@ TEST(Rle, SpecificSequences) { ValidateRle(values, width, nullptr, 2 * (1 + static_cast(bit_util::CeilDiv(width, 8)))); } - + // Test 100 0's and 1's alternating for (int i = 0; i < 100; ++i) { values[i] = i % 2; @@ -343,7 +343,7 @@ TEST(Rle, SpecificSequences) { for (int i = 1; i <= 100 / 8; ++i) { expected_buffer[i] = 0xAA /* 0b10101010 */; } - + // Values for the last 4 0 and 1's. The upper 4 bits should be padded to 0. expected_buffer[100 / 8 + 1] = 0x0A /* 0b00001010 */; @@ -354,7 +354,6 @@ TEST(Rle, SpecificSequences) { ValidateRle(values, width, nullptr, 1 + static_cast(bit_util::CeilDiv(width * num_values, 8))); } - // Test 16-bit values to confirm encoded values are stored in little endian values.resize(28); @@ -372,7 +371,6 @@ TEST(Rle, SpecificSequences) { expected_buffer[5] = 0xaa; ValidateRle(values, 16, expected_buffer, 6); - // Test 32-bit values to confirm encoded values are stored in little endian values.resize(28); @@ -382,7 +380,7 @@ TEST(Rle, SpecificSequences) { for (int i = 16; i < 28; ++i) { values[i] = 0x5aaaa555; } - + expected_buffer[0] = (16 << 1); expected_buffer[1] = 0xa5; expected_buffer[2] = 0xaa; From da548e60baee6d86ed5fd6fe26aa7f3a806bea58 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Thu, 19 Oct 2023 11:57:52 +0100 Subject: [PATCH 47/95] formatting --- cpp/src/arrow/io/file.cc | 16 ++++++++-------- cpp/src/arrow/ipc/read_write_test.cc | 12 ++++++------ cpp/src/arrow/util/atfork_test.cc | 1 - cpp/src/arrow/util/io_util_test.cc | 8 ++++---- cpp/src/arrow/util/mutex.cc | 5 +++-- cpp/src/arrow/util/value_parsing_test.cc | 7 ++++--- 6 files changed, 25 insertions(+), 24 deletions(-) diff --git a/cpp/src/arrow/io/file.cc b/cpp/src/arrow/io/file.cc index 39e55773df3..239091d6cd9 100644 --- a/cpp/src/arrow/io/file.cc +++ b/cpp/src/arrow/io/file.cc @@ -397,14 +397,14 @@ class MemoryMappedFile::MemoryMap ~Region() { if (data_ != nullptr) { - #ifndef __EMSCRIPTEN__ - int result = munmap(data(), static_cast(size_)); - // emscripten erroneously reports failures in munmap - // https://github.com/emscripten-core/emscripten/issues/20459 - ARROW_CHECK_EQ(result, 0) << "munmap failed"; - #else - munmap(data(), static_cast(size_)); - #endif +#ifndef __EMSCRIPTEN__ + int result = munmap(data(), static_cast(size_)); + // emscripten erroneously reports failures in munmap + // https://github.com/emscripten-core/emscripten/issues/20459 + ARROW_CHECK_EQ(result, 0) << "munmap failed"; +#else + munmap(data(), static_cast(size_)); +#endif } } diff --git a/cpp/src/arrow/ipc/read_write_test.cc b/cpp/src/arrow/ipc/read_write_test.cc index 44790a358e0..ded9d07ffd6 100644 --- a/cpp/src/arrow/ipc/read_write_test.cc +++ b/cpp/src/arrow/ipc/read_write_test.cc @@ -1035,9 +1035,9 @@ class RecursionLimits : public ::testing::Test, public io::MemoryMapFixture { }; TEST_F(RecursionLimits, WriteLimit) { - #ifdef EMSCRIPTEN - GTEST_SKIP() << "This crashes the Emscripten runtime."; - #endif +#ifdef EMSCRIPTEN + GTEST_SKIP() << "This crashes the Emscripten runtime."; +#endif int32_t metadata_length = -1; int64_t body_length = -1; std::shared_ptr schema; @@ -1070,9 +1070,9 @@ TEST_F(RecursionLimits, ReadLimit) { // Test fails with a structured exception on Windows + Debug #if !defined(_WIN32) || defined(NDEBUG) TEST_F(RecursionLimits, StressLimit) { - #ifdef EMSCRIPTEN - GTEST_SKIP() << "This crashes the Emscripten runtime."; - #endif +#ifdef EMSCRIPTEN + GTEST_SKIP() << "This crashes the Emscripten runtime."; +#endif auto CheckDepth = [this](int recursion_depth, bool* it_works) { int32_t metadata_length = -1; diff --git a/cpp/src/arrow/util/atfork_test.cc b/cpp/src/arrow/util/atfork_test.cc index 9000ab15a73..a757394db1d 100644 --- a/cpp/src/arrow/util/atfork_test.cc +++ b/cpp/src/arrow/util/atfork_test.cc @@ -261,7 +261,6 @@ TEST_F(TestAtFork, NestedChild) { GTEST_SKIP() << "Test requires threading support"; #endif - auto handlers1 = std::make_shared(PushBefore(1), PushParentAfter(11), PushChildAfter(21)); auto handlers2 = std::make_shared(PushBefore(2), PushParentAfter(12), diff --git a/cpp/src/arrow/util/io_util_test.cc b/cpp/src/arrow/util/io_util_test.cc index 0614ef5210b..c84c2ab3700 100644 --- a/cpp/src/arrow/util/io_util_test.cc +++ b/cpp/src/arrow/util/io_util_test.cc @@ -371,7 +371,7 @@ TEST_F(TestSelfPipe, WaitAndSend) { #ifndef ARROW_ENABLE_THREADING GTEST_SKIP() << "Test requires threading support"; #endif - + StartReading(); SleepABit(); AssertPayloadsEventually({}); @@ -1052,9 +1052,9 @@ TEST_F(TestSendSignal, Generic) { } TEST_F(TestSendSignal, ToThread) { - #ifndef ARROW_ENABLE_THREADING - GTEST_SKIP() << "SendSignalToThread requires threading"; - #endif +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "SendSignalToThread requires threading"; +#endif #ifdef _WIN32 uint64_t dummy_thread_id = 42; ASSERT_RAISES(NotImplemented, SendSignalToThread(SIGINT, dummy_thread_id)); diff --git a/cpp/src/arrow/util/mutex.cc b/cpp/src/arrow/util/mutex.cc index 48cce34abc3..c03b275efad 100644 --- a/cpp/src/arrow/util/mutex.cc +++ b/cpp/src/arrow/util/mutex.cc @@ -37,9 +37,10 @@ Mutex::Guard::Guard(Mutex* locked) : locked_(locked, [](Mutex* locked) { #if !EMSCRIPTEN || ARROW_ENABLE_THREADING DCHECK(!locked->impl_->mutex_.try_lock()); -#endif +#endif locked->impl_->mutex_.unlock(); - }) {} + }) { +} Mutex::Guard Mutex::TryLock() { DCHECK_NE(impl_, nullptr); diff --git a/cpp/src/arrow/util/value_parsing_test.cc b/cpp/src/arrow/util/value_parsing_test.cc index 7a3e13d73b4..53d77a5e700 100644 --- a/cpp/src/arrow/util/value_parsing_test.cc +++ b/cpp/src/arrow/util/value_parsing_test.cc @@ -794,9 +794,10 @@ TEST(TimestampParser, StrptimeZoneOffset) { if (!kStrptimeSupportsZone) { GTEST_SKIP() << "strptime does not support %z on this platform"; } - #ifdef EMSCRIPTEN - GTEST_SKIP() << "Test temporarily disabled due to emscripten bug https://github.com/emscripten-core/emscripten/issues/20467 "; - #endif +#ifdef EMSCRIPTEN + GTEST_SKIP() << "Test temporarily disabled due to emscripten bug " + "https://github.com/emscripten-core/emscripten/issues/20467 "; +#endif std::string format = "%Y-%d-%m %H:%M:%S%z"; auto parser = TimestampParser::MakeStrptime(format); From b457d7c06e639fcfef73acf17cb159c5757727c6 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Thu, 19 Oct 2023 12:15:42 +0100 Subject: [PATCH 48/95] dockerfile fixes in code review Co-authored-by: Sutou Kouhei --- ci/docker/ubuntu-22.04-cpp.dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/docker/ubuntu-22.04-cpp.dockerfile b/ci/docker/ubuntu-22.04-cpp.dockerfile index 2068a44e9fb..18142b25775 100644 --- a/ci/docker/ubuntu-22.04-cpp.dockerfile +++ b/ci/docker/ubuntu-22.04-cpp.dockerfile @@ -69,7 +69,7 @@ RUN apt-get update -y -q && \ ca-certificates \ ccache \ cmake \ - curl \ + curl \ gdb \ git \ libbenchmark-dev \ @@ -116,7 +116,7 @@ RUN apt-get update -y -q && \ rsync \ tzdata \ wget \ - xz-utils && \ + xz-utils && \ apt-get clean && \ rm -rf /var/lib/apt/lists* From 92ead7c59c6c1cef586eb34308ee9b6d825aa3e8 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Thu, 19 Oct 2023 12:39:40 +0100 Subject: [PATCH 49/95] review updates --- ci/scripts/cpp_build.sh | 11 +--- cpp/CMakeLists.txt | 4 +- cpp/cmake_modules/SetupCxxFlags.cmake | 19 ++++-- cpp/cmake_modules/ThirdpartyToolchain.cmake | 59 ++++++++++--------- .../compute/kernels/scalar_string_test.cc | 12 ++-- .../compute/kernels/scalar_temporal_test.cc | 6 +- cpp/src/arrow/flight/CMakeLists.txt | 3 +- 7 files changed, 55 insertions(+), 59 deletions(-) diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index 7d360ce870b..982437d5549 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -91,16 +91,7 @@ esac mkdir -p ${build_dir} pushd ${build_dir} -if [ "${ARROW_EMSCRIPTEN:-OFF}" = "ON" ]; then - if [ "${ARROW_BUILD_TYPE:-debug}" = "debug" ]; then - echo "Forcing non-parallel build for emscripten debug" - export CMAKE_BUILD_PARALLEL_LEVEL=1 - # emscripten debug linking takes *tons* of memory - # https://github.com/WebAssembly/binaryen/issues/4261 - # so stop parallel builds for debug build - # or else crossbow CI runs out of memory - fi - +if [ "${ARROW_EMSCRIPTEN:-OFF}" = "ON" ]; then if [ "${UBUNTU}" = "20.04" ]; then echo "arrow emscripten build is not supported on Ubuntu 20.04, run with UBUNTU=22.04" exit -1 diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 8566508406b..3ff5e93c582 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -587,8 +587,8 @@ include_directories(src/generated) if(PARQUET_BUILD_SHARED) set_target_properties(arrow_shared PROPERTIES C_VISIBILITY_PRESET hidden - CXX_VISIBILITY_PRESET hidden - VISIBILITY_INLINES_HIDDEN 1) + CXX_VISIBILITY_PRESET hidden VISIBILITY_INLINES_HIDDEN + 1) endif() # diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index 6d47a9720a7..504421f33e9 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -319,7 +319,12 @@ if("${BUILD_WARNING_LEVEL}" STREQUAL "CHECKIN") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wextra") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wdocumentation") - set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wshorten-64-to-32") + if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + # size_t is 32 bit in Emscripten wasm32 - ignore conversion errors + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-shorten-64-to-32") + else() + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wshorten-64-to-32") + endif() set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-missing-braces") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-unused-parameter") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-constant-logical-operand") @@ -674,7 +679,12 @@ if(NOT MSVC) if(NOT CMAKE_CXX_FLAGS_DEBUG MATCHES "-O") string(APPEND CXX_DEBUG_FLAGS " -O0") endif() - if(ARROW_GGDB_DEBUG) + if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + string(APPEND C_DEBUG_FLAGS " -g2") + string(APPEND CXX_DEBUG_FLAGS " -g2") + string(APPEND C_RELWITHDEBINFO_FLAGS " -g2") + string(APPEND CXX_RELWITHDEBINFO_FLAGS " -g2") + elseif(ARROW_GGDB_DEBUG) string(APPEND C_DEBUG_FLAGS " -ggdb") string(APPEND CXX_DEBUG_FLAGS " -ggdb") string(APPEND C_RELWITHDEBINFO_FLAGS " -ggdb") @@ -730,8 +740,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") # 2) Tell it to use javascript / webassembly 64 bit number support. # 3) Tell it to build with support for C++ exceptions # 4) Skip linker flags error which happens with -soname parameter - set(ARROW_EMSCRIPTEN_LINKER_FLAGS - "-sUSE_ZLIB=1 -sWASM_BIGINT=1 -fexceptions -Wno-error=linkflags") + set(ARROW_EMSCRIPTEN_LINKER_FLAGS "-sWASM_BIGINT=1 -fexceptions -Wno-error=linkflags") set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-sSIDE_MODULE=1 ${ARROW_EMSCRIPTEN_LINKER_FLAGS}") set(CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS @@ -740,7 +749,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") if(ARROW_TESTING) # flags for building test executables for use in node set(CMAKE_EXE_LINKER_FLAGS - "${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sALLOW_MEMORY_GROWTH -lnodefs.js -lnoderawfs.js --pre-js ${BUILD_SUPPORT_DIR}/emscripten-test-init.js" + "${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sERROR_ON_WASM_CHANGES_AFTER_LINK=1 -sALLOW_MEMORY_GROWTH -lnodefs.js -lnoderawfs.js --pre-js ${BUILD_SUPPORT_DIR}/emscripten-test-init.js" ) else() set(CMAKE_EXE_LINKER_FLAGS "${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sALLOW_MEMORY_GROWTH") diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index f71f09cbe09..12bac630ba9 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -1420,9 +1420,18 @@ macro(build_brotli) ) set(BROTLI_CMAKE_ARGS ${EP_COMMON_CMAKE_ARGS} "-DCMAKE_INSTALL_PREFIX=${BROTLI_PREFIX}") + set(BROTLI_EP_OPTIONS) if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") - # cmake install is disabled for brotli on emscripten, so we have - # to manually copy the libraries to our install directory + # "cmake install" is disabled for Brotli on Emscripten, so the + # default INSTALL_COMMAND fails. We need to disable the default + # INSTALL_COMMAND. + list(APPEND + BROTLI_EP_OPTIONS + INSTALL_COMMAND + ${CMAKE_COMMAND} + -E + true) + set(BROTLI_BUILD_DIR ${CMAKE_CURRENT_BINARY_DIR}/brotli_ep-prefix/src/brotli_ep-build) set(BROTLI_BUILD_LIBS "${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlienc-static${CMAKE_STATIC_LIBRARY_SUFFIX}" @@ -1430,20 +1439,23 @@ macro(build_brotli) "${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlicommon-static${CMAKE_STATIC_LIBRARY_SUFFIX}" ) + endif() + + externalproject_add(brotli_ep + ${EP_COMMON_OPTIONS} + URL ${BROTLI_SOURCE_URL} + URL_HASH "SHA256=${ARROW_BROTLI_BUILD_SHA256_CHECKSUM}" + BUILD_BYPRODUCTS "${BROTLI_STATIC_LIBRARY_ENC}" + "${BROTLI_STATIC_LIBRARY_DEC}" + "${BROTLI_STATIC_LIBRARY_COMMON}" + ${BROTLI_BUILD_BYPRODUCTS} + CMAKE_ARGS ${BROTLI_CMAKE_ARGS} + STEP_TARGETS headers_copy ${BROTLI_EP_OPTIONS}) + + if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + # Copy the libraries to our install directory manually. set(BROTLI_BUILD_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/brotli_ep-prefix/src/brotli_ep/c/include/brotli) - - externalproject_add(brotli_ep - ${EP_COMMON_OPTIONS} - URL ${BROTLI_SOURCE_URL} - URL_HASH "SHA256=${ARROW_BROTLI_BUILD_SHA256_CHECKSUM}" - BUILD_BYPRODUCTS "${BROTLI_STATIC_LIBRARY_ENC}" - "${BROTLI_STATIC_LIBRARY_DEC}" - "${BROTLI_STATIC_LIBRARY_COMMON}" - ${BROTLI_BUILD_BYPRODUCTS} - CMAKE_ARGS ${BROTLI_CMAKE_ARGS} - STEP_TARGETS headers_copy - INSTALL_COMMAND "") add_custom_command(TARGET brotli_ep POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different @@ -1451,17 +1463,6 @@ macro(build_brotli) ${BROTLI_LIB_DIR} COMMAND ${CMAKE_COMMAND} -E copy_directory ${BROTLI_BUILD_INCLUDE_DIR} ${BROTLI_INCLUDE_DIR}/brotli) - else() # not emscripten - just behave as normal - externalproject_add(brotli_ep - ${EP_COMMON_OPTIONS} - URL ${BROTLI_SOURCE_URL} - URL_HASH "SHA256=${ARROW_BROTLI_BUILD_SHA256_CHECKSUM}" - BUILD_BYPRODUCTS "${BROTLI_STATIC_LIBRARY_ENC}" - "${BROTLI_STATIC_LIBRARY_DEC}" - "${BROTLI_STATIC_LIBRARY_COMMON}" - ${BROTLI_BUILD_BYPRODUCTS} - CMAKE_ARGS ${BROTLI_CMAKE_ARGS} - STEP_TARGETS headers_copy) endif() add_dependencies(toolchain brotli_ep) @@ -2239,12 +2240,12 @@ function(build_gtest) endif() # If we're building static libs for Emscripten, we need to build *everything* as # static libs. - if(NOT (CMAKE_SYSTEM_NAME STREQUAL "Emscripten") OR ARROW_BUILD_SHARED) - set(BUILD_SHARED_LIBS ON) - set(BUILD_STATIC_LIBS OFF) - else() + if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") set(BUILD_SHARED_LIBS OFF) set(BUILD_STATIC_LIBS ON) + else() + set(BUILD_SHARED_LIBS ON) + set(BUILD_STATIC_LIBS OFF) endif() # We need to use "cache" variable to override the default # INSTALL_GTEST option by this value. See also: diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc index 96a4302c25c..1ced0c9ffd1 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc @@ -1859,6 +1859,10 @@ TYPED_TEST(TestBaseBinaryKernels, ExtractRegexInvalid) { #endif TYPED_TEST(TestStringKernels, Strptime) { + #ifdef EMSCRIPTEN + GTEST_SKIP()<< "Skipping some strptime tests due to emscripten bug https://github.com/emscripten-core/emscripten/issues/20466"; + #endif + std::string input1 = R"(["5/1/2020", null, null, "12/13/1900", null])"; std::string input2 = R"(["5-1-2020", "12/13/1900"])"; std::string input3 = R"(["5/1/2020", "AA/BB/CCCC"])"; @@ -1879,7 +1883,6 @@ TYPED_TEST(TestStringKernels, Strptime) { this->CheckUnary("strptime", input4, unit, output4, &options); options.format = "%m/%d/%Y %%z"; -#ifndef EMSCRIPTEN // emscripten bug https://github.com/emscripten-core/emscripten/issues/20466 this->CheckUnary("strptime", input5, unit, output1, &options); @@ -1890,17 +1893,13 @@ TYPED_TEST(TestStringKernels, Strptime) { Invalid, testing::HasSubstr("Invalid: Failed to parse string: '5/1/2020'"), Strptime(ArrayFromJSON(this->type(), input1), options)); -#else - GTEST_SKIP() << "Skipping some strptime tests due to emscripten bug " - "https://github.com/emscripten-core/emscripten/issues/20466"; -#endif } TYPED_TEST(TestStringKernels, StrptimeZoneOffset) { #ifdef EMSCRIPTEN GTEST_SKIP() << "Emscripten bug https://github.com/emscripten-core/emscripten/issues/20467 "; -#else +#endif if (!arrow::internal::kStrptimeSupportsZone) { GTEST_SKIP() << "strptime does not support %z on this platform"; @@ -1919,7 +1918,6 @@ TYPED_TEST(TestStringKernels, StrptimeZoneOffset) { StrptimeOptions options2("%Y-%m-%dT%H:%M%z", TimeUnit::MICRO, /*error_is_null=*/true); this->CheckUnary("strptime", input2, timestamp(TimeUnit::MICRO, "UTC"), output, &options2); -#endif } TYPED_TEST(TestStringKernels, StrptimeDoesNotProvideDefaultOptions) { diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index f6f023f15ba..13512c9d6b1 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -2145,7 +2145,7 @@ TEST_F(ScalarTemporalTest, StrftimeOtherLocale) { GTEST_SKIP() << "There is a known bug in strftime for locales on Windows (ARROW-15922)"; #elif defined(EMSCRIPTEN) GTEST_SKIP() << "Emscripten doesn't build with multiple locales as default"; -#else +#endif if (!LocaleExists("fr_FR.UTF-8")) { GTEST_SKIP() << "locale 'fr_FR.UTF-8' doesn't exist on this system"; @@ -2158,13 +2158,12 @@ TEST_F(ScalarTemporalTest, StrftimeOtherLocale) { ["01 janvier 1970 00:00:59,123", "18 août 2021 15:11:50,456", null])"; CheckScalarUnary("strftime", timestamp(TimeUnit::MILLI, "UTC"), milliseconds, utf8(), expected, &options); -#endif } TEST_F(ScalarTemporalTest, StrftimeInvalidLocale) { #ifdef EMSCRIPTEN GTEST_SKIP() << "Emscripten doesn't build with multiple locales as default"; -#else +#endif auto options = StrftimeOptions("%d %B %Y %H:%M:%S", "non-existent"); const char* seconds = R"(["1970-01-01T00:00:59", null])"; auto arr = ArrayFromJSON(timestamp(TimeUnit::SECOND, "UTC"), seconds); @@ -2172,7 +2171,6 @@ TEST_F(ScalarTemporalTest, StrftimeInvalidLocale) { EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, testing::HasSubstr("Cannot find locale 'non-existent'"), Strftime(arr, options)); -#endif } TEST_F(ScalarTemporalTest, TestTemporalDifferenceZoned) { diff --git a/cpp/src/arrow/flight/CMakeLists.txt b/cpp/src/arrow/flight/CMakeLists.txt index c37d2c56700..a26ddefada3 100644 --- a/cpp/src/arrow/flight/CMakeLists.txt +++ b/cpp/src/arrow/flight/CMakeLists.txt @@ -163,8 +163,7 @@ if(MSVC) "${CMAKE_CURRENT_BINARY_DIR}/Flight.pb.h") # Suppress missing dll-interface warning set_source_files_properties("${GENERATED_SOURCE}" - PROPERTIES COMPILE_OPTIONS "/wd4251" - GENERATED TRUE + PROPERTIES COMPILE_OPTIONS "/wd4251" GENERATED TRUE SKIP_UNITY_BUILD_INCLUSION TRUE) endforeach() endif() From 58fbdfe6b18d2c7260e10f22287aac403368da96 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Thu, 19 Oct 2023 14:39:30 +0100 Subject: [PATCH 50/95] cmake-format updated version --- cpp/CMakeLists.txt | 4 ++-- cpp/src/arrow/flight/CMakeLists.txt | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 3ff5e93c582..8566508406b 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -587,8 +587,8 @@ include_directories(src/generated) if(PARQUET_BUILD_SHARED) set_target_properties(arrow_shared PROPERTIES C_VISIBILITY_PRESET hidden - CXX_VISIBILITY_PRESET hidden VISIBILITY_INLINES_HIDDEN - 1) + CXX_VISIBILITY_PRESET hidden + VISIBILITY_INLINES_HIDDEN 1) endif() # diff --git a/cpp/src/arrow/flight/CMakeLists.txt b/cpp/src/arrow/flight/CMakeLists.txt index a26ddefada3..c37d2c56700 100644 --- a/cpp/src/arrow/flight/CMakeLists.txt +++ b/cpp/src/arrow/flight/CMakeLists.txt @@ -163,7 +163,8 @@ if(MSVC) "${CMAKE_CURRENT_BINARY_DIR}/Flight.pb.h") # Suppress missing dll-interface warning set_source_files_properties("${GENERATED_SOURCE}" - PROPERTIES COMPILE_OPTIONS "/wd4251" GENERATED TRUE + PROPERTIES COMPILE_OPTIONS "/wd4251" + GENERATED TRUE SKIP_UNITY_BUILD_INCLUSION TRUE) endforeach() endif() From e9e1988702fb6d0bfbe35b773bb022b222217065 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Thu, 19 Oct 2023 14:40:41 +0100 Subject: [PATCH 51/95] lint --- cpp/src/arrow/compute/kernels/scalar_string_test.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc index 1ced0c9ffd1..0496dbe9f81 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc @@ -1859,9 +1859,10 @@ TYPED_TEST(TestBaseBinaryKernels, ExtractRegexInvalid) { #endif TYPED_TEST(TestStringKernels, Strptime) { - #ifdef EMSCRIPTEN - GTEST_SKIP()<< "Skipping some strptime tests due to emscripten bug https://github.com/emscripten-core/emscripten/issues/20466"; - #endif +#ifdef EMSCRIPTEN + GTEST_SKIP() << "Skipping some strptime tests due to emscripten bug " + "https://github.com/emscripten-core/emscripten/issues/20466"; +#endif std::string input1 = R"(["5/1/2020", null, null, "12/13/1900", null])"; std::string input2 = R"(["5-1-2020", "12/13/1900"])"; @@ -1892,7 +1893,6 @@ TYPED_TEST(TestStringKernels, Strptime) { EXPECT_RAISES_WITH_MESSAGE_THAT( Invalid, testing::HasSubstr("Invalid: Failed to parse string: '5/1/2020'"), Strptime(ArrayFromJSON(this->type(), input1), options)); - } TYPED_TEST(TestStringKernels, StrptimeZoneOffset) { From ddbcf564f080622726480fea339ec61f799f7074 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Thu, 19 Oct 2023 14:54:26 +0100 Subject: [PATCH 52/95] licence on test init js --- cpp/build-support/emscripten-test-init.js | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/cpp/build-support/emscripten-test-init.js b/cpp/build-support/emscripten-test-init.js index 6de62a3d234..bbb542a29f0 100644 --- a/cpp/build-support/emscripten-test-init.js +++ b/cpp/build-support/emscripten-test-init.js @@ -1,3 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + var Module = { }; From 1d4cc2cad7f19abffdbd4930a7794b12dd415332 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Thu, 19 Oct 2023 15:06:06 +0100 Subject: [PATCH 53/95] fix to preprocessor directive check --- cpp/src/arrow/util/mutex.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/util/mutex.cc b/cpp/src/arrow/util/mutex.cc index c03b275efad..5a3f459b2fc 100644 --- a/cpp/src/arrow/util/mutex.cc +++ b/cpp/src/arrow/util/mutex.cc @@ -35,7 +35,7 @@ struct Mutex::Impl { Mutex::Guard::Guard(Mutex* locked) : locked_(locked, [](Mutex* locked) { -#if !EMSCRIPTEN || ARROW_ENABLE_THREADING +#if !EMSCRIPTEN || defined(ARROW_ENABLE_THREADING) DCHECK(!locked->impl_->mutex_.try_lock()); #endif locked->impl_->mutex_.unlock(); From 8b8f8d0a9b1d03363bd171eb2bc4272d7ac6c4dd Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Fri, 27 Oct 2023 15:57:42 +0100 Subject: [PATCH 54/95] fix #defines --- cpp/src/arrow/io/file.cc | 2 +- cpp/src/arrow/util/mutex.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/io/file.cc b/cpp/src/arrow/io/file.cc index 239091d6cd9..29803f90e26 100644 --- a/cpp/src/arrow/io/file.cc +++ b/cpp/src/arrow/io/file.cc @@ -397,7 +397,7 @@ class MemoryMappedFile::MemoryMap ~Region() { if (data_ != nullptr) { -#ifndef __EMSCRIPTEN__ +#ifndef EMSCRIPTEN int result = munmap(data(), static_cast(size_)); // emscripten erroneously reports failures in munmap // https://github.com/emscripten-core/emscripten/issues/20459 diff --git a/cpp/src/arrow/util/mutex.cc b/cpp/src/arrow/util/mutex.cc index 5a3f459b2fc..ff1bfd7b5ac 100644 --- a/cpp/src/arrow/util/mutex.cc +++ b/cpp/src/arrow/util/mutex.cc @@ -35,7 +35,7 @@ struct Mutex::Impl { Mutex::Guard::Guard(Mutex* locked) : locked_(locked, [](Mutex* locked) { -#if !EMSCRIPTEN || defined(ARROW_ENABLE_THREADING) +#if !defined(EMSCRIPTEN) || defined(ARROW_ENABLE_THREADING) DCHECK(!locked->impl_->mutex_.try_lock()); #endif locked->impl_->mutex_.unlock(); From 6124d53860c501b5d22addf3548b00e606565545 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Tue, 7 Nov 2023 14:14:47 +0000 Subject: [PATCH 55/95] review fixes --- cpp/CMakePresets.json | 1 - cpp/cmake_modules/SetupCxxFlags.cmake | 7 ------- cpp/cmake_modules/ThirdpartyToolchain.cmake | 1 + cpp/src/arrow/util/rle_encoding_test.cc | 17 ++++++++--------- 4 files changed, 9 insertions(+), 17 deletions(-) diff --git a/cpp/CMakePresets.json b/cpp/CMakePresets.json index fe2f405e82a..7973ec7f969 100644 --- a/cpp/CMakePresets.json +++ b/cpp/CMakePresets.json @@ -53,7 +53,6 @@ "ARROW_ACERO": "ON", "ARROW_BUILD_SHARED": "OFF", "ARROW_BUILD_STATIC": "ON", - "ARROW_BUILD_TESTS": "ON", "ARROW_CUDA": "OFF", "ARROW_DEPENDENCY_SOURCE": "BUNDLED", "ARROW_DEPENDENCY_USE_SHARED": "OFF", diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index 81888092194..56cc0f2db27 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -750,11 +750,4 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") set(CMAKE_EXE_LINKER_FLAGS "${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sALLOW_MEMORY_GROWTH") endif() - # limit debug info because building with DWARF debug info requires - # absolutely tons of memory - # https://github.com/WebAssembly/binaryen/issues/4261 - if(${UPPERCASE_BUILD_TYPE} STREQUAL "DEBUG") - string(APPEND CMAKE_CXX_FLAGS_DEBUG " -g2") - endif() - endif() diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 819d1fa2e15..9a5b397d8ca 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -1826,6 +1826,7 @@ macro(build_protobuf) endif() set(PROTOBUF_EXTERNAL_PROJECT_ADD_ARGS CMAKE_ARGS ${PROTOBUF_CMAKE_ARGS} SOURCE_SUBDIR "cmake") + externalproject_add(protobuf_ep ${EP_COMMON_OPTIONS} ${PROTOBUF_EXTERNAL_PROJECT_ADD_ARGS} BUILD_BYPRODUCTS "${PROTOBUF_STATIC_LIB}" "${PROTOBUF_COMPILER}" diff --git a/cpp/src/arrow/util/rle_encoding_test.cc b/cpp/src/arrow/util/rle_encoding_test.cc index b273170b261..153d34bcdcb 100644 --- a/cpp/src/arrow/util/rle_encoding_test.cc +++ b/cpp/src/arrow/util/rle_encoding_test.cc @@ -33,8 +33,6 @@ #include "arrow/util/io_util.h" #include "arrow/util/rle_encoding.h" -#include - namespace arrow { namespace util { @@ -217,8 +215,10 @@ void ValidateRle(const std::vector& values, int bit_width, uint8_t* expected_encoding, int expected_len) { const int len = 64 * 1024; #ifdef EMSCRIPTEN - // on Emscripten, this buffer won't fit in the stack - static uint8_t buffer[len]; + // don't make this on the stack as it is + // too big for emscripten + std::vector buffer_vec((size_t)len); + uint8_t *buffer = buffer_vec.data(); #else uint8_t buffer[len]; #endif @@ -263,8 +263,10 @@ void ValidateRle(const std::vector& values, int bit_width, bool CheckRoundTrip(const std::vector& values, int bit_width) { const int len = 64 * 1024; #ifdef EMSCRIPTEN - // on Emscripten, this buffer won't fit in the stack - static uint8_t buffer[len]; + // don't make this on the stack as it is + // too big for emscripten + std::vector buffer_vec((size_t)len); + uint8_t *buffer = buffer_vec.data(); #else uint8_t buffer[len]; #endif @@ -311,7 +313,6 @@ TEST(Rle, SpecificSequences) { std::vector values; // Test 50 0' followed by 50 1's - values.resize(100); for (int i = 0; i < 50; ++i) { values[i] = 0; @@ -343,7 +344,6 @@ TEST(Rle, SpecificSequences) { for (int i = 1; i <= 100 / 8; ++i) { expected_buffer[i] = 0xAA /* 0b10101010 */; } - // Values for the last 4 0 and 1's. The upper 4 bits should be padded to 0. expected_buffer[100 / 8 + 1] = 0x0A /* 0b00001010 */; @@ -380,7 +380,6 @@ TEST(Rle, SpecificSequences) { for (int i = 16; i < 28; ++i) { values[i] = 0x5aaaa555; } - expected_buffer[0] = (16 << 1); expected_buffer[1] = 0xa5; expected_buffer[2] = 0xaa; From b10b41524c40338a8fc42700b0b3e8984f183119 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Tue, 7 Nov 2023 14:18:46 +0000 Subject: [PATCH 56/95] use __EMSCRIPTEN__ because emscripten docs say so --- cpp/src/arrow/array/array_dict_test.cc | 2 +- cpp/src/arrow/compute/kernels/scalar_string_test.cc | 4 ++-- cpp/src/arrow/compute/kernels/scalar_temporal_test.cc | 4 ++-- cpp/src/arrow/io/file.cc | 2 +- cpp/src/arrow/ipc/read_write_test.cc | 4 ++-- cpp/src/arrow/util/io_util.cc | 2 +- cpp/src/arrow/util/io_util_test.cc | 2 +- cpp/src/arrow/util/mutex.cc | 2 +- cpp/src/arrow/util/rle_encoding_test.cc | 4 ++-- cpp/src/arrow/util/value_parsing_test.cc | 2 +- 10 files changed, 14 insertions(+), 14 deletions(-) diff --git a/cpp/src/arrow/array/array_dict_test.cc b/cpp/src/arrow/array/array_dict_test.cc index 18c0d1d05d6..c7e132295fd 100644 --- a/cpp/src/arrow/array/array_dict_test.cc +++ b/cpp/src/arrow/array/array_dict_test.cc @@ -1129,7 +1129,7 @@ TEST(TestDictionary, Validate) { arr = std::make_shared(dict_type, indices, MakeArray(invalid_data)); ASSERT_RAISES(Invalid, arr->ValidateFull()); -#if !defined(__APPLE__) && !defined(EMSCRIPTEN) +#if !defined(__APPLE__) && !defined(__EMSCRIPTEN__) // GH-35712: ASSERT_DEATH would make testing slow on MacOS. ASSERT_DEATH( { diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc index 0496dbe9f81..53d284f8b65 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc @@ -1859,7 +1859,7 @@ TYPED_TEST(TestBaseBinaryKernels, ExtractRegexInvalid) { #endif TYPED_TEST(TestStringKernels, Strptime) { -#ifdef EMSCRIPTEN +#ifdef __EMSCRIPTEN__ GTEST_SKIP() << "Skipping some strptime tests due to emscripten bug " "https://github.com/emscripten-core/emscripten/issues/20466"; #endif @@ -1896,7 +1896,7 @@ TYPED_TEST(TestStringKernels, Strptime) { } TYPED_TEST(TestStringKernels, StrptimeZoneOffset) { -#ifdef EMSCRIPTEN +#ifdef __EMSCRIPTEN__ GTEST_SKIP() << "Emscripten bug https://github.com/emscripten-core/emscripten/issues/20467 "; #endif diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index 13512c9d6b1..36c1d31409c 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -2143,7 +2143,7 @@ TEST_F(ScalarTemporalTest, StrftimeCLocale) { TEST_F(ScalarTemporalTest, StrftimeOtherLocale) { #ifdef _WIN32 GTEST_SKIP() << "There is a known bug in strftime for locales on Windows (ARROW-15922)"; -#elif defined(EMSCRIPTEN) +#elif defined(__EMSCRIPTEN__) GTEST_SKIP() << "Emscripten doesn't build with multiple locales as default"; #endif @@ -2161,7 +2161,7 @@ TEST_F(ScalarTemporalTest, StrftimeOtherLocale) { } TEST_F(ScalarTemporalTest, StrftimeInvalidLocale) { -#ifdef EMSCRIPTEN +#ifdef __EMSCRIPTEN__ GTEST_SKIP() << "Emscripten doesn't build with multiple locales as default"; #endif auto options = StrftimeOptions("%d %B %Y %H:%M:%S", "non-existent"); diff --git a/cpp/src/arrow/io/file.cc b/cpp/src/arrow/io/file.cc index 29803f90e26..239091d6cd9 100644 --- a/cpp/src/arrow/io/file.cc +++ b/cpp/src/arrow/io/file.cc @@ -397,7 +397,7 @@ class MemoryMappedFile::MemoryMap ~Region() { if (data_ != nullptr) { -#ifndef EMSCRIPTEN +#ifndef __EMSCRIPTEN__ int result = munmap(data(), static_cast(size_)); // emscripten erroneously reports failures in munmap // https://github.com/emscripten-core/emscripten/issues/20459 diff --git a/cpp/src/arrow/ipc/read_write_test.cc b/cpp/src/arrow/ipc/read_write_test.cc index 2337e1380cb..3b9d5243c8d 100644 --- a/cpp/src/arrow/ipc/read_write_test.cc +++ b/cpp/src/arrow/ipc/read_write_test.cc @@ -1038,7 +1038,7 @@ class RecursionLimits : public ::testing::Test, public io::MemoryMapFixture { }; TEST_F(RecursionLimits, WriteLimit) { -#ifdef EMSCRIPTEN +#ifdef __EMSCRIPTEN__ GTEST_SKIP() << "This crashes the Emscripten runtime."; #endif int32_t metadata_length = -1; @@ -1073,7 +1073,7 @@ TEST_F(RecursionLimits, ReadLimit) { // Test fails with a structured exception on Windows + Debug #if !defined(_WIN32) || defined(NDEBUG) TEST_F(RecursionLimits, StressLimit) { -#ifdef EMSCRIPTEN +#ifdef __EMSCRIPTEN__ GTEST_SKIP() << "This crashes the Emscripten runtime."; #endif diff --git a/cpp/src/arrow/util/io_util.cc b/cpp/src/arrow/util/io_util.cc index 9819f9516c0..5307cc0d477 100644 --- a/cpp/src/arrow/util/io_util.cc +++ b/cpp/src/arrow/util/io_util.cc @@ -1476,7 +1476,7 @@ Status MemoryMapRemap(void* addr, size_t old_size, size_t new_size, int fildes, } Status MemoryAdviseWillNeed(const std::vector& regions) { -#ifndef EMSCRIPTEN +#ifndef __EMSCRIPTEN__ const auto page_size = static_cast(GetPageSize()); DCHECK_GT(page_size, 0); const size_t page_mask = ~(page_size - 1); diff --git a/cpp/src/arrow/util/io_util_test.cc b/cpp/src/arrow/util/io_util_test.cc index c84c2ab3700..fd4b94ef0ed 100644 --- a/cpp/src/arrow/util/io_util_test.cc +++ b/cpp/src/arrow/util/io_util_test.cc @@ -147,7 +147,7 @@ TEST(MemoryAdviseWillNeed, Basics) { // Should probably fail // (but on Windows or Emscripten, MemoryAdviseWillNeed can be a no-op) -#if !defined(_WIN32) && !defined(EMSCRIPTEN) +#if !defined(_WIN32) && !defined(__EMSCRIPTEN__) ASSERT_RAISES(IOError, MemoryAdviseWillNeed({{nullptr, std::numeric_limits::max()}})); #endif diff --git a/cpp/src/arrow/util/mutex.cc b/cpp/src/arrow/util/mutex.cc index ff1bfd7b5ac..4643dd0e548 100644 --- a/cpp/src/arrow/util/mutex.cc +++ b/cpp/src/arrow/util/mutex.cc @@ -35,7 +35,7 @@ struct Mutex::Impl { Mutex::Guard::Guard(Mutex* locked) : locked_(locked, [](Mutex* locked) { -#if !defined(EMSCRIPTEN) || defined(ARROW_ENABLE_THREADING) +#if !defined(__EMSCRIPTEN__) || defined(ARROW_ENABLE_THREADING) DCHECK(!locked->impl_->mutex_.try_lock()); #endif locked->impl_->mutex_.unlock(); diff --git a/cpp/src/arrow/util/rle_encoding_test.cc b/cpp/src/arrow/util/rle_encoding_test.cc index 153d34bcdcb..ce5bc798f4e 100644 --- a/cpp/src/arrow/util/rle_encoding_test.cc +++ b/cpp/src/arrow/util/rle_encoding_test.cc @@ -214,7 +214,7 @@ TEST(BitUtil, RoundTripIntValues) { void ValidateRle(const std::vector& values, int bit_width, uint8_t* expected_encoding, int expected_len) { const int len = 64 * 1024; -#ifdef EMSCRIPTEN +#ifdef __EMSCRIPTEN__ // don't make this on the stack as it is // too big for emscripten std::vector buffer_vec((size_t)len); @@ -262,7 +262,7 @@ void ValidateRle(const std::vector& values, int bit_width, // the returned values are not all the same bool CheckRoundTrip(const std::vector& values, int bit_width) { const int len = 64 * 1024; -#ifdef EMSCRIPTEN +#ifdef __EMSCRIPTEN__ // don't make this on the stack as it is // too big for emscripten std::vector buffer_vec((size_t)len); diff --git a/cpp/src/arrow/util/value_parsing_test.cc b/cpp/src/arrow/util/value_parsing_test.cc index 53d77a5e700..92d727019aa 100644 --- a/cpp/src/arrow/util/value_parsing_test.cc +++ b/cpp/src/arrow/util/value_parsing_test.cc @@ -794,7 +794,7 @@ TEST(TimestampParser, StrptimeZoneOffset) { if (!kStrptimeSupportsZone) { GTEST_SKIP() << "strptime does not support %z on this platform"; } -#ifdef EMSCRIPTEN +#ifdef __EMSCRIPTEN__ GTEST_SKIP() << "Test temporarily disabled due to emscripten bug " "https://github.com/emscripten-core/emscripten/issues/20467 "; #endif From c9a79cc38a5fdbbd973ea77c31fe76fcb787791f Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Tue, 7 Nov 2023 14:28:26 +0000 Subject: [PATCH 57/95] whitespace --- cpp/src/arrow/util/rle_encoding_test.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/arrow/util/rle_encoding_test.cc b/cpp/src/arrow/util/rle_encoding_test.cc index ce5bc798f4e..823aa708954 100644 --- a/cpp/src/arrow/util/rle_encoding_test.cc +++ b/cpp/src/arrow/util/rle_encoding_test.cc @@ -237,6 +237,7 @@ void ValidateRle(const std::vector& values, int bit_width, if (expected_encoding != NULL && encoded_len == expected_len) { EXPECT_EQ(memcmp(buffer, expected_encoding, encoded_len), 0); } + // Verify read { RleDecoder decoder(buffer, len, bit_width); From 4980d482986fdef049cc7e9e0f58573f89bb5b50 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Tue, 7 Nov 2023 18:29:23 +0000 Subject: [PATCH 58/95] build fixes after review --- cpp/cmake_modules/SetupCxxFlags.cmake | 18 ++++++------ cpp/cmake_modules/ThirdpartyToolchain.cmake | 30 +++++++++++--------- cpp/src/arrow/compute/kernels/CMakeLists.txt | 13 +++++++-- 3 files changed, 34 insertions(+), 27 deletions(-) diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index 56cc0f2db27..54d7e2c4cc3 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -717,24 +717,22 @@ endif() if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") # flags are: - # 1) We're using zlib from Emscripten ports - # 2) We are building library code - # 3) We force *everything* to build as position independent - # 4) And with support for C++ exceptions - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -sUSE_ZLIB=1 -fPIC -fexceptions") + # 1) We are building library code + # 2) We force *everything* to build as position independent + # 3) And with support for C++ exceptions + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -fexceptions") # size_t is 32 bit in emscripten wasm32 - ignore conversion errors # deprecated-literal-operator error is thrown in datetime (vendored lib in arrow) set(CMAKE_CXX_FLAGS - "${CMAKE_CXX_FLAGS} -sUSE_ZLIB=1 -fPIC -fexceptions -Wno-error=shorten-64-to-32 -Wno-error=deprecated-literal-operator" + "${CMAKE_CXX_FLAGS} -fPIC -fexceptions -Wno-error=shorten-64-to-32 -Wno-error=deprecated-literal-operator" ) # flags for creating shared libraries (only used in pyarrow, because # Emscripten builds libarrow as static) # flags are: - # 1) Tell it to use zlib from Emscripten ports - # 2) Tell it to use javascript / webassembly 64 bit number support. - # 3) Tell it to build with support for C++ exceptions - # 4) Skip linker flags error which happens with -soname parameter + # 1) Tell it to use javascript / webassembly 64 bit number support. + # 2) Tell it to build with support for C++ exceptions + # 3) Skip linker flags error which happens with -soname parameter set(ARROW_EMSCRIPTEN_LINKER_FLAGS "-sWASM_BIGINT=1 -fexceptions -Wno-error=linkflags") set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-sSIDE_MODULE=1 ${ARROW_EMSCRIPTEN_LINKER_FLAGS}") diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 9a5b397d8ca..b570316337d 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -1826,7 +1826,7 @@ macro(build_protobuf) endif() set(PROTOBUF_EXTERNAL_PROJECT_ADD_ARGS CMAKE_ARGS ${PROTOBUF_CMAKE_ARGS} SOURCE_SUBDIR "cmake") - + externalproject_add(protobuf_ep ${EP_COMMON_OPTIONS} ${PROTOBUF_EXTERNAL_PROJECT_ADD_ARGS} BUILD_BYPRODUCTS "${PROTOBUF_STATIC_LIB}" "${PROTOBUF_COMPILER}" @@ -2521,16 +2521,18 @@ macro(build_zlib) if(NOT EXISTS ${EMSCRIPTEN_SYSROOT}/lib/wasm32-emscripten/pic/libz.a) execute_process(COMMAND embuilder --pic --force build zlib) endif() - set(ZLIB_STATIC_LIB ${EMSCRIPTEN_SYSROOT}/lib/wasm32-emscripten/pic/libz.a) - set(ZLIB_LIBRARIES ${ZLIB_LIBRARY}) - # set(ZLIB_INCLUDE_DIRS "${ZLIB_PREFIX}/include") - - add_library(ZLIB::ZLIB STATIC IMPORTED) - set(ZLIB_LIBRARIES ${ZLIB_STATIC_LIB}) - set(ZLIB_INCLUDE_DIRS "${ZLIB_PREFIX}/include") - set_target_properties(ZLIB::ZLIB PROPERTIES IMPORTED_LOCATION ${ZLIB_LIBRARIES}) - # target_include_directories(ZLIB::ZLIB BEFORE INTERFACE "${ZLIB_INCLUDE_DIRS}") - + add_library(ZLIB::ZLIB INTERFACE IMPORTED) + # We need -sMAIN_MODULE=1 (for executable) or -sSIDE_MODULE=1 (for + # library) in target_compile_options() too. If we don't have + # -sMAIN_MODULE=1 nor -sSIDE_MODULE=1 here, Emscripten tries + # finding no-PIC libz.a. We can use -sRELOCATABLE=1 for this but + # it seems that RELOCATABLE isn't public setting. Emscripten + # document doesn't mention RELOCATABLE. (MAIN_MODULE/SIDE_MODULE + # are mentioned.) + target_compile_options(ZLIB::ZLIB INTERFACE -sUSE_ZLIB=1 + "$,EXECUTABLE>,-sMAIN_MODULE=1,-sSIDE_MODULE=1>") + target_link_options(ZLIB::ZLIB INTERFACE -sUSE_ZLIB=1 + "$,EXECUTABLE>,-sMAIN_MODULE=1,-sSIDE_MODULE=1>") else() set(ZLIB_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zlib_ep/src/zlib_ep-install") if(MSVC) @@ -2562,10 +2564,10 @@ macro(build_zlib) add_dependencies(toolchain zlib_ep) add_dependencies(ZLIB::ZLIB zlib_ep) - endif() + list(APPEND ARROW_BUNDLED_STATIC_LIBS ZLIB::ZLIB) + endif() - list(APPEND ARROW_BUNDLED_STATIC_LIBS ZLIB::ZLIB) - set(ZLIB_VENDORED TRUE) + set(ZLIB_VENDORED TRUE) endmacro() if(ARROW_WITH_ZLIB) diff --git a/cpp/src/arrow/compute/kernels/CMakeLists.txt b/cpp/src/arrow/compute/kernels/CMakeLists.txt index 4350cd57ff0..2c10cd0b423 100644 --- a/cpp/src/arrow/compute/kernels/CMakeLists.txt +++ b/cpp/src/arrow/compute/kernels/CMakeLists.txt @@ -21,9 +21,16 @@ # Define arrow_compute_testing object library for common test files if(ARROW_TESTING) add_library(arrow_compute_kernels_testing OBJECT test_util.cc) - # Even though this is still just an object library we still need to "link" our - # dependencies so that include paths are configured correctly - target_link_libraries(arrow_compute_kernels_testing PRIVATE ${ARROW_GTEST_GMOCK}) + if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + # because gtest is statically linked on emscripten + # so we can't link to it twice or else duplicate symbols happen + get_target_property(MOCK_INCLUDES ${ARROW_GTEST_GMOCK} INCLUDE_DIRECTORIES) + target_include_directories(arrow_compute_kernels_testing PRIVATE ${MOCK_INCLUDES}) + else() + # Even though this is still just an object library we still need to "link" our + # dependencies so that include paths are configured correctly + target_link_libraries(arrow_compute_kernels_testing PRIVATE ${ARROW_GTEST_GMOCK}) + endif() endif() add_arrow_test(scalar_cast_test From d6026f9a8ce7739e10c07824323d77ca6102053d Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Tue, 7 Nov 2023 19:47:54 +0000 Subject: [PATCH 59/95] fix zlib on wasm --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 15 ++++----------- cpp/src/arrow/compute/kernels/CMakeLists.txt | 13 +++---------- 2 files changed, 7 insertions(+), 21 deletions(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index b570316337d..bf51419a5b5 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -2522,17 +2522,10 @@ macro(build_zlib) execute_process(COMMAND embuilder --pic --force build zlib) endif() add_library(ZLIB::ZLIB INTERFACE IMPORTED) - # We need -sMAIN_MODULE=1 (for executable) or -sSIDE_MODULE=1 (for - # library) in target_compile_options() too. If we don't have - # -sMAIN_MODULE=1 nor -sSIDE_MODULE=1 here, Emscripten tries - # finding no-PIC libz.a. We can use -sRELOCATABLE=1 for this but - # it seems that RELOCATABLE isn't public setting. Emscripten - # document doesn't mention RELOCATABLE. (MAIN_MODULE/SIDE_MODULE - # are mentioned.) - target_compile_options(ZLIB::ZLIB INTERFACE -sUSE_ZLIB=1 - "$,EXECUTABLE>,-sMAIN_MODULE=1,-sSIDE_MODULE=1>") - target_link_options(ZLIB::ZLIB INTERFACE -sUSE_ZLIB=1 - "$,EXECUTABLE>,-sMAIN_MODULE=1,-sSIDE_MODULE=1>") + # We need -fPIC in target_compile_options() too + # to stop it using non-PIC libz.a in linking. + target_compile_options(ZLIB::ZLIB INTERFACE -sUSE_ZLIB=1 -fPIC) + target_link_options(ZLIB::ZLIB INTERFACE -sUSE_ZLIB=1 -fPIC) else() set(ZLIB_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zlib_ep/src/zlib_ep-install") if(MSVC) diff --git a/cpp/src/arrow/compute/kernels/CMakeLists.txt b/cpp/src/arrow/compute/kernels/CMakeLists.txt index 2c10cd0b423..4350cd57ff0 100644 --- a/cpp/src/arrow/compute/kernels/CMakeLists.txt +++ b/cpp/src/arrow/compute/kernels/CMakeLists.txt @@ -21,16 +21,9 @@ # Define arrow_compute_testing object library for common test files if(ARROW_TESTING) add_library(arrow_compute_kernels_testing OBJECT test_util.cc) - if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") - # because gtest is statically linked on emscripten - # so we can't link to it twice or else duplicate symbols happen - get_target_property(MOCK_INCLUDES ${ARROW_GTEST_GMOCK} INCLUDE_DIRECTORIES) - target_include_directories(arrow_compute_kernels_testing PRIVATE ${MOCK_INCLUDES}) - else() - # Even though this is still just an object library we still need to "link" our - # dependencies so that include paths are configured correctly - target_link_libraries(arrow_compute_kernels_testing PRIVATE ${ARROW_GTEST_GMOCK}) - endif() + # Even though this is still just an object library we still need to "link" our + # dependencies so that include paths are configured correctly + target_link_libraries(arrow_compute_kernels_testing PRIVATE ${ARROW_GTEST_GMOCK}) endif() add_arrow_test(scalar_cast_test From fed5439d30b3bf0bb4c9f889c10b3b2232853c66 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Wed, 8 Nov 2023 11:13:49 +0000 Subject: [PATCH 60/95] fix to csv test --- python/pyarrow/tests/test_csv.py | 46 ++++++++++++++++---------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py index afc5380b755..63a36f94178 100644 --- a/python/pyarrow/tests/test_csv.py +++ b/python/pyarrow/tests/test_csv.py @@ -1405,36 +1405,33 @@ def signal_from_thread(): print("workload size:", workload_size) large_csv = b"a,b,c\n" + b"1,2,3\n" * workload_size exc_info = None - + # the signal above may either be posted as a KeyboardInterrupt + # or handled by arrow and passed on as type pyarrow.ArrowCancelled try: - # We use a signal fd to reliably ensure that the signal - # has been delivered to Python, regardless of how exactly - # it was caught. - with util.signal_wakeup_fd() as sigfd: - try: - t = threading.Thread(target=signal_from_thread) - t.start() - t1 = time.time() - try: - self.read_bytes(large_csv) - except KeyboardInterrupt as e: - exc_info = e - last_duration = time.time() - t1 - finally: - # Wait for signal to arrive if it didn't already, - # to avoid getting a KeyboardInterrupt after the - # `except` block below. - select.select([sigfd], [], [sigfd], 10.0) - + try: + t = threading.Thread(target=signal_from_thread) + t.start() + t1 = time.time() + self.read_bytes(large_csv) + except BaseException as e: + exc_info = e + finally: + last_duration = time.time() - t1 + # make sure signal is sent before we exit the + # keyboardinterrupt handler below + t.join() except KeyboardInterrupt: # KeyboardInterrupt didn't interrupt `read_bytes` above. + # because it didn't take long enough pass if exc_info is not None: - # We managed to get `self.read_bytes` interrupted, see if it + # If we managed to get `self.read_bytes` interrupted, see if it # was actually interrupted inside Arrow C++ or in the Python # scaffolding. - if exc_info.__context__ is not None: + if not isinstance(exc_info,KeyboardInterrupt) and not isinstance(exc_info, pa.ArrowCancelled): + pytest.fail(f"Unexpected exception {exc_info} ({type(exc_info)}) thrown in read csv") + elif exc_info.__context__ is not None or type(exc_info)==pa.ArrowCancelled: # Interrupted inside Arrow C++, we're satisfied now break @@ -1446,7 +1443,10 @@ def signal_from_thread(): # Interruption should have arrived timely assert last_duration <= 1.0 - e = exc_info.__context__ + if isinstance(exc_info,pa.ArrowCancelled): + e= exc_info + else: + e = exc_info.__context__ assert isinstance(e, pa.ArrowCancelled) assert e.signum == signum From 48c795b68150bd5f586f6aba93c561e3d5008f71 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Wed, 8 Nov 2023 11:18:39 +0000 Subject: [PATCH 61/95] typo #if! instead of ifndef --- cpp/src/arrow/util/io_util.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/util/io_util.cc b/cpp/src/arrow/util/io_util.cc index 5307cc0d477..41342d5c710 100644 --- a/cpp/src/arrow/util/io_util.cc +++ b/cpp/src/arrow/util/io_util.cc @@ -2062,7 +2062,7 @@ Status SendSignal(int signum) { } Status SendSignalToThread(int signum, uint64_t thread_id) { -#if !ARROW_ENABLE_THREADING +#ifndef ARROW_ENABLE_THREADING return Status::NotImplemented("Can't send signal with no threads"); #elif _WIN32 return Status::NotImplemented("Cannot send signal to specific thread on Windows"); From 7b0171747d7fe51b4ec2bffadc5d20913b1c1633 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Wed, 8 Nov 2023 11:34:09 +0000 Subject: [PATCH 62/95] revert test changes --- python/pyarrow/tests/test_csv.py | 46 ++++++++++++++++---------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py index 63a36f94178..afc5380b755 100644 --- a/python/pyarrow/tests/test_csv.py +++ b/python/pyarrow/tests/test_csv.py @@ -1405,33 +1405,36 @@ def signal_from_thread(): print("workload size:", workload_size) large_csv = b"a,b,c\n" + b"1,2,3\n" * workload_size exc_info = None - # the signal above may either be posted as a KeyboardInterrupt - # or handled by arrow and passed on as type pyarrow.ArrowCancelled + try: - try: - t = threading.Thread(target=signal_from_thread) - t.start() - t1 = time.time() - self.read_bytes(large_csv) - except BaseException as e: - exc_info = e - finally: - last_duration = time.time() - t1 - # make sure signal is sent before we exit the - # keyboardinterrupt handler below - t.join() + # We use a signal fd to reliably ensure that the signal + # has been delivered to Python, regardless of how exactly + # it was caught. + with util.signal_wakeup_fd() as sigfd: + try: + t = threading.Thread(target=signal_from_thread) + t.start() + t1 = time.time() + try: + self.read_bytes(large_csv) + except KeyboardInterrupt as e: + exc_info = e + last_duration = time.time() - t1 + finally: + # Wait for signal to arrive if it didn't already, + # to avoid getting a KeyboardInterrupt after the + # `except` block below. + select.select([sigfd], [], [sigfd], 10.0) + except KeyboardInterrupt: # KeyboardInterrupt didn't interrupt `read_bytes` above. - # because it didn't take long enough pass if exc_info is not None: - # If we managed to get `self.read_bytes` interrupted, see if it + # We managed to get `self.read_bytes` interrupted, see if it # was actually interrupted inside Arrow C++ or in the Python # scaffolding. - if not isinstance(exc_info,KeyboardInterrupt) and not isinstance(exc_info, pa.ArrowCancelled): - pytest.fail(f"Unexpected exception {exc_info} ({type(exc_info)}) thrown in read csv") - elif exc_info.__context__ is not None or type(exc_info)==pa.ArrowCancelled: + if exc_info.__context__ is not None: # Interrupted inside Arrow C++, we're satisfied now break @@ -1443,10 +1446,7 @@ def signal_from_thread(): # Interruption should have arrived timely assert last_duration <= 1.0 - if isinstance(exc_info,pa.ArrowCancelled): - e= exc_info - else: - e = exc_info.__context__ + e = exc_info.__context__ assert isinstance(e, pa.ArrowCancelled) assert e.signum == signum From f780784f65fa43ef81d9fddbae8b4c451e0098f6 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Wed, 8 Nov 2023 11:46:16 +0000 Subject: [PATCH 63/95] include config.h for threading define --- cpp/src/arrow/util/io_util.cc | 1 + cpp/src/arrow/util/mutex.cc | 1 + 2 files changed, 2 insertions(+) diff --git a/cpp/src/arrow/util/io_util.cc b/cpp/src/arrow/util/io_util.cc index 41342d5c710..b7d38a4c34e 100644 --- a/cpp/src/arrow/util/io_util.cc +++ b/cpp/src/arrow/util/io_util.cc @@ -95,6 +95,7 @@ #include "arrow/result.h" #include "arrow/util/atfork_internal.h" #include "arrow/util/checked_cast.h" +#include "arrow/util/config.h" #include "arrow/util/io_util.h" #include "arrow/util/logging.h" #include "arrow/util/mutex.h" diff --git a/cpp/src/arrow/util/mutex.cc b/cpp/src/arrow/util/mutex.cc index 4643dd0e548..eb12acfc6f1 100644 --- a/cpp/src/arrow/util/mutex.cc +++ b/cpp/src/arrow/util/mutex.cc @@ -24,6 +24,7 @@ #include #endif +#include "arrow/util/config.h" #include "arrow/util/logging.h" namespace arrow { From d23f1a195caf8d94289a39952b1e034e4572fd5a Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Wed, 8 Nov 2023 13:43:24 +0000 Subject: [PATCH 64/95] lint --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 4 ++-- cpp/src/arrow/util/rle_encoding_test.cc | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index bf51419a5b5..584284db2ee 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -2558,9 +2558,9 @@ macro(build_zlib) add_dependencies(toolchain zlib_ep) add_dependencies(ZLIB::ZLIB zlib_ep) list(APPEND ARROW_BUNDLED_STATIC_LIBS ZLIB::ZLIB) - endif() + endif() - set(ZLIB_VENDORED TRUE) + set(ZLIB_VENDORED TRUE) endmacro() if(ARROW_WITH_ZLIB) diff --git a/cpp/src/arrow/util/rle_encoding_test.cc b/cpp/src/arrow/util/rle_encoding_test.cc index 823aa708954..627dbd8a63d 100644 --- a/cpp/src/arrow/util/rle_encoding_test.cc +++ b/cpp/src/arrow/util/rle_encoding_test.cc @@ -215,10 +215,10 @@ void ValidateRle(const std::vector& values, int bit_width, uint8_t* expected_encoding, int expected_len) { const int len = 64 * 1024; #ifdef __EMSCRIPTEN__ - // don't make this on the stack as it is + // don't make this on the stack as it is // too big for emscripten std::vector buffer_vec((size_t)len); - uint8_t *buffer = buffer_vec.data(); + uint8_t* buffer = buffer_vec.data(); #else uint8_t buffer[len]; #endif @@ -237,7 +237,7 @@ void ValidateRle(const std::vector& values, int bit_width, if (expected_encoding != NULL && encoded_len == expected_len) { EXPECT_EQ(memcmp(buffer, expected_encoding, encoded_len), 0); } - + // Verify read { RleDecoder decoder(buffer, len, bit_width); @@ -264,10 +264,10 @@ void ValidateRle(const std::vector& values, int bit_width, bool CheckRoundTrip(const std::vector& values, int bit_width) { const int len = 64 * 1024; #ifdef __EMSCRIPTEN__ - // don't make this on the stack as it is + // don't make this on the stack as it is // too big for emscripten std::vector buffer_vec((size_t)len); - uint8_t *buffer = buffer_vec.data(); + uint8_t* buffer = buffer_vec.data(); #else uint8_t buffer[len]; #endif From f79e49fff6fe7ecaf314f0d6500af9cef7807fa5 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Wed, 8 Nov 2023 20:29:39 +0000 Subject: [PATCH 65/95] dead code elimination in test executables --- cpp/cmake_modules/SetupCxxFlags.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index 54d7e2c4cc3..f0abd61771d 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -742,7 +742,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") if(ARROW_TESTING) # flags for building test executables for use in node set(CMAKE_EXE_LINKER_FLAGS - "${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sERROR_ON_WASM_CHANGES_AFTER_LINK=1 -sALLOW_MEMORY_GROWTH -lnodefs.js -lnoderawfs.js --pre-js ${BUILD_SUPPORT_DIR}/emscripten-test-init.js" + "${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sERROR_ON_WASM_CHANGES_AFTER_LINK=1 -sALLOW_MEMORY_GROWTH -sMAIN_MODULE=2 -lnodefs.js -lnoderawfs.js --pre-js ${BUILD_SUPPORT_DIR}/emscripten-test-init.js" ) else() set(CMAKE_EXE_LINKER_FLAGS "${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sALLOW_MEMORY_GROWTH") From 232d7f53740e042cbdf61b8656226e0bb04c2943 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Wed, 8 Nov 2023 20:53:03 +0000 Subject: [PATCH 66/95] revert main_module change (because it didn't improve code size) --- cpp/cmake_modules/SetupCxxFlags.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index f0abd61771d..54d7e2c4cc3 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -742,7 +742,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") if(ARROW_TESTING) # flags for building test executables for use in node set(CMAKE_EXE_LINKER_FLAGS - "${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sERROR_ON_WASM_CHANGES_AFTER_LINK=1 -sALLOW_MEMORY_GROWTH -sMAIN_MODULE=2 -lnodefs.js -lnoderawfs.js --pre-js ${BUILD_SUPPORT_DIR}/emscripten-test-init.js" + "${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sERROR_ON_WASM_CHANGES_AFTER_LINK=1 -sALLOW_MEMORY_GROWTH -lnodefs.js -lnoderawfs.js --pre-js ${BUILD_SUPPORT_DIR}/emscripten-test-init.js" ) else() set(CMAKE_EXE_LINKER_FLAGS "${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sALLOW_MEMORY_GROWTH") From 5b65a366a2cc852e8bfbe5b12127ec873b42fecf Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Thu, 9 Nov 2023 14:22:32 +0000 Subject: [PATCH 67/95] default to -O1 for emscripten --- cpp/cmake_modules/SetupCxxFlags.cmake | 38 +++++++++++++++++++-------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index 54d7e2c4cc3..f2be63ec819 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -668,23 +668,39 @@ if(NOT MSVC) set(C_DEBUG_FLAGS "") set(CXX_DEBUG_FLAGS "") if(NOT MSVC) - if(NOT CMAKE_C_FLAGS_DEBUG MATCHES "-O") - string(APPEND C_DEBUG_FLAGS " -O0") - endif() - if(NOT CMAKE_CXX_FLAGS_DEBUG MATCHES "-O") - string(APPEND CXX_DEBUG_FLAGS " -O0") - endif() if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + # with -g it uses DWARF debug info, which is really slow to build + # on emscripten (and uses tons of memory) + string(REPLACE "-g" " " CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG} ) + string(REPLACE "-g" " " CMAKE_C_FLAGS_DEBUG ${CMAKE_C_FLAGS_DEBUG} ) string(APPEND C_DEBUG_FLAGS " -g2") string(APPEND CXX_DEBUG_FLAGS " -g2") string(APPEND C_RELWITHDEBINFO_FLAGS " -g2") string(APPEND CXX_RELWITHDEBINFO_FLAGS " -g2") - elseif(ARROW_GGDB_DEBUG) - string(APPEND C_DEBUG_FLAGS " -ggdb") - string(APPEND CXX_DEBUG_FLAGS " -ggdb") - string(APPEND C_RELWITHDEBINFO_FLAGS " -ggdb") - string(APPEND CXX_RELWITHDEBINFO_FLAGS " -ggdb") + # without -O1, emscripten executables are *MASSIVE*. Don't use -O0 + if(NOT CMAKE_C_FLAGS_DEBUG MATCHES "-O") + string(APPEND C_DEBUG_FLAGS " -O1") + endif() + if(NOT CMAKE_CXX_FLAGS_DEBUG MATCHES "-O") + string(APPEND CXX_DEBUG_FLAGS " -O1") + endif() + else() + if(NOT CMAKE_C_FLAGS_DEBUG MATCHES "-O") + string(APPEND C_DEBUG_FLAGS " -O0") + endif() + if(NOT CMAKE_CXX_FLAGS_DEBUG MATCHES "-O") + string(APPEND CXX_DEBUG_FLAGS " -O0") + endif() + + if(ARROW_GGDB_DEBUG) + string(APPEND C_DEBUG_FLAGS " -ggdb") + string(APPEND CXX_DEBUG_FLAGS " -ggdb") + string(APPEND C_RELWITHDEBINFO_FLAGS " -ggdb") + string(APPEND CXX_RELWITHDEBINFO_FLAGS " -ggdb") + endif() + endif() + endif() string(APPEND CMAKE_C_FLAGS_RELEASE "${C_RELEASE_FLAGS} ${ARROW_C_FLAGS_RELEASE}") From 72481cf8a6b364984d81ef80096d10236ae0fd9c Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Thu, 9 Nov 2023 14:39:50 +0000 Subject: [PATCH 68/95] lint --- cpp/cmake_modules/SetupCxxFlags.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index f2be63ec819..439b246f307 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -671,8 +671,8 @@ if(NOT MSVC) if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") # with -g it uses DWARF debug info, which is really slow to build # on emscripten (and uses tons of memory) - string(REPLACE "-g" " " CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG} ) - string(REPLACE "-g" " " CMAKE_C_FLAGS_DEBUG ${CMAKE_C_FLAGS_DEBUG} ) + string(REPLACE "-g" " " CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG}) + string(REPLACE "-g" " " CMAKE_C_FLAGS_DEBUG ${CMAKE_C_FLAGS_DEBUG}) string(APPEND C_DEBUG_FLAGS " -g2") string(APPEND CXX_DEBUG_FLAGS " -g2") string(APPEND C_RELWITHDEBINFO_FLAGS " -g2") @@ -700,7 +700,7 @@ if(NOT MSVC) endif() endif() - + endif() string(APPEND CMAKE_C_FLAGS_RELEASE "${C_RELEASE_FLAGS} ${ARROW_C_FLAGS_RELEASE}") From 57e1b65bf3b595a34f352583a956eade0634a510 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Wed, 22 Nov 2023 14:37:43 +0000 Subject: [PATCH 69/95] Apply suggestions from code review Co-authored-by: Sutou Kouhei --- cpp/cmake_modules/SetupCxxFlags.cmake | 2 +- cpp/cmake_modules/ThirdpartyToolchain.cmake | 1 - cpp/src/arrow/compute/kernels/scalar_string_test.cc | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index 439b246f307..ca6ce73a85c 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -746,7 +746,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") # flags for creating shared libraries (only used in pyarrow, because # Emscripten builds libarrow as static) # flags are: - # 1) Tell it to use javascript / webassembly 64 bit number support. + # 1) Tell it to use JavaScript / WebAssembly 64 bit number support. # 2) Tell it to build with support for C++ exceptions # 3) Skip linker flags error which happens with -soname parameter set(ARROW_EMSCRIPTEN_LINKER_FLAGS "-sWASM_BIGINT=1 -fexceptions -Wno-error=linkflags") diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 616e1d960b9..936eecf9804 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -1449,7 +1449,6 @@ macro(build_brotli) "${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlidec-static${CMAKE_STATIC_LIBRARY_SUFFIX}" "${BROTLI_BUILD_DIR}/${CMAKE_STATIC_LIBRARY_PREFIX}brotlicommon-static${CMAKE_STATIC_LIBRARY_SUFFIX}" ) - endif() externalproject_add(brotli_ep diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc index 53d284f8b65..2d0c35cb9cd 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc @@ -1898,7 +1898,7 @@ TYPED_TEST(TestStringKernels, Strptime) { TYPED_TEST(TestStringKernels, StrptimeZoneOffset) { #ifdef __EMSCRIPTEN__ GTEST_SKIP() - << "Emscripten bug https://github.com/emscripten-core/emscripten/issues/20467 "; + << "Emscripten bug https://github.com/emscripten-core/emscripten/issues/20467"; #endif if (!arrow::internal::kStrptimeSupportsZone) { From 9506ca54eb5058fc0f0d25bbe07dbad7d5923f35 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Wed, 22 Nov 2023 14:44:38 +0000 Subject: [PATCH 70/95] use -sRELOCATABLE instead of -fPIC Co-authored-by: Sutou Kouhei --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 936eecf9804..96559fff9fe 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -2524,10 +2524,10 @@ macro(build_zlib) execute_process(COMMAND embuilder --pic --force build zlib) endif() add_library(ZLIB::ZLIB INTERFACE IMPORTED) - # We need -fPIC in target_compile_options() too + # We need -sRELOCATABLE=1 in target_compile_options() too # to stop it using non-PIC libz.a in linking. - target_compile_options(ZLIB::ZLIB INTERFACE -sUSE_ZLIB=1 -fPIC) - target_link_options(ZLIB::ZLIB INTERFACE -sUSE_ZLIB=1 -fPIC) + target_compile_options(ZLIB::ZLIB INTERFACE -sUSE_ZLIB=1 -sRELOCATABLE=1) + target_link_options(ZLIB::ZLIB INTERFACE -sUSE_ZLIB=1 -sRELOCATABLE=1) else() set(ZLIB_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zlib_ep/src/zlib_ep-install") if(MSVC) From 617ba512a649df3517971321d91b2ea631358bb0 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Wed, 22 Nov 2023 14:50:16 +0000 Subject: [PATCH 71/95] remove emscriptenoverrides --- cpp/CMakePresets.json | 6 +---- cpp/cmake_modules/EmscriptenOverrides.cmake | 25 --------------------- 2 files changed, 1 insertion(+), 30 deletions(-) delete mode 100644 cpp/cmake_modules/EmscriptenOverrides.cmake diff --git a/cpp/CMakePresets.json b/cpp/CMakePresets.json index 7973ec7f969..05154e23115 100644 --- a/cpp/CMakePresets.json +++ b/cpp/CMakePresets.json @@ -69,11 +69,7 @@ "ARROW_WITH_BROTLI": "ON", "ARROW_WITH_OPENTELEMETRY": "OFF", "ARROW_WITH_SNAPPY": "ON", - "CMAKE_C_BYTE_ORDER": "LITTLE_ENDIAN", - "CMAKE_PROJECT_INCLUDE": { - "type": "PATH", - "value": "${sourceDir}/cmake_modules/EmscriptenOverrides.cmake" - } + "CMAKE_C_BYTE_ORDER": "LITTLE_ENDIAN" } }, { diff --git a/cpp/cmake_modules/EmscriptenOverrides.cmake b/cpp/cmake_modules/EmscriptenOverrides.cmake deleted file mode 100644 index 1b82978c182..00000000000 --- a/cpp/cmake_modules/EmscriptenOverrides.cmake +++ /dev/null @@ -1,25 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# Force some variables for Emscripten -# to disable things that won't work there - -# # override default in Emscripten which is to not use shared libs -set_property(GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS TRUE) - -# stripping doesn't work on emscripten -set(CMAKE_STRIP FALSE) From dd2c66223bab37cc7e36a5fa40f51fa703535565 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Wed, 22 Nov 2023 14:54:33 +0000 Subject: [PATCH 72/95] review changes --- ci/scripts/cpp_build.sh | 1 + cpp/cmake_modules/SetupCxxFlags.cmake | 7 +++---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index 982437d5549..ce54756ccd0 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -96,6 +96,7 @@ if [ "${ARROW_EMSCRIPTEN:-OFF}" = "ON" ]; then echo "arrow emscripten build is not supported on Ubuntu 20.04, run with UBUNTU=22.04" exit -1 fi + n_jobs=2 # emscripten build fails on docker otherwise source ~/emsdk/emsdk_env.sh emcmake cmake \ --preset=ninja-${ARROW_BUILD_TYPE:-debug}-emscripten \ diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index ca6ce73a85c..7912a1c1d2f 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -733,14 +733,13 @@ endif() if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") # flags are: - # 1) We are building library code - # 2) We force *everything* to build as position independent - # 3) And with support for C++ exceptions + # 1) We force *everything* to build as position independent + # 2) And with support for C++ exceptions set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -fexceptions") # size_t is 32 bit in emscripten wasm32 - ignore conversion errors # deprecated-literal-operator error is thrown in datetime (vendored lib in arrow) set(CMAKE_CXX_FLAGS - "${CMAKE_CXX_FLAGS} -fPIC -fexceptions -Wno-error=shorten-64-to-32 -Wno-error=deprecated-literal-operator" + "${CMAKE_CXX_FLAGS} -fPIC -fexceptions -Wno-error=deprecated-literal-operator" ) # flags for creating shared libraries (only used in pyarrow, because From b1050f5f7cc685c6991acf3ed6e7955f73872c50 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Wed, 22 Nov 2023 15:03:53 +0000 Subject: [PATCH 73/95] Update cpp/src/arrow/util/io_util.cc Co-authored-by: Joris Van den Bossche --- cpp/src/arrow/util/io_util.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/util/io_util.cc b/cpp/src/arrow/util/io_util.cc index b7d38a4c34e..8557625d212 100644 --- a/cpp/src/arrow/util/io_util.cc +++ b/cpp/src/arrow/util/io_util.cc @@ -2065,7 +2065,7 @@ Status SendSignal(int signum) { Status SendSignalToThread(int signum, uint64_t thread_id) { #ifndef ARROW_ENABLE_THREADING return Status::NotImplemented("Can't send signal with no threads"); -#elif _WIN32 +#elif defined(_WIN32) return Status::NotImplemented("Cannot send signal to specific thread on Windows"); #else // Have to use a C-style cast because pthread_t can be a pointer *or* integer type From 145426fd6ba9e4c1570086dc6dd7fc7d91a84f79 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Wed, 22 Nov 2023 16:11:18 +0000 Subject: [PATCH 74/95] restrict number of concurrent processes in emscripten --- ci/scripts/cpp_build.sh | 2 +- ci/scripts/cpp_test.sh | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index ce54756ccd0..f7577fede68 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -96,7 +96,7 @@ if [ "${ARROW_EMSCRIPTEN:-OFF}" = "ON" ]; then echo "arrow emscripten build is not supported on Ubuntu 20.04, run with UBUNTU=22.04" exit -1 fi - n_jobs=2 # emscripten build fails on docker otherwise + n_jobs=2 # Emscripten build fails on docker unless this is set really low source ~/emsdk/emsdk_env.sh emcmake cmake \ --preset=ninja-${ARROW_BUILD_TYPE:-debug}-emscripten \ diff --git a/ci/scripts/cpp_test.sh b/ci/scripts/cpp_test.sh index 0c6e1c6ef70..0082dbdd22d 100755 --- a/ci/scripts/cpp_test.sh +++ b/ci/scripts/cpp_test.sh @@ -75,6 +75,10 @@ case "$(uname)" in ;; esac +if [ "${ARROW_EMSCRIPTEN:-OFF}" = "ON" ]; then + n_jobs=1 # avoid spurious fails on emscripten due to loading too many big executables +fi + pushd ${build_dir} if [ -z "${PYTHON}" ] && ! which python > /dev/null 2>&1; then From 8cc5ae3686405c457163f012f124ead312483d4c Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Fri, 24 Nov 2023 16:25:33 +0000 Subject: [PATCH 75/95] lint fixes --- cpp/cmake_modules/SetupCxxFlags.cmake | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index 7912a1c1d2f..fe846c62d3a 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -739,8 +739,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") # size_t is 32 bit in emscripten wasm32 - ignore conversion errors # deprecated-literal-operator error is thrown in datetime (vendored lib in arrow) set(CMAKE_CXX_FLAGS - "${CMAKE_CXX_FLAGS} -fPIC -fexceptions -Wno-error=deprecated-literal-operator" - ) + "${CMAKE_CXX_FLAGS} -fPIC -fexceptions -Wno-error=deprecated-literal-operator") # flags for creating shared libraries (only used in pyarrow, because # Emscripten builds libarrow as static) From 2429261e721cc22f76f54ae51cacc63d5e50996f Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Thu, 21 Mar 2024 10:04:20 +0000 Subject: [PATCH 76/95] Apply suggestions from code review Co-authored-by: Sutou Kouhei --- cpp/src/arrow/util/mutex.cc | 2 +- cpp/src/arrow/util/rle_encoding_test.cc | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/util/mutex.cc b/cpp/src/arrow/util/mutex.cc index eb12acfc6f1..bbf2a9a93e6 100644 --- a/cpp/src/arrow/util/mutex.cc +++ b/cpp/src/arrow/util/mutex.cc @@ -36,7 +36,7 @@ struct Mutex::Impl { Mutex::Guard::Guard(Mutex* locked) : locked_(locked, [](Mutex* locked) { -#if !defined(__EMSCRIPTEN__) || defined(ARROW_ENABLE_THREADING) +#ifdef ARROW_ENABLE_THREADING DCHECK(!locked->impl_->mutex_.try_lock()); #endif locked->impl_->mutex_.unlock(); diff --git a/cpp/src/arrow/util/rle_encoding_test.cc b/cpp/src/arrow/util/rle_encoding_test.cc index 627dbd8a63d..26984e5f773 100644 --- a/cpp/src/arrow/util/rle_encoding_test.cc +++ b/cpp/src/arrow/util/rle_encoding_test.cc @@ -217,7 +217,7 @@ void ValidateRle(const std::vector& values, int bit_width, #ifdef __EMSCRIPTEN__ // don't make this on the stack as it is // too big for emscripten - std::vector buffer_vec((size_t)len); + std::vector buffer_vec(static_cast(len)); uint8_t* buffer = buffer_vec.data(); #else uint8_t buffer[len]; @@ -266,7 +266,7 @@ bool CheckRoundTrip(const std::vector& values, int bit_width) { #ifdef __EMSCRIPTEN__ // don't make this on the stack as it is // too big for emscripten - std::vector buffer_vec((size_t)len); + std::vector buffer_vec(static_cast(len)); uint8_t* buffer = buffer_vec.data(); #else uint8_t buffer[len]; From 370b1ff9a45470f95995dc58a81081b11b617cc8 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Thu, 21 Mar 2024 10:05:20 +0000 Subject: [PATCH 77/95] Apply suggestions from code review Co-authored-by: Sutou Kouhei --- cpp/cmake_modules/SetupCxxFlags.cmake | 3 +-- cpp/cmake_modules/ThirdpartyToolchain.cmake | 8 +++----- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index fe846c62d3a..49f6d7f8b6c 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -759,7 +759,6 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") "${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sERROR_ON_WASM_CHANGES_AFTER_LINK=1 -sALLOW_MEMORY_GROWTH -lnodefs.js -lnoderawfs.js --pre-js ${BUILD_SUPPORT_DIR}/emscripten-test-init.js" ) else() - set(CMAKE_EXE_LINKER_FLAGS "${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sALLOW_MEMORY_GROWTH") + set(CMAKE_EXE_LINKER_FLAGS "${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sALLOW_MEMORY_GROWTH") endif() - endif() diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 96559fff9fe..c6fa0ba5223 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -1367,10 +1367,10 @@ macro(build_snappy) endif() if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") - # ignore linker flag errors, as snappy sets - # -Werror -Wall, and emscripten doesn't support -soname + # ignore linker flag errors, as Snappy sets + # -Werror -Wall, and Emscripten doesn't support -soname list(APPEND SNAPPY_CMAKE_ARGS - "-DCMAKE_SHARED_LINKER_FLAGS=${CMAKE_SHARED_LINKER_FLAGS} -Wno-error=linkflags") + "-DCMAKE_SHARED_LINKER_FLAGS=${CMAKE_SHARED_LINKER_FLAGS}" "-Wno-error=linkflags") endif() externalproject_add(snappy_ep @@ -1888,9 +1888,7 @@ macro(build_protobuf) PROPERTIES IMPORTED_LOCATION "${PROTOBUF_HOST_COMPILER}") add_dependencies(arrow::protobuf::host_protoc protobuf_ep_host) - endif() - endmacro() if(ARROW_WITH_PROTOBUF) From d0fa881e77167fc15f57e5ca9f169b28bf97a801 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Thu, 21 Mar 2024 10:06:18 +0000 Subject: [PATCH 78/95] Apply suggestions from code review Co-authored-by: Sutou Kouhei --- cpp/cmake_modules/SetupCxxFlags.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index 49f6d7f8b6c..424a2085e78 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -756,7 +756,7 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") if(ARROW_TESTING) # flags for building test executables for use in node set(CMAKE_EXE_LINKER_FLAGS - "${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sERROR_ON_WASM_CHANGES_AFTER_LINK=1 -sALLOW_MEMORY_GROWTH -lnodefs.js -lnoderawfs.js --pre-js ${BUILD_SUPPORT_DIR}/emscripten-test-init.js" + "${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sERROR_ON_WASM_CHANGES_AFTER_LINK=1 -sALLOW_MEMORY_GROWTH -lnodefs.js -lnoderawfs.js --pre-js ${BUILD_SUPPORT_DIR}/emscripten-test-init.js" ) else() set(CMAKE_EXE_LINKER_FLAGS "${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sALLOW_MEMORY_GROWTH") From da176ad3bbd894bde344cb931bb8f66103a8805a Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Thu, 21 Mar 2024 10:11:26 +0000 Subject: [PATCH 79/95] documentation consistency --- docs/source/developers/cpp/emscripten.rst | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/docs/source/developers/cpp/emscripten.rst b/docs/source/developers/cpp/emscripten.rst index 07eb2561e24..b4c563aae1a 100644 --- a/docs/source/developers/cpp/emscripten.rst +++ b/docs/source/developers/cpp/emscripten.rst @@ -19,13 +19,12 @@ .. highlight:: console .. _developers-cpp-emscripten: -################################################# +=============================================== Cross compiling for WebAssembly with Emscripten -################################################# +=============================================== -*************** - Prerequisites -*************** +Prerequisites +------------- You need CMake and compilers etc. installed as per the normal build instructions. Before building with Emscripten, you also need to install Emscripten and activate it using the commands below (see https://emscripten.org/docs/getting_started/downloads.html for details). @@ -73,9 +72,9 @@ It should make a wheel targeting the currently enabled version of Pyodide (i.e. the version corresponding to the currently installed ``pyodide-build``) in the ``dist`` subdirectory. -************** - Manual Build -************** + +Manual Build +------------ If you want to manually build for Emscripten, take a look at the ``CMakePresets.json`` file in the ``arrow/cpp`` directory for a list of things From 5f058c9dac1642293616fca6b4f1f7f78fc18b0c Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Thu, 21 Mar 2024 10:12:49 +0000 Subject: [PATCH 80/95] don't ignore conversion errors any more Co-authored-by: Sutou Kouhei --- cpp/cmake_modules/SetupCxxFlags.cmake | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index 424a2085e78..abd5952d259 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -736,7 +736,6 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") # 1) We force *everything* to build as position independent # 2) And with support for C++ exceptions set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -fexceptions") - # size_t is 32 bit in emscripten wasm32 - ignore conversion errors # deprecated-literal-operator error is thrown in datetime (vendored lib in arrow) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -fexceptions -Wno-error=deprecated-literal-operator") From 51705de56ecc61660d269f43e57bce683b92a518 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Thu, 21 Mar 2024 10:15:31 +0000 Subject: [PATCH 81/95] build fixes from review --- cpp/cmake_modules/BuildUtils.cmake | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/cpp/cmake_modules/BuildUtils.cmake b/cpp/cmake_modules/BuildUtils.cmake index 3930faa814b..a26a296b9e3 100644 --- a/cpp/cmake_modules/BuildUtils.cmake +++ b/cpp/cmake_modules/BuildUtils.cmake @@ -740,16 +740,8 @@ function(ADD_TEST_CASE REL_TEST_NAME) valgrind --suppressions=valgrind.supp --tool=memcheck --gen-suppressions=all \ --num-callers=500 --leak-check=full --leak-check-heuristics=stdstring \ --error-exitcode=1 ${TEST_PATH} ${ARG_TEST_ARGUMENTS}") - elseif(WIN32) - add_test(${TEST_NAME} ${TEST_PATH} ${ARG_TEST_ARGUMENTS}) - elseif(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") - add_test(${TEST_NAME} - ${BUILD_SUPPORT_DIR}/run-test.sh - ${CMAKE_BINARY_DIR} - test - ${CMAKE_CROSSCOMPILING_EMULATOR} - ${TEST_PATH} - ${ARG_TEST_ARGUMENTS}) + elseif(WIN32 OR CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + add_test(NAME ${TEST_NAME} COMMAND ${TEST_NAME} ${ARG_TEST_ARGUMENTS} else() add_test(${TEST_NAME} ${BUILD_SUPPORT_DIR}/run-test.sh From d3e077099298525250c2f31c0d01f5367eeb3cad Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Thu, 21 Mar 2024 10:53:26 +0000 Subject: [PATCH 82/95] typo --- cpp/cmake_modules/BuildUtils.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/cmake_modules/BuildUtils.cmake b/cpp/cmake_modules/BuildUtils.cmake index cd3045af0bb..e7523add272 100644 --- a/cpp/cmake_modules/BuildUtils.cmake +++ b/cpp/cmake_modules/BuildUtils.cmake @@ -761,7 +761,7 @@ function(ADD_TEST_CASE REL_TEST_NAME) --num-callers=500 --leak-check=full --leak-check-heuristics=stdstring \ --error-exitcode=1 ${TEST_PATH} ${ARG_TEST_ARGUMENTS}") elseif(WIN32 OR CMAKE_SYSTEM_NAME STREQUAL "Emscripten") - add_test(NAME ${TEST_NAME} COMMAND ${TEST_NAME} ${ARG_TEST_ARGUMENTS} + add_test(NAME ${TEST_NAME} COMMAND ${TEST_NAME} ${ARG_TEST_ARGUMENTS}) else() add_test(${TEST_NAME} ${BUILD_SUPPORT_DIR}/run-test.sh From abcd7dfa4f00ea1c68f6802168a2ede41dedd7e8 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Thu, 21 Mar 2024 11:05:41 +0000 Subject: [PATCH 83/95] fix script permissions --- ci/scripts/cpp_build.sh | 0 ci/scripts/go_bench.sh | 0 2 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 ci/scripts/cpp_build.sh mode change 100644 => 100755 ci/scripts/go_bench.sh diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh old mode 100644 new mode 100755 diff --git a/ci/scripts/go_bench.sh b/ci/scripts/go_bench.sh old mode 100644 new mode 100755 From cc0c1d551277c2d1d29abd128baa21d2ba2e2ef9 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Fri, 22 Mar 2024 10:35:56 +0000 Subject: [PATCH 84/95] toolchain lint --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 38f4f392c42..4745c7c62fe 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -1370,7 +1370,8 @@ macro(build_snappy) # ignore linker flag errors, as Snappy sets # -Werror -Wall, and Emscripten doesn't support -soname list(APPEND SNAPPY_CMAKE_ARGS - "-DCMAKE_SHARED_LINKER_FLAGS=${CMAKE_SHARED_LINKER_FLAGS}" "-Wno-error=linkflags") + "-DCMAKE_SHARED_LINKER_FLAGS=${CMAKE_SHARED_LINKER_FLAGS}" + "-Wno-error=linkflags") endif() externalproject_add(snappy_ep @@ -2552,7 +2553,7 @@ macro(build_zlib) set_target_properties(ZLIB::ZLIB PROPERTIES IMPORTED_LOCATION ${ZLIB_LIBRARIES}) target_include_directories(ZLIB::ZLIB BEFORE INTERFACE "${ZLIB_INCLUDE_DIRS}") - add_dependencies(ZLIB::ZLIB zlib_ep) + add_dependencies(ZLIB::ZLIB zlib_ep) list(APPEND ARROW_BUNDLED_STATIC_LIBS ZLIB::ZLIB) endif() From 701d5afaac63265b0f9d033502d75ba8b7dc6d57 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Mon, 25 Mar 2024 11:37:28 +0000 Subject: [PATCH 85/95] test commit: --- test.txt | 1 + 1 file changed, 1 insertion(+) create mode 100644 test.txt diff --git a/test.txt b/test.txt new file mode 100644 index 00000000000..01058d844a9 --- /dev/null +++ b/test.txt @@ -0,0 +1 @@ +g From 0064fa0719b85451646b360084b0701851bf96dc Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Mon, 25 Mar 2024 11:39:20 +0000 Subject: [PATCH 86/95] reverted test commit --- test.txt | 1 - 1 file changed, 1 deletion(-) delete mode 100644 test.txt diff --git a/test.txt b/test.txt deleted file mode 100644 index 01058d844a9..00000000000 --- a/test.txt +++ /dev/null @@ -1 +0,0 @@ -g From 028682eca04a4cc17fa0f6684c1b0ce87735f343 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Thu, 28 Mar 2024 21:19:00 +0000 Subject: [PATCH 87/95] Apply suggestions from code review Co-authored-by: Sutou Kouhei --- cpp/cmake_modules/SetupCxxFlags.cmake | 2 -- cpp/src/arrow/compute/kernels/scalar_string_test.cc | 1 - 2 files changed, 3 deletions(-) diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index 3135bbb3d90..05c23efdcab 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -729,9 +729,7 @@ if(NOT MSVC) string(APPEND C_RELWITHDEBINFO_FLAGS " -ggdb") string(APPEND CXX_RELWITHDEBINFO_FLAGS " -ggdb") endif() - endif() - endif() string(APPEND CMAKE_C_FLAGS_RELEASE "${C_RELEASE_FLAGS} ${ARROW_C_FLAGS_RELEASE}") diff --git a/cpp/src/arrow/compute/kernels/scalar_string_test.cc b/cpp/src/arrow/compute/kernels/scalar_string_test.cc index ec6cdcccb83..c7dbdef2436 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string_test.cc @@ -21,7 +21,6 @@ #include #include -#include #include #include "arrow/compute/api_scalar.h" From d36fca21e92fcaeb6b13eb425dcc63712215a54a Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Thu, 28 Mar 2024 21:24:30 +0000 Subject: [PATCH 88/95] code review changes --- cpp/src/arrow/util/atfork_test.cc | 1 + cpp/src/arrow/util/cache_test.cc | 1 + cpp/src/arrow/util/counting_semaphore_test.cc | 1 + cpp/src/arrow/util/io_util_test.cc | 1 + 4 files changed, 4 insertions(+) diff --git a/cpp/src/arrow/util/atfork_test.cc b/cpp/src/arrow/util/atfork_test.cc index a757394db1d..009ac10e0fd 100644 --- a/cpp/src/arrow/util/atfork_test.cc +++ b/cpp/src/arrow/util/atfork_test.cc @@ -35,6 +35,7 @@ #include "arrow/testing/gtest_util.h" #include "arrow/util/atfork_internal.h" +#include "arrow/util/future.h" #include "arrow/util/io_util.h" #include "arrow/util/logging.h" diff --git a/cpp/src/arrow/util/cache_test.cc b/cpp/src/arrow/util/cache_test.cc index 7c61b3980bf..b389c479ed7 100644 --- a/cpp/src/arrow/util/cache_test.cc +++ b/cpp/src/arrow/util/cache_test.cc @@ -26,6 +26,7 @@ #include "arrow/testing/gtest_util.h" #include "arrow/util/cache_internal.h" +#include "arrow/util/future.h" namespace arrow { namespace internal { diff --git a/cpp/src/arrow/util/counting_semaphore_test.cc b/cpp/src/arrow/util/counting_semaphore_test.cc index e5e129636c1..68e124ec414 100644 --- a/cpp/src/arrow/util/counting_semaphore_test.cc +++ b/cpp/src/arrow/util/counting_semaphore_test.cc @@ -21,6 +21,7 @@ #include #include +#include "arrow/util/future.h" #include "arrow/testing/gtest_util.h" #include "gtest/gtest.h" diff --git a/cpp/src/arrow/util/io_util_test.cc b/cpp/src/arrow/util/io_util_test.cc index 02d29a3c0d2..94443239f97 100644 --- a/cpp/src/arrow/util/io_util_test.cc +++ b/cpp/src/arrow/util/io_util_test.cc @@ -41,6 +41,7 @@ #include "arrow/testing/gtest_util.h" #include "arrow/util/bit_util.h" #include "arrow/util/cpu_info.h" +#include "arrow/util/future.h" #include "arrow/util/io_util.h" #include "arrow/util/logging.h" #include "arrow/util/windows_compatibility.h" From ee99e5e90c049f5679c50036dbc6a7b3d6c95ff4 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Thu, 28 Mar 2024 22:31:49 +0000 Subject: [PATCH 89/95] toolchain build fix --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 235b0478130..92500f906d6 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -1475,7 +1475,6 @@ macro(build_brotli) ${BROTLI_BUILD_INCLUDE_DIR} ${BROTLI_INCLUDE_DIR}/brotli) endif() - add_dependencies(toolchain brotli_ep) file(MAKE_DIRECTORY "${BROTLI_INCLUDE_DIR}") add_library(Brotli::brotlicommon STATIC IMPORTED) From b73740a037a05cb16cdad559c12a5131a03387d0 Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Thu, 28 Mar 2024 23:11:08 +0000 Subject: [PATCH 90/95] cmake fixes for emscripten --- cpp/cmake_modules/SetupCxxFlags.cmake | 12 +++++++++--- cpp/cmake_modules/ThirdpartyToolchain.cmake | 10 +++++----- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index 05c23efdcab..1d709fe98d7 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -783,9 +783,15 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") set(CMAKE_SHARED_LINKER_FLAGS "-sSIDE_MODULE=1 ${ARROW_EMSCRIPTEN_LINKER_FLAGS}") if(ARROW_TESTING) # flags for building test executables for use in node - set(CMAKE_EXE_LINKER_FLAGS - "${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sERROR_ON_WASM_CHANGES_AFTER_LINK=1 -sALLOW_MEMORY_GROWTH -lnodefs.js -lnoderawfs.js --pre-js ${BUILD_SUPPORT_DIR}/emscripten-test-init.js" - ) + if("${CMAKE_BUILD_TYPE}" STREQUAL "RELEASE") + set(CMAKE_EXE_LINKER_FLAGS + "${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sALLOW_MEMORY_GROWTH -lnodefs.js -lnoderawfs.js --pre-js ${BUILD_SUPPORT_DIR}/emscripten-test-init.js" + ) + else() + set(CMAKE_EXE_LINKER_FLAGS + "${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sERROR_ON_WASM_CHANGES_AFTER_LINK=1 -sALLOW_MEMORY_GROWTH -lnodefs.js -lnoderawfs.js --pre-js ${BUILD_SUPPORT_DIR}/emscripten-test-init.js" + ) + endif() else() set(CMAKE_EXE_LINKER_FLAGS "${ARROW_EMSCRIPTEN_LINKER_FLAGS} -sALLOW_MEMORY_GROWTH") endif() diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 92500f906d6..4a67eac1d4d 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -2520,11 +2520,11 @@ macro(build_zlib) if(NOT EXISTS ${EMSCRIPTEN_SYSROOT}/lib/wasm32-emscripten/pic/libz.a) execute_process(COMMAND embuilder --pic --force build zlib) endif() - add_library(ZLIB::ZLIB INTERFACE IMPORTED) - # We need -sRELOCATABLE=1 in target_compile_options() too - # to stop it using non-PIC libz.a in linking. - target_compile_options(ZLIB::ZLIB INTERFACE -sUSE_ZLIB=1 -sRELOCATABLE=1) - target_link_options(ZLIB::ZLIB INTERFACE -sUSE_ZLIB=1 -sRELOCATABLE=1) + add_library(ZLIB::ZLIB STATIC IMPORTED) + set_property(TARGET ZLIB::ZLIB + PROPERTY IMPORTED_LOCATION + "${EMSCRIPTEN_SYSROOT}/lib/wasm32-emscripten/pic/libz.a") + list(APPEND ARROW_BUNDLED_STATIC_LIBS ZLIB::ZLIB) else() set(ZLIB_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/zlib_ep/src/zlib_ep-install") if(MSVC) From 4dc3d03bf95c395301464f743e7cba5d69db40ac Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Fri, 29 Mar 2024 06:21:48 +0000 Subject: [PATCH 91/95] alphabetical order oops --- cpp/src/arrow/util/counting_semaphore_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/util/counting_semaphore_test.cc b/cpp/src/arrow/util/counting_semaphore_test.cc index 68e124ec414..6551c1a9803 100644 --- a/cpp/src/arrow/util/counting_semaphore_test.cc +++ b/cpp/src/arrow/util/counting_semaphore_test.cc @@ -21,8 +21,8 @@ #include #include -#include "arrow/util/future.h" #include "arrow/testing/gtest_util.h" +#include "arrow/util/future.h" #include "gtest/gtest.h" namespace arrow { From 8b07ebf02438ac6a9ccba8bd5954d5f686fea99f Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Fri, 29 Mar 2024 06:53:17 +0000 Subject: [PATCH 92/95] remove dynamic linking test on emsripten --- cpp/src/arrow/filesystem/localfs_test.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cpp/src/arrow/filesystem/localfs_test.cc b/cpp/src/arrow/filesystem/localfs_test.cc index f90833a88d1..b65bebba4ed 100644 --- a/cpp/src/arrow/filesystem/localfs_test.cc +++ b/cpp/src/arrow/filesystem/localfs_test.cc @@ -138,6 +138,9 @@ TEST(FileSystemFromUri, LinkedRegisteredFactory) { } TEST(FileSystemFromUri, LoadedRegisteredFactory) { +#ifdef __EMSCRIPTEN__ + GTEST_SKIP() << "Emscripten dynamic library testing disabled"; +#endif // Since the registrar's definition is in libarrow_filesystem_example.so, // its factory will be registered only after the library is dynamically loaded. std::string path; From c0692dae7971887c1a2652e0a9d010f5edb5240a Mon Sep 17 00:00:00 2001 From: Joe Marshall Date: Fri, 29 Mar 2024 07:02:43 +0000 Subject: [PATCH 93/95] lint --- cpp/src/arrow/filesystem/localfs_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/filesystem/localfs_test.cc b/cpp/src/arrow/filesystem/localfs_test.cc index b65bebba4ed..b76c7ebad45 100644 --- a/cpp/src/arrow/filesystem/localfs_test.cc +++ b/cpp/src/arrow/filesystem/localfs_test.cc @@ -140,7 +140,7 @@ TEST(FileSystemFromUri, LinkedRegisteredFactory) { TEST(FileSystemFromUri, LoadedRegisteredFactory) { #ifdef __EMSCRIPTEN__ GTEST_SKIP() << "Emscripten dynamic library testing disabled"; -#endif +#endif // Since the registrar's definition is in libarrow_filesystem_example.so, // its factory will be registered only after the library is dynamically loaded. std::string path; From d3bc3b4f3c765ca33c28c821d01a17509510314c Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 4 Apr 2024 23:12:11 +0900 Subject: [PATCH 94/95] Use arrow/util/config.h --- cpp/src/arrow/io/file_test.cc | 1 + cpp/src/arrow/util/atfork_test.cc | 2 +- cpp/src/arrow/util/cache_test.cc | 2 +- cpp/src/arrow/util/counting_semaphore_test.cc | 2 +- cpp/src/arrow/util/io_util_test.cc | 2 +- 5 files changed, 5 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/io/file_test.cc b/cpp/src/arrow/io/file_test.cc index 3111539867f..af414891b95 100644 --- a/cpp/src/arrow/io/file_test.cc +++ b/cpp/src/arrow/io/file_test.cc @@ -42,6 +42,7 @@ #include "arrow/status.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/util.h" +#include "arrow/util/config.h" #include "arrow/util/future.h" #include "arrow/util/io_util.h" diff --git a/cpp/src/arrow/util/atfork_test.cc b/cpp/src/arrow/util/atfork_test.cc index 009ac10e0fd..750f4d13879 100644 --- a/cpp/src/arrow/util/atfork_test.cc +++ b/cpp/src/arrow/util/atfork_test.cc @@ -35,7 +35,7 @@ #include "arrow/testing/gtest_util.h" #include "arrow/util/atfork_internal.h" -#include "arrow/util/future.h" +#include "arrow/util/config.h" #include "arrow/util/io_util.h" #include "arrow/util/logging.h" diff --git a/cpp/src/arrow/util/cache_test.cc b/cpp/src/arrow/util/cache_test.cc index b389c479ed7..264bfe68ec5 100644 --- a/cpp/src/arrow/util/cache_test.cc +++ b/cpp/src/arrow/util/cache_test.cc @@ -26,7 +26,7 @@ #include "arrow/testing/gtest_util.h" #include "arrow/util/cache_internal.h" -#include "arrow/util/future.h" +#include "arrow/util/config.h" namespace arrow { namespace internal { diff --git a/cpp/src/arrow/util/counting_semaphore_test.cc b/cpp/src/arrow/util/counting_semaphore_test.cc index 6551c1a9803..4de11ce852a 100644 --- a/cpp/src/arrow/util/counting_semaphore_test.cc +++ b/cpp/src/arrow/util/counting_semaphore_test.cc @@ -22,7 +22,7 @@ #include #include "arrow/testing/gtest_util.h" -#include "arrow/util/future.h" +#include "arrow/util/config.h" #include "gtest/gtest.h" namespace arrow { diff --git a/cpp/src/arrow/util/io_util_test.cc b/cpp/src/arrow/util/io_util_test.cc index 94443239f97..73213bf9ce4 100644 --- a/cpp/src/arrow/util/io_util_test.cc +++ b/cpp/src/arrow/util/io_util_test.cc @@ -40,8 +40,8 @@ #include "arrow/buffer.h" #include "arrow/testing/gtest_util.h" #include "arrow/util/bit_util.h" +#include "arrow/util/config.h" #include "arrow/util/cpu_info.h" -#include "arrow/util/future.h" #include "arrow/util/io_util.h" #include "arrow/util/logging.h" #include "arrow/util/windows_compatibility.h" From 1307910cf11e1af1c269c34ea9804c38a7f17799 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 5 Apr 2024 10:25:33 +0900 Subject: [PATCH 95/95] Remove trailing spaces --- ci/scripts/cpp_build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index 5bf7e495223..52c89acb9a7 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -92,7 +92,7 @@ esac mkdir -p ${build_dir} pushd ${build_dir} -if [ "${ARROW_EMSCRIPTEN:-OFF}" = "ON" ]; then +if [ "${ARROW_EMSCRIPTEN:-OFF}" = "ON" ]; then if [ "${UBUNTU}" = "20.04" ]; then echo "arrow emscripten build is not supported on Ubuntu 20.04, run with UBUNTU=22.04" exit -1