From 9d4bf5aeb8a54ed49e2807f835016202781717c9 Mon Sep 17 00:00:00 2001 From: Agriya Khetarpal <74401230+agriyakhetarpal@users.noreply.github.com> Date: Thu, 14 Aug 2025 02:17:22 +0530 Subject: [PATCH 01/12] Bump to Pyodide 0.28.1 --- ci/docker/conda-python-emscripten.dockerfile | 12 ++++++------ docker-compose.yml | 4 ++-- docs/source/developers/cpp/emscripten.rst | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/ci/docker/conda-python-emscripten.dockerfile b/ci/docker/conda-python-emscripten.dockerfile index 47ff550cd59..3c188826b8a 100644 --- a/ci/docker/conda-python-emscripten.dockerfile +++ b/ci/docker/conda-python-emscripten.dockerfile @@ -17,14 +17,14 @@ ARG repo ARG arch -ARG python="3.12" +ARG python="3.13" FROM ${repo}:${arch}-conda-python-${python} ARG selenium_version="4.15.2" -ARG pyodide_version="0.26.0" +ARG pyodide_version="0.28.1" ARG chrome_version="latest" -ARG required_python_min="(3,12)" -# fail if python version < 3.12 +ARG required_python_min="(3,13)" +# fail if python version < 3.13 RUN echo "check PYTHON>=${required_python_min}" && python -c "import sys;sys.exit(0 if sys.version_info>=${required_python_min} else 1)" # install selenium and recent pyodide-build and recent python @@ -46,9 +46,9 @@ RUN bash /arrow/ci/scripts/install_emscripten.sh ~ /pyodide # make sure zlib is cached in the EMSDK folder RUN source ~/emsdk/emsdk_env.sh && embuilder --pic build zlib -# install node 20 (needed for async call support) +# install node 22 (needed for async call support and JSPI) # and pthread-stubs for build, and unzip needed for chrome build to work -RUN conda install nodejs=20 unzip pthread-stubs make -c conda-forge +RUN conda install nodejs=22 unzip pthread-stubs make -c conda-forge # install chrome for testing browser based runner COPY ci/scripts/install_chromedriver.sh /arrow/ci/scripts/ diff --git a/docker-compose.yml b/docker-compose.yml index 3c2ea37de3e..29ea0491129 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -930,10 +930,10 @@ services: arch: ${ARCH} clang_tools: ${CLANG_TOOLS} llvm: ${LLVM} - pyodide_version: "0.26.0" + pyodide_version: "0.28.1" chrome_version: "134" selenium_version: "4.15.2" - required_python_min: "(3,12)" + required_python_min: "(3,13)" python: ${PYTHON} shm_size: *shm-size volumes: *ubuntu-volumes diff --git a/docs/source/developers/cpp/emscripten.rst b/docs/source/developers/cpp/emscripten.rst index d41b69aa6f0..785c9b0a67c 100644 --- a/docs/source/developers/cpp/emscripten.rst +++ b/docs/source/developers/cpp/emscripten.rst @@ -34,9 +34,9 @@ activate it using the commands below (see https://emscripten.org/docs/getting_st git clone https://github.com/emscripten-core/emsdk.git cd emsdk # replace with the desired EMSDK version. - # e.g. for Pyodide 0.26, you need EMSDK version 3.1.58 + # e.g. for Pyodide 0.28, you need EMSDK version 4.0.9 # the versions can be found in the Makefile.envs file in the Pyodide repo: - # https://github.com/pyodide/pyodide/blob/10b484cfe427e076c929a55dc35cfff01ea8d3bc/Makefile.envs + # https://github.com/pyodide/pyodide/blob/0db5ff79f310694a7d72ae0b3279e9809ab836d6/Makefile.envs ./emsdk install ./emsdk activate source ./emsdk_env.sh From 16f90d93b72fb78d166d033295b7cd04780cdc83 Mon Sep 17 00:00:00 2001 From: Agriya Khetarpal <74401230+agriyakhetarpal@users.noreply.github.com> Date: Thu, 14 Aug 2025 09:03:54 +0530 Subject: [PATCH 02/12] Bump Python version for conda-python-emscripten --- dev/tasks/tasks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index e41d20e4034..93da971f09b 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -849,7 +849,7 @@ tasks: params: env: UBUNTU: 22.04 - PYTHON: 3.12 + PYTHON: 3.13 image: conda-python-emscripten test-conda-python-3.11-hypothesis: From eebb7a86201a1c5fa7110ce172ab78c479fdbc02 Mon Sep 17 00:00:00 2001 From: Agriya Khetarpal <74401230+agriyakhetarpal@users.noreply.github.com> Date: Thu, 14 Aug 2025 09:29:33 +0530 Subject: [PATCH 03/12] Pass the right flag for WASM exception handling --- cpp/cmake_modules/SetupCxxFlags.cmake | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index afc0446a780..1ca20c3ccc9 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -798,18 +798,18 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") # flags are: # 1) We force *everything* to build as position independent # 2) And with support for C++ exceptions - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -fexceptions") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -fwasm-exceptions") # deprecated-literal-operator error is thrown in datetime (vendored lib in arrow) set(CMAKE_CXX_FLAGS - "${CMAKE_CXX_FLAGS} -fPIC -fexceptions -Wno-error=deprecated-literal-operator") + "${CMAKE_CXX_FLAGS} -fPIC -fwasm-exceptions -Wno-error=deprecated-literal-operator") # flags for creating shared libraries (only used in pyarrow, because # Emscripten builds libarrow as static) # flags are: # 1) Tell it to use JavaScript / WebAssembly 64 bit number support. - # 2) Tell it to build with support for C++ exceptions + # 2) Tell it to build with support for WASM exceptions # 3) Skip linker flags error which happens with -soname parameter - set(ARROW_EMSCRIPTEN_LINKER_FLAGS "-sWASM_BIGINT=1 -fexceptions -Wno-error=linkflags") + set(ARROW_EMSCRIPTEN_LINKER_FLAGS "-sWASM_BIGINT=1 -fwasm-exceptions -Wno-error=linkflags") set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-sSIDE_MODULE=1 ${ARROW_EMSCRIPTEN_LINKER_FLAGS}") set(CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS From af2e18d28cd031d5fe779a4d623cca0cfac66b4c Mon Sep 17 00:00:00 2001 From: Agriya Khetarpal <74401230+agriyakhetarpal@users.noreply.github.com> Date: Thu, 14 Aug 2025 10:48:50 +0530 Subject: [PATCH 04/12] Install `libatomic1` for Pyodide Dockerfile --- ci/docker/conda-python-emscripten.dockerfile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ci/docker/conda-python-emscripten.dockerfile b/ci/docker/conda-python-emscripten.dockerfile index 3c188826b8a..5b82abf6883 100644 --- a/ci/docker/conda-python-emscripten.dockerfile +++ b/ci/docker/conda-python-emscripten.dockerfile @@ -27,11 +27,14 @@ ARG required_python_min="(3,13)" # fail if python version < 3.13 RUN echo "check PYTHON>=${required_python_min}" && python -c "import sys;sys.exit(0 if sys.version_info>=${required_python_min} else 1)" -# install selenium and recent pyodide-build and recent python +RUN apt-get update -y -q && \ + apt install -y -q --no-install-recommends \ + libatomic1 # needs to be a login shell so ~/.profile is read SHELL ["/bin/bash", "--login", "-c", "-o", "pipefail"] +# install selenium and recent pyodide-build and recent python RUN python -m pip install --no-cache-dir selenium==${selenium_version} && \ python -m pip install --no-cache-dir --upgrade pyodide-build>=${pyodide_version} From 41adbebf943905cd424eff0e7a50d5bfd035a29c Mon Sep 17 00:00:00 2001 From: Agriya Khetarpal <74401230+agriyakhetarpal@users.noreply.github.com> Date: Thu, 14 Aug 2025 11:49:27 +0530 Subject: [PATCH 05/12] Undo WASM EH flags as SjLj is enabled This reverts commit eebb7a86201a1c5fa7110ce172ab78c479fdbc02. --- cpp/cmake_modules/SetupCxxFlags.cmake | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index 1ca20c3ccc9..afc0446a780 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -798,18 +798,18 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") # flags are: # 1) We force *everything* to build as position independent # 2) And with support for C++ exceptions - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -fwasm-exceptions") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC -fexceptions") # deprecated-literal-operator error is thrown in datetime (vendored lib in arrow) set(CMAKE_CXX_FLAGS - "${CMAKE_CXX_FLAGS} -fPIC -fwasm-exceptions -Wno-error=deprecated-literal-operator") + "${CMAKE_CXX_FLAGS} -fPIC -fexceptions -Wno-error=deprecated-literal-operator") # flags for creating shared libraries (only used in pyarrow, because # Emscripten builds libarrow as static) # flags are: # 1) Tell it to use JavaScript / WebAssembly 64 bit number support. - # 2) Tell it to build with support for WASM exceptions + # 2) Tell it to build with support for C++ exceptions # 3) Skip linker flags error which happens with -soname parameter - set(ARROW_EMSCRIPTEN_LINKER_FLAGS "-sWASM_BIGINT=1 -fwasm-exceptions -Wno-error=linkflags") + set(ARROW_EMSCRIPTEN_LINKER_FLAGS "-sWASM_BIGINT=1 -fexceptions -Wno-error=linkflags") set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-sSIDE_MODULE=1 ${ARROW_EMSCRIPTEN_LINKER_FLAGS}") set(CMAKE_SHARED_LIBRARY_CREATE_CXX_FLAGS From a89f7ecd51c41003ee582cf26c4fa30d62dbed99 Mon Sep 17 00:00:00 2001 From: Agriya Khetarpal <74401230+agriyakhetarpal@users.noreply.github.com> Date: Thu, 14 Aug 2025 12:01:19 +0530 Subject: [PATCH 06/12] Fix lint in Emscripten Dockerfile `apt-get` command --- ci/docker/conda-python-emscripten.dockerfile | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ci/docker/conda-python-emscripten.dockerfile b/ci/docker/conda-python-emscripten.dockerfile index 5b82abf6883..f1d0c201e90 100644 --- a/ci/docker/conda-python-emscripten.dockerfile +++ b/ci/docker/conda-python-emscripten.dockerfile @@ -28,8 +28,10 @@ ARG required_python_min="(3,13)" RUN echo "check PYTHON>=${required_python_min}" && python -c "import sys;sys.exit(0 if sys.version_info>=${required_python_min} else 1)" RUN apt-get update -y -q && \ - apt install -y -q --no-install-recommends \ - libatomic1 + apt-get install -y -q --no-install-recommends \ + libatomic1 && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* # needs to be a login shell so ~/.profile is read SHELL ["/bin/bash", "--login", "-c", "-o", "pipefail"] From 20de7ad163a606840765c229ae95e9c40422ca72 Mon Sep 17 00:00:00 2001 From: Agriya Khetarpal <74401230+agriyakhetarpal@users.noreply.github.com> Date: Thu, 14 Aug 2025 13:10:14 +0530 Subject: [PATCH 07/12] Build static `libarrow_python` on Emscripten --- python/CMakeLists.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index fc26ea71bde..73d97e79c81 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -486,7 +486,11 @@ if(NOT PYARROW_CPP_LINK_LIBS) endif() endif() -add_library(arrow_python SHARED ${PYARROW_CPP_SRCS}) +if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + add_library(arrow_python STATIC ${PYARROW_CPP_SRCS}) +else() + add_library(arrow_python SHARED ${PYARROW_CPP_SRCS}) +endif() target_include_directories(arrow_python PUBLIC ${PYARROW_CPP_ROOT_DIR} ${CMAKE_CURRENT_BINARY_DIR}/pyarrow/src) From 781b54a5a7c65fd7dd5cf6c919c561c312c762dc Mon Sep 17 00:00:00 2001 From: Agriya Khetarpal <74401230+agriyakhetarpal@users.noreply.github.com> Date: Fri, 15 Aug 2025 01:36:01 +0530 Subject: [PATCH 08/12] Add Emscripten-specific flags for `_acero` --- python/CMakeLists.txt | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 73d97e79c81..a9f0c7845bf 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -748,7 +748,11 @@ endif() # Acero if(PYARROW_BUILD_ACERO) - if(ARROW_BUILD_SHARED) + if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + set(ACERO_LINK_LIBS) + set_property(TARGET _acero PROPERTY LINK_FLAGS + "-s SIDE_MODULE=1 -s WASM_BIGINT -s EXPORT_ALL=1") + elseif(ARROW_BUILD_SHARED) if(PYARROW_BUNDLE_ARROW_CPP) bundle_arrow_lib(${ARROW_ACERO_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) if(MSVC) From f73c495170f88bd529a541252f76a3cb4e300fdf Mon Sep 17 00:00:00 2001 From: Agriya Khetarpal <74401230+agriyakhetarpal@users.noreply.github.com> Date: Fri, 15 Aug 2025 02:00:20 +0530 Subject: [PATCH 09/12] Fix CMake lint --- python/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index a9f0c7845bf..0fdd09ecd5c 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -750,8 +750,8 @@ endif() if(PYARROW_BUILD_ACERO) if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") set(ACERO_LINK_LIBS) - set_property(TARGET _acero PROPERTY LINK_FLAGS - "-s SIDE_MODULE=1 -s WASM_BIGINT -s EXPORT_ALL=1") + set_property(TARGET _acero PROPERTY LINK_FLAGS + "-s SIDE_MODULE=1 -s WASM_BIGINT -s EXPORT_ALL=1") elseif(ARROW_BUILD_SHARED) if(PYARROW_BUNDLE_ARROW_CPP) bundle_arrow_lib(${ARROW_ACERO_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) From 6ef7bb3b294565ff628c984398c6903477631429 Mon Sep 17 00:00:00 2001 From: Agriya Khetarpal <74401230+agriyakhetarpal@users.noreply.github.com> Date: Fri, 15 Aug 2025 04:36:33 +0530 Subject: [PATCH 10/12] Revert "Fix CMake lint" This reverts commit f73c495170f88bd529a541252f76a3cb4e300fdf. --- python/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 0fdd09ecd5c..a9f0c7845bf 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -750,8 +750,8 @@ endif() if(PYARROW_BUILD_ACERO) if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") set(ACERO_LINK_LIBS) - set_property(TARGET _acero PROPERTY LINK_FLAGS - "-s SIDE_MODULE=1 -s WASM_BIGINT -s EXPORT_ALL=1") + set_property(TARGET _acero PROPERTY LINK_FLAGS + "-s SIDE_MODULE=1 -s WASM_BIGINT -s EXPORT_ALL=1") elseif(ARROW_BUILD_SHARED) if(PYARROW_BUNDLE_ARROW_CPP) bundle_arrow_lib(${ARROW_ACERO_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) From 18bf7d50f2f3dd8f6e00794ee4d614e82d6677ee Mon Sep 17 00:00:00 2001 From: Agriya Khetarpal <74401230+agriyakhetarpal@users.noreply.github.com> Date: Fri, 15 Aug 2025 04:36:42 +0530 Subject: [PATCH 11/12] Revert "Add Emscripten-specific flags for `_acero`" This reverts commit 781b54a5a7c65fd7dd5cf6c919c561c312c762dc. --- python/CMakeLists.txt | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index a9f0c7845bf..73d97e79c81 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -748,11 +748,7 @@ endif() # Acero if(PYARROW_BUILD_ACERO) - if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") - set(ACERO_LINK_LIBS) - set_property(TARGET _acero PROPERTY LINK_FLAGS - "-s SIDE_MODULE=1 -s WASM_BIGINT -s EXPORT_ALL=1") - elseif(ARROW_BUILD_SHARED) + if(ARROW_BUILD_SHARED) if(PYARROW_BUNDLE_ARROW_CPP) bundle_arrow_lib(${ARROW_ACERO_SHARED_LIB} SO_VERSION ${ARROW_SO_VERSION}) if(MSVC) From eae25bec20fda7a7d8f8f50f141e8e5da6aa7f1b Mon Sep 17 00:00:00 2001 From: Agriya Khetarpal <74401230+agriyakhetarpal@users.noreply.github.com> Date: Fri, 15 Aug 2025 06:33:01 +0530 Subject: [PATCH 12/12] Revert "Build static `libarrow_python` on Emscripten" This reverts commit 20de7ad163a606840765c229ae95e9c40422ca72. --- python/CMakeLists.txt | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 73d97e79c81..fc26ea71bde 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -486,11 +486,7 @@ if(NOT PYARROW_CPP_LINK_LIBS) endif() endif() -if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") - add_library(arrow_python STATIC ${PYARROW_CPP_SRCS}) -else() - add_library(arrow_python SHARED ${PYARROW_CPP_SRCS}) -endif() +add_library(arrow_python SHARED ${PYARROW_CPP_SRCS}) target_include_directories(arrow_python PUBLIC ${PYARROW_CPP_ROOT_DIR} ${CMAKE_CURRENT_BINARY_DIR}/pyarrow/src)