From a3027852dc684ca2c93c1bf1e0cf410b18e6043a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 2 Sep 2024 14:14:25 +0200 Subject: [PATCH 1/5] GH-43905: [CI][Python] Add required PARQUET_TEST_DATA env to python_test_emscripten.sh --- ci/scripts/python_test_emscripten.sh | 7 +++++-- docker-compose.yml | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/ci/scripts/python_test_emscripten.sh b/ci/scripts/python_test_emscripten.sh index 4029722568b..eb650320088 100755 --- a/ci/scripts/python_test_emscripten.sh +++ b/ci/scripts/python_test_emscripten.sh @@ -22,8 +22,11 @@ set -ex -build_dir=${1}/python -pyodide_dist_dir=${2} +arrow_dir=${1} +build_dir=${2}/python +pyodide_dist_dir=${3} + +export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data cd ${build_dir} diff --git a/docker-compose.yml b/docker-compose.yml index 66607157318..66c350fd8aa 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -907,7 +907,7 @@ services: command: [" /arrow/ci/scripts/cpp_build.sh /arrow /build && /arrow/ci/scripts/python_build_emscripten.sh /arrow /build && - /arrow/ci/scripts/python_test_emscripten.sh /build /pyodide"] + /arrow/ci/scripts/python_test_emscripten.sh /arrow /build /pyodide"] ubuntu-cuda-python: # Usage: From 9a5e8a1e3365a716a2f8a6406ef173032c0293d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 3 Sep 2024 10:42:29 +0200 Subject: [PATCH 2/5] Try suggestion about loading environment variable to Pyodide --- python/scripts/run_emscripten_tests.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/scripts/run_emscripten_tests.py b/python/scripts/run_emscripten_tests.py index 53d3dd52bd8..440da4975dd 100644 --- a/python/scripts/run_emscripten_tests.py +++ b/python/scripts/run_emscripten_tests.py @@ -19,6 +19,7 @@ import argparse import contextlib import http.server +import json import os import queue import shutil @@ -153,7 +154,7 @@ def load_pyodide(self, dist_dir): self.execute_js( f""" const {{ loadPyodide }} = require('{dist_dir}/pyodide.js'); - let pyodide = await loadPyodide(); + let pyodide = await loadPyodide({json.dumps({"env": {"PARQUET_TEST_DATA": os.environ.get("PARQUET_TEST_DATA")}})}); """ ) From 7b54dd42774b2cfa5edca208a7b5f8b153bdb9f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 3 Sep 2024 12:09:28 +0200 Subject: [PATCH 3/5] Skip test that requires parquet_test_datadir on emscripten --- ci/scripts/python_test_emscripten.sh | 7 ++----- docker-compose.yml | 2 +- python/pyarrow/tests/parquet/conftest.py | 3 +++ python/scripts/run_emscripten_tests.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/ci/scripts/python_test_emscripten.sh b/ci/scripts/python_test_emscripten.sh index eb650320088..4029722568b 100755 --- a/ci/scripts/python_test_emscripten.sh +++ b/ci/scripts/python_test_emscripten.sh @@ -22,11 +22,8 @@ set -ex -arrow_dir=${1} -build_dir=${2}/python -pyodide_dist_dir=${3} - -export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data +build_dir=${1}/python +pyodide_dist_dir=${2} cd ${build_dir} diff --git a/docker-compose.yml b/docker-compose.yml index 66c350fd8aa..66607157318 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -907,7 +907,7 @@ services: command: [" /arrow/ci/scripts/cpp_build.sh /arrow /build && /arrow/ci/scripts/python_build_emscripten.sh /arrow /build && - /arrow/ci/scripts/python_test_emscripten.sh /arrow /build /pyodide"] + /arrow/ci/scripts/python_test_emscripten.sh /build /pyodide"] ubuntu-cuda-python: # Usage: diff --git a/python/pyarrow/tests/parquet/conftest.py b/python/pyarrow/tests/parquet/conftest.py index 80605e973cd..70c59db8e9d 100644 --- a/python/pyarrow/tests/parquet/conftest.py +++ b/python/pyarrow/tests/parquet/conftest.py @@ -17,6 +17,7 @@ import os import pathlib +import sys import pytest @@ -30,6 +31,8 @@ def datadir(base_datadir): @pytest.fixture(scope='module') def parquet_test_datadir(): + if sys.platform in ('emscripten', ): + pytest.skip("needs PARQUET_TEST_DATA files access") result = os.environ.get('PARQUET_TEST_DATA') if not result: raise RuntimeError('Please point the PARQUET_TEST_DATA environment ' diff --git a/python/scripts/run_emscripten_tests.py b/python/scripts/run_emscripten_tests.py index 440da4975dd..ca28068336d 100644 --- a/python/scripts/run_emscripten_tests.py +++ b/python/scripts/run_emscripten_tests.py @@ -154,7 +154,7 @@ def load_pyodide(self, dist_dir): self.execute_js( f""" const {{ loadPyodide }} = require('{dist_dir}/pyodide.js'); - let pyodide = await loadPyodide({json.dumps({"env": {"PARQUET_TEST_DATA": os.environ.get("PARQUET_TEST_DATA")}})}); + let pyodide = await loadPyodide(); """ ) From 13ebce99e9ce9a15adacadf54dde1b815c10ce74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 3 Sep 2024 12:30:07 +0200 Subject: [PATCH 4/5] Remove stray import from previous commit --- python/scripts/run_emscripten_tests.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/scripts/run_emscripten_tests.py b/python/scripts/run_emscripten_tests.py index ca28068336d..53d3dd52bd8 100644 --- a/python/scripts/run_emscripten_tests.py +++ b/python/scripts/run_emscripten_tests.py @@ -19,7 +19,6 @@ import argparse import contextlib import http.server -import json import os import queue import shutil From e918ffa2fbdd9472646f1170d6748b05f1a5387d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 3 Sep 2024 15:52:33 +0200 Subject: [PATCH 5/5] Update python/pyarrow/tests/parquet/conftest.py Co-authored-by: Joris Van den Bossche --- python/pyarrow/tests/parquet/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/tests/parquet/conftest.py b/python/pyarrow/tests/parquet/conftest.py index 70c59db8e9d..94b3058fa02 100644 --- a/python/pyarrow/tests/parquet/conftest.py +++ b/python/pyarrow/tests/parquet/conftest.py @@ -31,7 +31,7 @@ def datadir(base_datadir): @pytest.fixture(scope='module') def parquet_test_datadir(): - if sys.platform in ('emscripten', ): + if sys.platform == 'emscripten': pytest.skip("needs PARQUET_TEST_DATA files access") result = os.environ.get('PARQUET_TEST_DATA') if not result: