From 11cfcb6c4f1151a2f9b0d6092001b1b7e7d0b0c1 Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Wed, 14 Feb 2024 13:36:22 +0000 Subject: [PATCH 01/11] Add missing export that prevented any pyarrow tests from running --- ci/scripts/python_test.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/scripts/python_test.sh b/ci/scripts/python_test.sh index 20ca3300c05..7b803518494 100755 --- a/ci/scripts/python_test.sh +++ b/ci/scripts/python_test.sh @@ -52,6 +52,7 @@ fi : ${PYARROW_TEST_S3:=${ARROW_S3:-ON}} export PYARROW_TEST_ACERO +export PYARROW_TEST_AZURE export PYARROW_TEST_CUDA export PYARROW_TEST_DATASET export PYARROW_TEST_FLIGHT From 80644996f1a6ed362084da26feba28166df2120b Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Wed, 14 Feb 2024 13:36:49 +0000 Subject: [PATCH 02/11] Enable pyarrow tests on sdist_test --- ci/scripts/python_sdist_test.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/scripts/python_sdist_test.sh b/ci/scripts/python_sdist_test.sh index d3c6f0e6ade..1cd1000aa39 100755 --- a/ci/scripts/python_sdist_test.sh +++ b/ci/scripts/python_sdist_test.sh @@ -28,6 +28,7 @@ export PARQUET_TEST_DATA=${arrow_dir}/cpp/submodules/parquet-testing/data export PYARROW_CMAKE_GENERATOR=${CMAKE_GENERATOR:-Ninja} export PYARROW_BUILD_TYPE=${CMAKE_BUILD_TYPE:-debug} export PYARROW_WITH_ACERO=${ARROW_ACERO:-ON} +export PYARROW_WITH_AZURE=${ARROW_AZURE:-OFF} export PYARROW_WITH_S3=${ARROW_S3:-OFF} export PYARROW_WITH_ORC=${ARROW_ORC:-OFF} export PYARROW_WITH_CUDA=${ARROW_CUDA:-OFF} From 1f455b305183751d6f7808afd06f97f56143b614 Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Wed, 14 Feb 2024 13:37:11 +0000 Subject: [PATCH 03/11] Enable Azure on python macos builds --- .github/workflows/python.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 6e3797b29c2..25d918bcc25 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -132,6 +132,7 @@ jobs: timeout-minutes: 60 env: ARROW_HOME: /usr/local + ARROW_AZURE: ON ARROW_DATASET: ON ARROW_FLIGHT: ON ARROW_GANDIVA: ON From cf8ce6ae0b994f928e7a09caaad05349ca680609 Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Wed, 14 Feb 2024 13:37:43 +0000 Subject: [PATCH 04/11] Enable azure in conda builds and install dependencies --- ci/conda_env_cpp.txt | 5 +++++ ci/docker/conda-cpp.dockerfile | 11 +++++++++++ 2 files changed, 16 insertions(+) diff --git a/ci/conda_env_cpp.txt b/ci/conda_env_cpp.txt index ef00f7cf475..753950e796f 100644 --- a/ci/conda_env_cpp.txt +++ b/ci/conda_env_cpp.txt @@ -16,6 +16,11 @@ # under the License. aws-sdk-cpp=1.11.68 +azure-core-cpp>=1.10.3 +azure-identity-cpp>=1.6.0 +azure-storage-blobs-cpp>=12.10.0 +azure-storage-common-cpp>=12.5.0 +azure-storage-files-datalake-cpp>=12.9.0 benchmark>=1.6.0 boost-cpp>=1.68.0 brotli diff --git a/ci/docker/conda-cpp.dockerfile b/ci/docker/conda-cpp.dockerfile index 7a54dcc86f8..5fa7a82fe38 100644 --- a/ci/docker/conda-cpp.dockerfile +++ b/ci/docker/conda-cpp.dockerfile @@ -22,6 +22,16 @@ FROM ${repo}:${arch}-conda COPY ci/scripts/install_minio.sh /arrow/ci/scripts RUN /arrow/ci/scripts/install_minio.sh latest /opt/conda +# Azurite requires npm +RUN export DEBIAN_FRONTEND=noninteractive && \ + apt-get update -y -q && \ + apt-get install -y -q npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +COPY ci/scripts/install_azurite.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_azurite.sh + # Unless overridden use Python 3.10 # Google GCS fails building with Python 3.11 at the moment. ARG python=3.10 @@ -50,6 +60,7 @@ COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/ RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin ENV ARROW_ACERO=ON \ + ARROW_AZURE=ON \ ARROW_BUILD_TESTS=ON \ ARROW_DATASET=ON \ ARROW_DEPENDENCY_SOURCE=CONDA \ From 5f1e50b81103f9c87bc2cf42d79927ec90b6c1b3 Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Wed, 14 Feb 2024 22:58:52 +0000 Subject: [PATCH 05/11] Install node from conda instead of apt --- ci/conda_env_cpp.txt | 1 + ci/docker/conda-cpp.dockerfile | 16 ++++++---------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/ci/conda_env_cpp.txt b/ci/conda_env_cpp.txt index 753950e796f..b8c792008a9 100644 --- a/ci/conda_env_cpp.txt +++ b/ci/conda_env_cpp.txt @@ -39,6 +39,7 @@ libutf8proc lz4-c make ninja +nodejs orc pkg-config python diff --git a/ci/docker/conda-cpp.dockerfile b/ci/docker/conda-cpp.dockerfile index 5fa7a82fe38..bb17871e016 100644 --- a/ci/docker/conda-cpp.dockerfile +++ b/ci/docker/conda-cpp.dockerfile @@ -22,16 +22,6 @@ FROM ${repo}:${arch}-conda COPY ci/scripts/install_minio.sh /arrow/ci/scripts RUN /arrow/ci/scripts/install_minio.sh latest /opt/conda -# Azurite requires npm -RUN export DEBIAN_FRONTEND=noninteractive && \ - apt-get update -y -q && \ - apt-get install -y -q npm \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -COPY ci/scripts/install_azurite.sh /arrow/ci/scripts/ -RUN /arrow/ci/scripts/install_azurite.sh - # Unless overridden use Python 3.10 # Google GCS fails building with Python 3.11 at the moment. ARG python=3.10 @@ -52,6 +42,12 @@ RUN mamba install -q -y \ valgrind && \ mamba clean --all +# Ensure nvm and node are on path. npm is required to install azurite. +ENV PATH=/opt/conda/envs/arrow/bin:$PATH + +COPY ci/scripts/install_azurite.sh /arrow/ci/scripts/ +RUN /arrow/ci/scripts/install_azurite.sh + # We want to install the GCS testbench using the same Python binary that the Conda code will use. COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts RUN /arrow/ci/scripts/install_gcs_testbench.sh default From 79b1c5d780c2a554fa3b52a9adda0f1d912c047b Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Thu, 15 Feb 2024 08:56:48 +0000 Subject: [PATCH 06/11] Update comment --- ci/docker/conda-cpp.dockerfile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/docker/conda-cpp.dockerfile b/ci/docker/conda-cpp.dockerfile index bb17871e016..2b96dc5ea17 100644 --- a/ci/docker/conda-cpp.dockerfile +++ b/ci/docker/conda-cpp.dockerfile @@ -42,7 +42,8 @@ RUN mamba install -q -y \ valgrind && \ mamba clean --all -# Ensure nvm and node are on path. npm is required to install azurite. +# Ensure nvm, node and azurite are on path. npm and node are required to install azurite, which will then need to +# be on the path for the tests to run. ENV PATH=/opt/conda/envs/arrow/bin:$PATH COPY ci/scripts/install_azurite.sh /arrow/ci/scripts/ From 2e3e51c9df3cab55466c620313605477b4c62558 Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Sat, 17 Feb 2024 16:46:24 +0000 Subject: [PATCH 07/11] Pin specific versions to avoid segfault --- ci/conda_env_cpp.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ci/conda_env_cpp.txt b/ci/conda_env_cpp.txt index b8c792008a9..f7e7c3ce1c1 100644 --- a/ci/conda_env_cpp.txt +++ b/ci/conda_env_cpp.txt @@ -16,11 +16,11 @@ # under the License. aws-sdk-cpp=1.11.68 -azure-core-cpp>=1.10.3 -azure-identity-cpp>=1.6.0 -azure-storage-blobs-cpp>=12.10.0 -azure-storage-common-cpp>=12.5.0 -azure-storage-files-datalake-cpp>=12.9.0 +azure-core-cpp=1.10.3 +azure-identity-cpp=1.6.0 +azure-storage-blobs-cpp=12.10.0 +azure-storage-common-cpp=12.5.0 +azure-storage-files-datalake-cpp=12.9.0 benchmark>=1.6.0 boost-cpp>=1.68.0 brotli From cbe5c46aef245df4935168b5b11fd9236bee971a Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Sun, 18 Feb 2024 00:46:58 +0000 Subject: [PATCH 08/11] Less restrictive pin and add a comment about the issue --- ci/conda_env_cpp.txt | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/ci/conda_env_cpp.txt b/ci/conda_env_cpp.txt index f7e7c3ce1c1..2e7b568fc53 100644 --- a/ci/conda_env_cpp.txt +++ b/ci/conda_env_cpp.txt @@ -16,11 +16,12 @@ # under the License. aws-sdk-cpp=1.11.68 -azure-core-cpp=1.10.3 -azure-identity-cpp=1.6.0 -azure-storage-blobs-cpp=12.10.0 -azure-storage-common-cpp=12.5.0 -azure-storage-files-datalake-cpp=12.9.0 +# There is a problem with the 1.11.0 conda release of azure-core-cpp https://github.com/conda-forge/admin-requests/pull/911 +azure-core-cpp>=1.10.3,<1.11.0 +azure-identity-cpp>=1.6.0 +azure-storage-blobs-cpp>=12.10.0 +azure-storage-common-cpp>=12.5.0 +azure-storage-files-datalake-cpp>=12.9.0 benchmark>=1.6.0 boost-cpp>=1.68.0 brotli From 7c27978677b767306ecc52e5de96c243522e2446 Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Sun, 18 Feb 2024 20:42:18 +0000 Subject: [PATCH 09/11] Correct comment typo --- ci/docker/conda-cpp.dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/docker/conda-cpp.dockerfile b/ci/docker/conda-cpp.dockerfile index 2b96dc5ea17..dff1f222480 100644 --- a/ci/docker/conda-cpp.dockerfile +++ b/ci/docker/conda-cpp.dockerfile @@ -42,7 +42,7 @@ RUN mamba install -q -y \ valgrind && \ mamba clean --all -# Ensure nvm, node and azurite are on path. npm and node are required to install azurite, which will then need to +# Ensure npm, node and azurite are on path. npm and node are required to install azurite, which will then need to # be on the path for the tests to run. ENV PATH=/opt/conda/envs/arrow/bin:$PATH From 5e3f8469aa7b85da3e9510c7b29ad7f79b634093 Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Mon, 19 Feb 2024 19:23:47 +0000 Subject: [PATCH 10/11] Add retry mitigation suggested for GH-40121 --- ci/scripts/cpp_test.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/scripts/cpp_test.sh b/ci/scripts/cpp_test.sh index 1d685c51a93..a23ea8eb1cd 100755 --- a/ci/scripts/cpp_test.sh +++ b/ci/scripts/cpp_test.sh @@ -86,6 +86,7 @@ ctest \ --label-regex unittest \ --output-on-failure \ --parallel ${n_jobs} \ + --repeat until-pass:3 \ --timeout ${ARROW_CTEST_TIMEOUT:-300} \ "${ctest_options[@]}" \ "$@" From 53b9d2cb4d7f4b9e3e3c08a5558d59a06784bf57 Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Wed, 21 Feb 2024 10:09:03 +0000 Subject: [PATCH 11/11] Remove conda version restriction now that latest release works --- ci/conda_env_cpp.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ci/conda_env_cpp.txt b/ci/conda_env_cpp.txt index 2e7b568fc53..b8c792008a9 100644 --- a/ci/conda_env_cpp.txt +++ b/ci/conda_env_cpp.txt @@ -16,8 +16,7 @@ # under the License. aws-sdk-cpp=1.11.68 -# There is a problem with the 1.11.0 conda release of azure-core-cpp https://github.com/conda-forge/admin-requests/pull/911 -azure-core-cpp>=1.10.3,<1.11.0 +azure-core-cpp>=1.10.3 azure-identity-cpp>=1.6.0 azure-storage-blobs-cpp>=12.10.0 azure-storage-common-cpp>=12.5.0