Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions .github/workflows/cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -465,15 +465,17 @@ jobs:
chmod +x /usr/local/bin/minio.exe
- name: Set up Python
uses: actions/setup-python@v5.1.1
id: python-install
with:
python-version: 3.9
- name: Install Google Cloud Storage Testbench
shell: bash
shell: msys2 {0}
env:
PIPX_BIN_DIR: /usr/local/bin
PIPX_PYTHON: ${{ steps.python-install.outputs.python-path }}
run: |
ci/scripts/install_gcs_testbench.sh default
echo "PYTHON_BIN_DIR=$(cygpath --windows $(dirname $(which python3.exe)))" >> $GITHUB_ENV
- name: Test
shell: msys2 {0}
run: |
PATH="$(cygpath --unix ${PYTHON_BIN_DIR}):${PATH}"
ci/scripts/cpp_test.sh "$(pwd)" "$(pwd)/build"
1 change: 1 addition & 0 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ only_commits:
- appveyor.yml
- ci/appveyor*
- ci/conda*
- ci/scripts/*.bat
- cpp/
- format/
- python/
Expand Down
2 changes: 2 additions & 0 deletions ci/appveyor-cpp-build.bat
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@ set ARROW_CMAKE_ARGS=-DARROW_DEPENDENCY_SOURCE=CONDA -DARROW_WITH_BZ2=ON
set ARROW_CXXFLAGS=/WX /MP

@rem Install GCS testbench
set PIPX_BIN_DIR=C:\Windows\
call %CD%\ci\scripts\install_gcs_testbench.bat
storage-testbench -h || exit /B

@rem
@rem Build and test Arrow C++ libraries (including Parquet)
Expand Down
12 changes: 7 additions & 5 deletions ci/docker/conda-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,19 @@ RUN mamba install -q -y \
valgrind && \
mamba clean --all

# We want to install the GCS testbench using the Conda base environment's Python,
# because the test environment's Python may later change.
ENV PIPX_PYTHON=/opt/conda/bin/python3
COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts
RUN /arrow/ci/scripts/install_gcs_testbench.sh default

# Ensure npm, node and azurite are on path. npm and node are required to install azurite, which will then need to
# be on the path for the tests to run.
# be on the path for the tests to run.
ENV PATH=/opt/conda/envs/arrow/bin:$PATH

COPY ci/scripts/install_azurite.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_azurite.sh

# We want to install the GCS testbench using the same Python binary that the Conda code will use.
COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts
RUN /arrow/ci/scripts/install_gcs_testbench.sh default

COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin

Expand Down
5 changes: 0 additions & 5 deletions ci/docker/conda-python.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,6 @@ RUN mamba install -q -y \
nomkl && \
mamba clean --all

# XXX The GCS testbench was already installed in conda-cpp.dockerfile,
# but we changed the installed Python version above, so we need to reinstall it.
COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts
RUN /arrow/ci/scripts/install_gcs_testbench.sh default

ENV ARROW_ACERO=ON \
ARROW_BUILD_STATIC=OFF \
ARROW_BUILD_TESTS=OFF \
Expand Down
27 changes: 19 additions & 8 deletions ci/docker/python-wheel-windows-test-vs2019.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,27 @@ RUN setx path "%path%;C:\Program Files\Git\usr\bin"
RUN wmic product where "name like 'python%%'" call uninstall /nointeractive && \
rm -rf Python*

# Install the GCS testbench using a well-known Python version.
# NOTE: cannot use pipx's `--fetch-missing-python` because of
# https://github.com/pypa/pipx/issues/1521, therefore download Python ourselves.
RUN choco install -r -y --pre --no-progress python --version=3.11.9
ENV PIPX_BIN_DIR=C:\\Windows\\
ENV PIPX_PYTHON="C:\Python311\python.exe"
COPY ci/scripts/install_gcs_testbench.bat C:/arrow/ci/scripts/
RUN call "C:\arrow\ci\scripts\install_gcs_testbench.bat" && \
storage-testbench -h

# Define the full version number otherwise choco falls back to patch number 0 (3.8 => 3.8.0)
ARG python=3.8
RUN (if "%python%"=="3.8" setx PYTHON_VERSION "3.8.10" && setx PATH "%PATH%;C:\Python38;C:\Python38\Scripts") & \
(if "%python%"=="3.9" setx PYTHON_VERSION "3.9.13" && setx PATH "%PATH%;C:\Python39;C:\Python39\Scripts") & \
(if "%python%"=="3.10" setx PYTHON_VERSION "3.10.11" && setx PATH "%PATH%;C:\Python310;C:\Python310\Scripts") & \
(if "%python%"=="3.11" setx PYTHON_VERSION "3.11.9" && setx PATH "%PATH%;C:\Python311;C:\Python311\Scripts") & \
(if "%python%"=="3.12" setx PYTHON_VERSION "3.12.4" && setx PATH "%PATH%;C:\Python312;C:\Python312\Scripts") & \
(if "%python%"=="3.13" setx PYTHON_VERSION "3.13.0-rc1" && setx PATH "%PATH%;C:\Python313;C:\Python313\Scripts")
RUN (if "%python%"=="3.8" setx PYTHON_VERSION "3.8.10") & \
(if "%python%"=="3.9" setx PYTHON_VERSION "3.9.13") & \
(if "%python%"=="3.10" setx PYTHON_VERSION "3.10.11") & \
(if "%python%"=="3.11" setx PYTHON_VERSION "3.11.9") & \
(if "%python%"=="3.12" setx PYTHON_VERSION "3.12.4") & \
(if "%python%"=="3.13" setx PYTHON_VERSION "3.13.0-rc1")

# Install archiver to extract xz archives
RUN choco install -r -y --pre --no-progress python --version=%PYTHON_VERSION% & \
python -m pip install --no-cache-dir -U pip setuptools & \
RUN choco install -r -y --pre --no-progress --force python --version=%PYTHON_VERSION% && \
choco install --no-progress -r -y archiver

ENV PYTHON=$python
1 change: 1 addition & 0 deletions ci/docker/ubuntu-20.04-cpp-minimal.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ RUN apt-get update -y -q && \
libssl-dev \
libcurl4-openssl-dev \
python3-pip \
python3-venv \
tzdata \
wget && \
apt-get clean && \
Expand Down
1 change: 1 addition & 0 deletions ci/docker/ubuntu-22.04-cpp-minimal.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ RUN apt-get update -y -q && \
libssl-dev \
libcurl4-openssl-dev \
python3-pip \
python3-venv \
tzdata \
wget && \
apt-get clean && \
Expand Down
1 change: 1 addition & 0 deletions ci/docker/ubuntu-24.04-cpp-minimal.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ RUN apt-get update -y -q && \
libssl-dev \
libcurl4-openssl-dev \
python3-pip \
python3-venv \
tzdata \
tzdata-legacy \
wget && \
Expand Down
13 changes: 11 additions & 2 deletions ci/scripts/install_gcs_testbench.bat
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,18 @@

@echo on

set GCS_TESTBENCH_VERSION="v0.36.0"
set GCS_TESTBENCH_VERSION="v0.40.0"

set PIPX_FLAGS=--verbose
if NOT "%PIPX_PYTHON%"=="" (
set PIPX_FLAGS=--python %PIPX_PYTHON% %PIPX_FLAGS%
)

python -m pip install -U pipx || exit /B 1

@REM Install GCS testbench %GCS_TESTBENCH_VERSION%
python -m pip install ^
pipx install %PIPX_FLAGS% ^
"https://github.com/googleapis/storage-testbench/archive/%GCS_TESTBENCH_VERSION%.tar.gz" ^
|| exit /B 1

pipx list --verbose
20 changes: 12 additions & 8 deletions ci/scripts/install_gcs_testbench.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# specific language governing permissions and limitations
# under the License.

set -e
set -ex

if [ "$#" -ne 1 ]; then
echo "Usage: $0 <storage-testbench version>"
Expand All @@ -34,19 +34,23 @@ case "$(uname -m)" in
;;
esac

# On newer pythons install into the system will fail, so override that
export PIP_BREAK_SYSTEM_PACKAGES=1

version=$1
if [[ "${version}" -eq "default" ]]; then
version="v0.39.0"
# Latests versions of Testbench require newer setuptools
python3 -m pip install --upgrade setuptools
fi

: ${PIPX_PYTHON:=$(which python3)}

export PIP_BREAK_SYSTEM_PACKAGES=1
${PIPX_PYTHON} -m pip install -U pipx

# This script is run with PYTHON undefined in some places,
# but those only use older pythons.
if [[ -z "${PYTHON_VERSION}" ]] || [[ "${PYTHON_VERSION}" != "3.13" ]]; then
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This if check could be removed now? (AFAIK we only added it because we couldn't install grpcio on python 3.13, but now this is using pipx (with fixed python version) it shouldn't matter there are no python 3.13 wheels yet?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I should open a followup issue.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Opened #43883

python3 -m pip install \
"https://github.com/googleapis/storage-testbench/archive/${version}.tar.gz"
pipx_flags=--verbose
if [[ $(id -un) == "root" ]]; then
# Install globally as /root/.local/bin is typically not in $PATH
pipx_flags="${pipx_flags} --global"
fi
${PIPX_PYTHON} -m pipx install ${pipx_flags} "https://github.com/googleapis/storage-testbench/archive/${version}.tar.gz"
fi
40 changes: 22 additions & 18 deletions ci/scripts/python_wheel_windows_test.bat
Original file line number Diff line number Diff line change
Expand Up @@ -37,28 +37,32 @@ set PYARROW_TEST_TENSORFLOW=ON
set ARROW_TEST_DATA=C:\arrow\testing\data
set PARQUET_TEST_DATA=C:\arrow\cpp\submodules\parquet-testing\data

@REM Install testing dependencies
pip install -r C:\arrow\python\requirements-wheel-test.txt || exit /B 1
@REM List installed Pythons
py -0p

set PYTHON_CMD=py -%PYTHON%

@REM Install GCS testbench
call "C:\arrow\ci\scripts\install_gcs_testbench.bat"
%PYTHON_CMD% -m pip install -U pip setuptools || exit /B 1

@REM Install testing dependencies
%PYTHON_CMD% -m pip install -r C:\arrow\python\requirements-wheel-test.txt || exit /B 1

@REM Install the built wheels
python -m pip install --no-index --find-links=C:\arrow\python\dist\ pyarrow || exit /B 1
%PYTHON_CMD% -m pip install --no-index --find-links=C:\arrow\python\dist\ pyarrow || exit /B 1

@REM Test that the modules are importable
python -c "import pyarrow" || exit /B 1
python -c "import pyarrow._gcsfs" || exit /B 1
python -c "import pyarrow._hdfs" || exit /B 1
python -c "import pyarrow._s3fs" || exit /B 1
python -c "import pyarrow.csv" || exit /B 1
python -c "import pyarrow.dataset" || exit /B 1
python -c "import pyarrow.flight" || exit /B 1
python -c "import pyarrow.fs" || exit /B 1
python -c "import pyarrow.json" || exit /B 1
python -c "import pyarrow.orc" || exit /B 1
python -c "import pyarrow.parquet" || exit /B 1
python -c "import pyarrow.substrait" || exit /B 1
%PYTHON_CMD% -c "import pyarrow" || exit /B 1
%PYTHON_CMD% -c "import pyarrow._gcsfs" || exit /B 1
%PYTHON_CMD% -c "import pyarrow._hdfs" || exit /B 1
%PYTHON_CMD% -c "import pyarrow._s3fs" || exit /B 1
%PYTHON_CMD% -c "import pyarrow.csv" || exit /B 1
%PYTHON_CMD% -c "import pyarrow.dataset" || exit /B 1
%PYTHON_CMD% -c "import pyarrow.flight" || exit /B 1
%PYTHON_CMD% -c "import pyarrow.fs" || exit /B 1
%PYTHON_CMD% -c "import pyarrow.json" || exit /B 1
%PYTHON_CMD% -c "import pyarrow.orc" || exit /B 1
%PYTHON_CMD% -c "import pyarrow.parquet" || exit /B 1
%PYTHON_CMD% -c "import pyarrow.substrait" || exit /B 1

@rem Download IANA Timezone Database for ORC C++
curl https://cygwin.osuosl.org/noarch/release/tzdata/tzdata-2024a-1.tar.xz --output tzdata.tar.xz || exit /B
Expand All @@ -67,4 +71,4 @@ arc unarchive tzdata.tar.xz %USERPROFILE%\Downloads\test\tzdata
set TZDIR=%USERPROFILE%\Downloads\test\tzdata\usr\share\zoneinfo

@REM Execute unittest
pytest -r s --pyargs pyarrow || exit /B 1
%PYTHON_CMD% -m pytest -r s --pyargs pyarrow || exit /B 1
68 changes: 34 additions & 34 deletions cpp/src/arrow/filesystem/gcsfs_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -95,52 +95,52 @@ class GcsTestbench : public ::testing::Environment {
if (const auto* env = std::getenv("PYTHON")) {
names = {env};
}
auto error = std::string(
"Could not start GCS emulator."
" Used the following list of python interpreter names:");
for (const auto& interpreter : names) {
auto exe_path = bp::search_path(interpreter);
error += " " + interpreter;
if (exe_path.empty()) {
error += " (exe not found)";
continue;
}
auto error = std::string("Could not start GCS emulator 'storage-testbench'");

bp::ipstream output;
server_process_ = bp::child(exe_path, "-m", "testbench", "--port", port_, group_,
bp::std_err > output);
auto testbench_is_running = [](bp::child& process, bp::ipstream& output) {
// Wait for message: "* Restarting with"
auto testbench_is_running = [&output, this](bp::child& process) {
std::string line;
std::chrono::time_point<std::chrono::steady_clock> end =
std::chrono::steady_clock::now() + std::chrono::seconds(10);
while (server_process_.valid() && server_process_.running() &&
std::chrono::steady_clock::now() < end) {
if (output.peek() && std::getline(output, line)) {
std::cerr << line << std::endl;
if (line.find("* Restarting with") != std::string::npos) return true;
} else {
std::this_thread::sleep_for(std::chrono::milliseconds(20));
}
std::string line;
std::chrono::time_point<std::chrono::steady_clock> end =
std::chrono::steady_clock::now() + std::chrono::seconds(10);
while (process.valid() && process.running() &&
std::chrono::steady_clock::now() < end) {
if (output.peek() && std::getline(output, line)) {
std::cerr << line << std::endl;
if (line.find("* Restarting with") != std::string::npos) return true;
} else {
std::this_thread::sleep_for(std::chrono::milliseconds(20));
}
return false;
};
}
return false;
};

if (testbench_is_running(server_process_)) break;
error += " (failed to start)";
server_process_.terminate();
server_process_.wait();
auto exe_path = bp::search_path("storage-testbench");
if (!exe_path.empty()) {
bp::ipstream output;
server_process_ =
bp::child(exe_path, "--port", port_, group_, bp::std_err > output);
if (!testbench_is_running(server_process_, output)) {
error += " (failed to start)";
server_process_.terminate();
server_process_.wait();
}
} else {
error += " (exe not found)";
}
if (!server_process_.valid()) {
error_ = std::move(error);
}
if (server_process_.valid() && server_process_.valid()) return;
error_ = std::move(error);
}

bool running() { return server_process_.running(); }

~GcsTestbench() override {
// Brutal shutdown, kill the full process group because the GCS testbench may launch
// additional children.
group_.terminate();
try {
group_.terminate();
} catch (bp::process_error&) {
}
if (server_process_.valid()) {
server_process_.wait();
}
Expand Down
7 changes: 3 additions & 4 deletions python/pyarrow/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,17 +233,16 @@ def minio_server_health_check(address):
def gcs_server():
port = find_free_port()
env = os.environ.copy()
args = [sys.executable, '-m', 'testbench', '--port', str(port)]
exe = 'storage-testbench'
args = [exe, '--port', str(port)]
proc = None
try:
# check first if testbench module is available
import testbench # noqa:F401
# start server
proc = subprocess.Popen(args, env=env)
# Make sure the server is alive.
if proc.poll() is not None:
pytest.skip(f"Command {args} did not start server successfully!")
except (ModuleNotFoundError, OSError) as e:
except OSError as e:
pytest.skip(f"Command {args} failed to execute: {e}")
else:
yield {
Expand Down
2 changes: 1 addition & 1 deletion python/scripts/run_emscripten_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ def _load_pyarrow_in_runner(driver, wheel_name):
"""
import pyarrow,pathlib
pyarrow_dir = pathlib.Path(pyarrow.__file__).parent
pytest.main([pyarrow_dir, '-v'])
pytest.main([pyarrow_dir, '-r', 's'])
""",
wait_for_terminate=False,
)
Expand Down
4 changes: 2 additions & 2 deletions r/tests/testthat/test-gcs.R
Original file line number Diff line number Diff line change
Expand Up @@ -116,12 +116,12 @@ test_that("GcsFileSystem$create() can read json_credentials", {
})

skip_on_cran()
skip_if_not(system('python -c "import testbench"') == 0, message = "googleapis-storage-testbench is not installed.")
skip_if_not(system("storage-testbench -h") == 0, message = "googleapis-storage-testbench is not installed.")
library(dplyr)

testbench_port <- Sys.getenv("TESTBENCH_PORT", "9001")

pid_minio <- sys::exec_background("python", c("-m", "testbench", "--port", testbench_port),
pid_minio <- sys::exec_background("storage-testbench", c("--port", testbench_port),
std_out = FALSE,
std_err = FALSE # TODO: is there a good place to send output?
)
Expand Down