From 51d1cc7440d54528111b2bb3534f1692afb9fe51 Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Mon, 8 Dec 2025 11:16:14 -0800
Subject: [PATCH 1/3] Don't run parallel random access on GCS

---
 .../python/ci_benchmarks/benchmarks/test_random_access.py   | 3 ++-
 python/python/ci_benchmarks/datasets.py                     | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/python/python/ci_benchmarks/benchmarks/test_random_access.py b/python/python/ci_benchmarks/benchmarks/test_random_access.py
index e5fea790224..dc86d1c4b5c 100644
--- a/python/python/ci_benchmarks/benchmarks/test_random_access.py
+++ b/python/python/ci_benchmarks/benchmarks/test_random_access.py
@@ -9,7 +9,7 @@
 
 import lance
 import pytest
-from ci_benchmarks.datasets import open_dataset
+from ci_benchmarks.datasets import is_on_google, open_dataset
 
 # POSIX fadvise flag to drop page cache
 POSIX_FADV_DONTNEED = 4
@@ -65,6 +65,7 @@ def setup():
 
 @pytest.mark.parametrize("dataset", DATASETS)
 @pytest.mark.parametrize("rows_per_take", [1, 10, 100])
+@pytest.mark.skipif(is_on_google(), reason="Requires too many IOPS for cloud storage")
 def test_parallel_random_access(benchmark, dataset, rows_per_take):
     TAKES_PER_ITER = 100
 
diff --git a/python/python/ci_benchmarks/datasets.py b/python/python/ci_benchmarks/datasets.py
index 3fc901ff3c8..fa2070a26b1 100644
--- a/python/python/ci_benchmarks/datasets.py
+++ b/python/python/ci_benchmarks/datasets.py
@@ -9,7 +9,7 @@
 from lance.log import LOGGER
 
 
-def _is_on_google() -> bool:
+def is_on_google() -> bool:
     LOGGER.info("Testing if running on Google Cloud")
     try:
         rsp = requests.get("http://metadata.google.internal", timeout=5)
@@ -22,7 +22,7 @@ def _is_on_google() -> bool:
 
 @cache
 def _get_base_uri() -> str:
-    if _is_on_google():
+    if is_on_google():
         LOGGER.info("Running on Google Cloud, using gs://lance-benchmarks-ci-datasets/")
         return "gs://lance-benchmarks-ci-datasets/"
     else:
@@ -38,7 +38,7 @@ def get_dataset_uri(name: str) -> str:
     # This is a custom-built dataset, on a unique bucket, that is too big to reproduce
     # locally
     if name == "image_eda":
-        if not _is_on_google():
+        if not is_on_google():
             raise ValueError("The image_eda dataset is only available on Google Cloud")
         return "gs://lance-benchmarks-ci-datasets/image_eda.lance"
     return f"{_get_base_uri()}{name}"

From 33ea6eed61ade89bc411391559f7ca7662cda829 Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Mon, 8 Dec 2025 13:04:37 -0800
Subject: [PATCH 2/3] Don't run bitmap use_cache=False on GCS

---
 python/python/ci_benchmarks/benchmarks/test_search.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/python/python/ci_benchmarks/benchmarks/test_search.py b/python/python/ci_benchmarks/benchmarks/test_search.py
index f7ab517701d..203fb97f13c 100644
--- a/python/python/ci_benchmarks/benchmarks/test_search.py
+++ b/python/python/ci_benchmarks/benchmarks/test_search.py
@@ -5,7 +5,7 @@
 
 import lance
 import pytest
-from ci_benchmarks.datasets import get_dataset_uri
+from ci_benchmarks.datasets import get_dataset_uri, is_on_google
 from ci_benchmarks.utils import wipe_os_cache
 
 COLUMN_LABELS = ["bools", "normals"]
@@ -177,6 +177,13 @@ def test_basic_btree_search(
 ]
 
 
+# Don't run the no_cache test on Google Cloud as it is way too expensive at the moment
+def use_cache_param():
+    if is_on_google():
+        return [True]
+    return [True, False]
+
+
 # Repeats the same test for the basic dataset which is easier to test with locally
 # This benchmark is not part of the CI job as the EDA dataset is better for that
 @pytest.mark.parametrize("filt", BASIC_BITMAP_FILTERS, ids=BASIC_BITMAP_FILTER_LABELS)

From b2466e59683972af5c667848371f70fc4c947e35 Mon Sep 17 00:00:00 2001
From: Weston Pace <weston.pace@gmail.com>
Date: Mon, 8 Dec 2025 13:50:31 -0800
Subject: [PATCH 3/3] Don't run bitmap use_cache=False on GCS

---
 python/python/ci_benchmarks/benchmarks/test_search.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/python/python/ci_benchmarks/benchmarks/test_search.py b/python/python/ci_benchmarks/benchmarks/test_search.py
index 203fb97f13c..7f0eb2f84b3 100644
--- a/python/python/ci_benchmarks/benchmarks/test_search.py
+++ b/python/python/ci_benchmarks/benchmarks/test_search.py
@@ -184,11 +184,17 @@ def use_cache_param():
     return [True, False]
 
 
+def use_cache_ids():
+    if is_on_google():
+        return ["cache"]
+    return ["cache", "no_cache"]
+
+
 # Repeats the same test for the basic dataset which is easier to test with locally
 # This benchmark is not part of the CI job as the EDA dataset is better for that
 @pytest.mark.parametrize("filt", BASIC_BITMAP_FILTERS, ids=BASIC_BITMAP_FILTER_LABELS)
 @pytest.mark.parametrize("payload", [None, "small_strings", "integers"])
-@pytest.mark.parametrize("use_cache", [True, False], ids=["cache", "no_cache"])
+@pytest.mark.parametrize("use_cache", use_cache_param(), ids=use_cache_ids())
 def test_basic_bitmap_search(
     benchmark, filt: str | None, payload: str | None, use_cache: bool
 ):