xarray-contrib · brendancol · Apr 23, 2026 · Apr 22, 2026 · Apr 23, 2026 · Apr 23, 2026
diff --git a/.claude/sweep-security-state.json b/.claude/sweep-security-state.json
@@ -41,6 +41,20 @@
       "categories_found": [1],
       "notes": "HIGH (fixed #1229): _viewshed_cpu allocated ~500 bytes/pixel of working memory (event_list 3*H*W*7*8 bytes + status_values/status_struct/idle + visibility_grid + lexsort temporary) with no guard. A 20000x20000 raster tried to allocate ~200 GB. Fixed by adding peak-memory guard mirroring the _viewshed_dask pattern (_available_memory_bytes() check, raises MemoryError with max_distance= hint). No other HIGH findings: dask path already guarded, _validate_raster is called, distance-sweep uses dtype=float64, _calc_dist_n_grad guards zero distance."
     },
+    "pathfinding": {
+      "last_inspected": "2026-04-22",
+      "issue": null,
+      "severity_max": "MEDIUM",
+      "categories_found": [1, 6],
+      "notes": "No CRITICAL/HIGH findings. Cat 1 already well-guarded: _check_memory(h, w) runs before every numpy/cupy _a_star_search allocation and covers the ~65 bytes/pixel footprint (parent_ys/xs int64, g_cost f64, visited i8, three heap arrays h_keys/h_rows/h_cols sized h*w). Auto-radius falls back when full grid exceeds 50% RAM, HPA* kicks in for long paths. Dask path uses sparse dict/set and on-demand chunk cache so no full-grid numpy materialisation. No CUDA kernels (cupy backend transfers to CPU). No file-path I/O from user input (only /proc/meminfo read). MEDIUM (unfixed, Cat 1): multi_stop_search does not cap len(waypoints); _optimize_waypoint_order builds an O(N^2) dist matrix and runs N^2 A* calls, and _nearest_neighbor_2opt is O(N^3), so pathological waypoint lists can cause extreme CPU consumption (DoS). MEDIUM (unfixed, Cat 6): a_star_search and multi_stop_search do not call _validate_raster(surface) -- only ndim==2 is checked, dtype is not; non-numeric dtypes would fail inside numba with confusing errors rather than a clean TypeError. int64 overflow in height*width (line 214 max_heap) is not reachable given the memory guard (~46340x46340 would already raise MemoryError long before 2**63)."
+    },
+    "bump": {
+      "last_inspected": "2026-04-22",
+      "issue": 1231,
+      "severity_max": "HIGH",
+      "categories_found": [1],
+      "notes": "HIGH (fixed #1231): _finish_bump allocated np.zeros((height, width)) with no memory guard. The existing count guard (added in #1206) only protected the locs/heights arrays, so bump(width=1_000_000, height=1_000_000) passed the guard (count capped at 10M ~ 160 MB) and then tried to allocate an 8 TB float64 raster. Fixed by extending the memory budget check to include raster_bytes = w * h * 8 when the backend will materialize the full array; dask paths build per-chunk and are excluded. No other HIGH findings: _bump_dask_numpy/_bump_dask_cupy build output lazily via da.from_delayed, no CUDA kernels (cupy path wraps the numba CPU kernel), no file I/O, no int32 overflow in realistic scenarios. MEDIUM (unfixed, Cat 6): bump() does not call _validate_raster on agg (dtype is not checked; shape unpacking catches wrong-ndim, but a non-numeric DataArray would fail confusingly downstream)."
+    },
     "perlin": {
       "last_inspected": "2026-04-22",
       "issue": 1232,

diff --git a/xrspatial/bump.py b/xrspatial/bump.py
@@ -15,7 +15,13 @@ class cupy(object):
 except ImportError:
     da = None
 
-from xrspatial.utils import ArrayTypeFunctionMapping, _validate_scalar, ngjit
+from xrspatial.utils import (
+    ArrayTypeFunctionMapping,
+    _validate_scalar,
+    has_cuda_and_cupy,
+    is_cupy_array,
+    ngjit,
+)
 
 # Upper bound on bump count to prevent accidental OOM from the default
 # w*h//10 heuristic.  16 bytes per bump (int32 loc pair + float64 height),
@@ -340,15 +346,38 @@ def heights(locations, src, src_range, height = 20):
     if count is None:
         count = min(w * h // 10, _MAX_DEFAULT_COUNT)
 
-    # 16 bytes per bump: 2 x int32 coords + 1 x float64 height
-    required_bytes = count * 16
+    # The dask backends build the output lazily per-chunk, so the full
+    # raster never lives in memory at once.  Only guard against raster
+    # size when we will actually materialize it.
+    materializes_raster = (
+        agg is None
+        or isinstance(agg.data, np.ndarray)
+        or (has_cuda_and_cupy() and is_cupy_array(agg.data))
+    )
+
+    # Budget: 16 bytes per bump (2 x int32 coord + float64 height) plus
+    # the full output raster at 8 bytes/cell (float64) when the backend
+    # will materialize it.  Guard both so a huge w*h cannot allocate
+    # multi-TB arrays silently.
+    bump_bytes = count * 16
+    raster_bytes = w * h * 8 if materializes_raster else 0
+    required_bytes = bump_bytes + raster_bytes
     available = _available_memory_bytes()
     if required_bytes > 0.5 * available:
+        if raster_bytes > 0:
+            detail = (
+                f"({raster_bytes / 1e9:.1f} GB for the output raster plus "
+                f"{bump_bytes / 1e9:.1f} GB for location/height arrays)"
+            )
+            hint = "Use a smaller raster or pass a dask-backed agg."
+        else:
+            detail = "for location/height arrays"
+            hint = "Pass a smaller count explicitly."
         raise MemoryError(
-            f"bump() with count={count:,} requires ~{required_bytes / 1e9:.1f} GB "
-            f"for location/height arrays, but only "
-            f"{available / 1e9:.1f} GB is available.  "
-            f"Pass a smaller count explicitly."
+            f"bump() with width={w:,}, height={h:,}, count={count:,} "
+            f"requires ~{required_bytes / 1e9:.1f} GB {detail}, "
+            f"but only {available / 1e9:.1f} GB is available.  "
+            f"{hint}"
         )
 
     if height_func is None:

diff --git a/xrspatial/tests/test_bump.py b/xrspatial/tests/test_bump.py
@@ -196,10 +196,19 @@ def test_bump_locs_use_int32_not_uint16():
     assert out[50, 4464] == 0, "uint16 wrap-around location should be empty"
 
 
-def test_bump_default_count_capped():
+def test_bump_default_count_capped(monkeypatch):
     """Default count should not exceed _MAX_DEFAULT_COUNT (#1206)."""
+    import sys
+
     from xrspatial.bump import _MAX_DEFAULT_COUNT
 
+    # The 20000 x 20000 raster is 3.2 GB, which exceeds the memory guard on
+    # small CI runners (#1234). Stub available memory so the guard does not
+    # interfere with this count-cap assertion. xrspatial.__init__ rebinds
+    # the `bump` name to the function, so reach the module via sys.modules.
+    bump_mod = sys.modules["xrspatial.bump"]
+    monkeypatch.setattr(bump_mod, "_available_memory_bytes", lambda: 64 * 1024**3)
+
     # 20000 x 20000 → w*h//10 = 40M, should be capped to 10M
     result = bump(width=20000, height=20000, spread=0)
     assert result.shape == (20000, 20000)
@@ -260,3 +269,43 @@ def test_bump_dask_partitioned_matches_numpy():
     result_dask = bump(agg=agg_dask, count=50, spread=2)
 
     np.testing.assert_array_equal(result_np.values, result_dask.values)
+
+
+# --- Issue #1231 regression tests ---
+
+def test_bump_raster_memory_guard_rejects_pathological_size():
+    """Memory guard should reject huge width*height even with a small
+    count (#1231).
+
+    Pre-fix: ``bump(width=1_000_000, height=1_000_000)`` would pass the
+    bump-count guard (default count capped at 10M ~ 160 MB) and then
+    allocate an 8 TB float64 output raster inside ``_finish_bump``.
+    """
+    import pytest
+
+    with pytest.raises(MemoryError, match=r"bump.*width=1,000,000"):
+        bump(width=1_000_000, height=1_000_000)
+
+
+def test_bump_raster_memory_guard_mentions_raster_bytes():
+    """Error message should name the raster allocation as the culprit
+    when the raster dominates the budget (#1231)."""
+    import pytest
+
+    with pytest.raises(MemoryError, match="output raster"):
+        bump(width=500_000, height=500_000, count=1)
+
+
+@dask_array_available
+def test_bump_dask_bypasses_raster_guard():
+    """Dask paths build the output lazily, so the raster-size guard
+    must not reject a huge dask-backed agg (#1231)."""
+    # 100k x 100k float64 would be 80 GB if materialized, but the dask
+    # backend never holds the full array in memory.
+    agg = xr.DataArray(
+        da.zeros((100_000, 100_000), chunks=(1_000, 1_000), dtype=np.float64),
+        dims=['y', 'x'],
+    )
+    result = bump(agg=agg, count=10, spread=0)
+    assert result.shape == (100_000, 100_000)
+    assert isinstance(result.data, da.Array)