Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions .claude/sweep-security-state.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,20 @@
"categories_found": [1],
"notes": "HIGH (fixed #1229): _viewshed_cpu allocated ~500 bytes/pixel of working memory (event_list 3*H*W*7*8 bytes + status_values/status_struct/idle + visibility_grid + lexsort temporary) with no guard. A 20000x20000 raster tried to allocate ~200 GB. Fixed by adding peak-memory guard mirroring the _viewshed_dask pattern (_available_memory_bytes() check, raises MemoryError with max_distance= hint). No other HIGH findings: dask path already guarded, _validate_raster is called, distance-sweep uses dtype=float64, _calc_dist_n_grad guards zero distance."
},
"pathfinding": {
"last_inspected": "2026-04-22",
"issue": null,
"severity_max": "MEDIUM",
"categories_found": [1, 6],
"notes": "No CRITICAL/HIGH findings. Cat 1 already well-guarded: _check_memory(h, w) runs before every numpy/cupy _a_star_search allocation and covers the ~65 bytes/pixel footprint (parent_ys/xs int64, g_cost f64, visited i8, three heap arrays h_keys/h_rows/h_cols sized h*w). Auto-radius falls back when full grid exceeds 50% RAM, HPA* kicks in for long paths. Dask path uses sparse dict/set and on-demand chunk cache so no full-grid numpy materialisation. No CUDA kernels (cupy backend transfers to CPU). No file-path I/O from user input (only /proc/meminfo read). MEDIUM (unfixed, Cat 1): multi_stop_search does not cap len(waypoints); _optimize_waypoint_order builds an O(N^2) dist matrix and runs N^2 A* calls, and _nearest_neighbor_2opt is O(N^3), so pathological waypoint lists can cause extreme CPU consumption (DoS). MEDIUM (unfixed, Cat 6): a_star_search and multi_stop_search do not call _validate_raster(surface) -- only ndim==2 is checked, dtype is not; non-numeric dtypes would fail inside numba with confusing errors rather than a clean TypeError. int64 overflow in height*width (line 214 max_heap) is not reachable given the memory guard (~46340x46340 would already raise MemoryError long before 2**63)."
},
"bump": {
"last_inspected": "2026-04-22",
"issue": 1231,
"severity_max": "HIGH",
"categories_found": [1],
"notes": "HIGH (fixed #1231): _finish_bump allocated np.zeros((height, width)) with no memory guard. The existing count guard (added in #1206) only protected the locs/heights arrays, so bump(width=1_000_000, height=1_000_000) passed the guard (count capped at 10M ~ 160 MB) and then tried to allocate an 8 TB float64 raster. Fixed by extending the memory budget check to include raster_bytes = w * h * 8 when the backend will materialize the full array; dask paths build per-chunk and are excluded. No other HIGH findings: _bump_dask_numpy/_bump_dask_cupy build output lazily via da.from_delayed, no CUDA kernels (cupy path wraps the numba CPU kernel), no file I/O, no int32 overflow in realistic scenarios. MEDIUM (unfixed, Cat 6): bump() does not call _validate_raster on agg (dtype is not checked; shape unpacking catches wrong-ndim, but a non-numeric DataArray would fail confusingly downstream)."
},
"perlin": {
"last_inspected": "2026-04-22",
"issue": 1232,
Expand Down
43 changes: 36 additions & 7 deletions xrspatial/bump.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,13 @@ class cupy(object):
except ImportError:
da = None

from xrspatial.utils import ArrayTypeFunctionMapping, _validate_scalar, ngjit
from xrspatial.utils import (
ArrayTypeFunctionMapping,
_validate_scalar,
has_cuda_and_cupy,
is_cupy_array,
ngjit,
)

# Upper bound on bump count to prevent accidental OOM from the default
# w*h//10 heuristic. 16 bytes per bump (int32 loc pair + float64 height),
Expand Down Expand Up @@ -340,15 +346,38 @@ def heights(locations, src, src_range, height = 20):
if count is None:
count = min(w * h // 10, _MAX_DEFAULT_COUNT)

# 16 bytes per bump: 2 x int32 coords + 1 x float64 height
required_bytes = count * 16
# The dask backends build the output lazily per-chunk, so the full
# raster never lives in memory at once. Only guard against raster
# size when we will actually materialize it.
materializes_raster = (
agg is None
or isinstance(agg.data, np.ndarray)
or (has_cuda_and_cupy() and is_cupy_array(agg.data))
)

# Budget: 16 bytes per bump (2 x int32 coord + float64 height) plus
# the full output raster at 8 bytes/cell (float64) when the backend
# will materialize it. Guard both so a huge w*h cannot allocate
# multi-TB arrays silently.
bump_bytes = count * 16
raster_bytes = w * h * 8 if materializes_raster else 0
required_bytes = bump_bytes + raster_bytes
available = _available_memory_bytes()
if required_bytes > 0.5 * available:
if raster_bytes > 0:
detail = (
f"({raster_bytes / 1e9:.1f} GB for the output raster plus "
f"{bump_bytes / 1e9:.1f} GB for location/height arrays)"
)
hint = "Use a smaller raster or pass a dask-backed agg."
else:
detail = "for location/height arrays"
hint = "Pass a smaller count explicitly."
raise MemoryError(
f"bump() with count={count:,} requires ~{required_bytes / 1e9:.1f} GB "
f"for location/height arrays, but only "
f"{available / 1e9:.1f} GB is available. "
f"Pass a smaller count explicitly."
f"bump() with width={w:,}, height={h:,}, count={count:,} "
f"requires ~{required_bytes / 1e9:.1f} GB {detail}, "
f"but only {available / 1e9:.1f} GB is available. "
f"{hint}"
)

if height_func is None:
Expand Down
51 changes: 50 additions & 1 deletion xrspatial/tests/test_bump.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,10 +196,19 @@ def test_bump_locs_use_int32_not_uint16():
assert out[50, 4464] == 0, "uint16 wrap-around location should be empty"


def test_bump_default_count_capped():
def test_bump_default_count_capped(monkeypatch):
"""Default count should not exceed _MAX_DEFAULT_COUNT (#1206)."""
import sys

from xrspatial.bump import _MAX_DEFAULT_COUNT

# The 20000 x 20000 raster is 3.2 GB, which exceeds the memory guard on
# small CI runners (#1234). Stub available memory so the guard does not
# interfere with this count-cap assertion. xrspatial.__init__ rebinds
# the `bump` name to the function, so reach the module via sys.modules.
bump_mod = sys.modules["xrspatial.bump"]
monkeypatch.setattr(bump_mod, "_available_memory_bytes", lambda: 64 * 1024**3)

# 20000 x 20000 → w*h//10 = 40M, should be capped to 10M
result = bump(width=20000, height=20000, spread=0)
assert result.shape == (20000, 20000)
Expand Down Expand Up @@ -260,3 +269,43 @@ def test_bump_dask_partitioned_matches_numpy():
result_dask = bump(agg=agg_dask, count=50, spread=2)

np.testing.assert_array_equal(result_np.values, result_dask.values)


# --- Issue #1231 regression tests ---

def test_bump_raster_memory_guard_rejects_pathological_size():
"""Memory guard should reject huge width*height even with a small
count (#1231).

Pre-fix: ``bump(width=1_000_000, height=1_000_000)`` would pass the
bump-count guard (default count capped at 10M ~ 160 MB) and then
allocate an 8 TB float64 output raster inside ``_finish_bump``.
"""
import pytest

with pytest.raises(MemoryError, match=r"bump.*width=1,000,000"):
bump(width=1_000_000, height=1_000_000)


def test_bump_raster_memory_guard_mentions_raster_bytes():
"""Error message should name the raster allocation as the culprit
when the raster dominates the budget (#1231)."""
import pytest

with pytest.raises(MemoryError, match="output raster"):
bump(width=500_000, height=500_000, count=1)


@dask_array_available
def test_bump_dask_bypasses_raster_guard():
"""Dask paths build the output lazily, so the raster-size guard
must not reject a huge dask-backed agg (#1231)."""
# 100k x 100k float64 would be 80 GB if materialized, but the dask
# backend never holds the full array in memory.
agg = xr.DataArray(
da.zeros((100_000, 100_000), chunks=(1_000, 1_000), dtype=np.float64),
dims=['y', 'x'],
)
result = bump(agg=agg, count=10, spread=0)
assert result.shape == (100_000, 100_000)
assert isinstance(result.data, da.Array)
Loading