diff --git a/.claude/sweep-security-state.json b/.claude/sweep-security-state.json index 5d474478..a6e6804a 100644 --- a/.claude/sweep-security-state.json +++ b/.claude/sweep-security-state.json @@ -41,6 +41,20 @@ "categories_found": [1], "notes": "HIGH (fixed #1229): _viewshed_cpu allocated ~500 bytes/pixel of working memory (event_list 3*H*W*7*8 bytes + status_values/status_struct/idle + visibility_grid + lexsort temporary) with no guard. A 20000x20000 raster tried to allocate ~200 GB. Fixed by adding peak-memory guard mirroring the _viewshed_dask pattern (_available_memory_bytes() check, raises MemoryError with max_distance= hint). No other HIGH findings: dask path already guarded, _validate_raster is called, distance-sweep uses dtype=float64, _calc_dist_n_grad guards zero distance." }, + "pathfinding": { + "last_inspected": "2026-04-22", + "issue": null, + "severity_max": "MEDIUM", + "categories_found": [1, 6], + "notes": "No CRITICAL/HIGH findings. Cat 1 already well-guarded: _check_memory(h, w) runs before every numpy/cupy _a_star_search allocation and covers the ~65 bytes/pixel footprint (parent_ys/xs int64, g_cost f64, visited i8, three heap arrays h_keys/h_rows/h_cols sized h*w). Auto-radius falls back when full grid exceeds 50% RAM, HPA* kicks in for long paths. Dask path uses sparse dict/set and on-demand chunk cache so no full-grid numpy materialisation. No CUDA kernels (cupy backend transfers to CPU). No file-path I/O from user input (only /proc/meminfo read). MEDIUM (unfixed, Cat 1): multi_stop_search does not cap len(waypoints); _optimize_waypoint_order builds an O(N^2) dist matrix and runs N^2 A* calls, and _nearest_neighbor_2opt is O(N^3), so pathological waypoint lists can cause extreme CPU consumption (DoS). MEDIUM (unfixed, Cat 6): a_star_search and multi_stop_search do not call _validate_raster(surface) -- only ndim==2 is checked, dtype is not; non-numeric dtypes would fail inside numba with confusing errors rather than a clean TypeError. int64 overflow in height*width (line 214 max_heap) is not reachable given the memory guard (~46340x46340 would already raise MemoryError long before 2**63)." + }, + "bump": { + "last_inspected": "2026-04-22", + "issue": 1231, + "severity_max": "HIGH", + "categories_found": [1], + "notes": "HIGH (fixed #1231): _finish_bump allocated np.zeros((height, width)) with no memory guard. The existing count guard (added in #1206) only protected the locs/heights arrays, so bump(width=1_000_000, height=1_000_000) passed the guard (count capped at 10M ~ 160 MB) and then tried to allocate an 8 TB float64 raster. Fixed by extending the memory budget check to include raster_bytes = w * h * 8 when the backend will materialize the full array; dask paths build per-chunk and are excluded. No other HIGH findings: _bump_dask_numpy/_bump_dask_cupy build output lazily via da.from_delayed, no CUDA kernels (cupy path wraps the numba CPU kernel), no file I/O, no int32 overflow in realistic scenarios. MEDIUM (unfixed, Cat 6): bump() does not call _validate_raster on agg (dtype is not checked; shape unpacking catches wrong-ndim, but a non-numeric DataArray would fail confusingly downstream)." + }, "perlin": { "last_inspected": "2026-04-22", "issue": 1232, diff --git a/xrspatial/bump.py b/xrspatial/bump.py index cdcea695..c7518b46 100644 --- a/xrspatial/bump.py +++ b/xrspatial/bump.py @@ -15,7 +15,13 @@ class cupy(object): except ImportError: da = None -from xrspatial.utils import ArrayTypeFunctionMapping, _validate_scalar, ngjit +from xrspatial.utils import ( + ArrayTypeFunctionMapping, + _validate_scalar, + has_cuda_and_cupy, + is_cupy_array, + ngjit, +) # Upper bound on bump count to prevent accidental OOM from the default # w*h//10 heuristic. 16 bytes per bump (int32 loc pair + float64 height), @@ -340,15 +346,38 @@ def heights(locations, src, src_range, height = 20): if count is None: count = min(w * h // 10, _MAX_DEFAULT_COUNT) - # 16 bytes per bump: 2 x int32 coords + 1 x float64 height - required_bytes = count * 16 + # The dask backends build the output lazily per-chunk, so the full + # raster never lives in memory at once. Only guard against raster + # size when we will actually materialize it. + materializes_raster = ( + agg is None + or isinstance(agg.data, np.ndarray) + or (has_cuda_and_cupy() and is_cupy_array(agg.data)) + ) + + # Budget: 16 bytes per bump (2 x int32 coord + float64 height) plus + # the full output raster at 8 bytes/cell (float64) when the backend + # will materialize it. Guard both so a huge w*h cannot allocate + # multi-TB arrays silently. + bump_bytes = count * 16 + raster_bytes = w * h * 8 if materializes_raster else 0 + required_bytes = bump_bytes + raster_bytes available = _available_memory_bytes() if required_bytes > 0.5 * available: + if raster_bytes > 0: + detail = ( + f"({raster_bytes / 1e9:.1f} GB for the output raster plus " + f"{bump_bytes / 1e9:.1f} GB for location/height arrays)" + ) + hint = "Use a smaller raster or pass a dask-backed agg." + else: + detail = "for location/height arrays" + hint = "Pass a smaller count explicitly." raise MemoryError( - f"bump() with count={count:,} requires ~{required_bytes / 1e9:.1f} GB " - f"for location/height arrays, but only " - f"{available / 1e9:.1f} GB is available. " - f"Pass a smaller count explicitly." + f"bump() with width={w:,}, height={h:,}, count={count:,} " + f"requires ~{required_bytes / 1e9:.1f} GB {detail}, " + f"but only {available / 1e9:.1f} GB is available. " + f"{hint}" ) if height_func is None: diff --git a/xrspatial/tests/test_bump.py b/xrspatial/tests/test_bump.py index 74f37741..bb9857a3 100644 --- a/xrspatial/tests/test_bump.py +++ b/xrspatial/tests/test_bump.py @@ -196,10 +196,19 @@ def test_bump_locs_use_int32_not_uint16(): assert out[50, 4464] == 0, "uint16 wrap-around location should be empty" -def test_bump_default_count_capped(): +def test_bump_default_count_capped(monkeypatch): """Default count should not exceed _MAX_DEFAULT_COUNT (#1206).""" + import sys + from xrspatial.bump import _MAX_DEFAULT_COUNT + # The 20000 x 20000 raster is 3.2 GB, which exceeds the memory guard on + # small CI runners (#1234). Stub available memory so the guard does not + # interfere with this count-cap assertion. xrspatial.__init__ rebinds + # the `bump` name to the function, so reach the module via sys.modules. + bump_mod = sys.modules["xrspatial.bump"] + monkeypatch.setattr(bump_mod, "_available_memory_bytes", lambda: 64 * 1024**3) + # 20000 x 20000 → w*h//10 = 40M, should be capped to 10M result = bump(width=20000, height=20000, spread=0) assert result.shape == (20000, 20000) @@ -260,3 +269,43 @@ def test_bump_dask_partitioned_matches_numpy(): result_dask = bump(agg=agg_dask, count=50, spread=2) np.testing.assert_array_equal(result_np.values, result_dask.values) + + +# --- Issue #1231 regression tests --- + +def test_bump_raster_memory_guard_rejects_pathological_size(): + """Memory guard should reject huge width*height even with a small + count (#1231). + + Pre-fix: ``bump(width=1_000_000, height=1_000_000)`` would pass the + bump-count guard (default count capped at 10M ~ 160 MB) and then + allocate an 8 TB float64 output raster inside ``_finish_bump``. + """ + import pytest + + with pytest.raises(MemoryError, match=r"bump.*width=1,000,000"): + bump(width=1_000_000, height=1_000_000) + + +def test_bump_raster_memory_guard_mentions_raster_bytes(): + """Error message should name the raster allocation as the culprit + when the raster dominates the budget (#1231).""" + import pytest + + with pytest.raises(MemoryError, match="output raster"): + bump(width=500_000, height=500_000, count=1) + + +@dask_array_available +def test_bump_dask_bypasses_raster_guard(): + """Dask paths build the output lazily, so the raster-size guard + must not reject a huge dask-backed agg (#1231).""" + # 100k x 100k float64 would be 80 GB if materialized, but the dask + # backend never holds the full array in memory. + agg = xr.DataArray( + da.zeros((100_000, 100_000), chunks=(1_000, 1_000), dtype=np.float64), + dims=['y', 'x'], + ) + result = bump(agg=agg, count=10, spread=0) + assert result.shape == (100_000, 100_000) + assert isinstance(result.data, da.Array)