From d0675b884e2b48c79f96953a2f3d4638a05282f5 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Wed, 22 Apr 2026 16:59:55 -0700 Subject: [PATCH 1/3] Reject oversize bump output rasters before allocation (#1231) _finish_bump allocated np.zeros((height, width)) with no memory check. The existing bump-count guard only protected the locs/heights arrays, so bump(width=1_000_000, height=1_000_000) passed the guard (count capped at 10M ~ 160 MB) and then tried to allocate an 8 TB float64 raster. Extend the memory budget check to include the output raster bytes (8 * w * h for float64) when the backend will materialize the full array. Dask paths build the output lazily per chunk, so they are excluded from the raster guard and continue to handle large dimensions. --- xrspatial/bump.py | 43 ++++++++++++++++++++++++++++++------ xrspatial/tests/test_bump.py | 40 +++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 7 deletions(-) diff --git a/xrspatial/bump.py b/xrspatial/bump.py index cdcea695..c7518b46 100644 --- a/xrspatial/bump.py +++ b/xrspatial/bump.py @@ -15,7 +15,13 @@ class cupy(object): except ImportError: da = None -from xrspatial.utils import ArrayTypeFunctionMapping, _validate_scalar, ngjit +from xrspatial.utils import ( + ArrayTypeFunctionMapping, + _validate_scalar, + has_cuda_and_cupy, + is_cupy_array, + ngjit, +) # Upper bound on bump count to prevent accidental OOM from the default # w*h//10 heuristic. 16 bytes per bump (int32 loc pair + float64 height), @@ -340,15 +346,38 @@ def heights(locations, src, src_range, height = 20): if count is None: count = min(w * h // 10, _MAX_DEFAULT_COUNT) - # 16 bytes per bump: 2 x int32 coords + 1 x float64 height - required_bytes = count * 16 + # The dask backends build the output lazily per-chunk, so the full + # raster never lives in memory at once. Only guard against raster + # size when we will actually materialize it. + materializes_raster = ( + agg is None + or isinstance(agg.data, np.ndarray) + or (has_cuda_and_cupy() and is_cupy_array(agg.data)) + ) + + # Budget: 16 bytes per bump (2 x int32 coord + float64 height) plus + # the full output raster at 8 bytes/cell (float64) when the backend + # will materialize it. Guard both so a huge w*h cannot allocate + # multi-TB arrays silently. + bump_bytes = count * 16 + raster_bytes = w * h * 8 if materializes_raster else 0 + required_bytes = bump_bytes + raster_bytes available = _available_memory_bytes() if required_bytes > 0.5 * available: + if raster_bytes > 0: + detail = ( + f"({raster_bytes / 1e9:.1f} GB for the output raster plus " + f"{bump_bytes / 1e9:.1f} GB for location/height arrays)" + ) + hint = "Use a smaller raster or pass a dask-backed agg." + else: + detail = "for location/height arrays" + hint = "Pass a smaller count explicitly." raise MemoryError( - f"bump() with count={count:,} requires ~{required_bytes / 1e9:.1f} GB " - f"for location/height arrays, but only " - f"{available / 1e9:.1f} GB is available. " - f"Pass a smaller count explicitly." + f"bump() with width={w:,}, height={h:,}, count={count:,} " + f"requires ~{required_bytes / 1e9:.1f} GB {detail}, " + f"but only {available / 1e9:.1f} GB is available. " + f"{hint}" ) if height_func is None: diff --git a/xrspatial/tests/test_bump.py b/xrspatial/tests/test_bump.py index 74f37741..dee77961 100644 --- a/xrspatial/tests/test_bump.py +++ b/xrspatial/tests/test_bump.py @@ -260,3 +260,43 @@ def test_bump_dask_partitioned_matches_numpy(): result_dask = bump(agg=agg_dask, count=50, spread=2) np.testing.assert_array_equal(result_np.values, result_dask.values) + + +# --- Issue #1231 regression tests --- + +def test_bump_raster_memory_guard_rejects_pathological_size(): + """Memory guard should reject huge width*height even with a small + count (#1231). + + Pre-fix: ``bump(width=1_000_000, height=1_000_000)`` would pass the + bump-count guard (default count capped at 10M ~ 160 MB) and then + allocate an 8 TB float64 output raster inside ``_finish_bump``. + """ + import pytest + + with pytest.raises(MemoryError, match=r"bump.*width=1,000,000"): + bump(width=1_000_000, height=1_000_000) + + +def test_bump_raster_memory_guard_mentions_raster_bytes(): + """Error message should name the raster allocation as the culprit + when the raster dominates the budget (#1231).""" + import pytest + + with pytest.raises(MemoryError, match="output raster"): + bump(width=500_000, height=500_000, count=1) + + +@dask_array_available +def test_bump_dask_bypasses_raster_guard(): + """Dask paths build the output lazily, so the raster-size guard + must not reject a huge dask-backed agg (#1231).""" + # 100k x 100k float64 would be 80 GB if materialized, but the dask + # backend never holds the full array in memory. + agg = xr.DataArray( + da.zeros((100_000, 100_000), chunks=(1_000, 1_000), dtype=np.float64), + dims=['y', 'x'], + ) + result = bump(agg=agg, count=10, spread=0) + assert result.shape == (100_000, 100_000) + assert isinstance(result.data, da.Array) From 7ee9607e1b2ecad22855a05df8c34ff256869989 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Wed, 22 Apr 2026 17:00:52 -0700 Subject: [PATCH 2/3] Add bump security sweep state (2026-04-22) --- .claude/sweep-security-state.json | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/.claude/sweep-security-state.json b/.claude/sweep-security-state.json index b49d6393..4768d341 100644 --- a/.claude/sweep-security-state.json +++ b/.claude/sweep-security-state.json @@ -40,6 +40,20 @@ "severity_max": "HIGH", "categories_found": [1], "notes": "HIGH (fixed #1229): _viewshed_cpu allocated ~500 bytes/pixel of working memory (event_list 3*H*W*7*8 bytes + status_values/status_struct/idle + visibility_grid + lexsort temporary) with no guard. A 20000x20000 raster tried to allocate ~200 GB. Fixed by adding peak-memory guard mirroring the _viewshed_dask pattern (_available_memory_bytes() check, raises MemoryError with max_distance= hint). No other HIGH findings: dask path already guarded, _validate_raster is called, distance-sweep uses dtype=float64, _calc_dist_n_grad guards zero distance." + }, + "pathfinding": { + "last_inspected": "2026-04-22", + "issue": null, + "severity_max": "MEDIUM", + "categories_found": [1, 6], + "notes": "No CRITICAL/HIGH findings. Cat 1 already well-guarded: _check_memory(h, w) runs before every numpy/cupy _a_star_search allocation and covers the ~65 bytes/pixel footprint (parent_ys/xs int64, g_cost f64, visited i8, three heap arrays h_keys/h_rows/h_cols sized h*w). Auto-radius falls back when full grid exceeds 50% RAM, HPA* kicks in for long paths. Dask path uses sparse dict/set and on-demand chunk cache so no full-grid numpy materialisation. No CUDA kernels (cupy backend transfers to CPU). No file-path I/O from user input (only /proc/meminfo read). MEDIUM (unfixed, Cat 1): multi_stop_search does not cap len(waypoints); _optimize_waypoint_order builds an O(N^2) dist matrix and runs N^2 A* calls, and _nearest_neighbor_2opt is O(N^3), so pathological waypoint lists can cause extreme CPU consumption (DoS). MEDIUM (unfixed, Cat 6): a_star_search and multi_stop_search do not call _validate_raster(surface) -- only ndim==2 is checked, dtype is not; non-numeric dtypes would fail inside numba with confusing errors rather than a clean TypeError. int64 overflow in height*width (line 214 max_heap) is not reachable given the memory guard (~46340x46340 would already raise MemoryError long before 2**63)." + }, + "bump": { + "last_inspected": "2026-04-22", + "issue": 1231, + "severity_max": "HIGH", + "categories_found": [1], + "notes": "HIGH (fixed #1231): _finish_bump allocated np.zeros((height, width)) with no memory guard. The existing count guard (added in #1206) only protected the locs/heights arrays, so bump(width=1_000_000, height=1_000_000) passed the guard (count capped at 10M ~ 160 MB) and then tried to allocate an 8 TB float64 raster. Fixed by extending the memory budget check to include raster_bytes = w * h * 8 when the backend will materialize the full array; dask paths build per-chunk and are excluded. No other HIGH findings: _bump_dask_numpy/_bump_dask_cupy build output lazily via da.from_delayed, no CUDA kernels (cupy path wraps the numba CPU kernel), no file I/O, no int32 overflow in realistic scenarios. MEDIUM (unfixed, Cat 6): bump() does not call _validate_raster on agg (dtype is not checked; shape unpacking catches wrong-ndim, but a non-numeric DataArray would fail confusingly downstream)." } } } From 18d910ba54a1e63ba9595ebeddf74ec2f39468f4 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Wed, 22 Apr 2026 19:54:14 -0700 Subject: [PATCH 3/3] Stub _available_memory_bytes in count-cap test The 20000x20000 default count test allocates a 3.2 GB raster, which exceeds the new memory guard on 2 GB CI runners. The test only verifies count-cap semantics, so bypass the guard via monkeypatch instead of changing the guard or the test dimensions. --- xrspatial/tests/test_bump.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/xrspatial/tests/test_bump.py b/xrspatial/tests/test_bump.py index dee77961..bb9857a3 100644 --- a/xrspatial/tests/test_bump.py +++ b/xrspatial/tests/test_bump.py @@ -196,10 +196,19 @@ def test_bump_locs_use_int32_not_uint16(): assert out[50, 4464] == 0, "uint16 wrap-around location should be empty" -def test_bump_default_count_capped(): +def test_bump_default_count_capped(monkeypatch): """Default count should not exceed _MAX_DEFAULT_COUNT (#1206).""" + import sys + from xrspatial.bump import _MAX_DEFAULT_COUNT + # The 20000 x 20000 raster is 3.2 GB, which exceeds the memory guard on + # small CI runners (#1234). Stub available memory so the guard does not + # interfere with this count-cap assertion. xrspatial.__init__ rebinds + # the `bump` name to the function, so reach the module via sys.modules. + bump_mod = sys.modules["xrspatial.bump"] + monkeypatch.setattr(bump_mod, "_available_memory_bytes", lambda: 64 * 1024**3) + # 20000 x 20000 → w*h//10 = 40M, should be capped to 10M result = bump(width=20000, height=20000, spread=0) assert result.shape == (20000, 20000)