From 922cc8990a225f8e93e470e45c2789c041489a78 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Wed, 22 Apr 2026 13:21:37 -0700 Subject: [PATCH] Guard viewshed numpy path against oversize rasters (#1229) `_viewshed_cpu` allocated ~500 bytes per pixel with no size check, so a 20000x20000 raster tried to grab ~200 GB before any validation and OOM-killed the process. Add the same style of guard `_viewshed_dask` already has: estimate peak working memory (~500 * H * W) against `_available_memory_bytes()` and raise `MemoryError` pointing users at `max_distance=` when the request would eat more than 50% of RAM. `max_distance` routes through `_viewshed_windowed`, which shrinks the raster before `_viewshed_cpu` sees it, so the guard is not a dead end. Two regression tests: one patches `_available_memory_bytes` to a small value and asserts the guard fires with a message mentioning `max_distance`; the other confirms `max_distance` bypasses it. --- .claude/sweep-security-state.json | 7 ++++++ xrspatial/tests/test_viewshed.py | 42 +++++++++++++++++++++++++++++++ xrspatial/viewshed.py | 16 ++++++++++++ 3 files changed, 65 insertions(+) diff --git a/.claude/sweep-security-state.json b/.claude/sweep-security-state.json index dfc45825..b49d6393 100644 --- a/.claude/sweep-security-state.json +++ b/.claude/sweep-security-state.json @@ -33,6 +33,13 @@ "severity_max": "HIGH", "categories_found": [1, 2, 6], "notes": "HIGH (fixed #1227): _stats_cupy used `if nodata_values:` (truthy) so nodata_values=0 silently skipped the filter on the cupy backend, producing wrong stats vs every other backend. MEDIUM (unfixed): _strides uses np.int32 for stride indices — can wrap for arrays > ~2B elements in the numpy path. MEDIUM (unfixed): hypsometric_integral() skips _validate_raster on zones/values; _regions_numpy has no memory guard (numpy-only path, bounded by caller-allocated input). MEDIUM (unfixed): _stats_numpy return_type='xarray.DataArray' allocates np.full((n_stats, values.size)) with no guard." + }, + "viewshed": { + "last_inspected": "2026-04-22", + "issue": 1229, + "severity_max": "HIGH", + "categories_found": [1], + "notes": "HIGH (fixed #1229): _viewshed_cpu allocated ~500 bytes/pixel of working memory (event_list 3*H*W*7*8 bytes + status_values/status_struct/idle + visibility_grid + lexsort temporary) with no guard. A 20000x20000 raster tried to allocate ~200 GB. Fixed by adding peak-memory guard mirroring the _viewshed_dask pattern (_available_memory_bytes() check, raises MemoryError with max_distance= hint). No other HIGH findings: dask path already guarded, _validate_raster is called, distance-sweep uses dtype=float64, _calc_dist_n_grad guards zero distance." } } } diff --git a/xrspatial/tests/test_viewshed.py b/xrspatial/tests/test_viewshed.py index 408e18fb..c821a90e 100644 --- a/xrspatial/tests/test_viewshed.py +++ b/xrspatial/tests/test_viewshed.py @@ -417,3 +417,45 @@ def test_viewshed_dask_distance_sweep_target_elev(): np.testing.assert_allclose( result[vis_mask], v_np.values[vis_mask], atol=1.0, err_msg="Tier C angles diverge too far from numpy reference") + + +def test_viewshed_cpu_memory_guard(): + """_viewshed_cpu should refuse rasters that would blow past RAM. + + Regression test for the DoS vulnerability where the numpy path + allocated ~500 bytes/pixel of working memory with no size check. + """ + from unittest.mock import patch + + ny, nx = 100, 100 + terrain = np.zeros((ny, nx)) + xs = np.arange(nx, dtype=float) + ys = np.arange(ny, dtype=float) + raster = xa.DataArray(terrain, coords=dict(x=xs, y=ys), dims=["y", "x"]) + + # 100x100 needs ~5 MB of working memory. Report 1 MB available so + # the 50% threshold (~500 kB) is exceeded and the guard trips. + with patch('xrspatial.viewshed._available_memory_bytes', + return_value=1_000_000): + with pytest.raises(MemoryError, match="max_distance"): + viewshed(raster, x=50.0, y=50.0, observer_elev=5) + + +def test_viewshed_cpu_memory_guard_passes_with_max_distance(): + """max_distance should bypass the memory guard by windowing first.""" + from unittest.mock import patch + + ny, nx = 100, 100 + terrain = np.zeros((ny, nx)) + xs = np.arange(nx, dtype=float) + ys = np.arange(ny, dtype=float) + raster = xa.DataArray(terrain, coords=dict(x=xs, y=ys), dims=["y", "x"]) + + # Same tight memory budget as above — but max_distance=3 shrinks the + # working window to a 7x7 tile, so the guard on the small window + # does not trip. + with patch('xrspatial.viewshed._available_memory_bytes', + return_value=1_000_000): + v = viewshed(raster, x=50.0, y=50.0, observer_elev=5, + max_distance=3.0) + assert v.values[50, 50] == 180.0 diff --git a/xrspatial/viewshed.py b/xrspatial/viewshed.py index 0e282052..b5d21019 100644 --- a/xrspatial/viewshed.py +++ b/xrspatial/viewshed.py @@ -1514,6 +1514,22 @@ def _viewshed_cpu( height, width = raster.shape + # Peak-memory guard. The sweep algorithm allocates an event_list of + # 3*H*W rows (168 bytes/pixel), plus the red-black status structure + # (status_values + status_struct + idle ~= 104 bytes/pixel), + # visibility_grid and raster (~16 bytes/pixel), and a lexsort + # temporary that roughly doubles event_list during sort. + # Round to ~500 bytes/pixel and refuse if it would eat most of RAM. + peak_bytes = 500 * height * width + avail = _available_memory_bytes() + if peak_bytes > 0.5 * avail: + raise MemoryError( + f"viewshed CPU sweep would need ~{peak_bytes / 1e9:.1f} GB of " + f"working memory for a {height}x{width} raster, which exceeds " + f"50% of available RAM ({avail / 1e9:.1f} GB). " + f"Pass max_distance= to limit the analysis area." + ) + y_coords = raster.indexes.get('y').values x_coords = raster.indexes.get('x').values