diff --git a/.claude/sweep-security-state.json b/.claude/sweep-security-state.json index dfc45825..b49d6393 100644 --- a/.claude/sweep-security-state.json +++ b/.claude/sweep-security-state.json @@ -33,6 +33,13 @@ "severity_max": "HIGH", "categories_found": [1, 2, 6], "notes": "HIGH (fixed #1227): _stats_cupy used `if nodata_values:` (truthy) so nodata_values=0 silently skipped the filter on the cupy backend, producing wrong stats vs every other backend. MEDIUM (unfixed): _strides uses np.int32 for stride indices — can wrap for arrays > ~2B elements in the numpy path. MEDIUM (unfixed): hypsometric_integral() skips _validate_raster on zones/values; _regions_numpy has no memory guard (numpy-only path, bounded by caller-allocated input). MEDIUM (unfixed): _stats_numpy return_type='xarray.DataArray' allocates np.full((n_stats, values.size)) with no guard." + }, + "viewshed": { + "last_inspected": "2026-04-22", + "issue": 1229, + "severity_max": "HIGH", + "categories_found": [1], + "notes": "HIGH (fixed #1229): _viewshed_cpu allocated ~500 bytes/pixel of working memory (event_list 3*H*W*7*8 bytes + status_values/status_struct/idle + visibility_grid + lexsort temporary) with no guard. A 20000x20000 raster tried to allocate ~200 GB. Fixed by adding peak-memory guard mirroring the _viewshed_dask pattern (_available_memory_bytes() check, raises MemoryError with max_distance= hint). No other HIGH findings: dask path already guarded, _validate_raster is called, distance-sweep uses dtype=float64, _calc_dist_n_grad guards zero distance." } } } diff --git a/xrspatial/tests/test_viewshed.py b/xrspatial/tests/test_viewshed.py index 408e18fb..c821a90e 100644 --- a/xrspatial/tests/test_viewshed.py +++ b/xrspatial/tests/test_viewshed.py @@ -417,3 +417,45 @@ def test_viewshed_dask_distance_sweep_target_elev(): np.testing.assert_allclose( result[vis_mask], v_np.values[vis_mask], atol=1.0, err_msg="Tier C angles diverge too far from numpy reference") + + +def test_viewshed_cpu_memory_guard(): + """_viewshed_cpu should refuse rasters that would blow past RAM. + + Regression test for the DoS vulnerability where the numpy path + allocated ~500 bytes/pixel of working memory with no size check. + """ + from unittest.mock import patch + + ny, nx = 100, 100 + terrain = np.zeros((ny, nx)) + xs = np.arange(nx, dtype=float) + ys = np.arange(ny, dtype=float) + raster = xa.DataArray(terrain, coords=dict(x=xs, y=ys), dims=["y", "x"]) + + # 100x100 needs ~5 MB of working memory. Report 1 MB available so + # the 50% threshold (~500 kB) is exceeded and the guard trips. + with patch('xrspatial.viewshed._available_memory_bytes', + return_value=1_000_000): + with pytest.raises(MemoryError, match="max_distance"): + viewshed(raster, x=50.0, y=50.0, observer_elev=5) + + +def test_viewshed_cpu_memory_guard_passes_with_max_distance(): + """max_distance should bypass the memory guard by windowing first.""" + from unittest.mock import patch + + ny, nx = 100, 100 + terrain = np.zeros((ny, nx)) + xs = np.arange(nx, dtype=float) + ys = np.arange(ny, dtype=float) + raster = xa.DataArray(terrain, coords=dict(x=xs, y=ys), dims=["y", "x"]) + + # Same tight memory budget as above — but max_distance=3 shrinks the + # working window to a 7x7 tile, so the guard on the small window + # does not trip. + with patch('xrspatial.viewshed._available_memory_bytes', + return_value=1_000_000): + v = viewshed(raster, x=50.0, y=50.0, observer_elev=5, + max_distance=3.0) + assert v.values[50, 50] == 180.0 diff --git a/xrspatial/viewshed.py b/xrspatial/viewshed.py index 0e282052..b5d21019 100644 --- a/xrspatial/viewshed.py +++ b/xrspatial/viewshed.py @@ -1514,6 +1514,22 @@ def _viewshed_cpu( height, width = raster.shape + # Peak-memory guard. The sweep algorithm allocates an event_list of + # 3*H*W rows (168 bytes/pixel), plus the red-black status structure + # (status_values + status_struct + idle ~= 104 bytes/pixel), + # visibility_grid and raster (~16 bytes/pixel), and a lexsort + # temporary that roughly doubles event_list during sort. + # Round to ~500 bytes/pixel and refuse if it would eat most of RAM. + peak_bytes = 500 * height * width + avail = _available_memory_bytes() + if peak_bytes > 0.5 * avail: + raise MemoryError( + f"viewshed CPU sweep would need ~{peak_bytes / 1e9:.1f} GB of " + f"working memory for a {height}x{width} raster, which exceeds " + f"50% of available RAM ({avail / 1e9:.1f} GB). " + f"Pass max_distance= to limit the analysis area." + ) + y_coords = raster.indexes.get('y').values x_coords = raster.indexes.get('x').values