From a7f02cfb1bc67c5728ec27ed494aaded264da875 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Thu, 23 Apr 2026 11:47:43 -0700 Subject: [PATCH] Clamp bilateral kernel radius to raster extent (#1236) bilateral() validated sigma_spatial only as > 0, with no upper bound. The derived kernel radius = ceil(2 * sigma_spatial) fed _pad_array (allocating H+2r by W+2r when boundary != 'nan') and the dask map_overlap depth, so a single float from the caller could exhaust host memory. sigma_spatial=1e9 on a 100x100 raster maps to radius=2e9 and ~128 EB of float64 padding. The numba and CUDA inner loops are already clamped to rows/cols, so the filter output is unchanged once the radius is at least max(rows, cols). Clamp radius to max(rows, cols) in bilateral() before dispatch; this closes the DoS without raising on realistic-but-large inputs (a 1x1 raster with sigma_spatial=1 still works). Adds test_bilateral_clamps_oversize_sigma_spatial covering both the numba path (boundary='nan') and the padding path (boundary='nearest'), plus a 3D input case. Also records the finding in .claude/sweep-security-state.json. --- .claude/sweep-security-state.json | 7 ++++++ xrspatial/bilateral.py | 13 ++++++++++ xrspatial/tests/test_bilateral.py | 40 +++++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+) diff --git a/.claude/sweep-security-state.json b/.claude/sweep-security-state.json index 69972b6a..6188a253 100644 --- a/.claude/sweep-security-state.json +++ b/.claude/sweep-security-state.json @@ -75,6 +75,13 @@ "severity_max": "HIGH", "categories_found": [1], "notes": "HIGH (fixed #1231): _finish_bump allocated np.zeros((height, width)) with no memory guard. The existing count guard (added in #1206) only protected the locs/heights arrays, so bump(width=1_000_000, height=1_000_000) passed the guard (count capped at 10M ~ 160 MB) and then tried to allocate an 8 TB float64 raster. Fixed by extending the memory budget check to include raster_bytes = w * h * 8 when the backend will materialize the full array; dask paths build per-chunk and are excluded. No other HIGH findings: _bump_dask_numpy/_bump_dask_cupy build output lazily via da.from_delayed, no CUDA kernels (cupy path wraps the numba CPU kernel), no file I/O, no int32 overflow in realistic scenarios. MEDIUM (unfixed, Cat 6): bump() does not call _validate_raster on agg (dtype is not checked; shape unpacking catches wrong-ndim, but a non-numeric DataArray would fail confusingly downstream)." + }, + "bilateral": { + "last_inspected": "2026-04-23", + "issue": 1236, + "severity_max": "HIGH", + "categories_found": [1], + "notes": "HIGH (fixed #1236): bilateral() validated sigma_spatial only as > 0, with no upper bound. The derived kernel radius = ceil(2*sigma_spatial) drove the _pad_array allocation (H+2r, W+2r) when boundary != 'nan' and the dask map_overlap depth on every backend. sigma_spatial=1e9 on a 100x100 raster -> radius=2e9 -> ~128 EB padded float64 allocation. sigma_spatial=1e5 -> ~320 GB. Fixed by clamping radius to max(rows, cols) in bilateral() before dispatch; inner numba/CUDA loops were already clamped to rows/cols so the output is unchanged for realistic inputs. No other HIGH findings: GPU kernel has bounds guard (if 0 <= x < cols and 0 <= y < rows), _validate_raster is called on agg, agg.data.astype(float) is applied before dispatch, NaN propagation is explicit (center NaN -> NaN out, neighbor NaN skipped), division by w_sum is guarded (w_sum > 0.0). MEDIUM (unfixed, Cat 3): sigma_spatial underflow (e.g. 1e-200) makes inv_2_ss = inf and can propagate NaN through exp() at the center pixel, but not safety-critical." } } } diff --git a/xrspatial/bilateral.py b/xrspatial/bilateral.py index 474e6b6c..17ab7cd7 100644 --- a/xrspatial/bilateral.py +++ b/xrspatial/bilateral.py @@ -309,6 +309,19 @@ def bilateral(agg, sigma_spatial=1.0, sigma_range=10.0, sigma_range = float(sigma_range) radius = _kernel_radius(sigma_spatial) + # Clamp the kernel radius to the raster extent. A radius larger than + # max(rows, cols) already covers the whole raster, so the filter + # output is unchanged by letting it grow further. Without this clamp + # an oversized sigma_spatial drives a quadratic allocation in + # _pad_array (when boundary != 'nan') and a quadratic overlap depth + # in the dask paths, so one float from the caller can DoS the host + # (e.g. sigma_spatial=1e9 -> radius=2e9 -> exabyte-scale padding). + # The numba/CUDA inner loops are already clamped to rows/cols. + rows, cols = agg.shape[-2], agg.shape[-1] + max_radius = max(rows, cols) + if radius > max_radius: + radius = max_radius + if agg.ndim == 3: from xrspatial.focal import _apply_per_band return _apply_per_band( diff --git a/xrspatial/tests/test_bilateral.py b/xrspatial/tests/test_bilateral.py index 35fb808e..e9810275 100644 --- a/xrspatial/tests/test_bilateral.py +++ b/xrspatial/tests/test_bilateral.py @@ -135,6 +135,46 @@ def test_bilateral_validation_errors(): bilateral(agg, boundary='invalid') +def test_bilateral_clamps_oversize_sigma_spatial(): + """An oversized sigma_spatial should be clamped internally so that the + derived kernel radius never exceeds max(rows, cols). + + Without this clamp, _pad_array would allocate (H + 2*radius, + W + 2*radius) and the dask map_overlap depth would grow unbounded, so + a single float from the caller could exhaust host memory (DoS). + """ + # 10x20 raster -> max_radius=20. sigma_spatial=1e9 would normally + # derive radius ~ 2e9, which would blow up the padded allocation. + # The clamp bounds internal work to max(rows, cols), so the call must + # return quickly with a finite result. + rng = np.random.default_rng(1236) + data = rng.standard_normal((10, 20)).astype(np.float64) + agg = xr.DataArray(data) + + # Large sigma with boundary='nearest' exercises the padding path. + result = bilateral( + agg, sigma_spatial=1e9, sigma_range=1000.0, boundary='nearest', + ) + assert result.shape == agg.shape + assert np.all(np.isfinite(result.data)) + + # Large sigma with boundary='nan' exercises the pure numba path. + result_nan = bilateral( + agg, sigma_spatial=1e9, sigma_range=1000.0, boundary='nan', + ) + assert result_nan.shape == agg.shape + assert np.all(np.isfinite(result_nan.data)) + + # 3D input: clamp should use the last two dims. + agg3d = xr.DataArray( + rng.standard_normal((2, 8, 8)).astype(np.float64), + dims=['band', 'y', 'x'], + ) + result3d = bilateral(agg3d, sigma_spatial=1e9, sigma_range=1000.0) + assert result3d.shape == agg3d.shape + assert np.all(np.isfinite(result3d.data)) + + def test_bilateral_known_values(): """Verify output against hand-computed bilateral filter on a 3x3 raster.""" data = np.array([