diff --git a/.claude/sweep-security-state.json b/.claude/sweep-security-state.json index d6010095..1daad95f 100644 --- a/.claude/sweep-security-state.json +++ b/.claude/sweep-security-state.json @@ -20,11 +20,12 @@ "categories_found": [1, 4], "notes": "Follow-up #1220: GPU predictor=2 kernel over-indexed d_decomp for multi-sample tiled TIFFs (silent OOB GPU write). Fixed by passing width=tile_width instead of tile_width*samples." }, - "hydro": { - "last_inspected": "2026-04-17", - "issue": null, - "severity_max": "MEDIUM", - "categories_found": [1, 3, 6] + "rasterize": { + "last_inspected": "2026-04-21", + "issue": 1223, + "severity_max": "HIGH", + "categories_found": [1, 2], + "notes": "HIGH: unbounded out/written allocation in _run_numpy/_run_cupy driven by user-supplied width/height/resolution (no cap). MEDIUM (unfixed): _build_row_csr_numba total=row_ptr[height] is int32 and can wrap for very tall rasters with many long edges." } } } diff --git a/xrspatial/rasterize.py b/xrspatial/rasterize.py index 8c29d023..259e2a96 100644 --- a/xrspatial/rasterize.py +++ b/xrspatial/rasterize.py @@ -37,6 +37,33 @@ _HAS_SHAPELY2 = False +# --------------------------------------------------------------------------- +# Allocation guard: reject output dimensions that would exhaust memory +# --------------------------------------------------------------------------- + +#: Default maximum total output pixel count (width * height). +#: ~1 billion pixels, which is ~8 GB for float64 single-band plus an int8 +#: ``written`` mask. Override per call via the ``max_pixels`` keyword. +MAX_PIXELS_DEFAULT = 1_000_000_000 + + +def _check_output_dimensions(width, height, max_pixels): + """Raise ValueError if the requested output raster exceeds *max_pixels*. + + Called before any host or device allocation so a hostile ``width``, + ``height``, or ``resolution`` cannot trigger a multi-gigabyte + ``np.full`` / ``cupy.full`` before the error surfaces. + """ + total = int(width) * int(height) + if total > max_pixels: + raise ValueError( + f"rasterize output dimensions ({width} x {height} = " + f"{total:,} pixels) exceed the safety limit of " + f"{max_pixels:,} pixels. Pass a larger max_pixels value to " + f"rasterize() if this size is intentional." + ) + + # --------------------------------------------------------------------------- # Merge functions (CPU, numba-jitted) # @@ -1948,6 +1975,7 @@ def rasterize( like: Optional[xr.DataArray] = None, merge='last', chunks: Optional[Union[int, Tuple[int, int]]] = None, + max_pixels: int = MAX_PIXELS_DEFAULT, ) -> xr.DataArray: """Rasterize vector geometries into a 2D DataArray. @@ -2034,6 +2062,11 @@ def rasterize( tiles of this size ``(row_chunk, col_chunk)``. A single int uses the same chunk size for both axes. Combined with ``use_cuda`` to select dask+numpy vs dask+cupy. + max_pixels : int, default 1_000_000_000 + Safety cap on the resolved output size (``width * height``). The + function raises ``ValueError`` before any host or device + allocation if the cap is exceeded. Raise this explicitly when + rasterizing a legitimately large grid. Returns ------- @@ -2136,6 +2169,10 @@ def rasterize( f"width and height must be >= 1, got width={final_width}, " f"height={final_height}") + # Reject oversize outputs before any host or device allocation. Covers + # the explicit width/height path and the resolution-derived path. + _check_output_dimensions(final_width, final_height, max_pixels) + # Resolve dtype: explicit > like > default if dtype is not None: final_dtype = dtype diff --git a/xrspatial/tests/test_rasterize.py b/xrspatial/tests/test_rasterize.py index d0ee3c56..b92f29a3 100644 --- a/xrspatial/tests/test_rasterize.py +++ b/xrspatial/tests/test_rasterize.py @@ -293,6 +293,55 @@ def test_no_bounds_no_geom(self): with pytest.raises(ValueError, match="bounds must be provided"): rasterize([], width=10, height=10) + def test_oversize_explicit_dimensions_rejected(self): + # width * height = 4e18, far above the 1e9 default cap. Must + # raise before any np.full allocation runs. + with pytest.raises(ValueError, match="exceed the safety limit"): + rasterize([(box(0, 0, 1, 1), 1.0)], + width=2_000_000_000, height=2_000_000_000, + bounds=(0, 0, 1, 1)) + + def test_oversize_resolution_rejected(self): + # resolution=1e-6 with 10x10 bounds resolves to 10M x 10M = + # 1e14 pixels, well above the default cap. + with pytest.raises(ValueError, match="exceed the safety limit"): + rasterize([(box(0, 0, 1, 1), 1.0)], + resolution=1e-6, bounds=(0, 0, 10, 10)) + + def test_moderate_oversize_explicit_rejected(self): + # Realistic attack: ~2e9 pixels = ~16 GB float64 + 2 GB int8. + # Still above the default cap, still rejected. + with pytest.raises(ValueError, match="exceed the safety limit"): + rasterize([(box(0, 0, 1, 1), 1.0)], + width=50_000, height=50_000, + bounds=(0, 0, 1, 1)) + + def test_max_pixels_override_permits_larger(self): + # A caller who genuinely needs a >1e9-pixel raster can raise the + # cap explicitly. Use a moderate size that actually allocates. + result = rasterize( + [(box(0, 0, 1, 1), 1.0)], + width=2000, height=2000, bounds=(0, 0, 1, 1), + max_pixels=10_000_000, + ) + assert result.shape == (2000, 2000) + + def test_max_pixels_at_boundary_permitted(self): + # width * height == max_pixels must pass (strict >). + result = rasterize( + [(box(0, 0, 1, 1), 1.0)], + width=100, height=100, bounds=(0, 0, 1, 1), + max_pixels=10_000, + ) + assert result.shape == (100, 100) + + def test_max_pixels_one_over_rejected(self): + # width * height = 10_001, cap = 10_000 -> reject. + with pytest.raises(ValueError, match="exceed the safety limit"): + rasterize([(box(0, 0, 1, 1), 1.0)], + width=101, height=100, bounds=(0, 0, 1, 1), + max_pixels=10_000) + # --------------------------------------------------------------------------- # all_touched mode