Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions .claude/sweep-security-state.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,12 @@
"categories_found": [1, 4],
"notes": "Follow-up #1220: GPU predictor=2 kernel over-indexed d_decomp for multi-sample tiled TIFFs (silent OOB GPU write). Fixed by passing width=tile_width instead of tile_width*samples."
},
"hydro": {
"last_inspected": "2026-04-17",
"issue": null,
"severity_max": "MEDIUM",
"categories_found": [1, 3, 6]
"rasterize": {
"last_inspected": "2026-04-21",
"issue": 1223,
"severity_max": "HIGH",
"categories_found": [1, 2],
"notes": "HIGH: unbounded out/written allocation in _run_numpy/_run_cupy driven by user-supplied width/height/resolution (no cap). MEDIUM (unfixed): _build_row_csr_numba total=row_ptr[height] is int32 and can wrap for very tall rasters with many long edges."
}
}
}
37 changes: 37 additions & 0 deletions xrspatial/rasterize.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,33 @@
_HAS_SHAPELY2 = False


# ---------------------------------------------------------------------------
# Allocation guard: reject output dimensions that would exhaust memory
# ---------------------------------------------------------------------------

#: Default maximum total output pixel count (width * height).
#: ~1 billion pixels, which is ~8 GB for float64 single-band plus an int8
#: ``written`` mask. Override per call via the ``max_pixels`` keyword.
MAX_PIXELS_DEFAULT = 1_000_000_000


def _check_output_dimensions(width, height, max_pixels):
"""Raise ValueError if the requested output raster exceeds *max_pixels*.

Called before any host or device allocation so a hostile ``width``,
``height``, or ``resolution`` cannot trigger a multi-gigabyte
``np.full`` / ``cupy.full`` before the error surfaces.
"""
total = int(width) * int(height)
if total > max_pixels:
raise ValueError(
f"rasterize output dimensions ({width} x {height} = "
f"{total:,} pixels) exceed the safety limit of "
f"{max_pixels:,} pixels. Pass a larger max_pixels value to "
f"rasterize() if this size is intentional."
)


# ---------------------------------------------------------------------------
# Merge functions (CPU, numba-jitted)
#
Expand Down Expand Up @@ -1948,6 +1975,7 @@ def rasterize(
like: Optional[xr.DataArray] = None,
merge='last',
chunks: Optional[Union[int, Tuple[int, int]]] = None,
max_pixels: int = MAX_PIXELS_DEFAULT,
) -> xr.DataArray:
"""Rasterize vector geometries into a 2D DataArray.

Expand Down Expand Up @@ -2034,6 +2062,11 @@ def rasterize(
tiles of this size ``(row_chunk, col_chunk)``. A single int
uses the same chunk size for both axes. Combined with
``use_cuda`` to select dask+numpy vs dask+cupy.
max_pixels : int, default 1_000_000_000
Safety cap on the resolved output size (``width * height``). The
function raises ``ValueError`` before any host or device
allocation if the cap is exceeded. Raise this explicitly when
rasterizing a legitimately large grid.

Returns
-------
Expand Down Expand Up @@ -2136,6 +2169,10 @@ def rasterize(
f"width and height must be >= 1, got width={final_width}, "
f"height={final_height}")

# Reject oversize outputs before any host or device allocation. Covers
# the explicit width/height path and the resolution-derived path.
_check_output_dimensions(final_width, final_height, max_pixels)

# Resolve dtype: explicit > like > default
if dtype is not None:
final_dtype = dtype
Expand Down
49 changes: 49 additions & 0 deletions xrspatial/tests/test_rasterize.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,55 @@ def test_no_bounds_no_geom(self):
with pytest.raises(ValueError, match="bounds must be provided"):
rasterize([], width=10, height=10)

def test_oversize_explicit_dimensions_rejected(self):
# width * height = 4e18, far above the 1e9 default cap. Must
# raise before any np.full allocation runs.
with pytest.raises(ValueError, match="exceed the safety limit"):
rasterize([(box(0, 0, 1, 1), 1.0)],
width=2_000_000_000, height=2_000_000_000,
bounds=(0, 0, 1, 1))

def test_oversize_resolution_rejected(self):
# resolution=1e-6 with 10x10 bounds resolves to 10M x 10M =
# 1e14 pixels, well above the default cap.
with pytest.raises(ValueError, match="exceed the safety limit"):
rasterize([(box(0, 0, 1, 1), 1.0)],
resolution=1e-6, bounds=(0, 0, 10, 10))

def test_moderate_oversize_explicit_rejected(self):
# Realistic attack: ~2e9 pixels = ~16 GB float64 + 2 GB int8.
# Still above the default cap, still rejected.
with pytest.raises(ValueError, match="exceed the safety limit"):
rasterize([(box(0, 0, 1, 1), 1.0)],
width=50_000, height=50_000,
bounds=(0, 0, 1, 1))

def test_max_pixels_override_permits_larger(self):
# A caller who genuinely needs a >1e9-pixel raster can raise the
# cap explicitly. Use a moderate size that actually allocates.
result = rasterize(
[(box(0, 0, 1, 1), 1.0)],
width=2000, height=2000, bounds=(0, 0, 1, 1),
max_pixels=10_000_000,
)
assert result.shape == (2000, 2000)

def test_max_pixels_at_boundary_permitted(self):
# width * height == max_pixels must pass (strict >).
result = rasterize(
[(box(0, 0, 1, 1), 1.0)],
width=100, height=100, bounds=(0, 0, 1, 1),
max_pixels=10_000,
)
assert result.shape == (100, 100)

def test_max_pixels_one_over_rejected(self):
# width * height = 10_001, cap = 10_000 -> reject.
with pytest.raises(ValueError, match="exceed the safety limit"):
rasterize([(box(0, 0, 1, 1), 1.0)],
width=101, height=100, bounds=(0, 0, 1, 1),
max_pixels=10_000)


# ---------------------------------------------------------------------------
# all_touched mode
Expand Down
Loading