From bb3ecbc875680b7a354af049bdec930aff050f01 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Wed, 13 May 2026 06:34:40 -0700 Subject: [PATCH] geotiff: validate VRT SrcRect dimensions and offsets (#1784) The DstRect validation added for #1737 covered only one half of the SimpleSource rectangle pair. A malformed `` (or negative offset) reached `read_to_array` as a bad window, raised `ValueError` for the out-of-range window, and was then swallowed by the lenient source-read try/except meant to handle missing or unreadable source files. Result: malformed XML produced a single warning plus a zero-filled hole, and strict mode surfaced the swallowed error inside the try rather than at the malformed-XML site. Reject `sr.x_size < 0`, `sr.y_size < 0`, `sr.x_off < 0`, and `sr.y_off < 0` up front, before the overlap math and before the source-read try, so the malformed VRT always reaches the caller. Error messages name the offending field and value, matching the existing DstRect error. Upper-bound validation against the source raster extent requires reading the source header and is left for a future change; this commit addresses the negatives only. --- xrspatial/geotiff/_vrt.py | 21 +++ ...est_geotiff_vrt_srcrect_validation_1784.py | 160 ++++++++++++++++++ 2 files changed, 181 insertions(+) create mode 100644 xrspatial/geotiff/tests/test_geotiff_vrt_srcrect_validation_1784.py diff --git a/xrspatial/geotiff/_vrt.py b/xrspatial/geotiff/_vrt.py index 2e0cb7e1..e50c9c2b 100644 --- a/xrspatial/geotiff/_vrt.py +++ b/xrspatial/geotiff/_vrt.py @@ -647,6 +647,27 @@ def read_vrt(vrt_path: str, *, window=None, f"DstRect sizes must be non-negative." ) + # Reject malformed SrcRect values up front, before the overlap + # math and before the lenient source-read try/except below. A + # negative ``xSize`` / ``ySize`` would otherwise reach + # ``read_to_array`` as a bad window and be silently swallowed by + # the missing-source fallback (which is meant for unreadable + # files, not malformed XML rectangles); a negative ``xOff`` / + # ``yOff`` likewise produces an out-of-range window that the + # fallback would turn into a zero-filled hole. See issue #1784. + if sr.x_size < 0 or sr.y_size < 0: + raise ValueError( + f"VRT SimpleSource SrcRect has negative size " + f"(xSize={sr.x_size}, ySize={sr.y_size}); " + f"SrcRect sizes must be non-negative." + ) + if sr.x_off < 0 or sr.y_off < 0: + raise ValueError( + f"VRT SimpleSource SrcRect has negative offset " + f"(xOff={sr.x_off}, yOff={sr.y_off}); " + f"SrcRect offsets must be non-negative." + ) + # Destination rect in virtual raster coordinates dst_r0 = dr.y_off dst_c0 = dr.x_off diff --git a/xrspatial/geotiff/tests/test_geotiff_vrt_srcrect_validation_1784.py b/xrspatial/geotiff/tests/test_geotiff_vrt_srcrect_validation_1784.py new file mode 100644 index 00000000..5edb8ca4 --- /dev/null +++ b/xrspatial/geotiff/tests/test_geotiff_vrt_srcrect_validation_1784.py @@ -0,0 +1,160 @@ +"""VRT ``SrcRect`` must reject negative sizes and offsets up front. + +The ``DstRect`` validation added for issue #1737 only covers one half of the +SimpleSource rectangle pair. A malformed ```` (or +negative offset) reaches ``read_to_array`` as a bad window, raises +``ValueError`` for the out-of-range window, and is then swallowed by the +lenient source-read ``try/except`` that is meant to handle *missing or +unreadable source files* -- not malformed XML rectangles. + +Net effect before this fix: malformed XML becomes a single warning plus a +zero-filled hole in the mosaic. In strict mode the same condition surfaces +the swallowed error inside the try. Either way, the caller cannot tell the +malformed-VRT case from a legitimate missing tile. + +Regression test for issue #1784: ``read_vrt`` should refuse the read with a +``ValueError`` that names the offending SrcRect field, in both lenient and +strict modes. +""" +from __future__ import annotations + +import os +import tempfile +import warnings + +import numpy as np +import pytest + +from xrspatial.geotiff import to_geotiff +from xrspatial.geotiff._vrt import read_vrt + + +def _write_source(td: str, name: str = 'src.tif') -> str: + """Write a 10x10 uint8 source GeoTIFF and return its path.""" + src_path = os.path.join(td, name) + to_geotiff(np.zeros((10, 10), dtype=np.uint8), src_path, + compression='none') + return src_path + + +def _write_vrt(td: str, *, + src_x_off: int = 0, src_y_off: int = 0, + src_x_size: int = 10, src_y_size: int = 10, + src_filename: str = 'src.tif', + raster_x: int = 100, raster_y: int = 100) -> str: + """Write a VRT with a single SimpleSource using the given SrcRect.""" + vrt_path = os.path.join(td, 'mosaic.vrt') + vrt_xml = ( + f'\n' + f' \n' + f' \n' + f' {src_filename}' + f'\n' + f' 1\n' + f' \n' + f' \n' + f' \n' + f' \n' + f'\n' + ) + with open(vrt_path, 'w') as f: + f.write(vrt_xml) + return vrt_path + + +def test_negative_srcrect_x_size_rejected(): + """Negative ``SrcRect xSize`` surfaces as ``ValueError`` rather than + being swallowed by the missing-source fallback.""" + with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td: + _write_source(td) + vrt_path = _write_vrt(td, src_x_size=-50) + with pytest.raises(ValueError, match=r"SrcRect.*negative size"): + read_vrt(vrt_path) + + +def test_negative_srcrect_y_size_rejected(): + """Negative ``SrcRect ySize`` surfaces as ``ValueError``.""" + with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td: + _write_source(td) + vrt_path = _write_vrt(td, src_y_size=-50) + with pytest.raises(ValueError, match=r"SrcRect.*negative size"): + read_vrt(vrt_path) + + +def test_negative_srcrect_x_off_rejected(): + """Negative ``SrcRect xOff`` surfaces as ``ValueError``.""" + with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td: + _write_source(td) + vrt_path = _write_vrt(td, src_x_off=-10) + with pytest.raises(ValueError, match=r"SrcRect.*negative offset"): + read_vrt(vrt_path) + + +def test_negative_srcrect_y_off_rejected(): + """Negative ``SrcRect yOff`` surfaces as ``ValueError``.""" + with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td: + _write_source(td) + vrt_path = _write_vrt(td, src_y_off=-10) + with pytest.raises(ValueError, match=r"SrcRect.*negative offset"): + read_vrt(vrt_path) + + +def test_negative_srcrect_message_names_bad_values(): + """The error message must name the malformed field and its value so the + caller can find the offending ```` in the VRT.""" + with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td: + _write_source(td) + vrt_path = _write_vrt(td, src_x_size=-7, src_y_size=-3) + with pytest.raises(ValueError) as excinfo: + read_vrt(vrt_path) + msg = str(excinfo.value) + assert "SrcRect" in msg + assert "-7" in msg + assert "-3" in msg + + +def test_missing_source_still_takes_lenient_warning_path(): + """A *valid* SrcRect with a missing source file must still hit the + lenient warning path -- the new SrcRect check must not swallow the + missing-file case that PR #1675 narrowed.""" + from xrspatial.geotiff import GeoTIFFFallbackWarning + with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td: + # No source file written; SrcRect itself is well-formed. + vrt_path = _write_vrt(td, src_filename='does_not_exist.tif') + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter('always') + arr, _ = read_vrt(vrt_path) + # The lenient path must produce a fallback warning and a result + # array (zero-filled at the hole), not raise. + fallback = [w for w in caught + if issubclass(w.category, GeoTIFFFallbackWarning)] + assert fallback, ( + "expected a GeoTIFFFallbackWarning for the missing source" + ) + assert arr.shape == (100, 100) + + +def test_valid_srcrect_reads_normally(): + """A well-formed SrcRect with a real source must succeed -- no false + positives on valid VRTs.""" + with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td: + _write_source(td) + vrt_path = _write_vrt(td, raster_x=10, raster_y=10) + arr, _ = read_vrt(vrt_path) + assert arr.shape == (10, 10) + # Source is all zeros and DstRect covers the full VRT raster, so + # the entire output must be zero. + assert np.all(arr == 0) + + +def test_negative_srcrect_raises_under_strict_mode(monkeypatch): + """The check runs *before* the lenient try/except, so strict mode and + lenient mode must both raise. Pinning strict mode here prevents a + regression where the check accidentally moves back inside the try.""" + monkeypatch.setenv('XRSPATIAL_GEOTIFF_STRICT', '1') + with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td: + _write_source(td) + vrt_path = _write_vrt(td, src_x_size=-50) + with pytest.raises(ValueError, match=r"SrcRect.*negative size"): + read_vrt(vrt_path)