From 47d5bcc9d37ae59f1f517c3bd0715d56cfcee13f Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 12 May 2026 11:06:58 -0700 Subject: [PATCH 1/2] Validate read_vrt window against VRT extent (#1697) The local TIFF path (#1634) and HTTP COG path (#1669) reject out-of-bounds, zero-size, and inverted windows up front with a ValueError. The VRT path silently clamped the window to the parsed extent instead, which returned a smaller array than open_geotiff's caller-built coord arrays and surfaced as a CoordinateValidationError deep inside xarray. Replace the clamp with the same validator the other two paths use, swapping "source extent" to "VRT extent" since the dimensions come from the parsed VRT rather than a single source file. Fixes #1697 --- xrspatial/geotiff/_vrt.py | 18 +- .../tests/test_vrt_window_validation_1697.py | 227 ++++++++++++++++++ 2 files changed, 241 insertions(+), 4 deletions(-) create mode 100644 xrspatial/geotiff/tests/test_vrt_window_validation_1697.py diff --git a/xrspatial/geotiff/_vrt.py b/xrspatial/geotiff/_vrt.py index 534d09b2..9d3fadb2 100644 --- a/xrspatial/geotiff/_vrt.py +++ b/xrspatial/geotiff/_vrt.py @@ -410,12 +410,22 @@ def read_vrt(vrt_path: str, *, window=None, f"band index {band} out of range for VRT with " f"{len(vrt.bands)} band(s)") + # Validate ``window`` against the VRT's parsed extent before any + # source reads. Without this, the clamp below silently shrinks an + # out-of-bounds window and returns a smaller array, which then + # mismatches caller-built coord arrays in ``open_geotiff`` and + # surfaces as an opaque ``CoordinateValidationError``. Mirrors the + # local-path validator in ``read_to_array`` (#1634) and the HTTP + # path validator in ``_read_cog_http`` (#1669) so all backends + # agree on the contract. See issue #1697. if window is not None: r0, c0, r1, c1 = window - r0 = max(0, r0) - c0 = max(0, c0) - r1 = min(vrt.height, r1) - c1 = min(vrt.width, c1) + if (r0 < 0 or c0 < 0 + or r1 > vrt.height or c1 > vrt.width + or r0 >= r1 or c0 >= c1): + raise ValueError( + f"window={window} is outside the VRT extent " + f"({vrt.height}x{vrt.width}) or has non-positive size.") else: r0, c0, r1, c1 = 0, 0, vrt.height, vrt.width diff --git a/xrspatial/geotiff/tests/test_vrt_window_validation_1697.py b/xrspatial/geotiff/tests/test_vrt_window_validation_1697.py new file mode 100644 index 00000000..a730e380 --- /dev/null +++ b/xrspatial/geotiff/tests/test_vrt_window_validation_1697.py @@ -0,0 +1,227 @@ +"""Regression tests for issue #1697. + +``read_vrt(path, window=...)`` silently clamped invalid window +coordinates instead of raising ``ValueError``. The local TIFF path +(#1634) and the HTTP COG path (#1669) both reject the same bad +windows up front. VRT was missed when #1634 landed. + +The hidden failure mode is the same one #1634 fixed: ``open_geotiff`` +builds the y/x coord arrays from the caller-supplied window indices, +so a clamped read returns a smaller array than the coord arrays and +xarray raises a ``CoordinateValidationError`` deep inside its +constructor instead of a clear xrspatial-level error. + +These tests pin the VRT path to the same contract as the local and +HTTP paths: out-of-bounds, zero-size, or inverted windows raise +``ValueError`` with a message of the same shape (one "extent" word +swap from ``source extent`` to ``VRT extent``). +""" +from __future__ import annotations + +import os +import uuid + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import to_geotiff +from xrspatial.geotiff._reader import read_to_array +from xrspatial.geotiff._vrt import read_vrt + + +def _unique_dir(tmp_path, label: str) -> str: + """Return a sub-path under ``tmp_path`` with a uuid suffix so + parallel test workers cannot collide on the same name.""" + d = tmp_path / f"vrt_1697_{label}_{uuid.uuid4().hex[:8]}" + d.mkdir() + return str(d) + + +def _write_tif(path: str, size: int = 4) -> None: + """Write a ``size``x``size`` float32 GeoTIFF the VRT can wrap.""" + arr = np.arange(size * size, dtype=np.float32).reshape(size, size) + y = np.linspace(float(size) - 0.5, 0.5, size) + x = np.linspace(0.5, float(size) - 0.5, size) + da = xr.DataArray( + arr, dims=['y', 'x'], + coords={'y': y, 'x': x}, + attrs={'crs': 4326}, + ) + to_geotiff(da, path, compression='none') + + +def _write_vrt(vrt_path: str, source_filename: str, size: int = 4) -> None: + """Write a single-band VRT of dimension ``size``x``size`` pointing + at ``source_filename`` (relative to the VRT directory).""" + xml = ( + f'\n' + ' 0, 1, 0, 0, 0, -1\n' + ' \n' + ' \n' + f' {source_filename}' + '\n' + ' 1\n' + f' \n' + f' \n' + ' \n' + ' \n' + '\n' + ) + with open(vrt_path, 'w') as f: + f.write(xml) + + +@pytest.fixture +def vrt_4x4(tmp_path): + """Return a path to a 4x4 single-band VRT wrapping a 4x4 TIFF.""" + d = _unique_dir(tmp_path, "fixture") + tif = os.path.join(d, 'data.tif') + _write_tif(tif, size=4) + vrt = os.path.join(d, 'mosaic.vrt') + _write_vrt(vrt, 'data.tif', size=4) + return vrt + + +# --------------------------------------------------------------------------- +# Negative starts +# --------------------------------------------------------------------------- + + +def test_negative_r0_raises_value_error(vrt_4x4): + """``r0 < 0`` raises ValueError instead of being clamped to 0.""" + with pytest.raises(ValueError, match='outside the VRT extent'): + read_vrt(vrt_4x4, window=(-1, 0, 2, 2)) + + +def test_negative_c0_raises_value_error(vrt_4x4): + """``c0 < 0`` raises ValueError instead of being clamped to 0.""" + with pytest.raises(ValueError, match='outside the VRT extent'): + read_vrt(vrt_4x4, window=(0, -1, 2, 2)) + + +# --------------------------------------------------------------------------- +# Past-edge stops +# --------------------------------------------------------------------------- + + +def test_r1_past_bottom_edge_raises_value_error(vrt_4x4): + """``r1 > vrt.height`` raises instead of being clamped to vrt.height.""" + with pytest.raises(ValueError, match='outside the VRT extent'): + read_vrt(vrt_4x4, window=(0, 0, 5, 4)) + + +def test_c1_past_right_edge_raises_value_error(vrt_4x4): + """``c1 > vrt.width`` raises instead of being clamped to vrt.width.""" + with pytest.raises(ValueError, match='outside the VRT extent'): + read_vrt(vrt_4x4, window=(0, 0, 4, 5)) + + +def test_window_past_both_edges_raises_value_error(vrt_4x4): + """Windows past both right and bottom edges raise the same error.""" + with pytest.raises(ValueError, match='outside the VRT extent'): + read_vrt(vrt_4x4, window=(0, 0, 10, 10)) + + +# --------------------------------------------------------------------------- +# Zero-size and inverted windows +# --------------------------------------------------------------------------- + + +def test_zero_size_row_window_raises_value_error(vrt_4x4): + """``r0 == r1`` produces a zero-height window and must raise.""" + with pytest.raises(ValueError, match='non-positive size'): + read_vrt(vrt_4x4, window=(2, 0, 2, 4)) + + +def test_zero_size_col_window_raises_value_error(vrt_4x4): + """``c0 == c1`` produces a zero-width window and must raise.""" + with pytest.raises(ValueError, match='non-positive size'): + read_vrt(vrt_4x4, window=(0, 2, 4, 2)) + + +def test_fully_zero_size_window_raises_value_error(vrt_4x4): + """``r0 == r1 and c0 == c1`` raises (current code returned a (0, 0) array).""" + with pytest.raises(ValueError, match='non-positive size'): + read_vrt(vrt_4x4, window=(2, 2, 2, 2)) + + +def test_inverted_row_window_raises_value_error(vrt_4x4): + """``r0 > r1`` is degenerate and must raise.""" + with pytest.raises(ValueError, match='non-positive size'): + read_vrt(vrt_4x4, window=(3, 0, 1, 4)) + + +def test_inverted_col_window_raises_value_error(vrt_4x4): + """``c0 > c1`` is degenerate and must raise.""" + with pytest.raises(ValueError, match='non-positive size'): + read_vrt(vrt_4x4, window=(0, 3, 4, 1)) + + +# --------------------------------------------------------------------------- +# Valid in-bounds windows still work (don't over-reject) +# --------------------------------------------------------------------------- + + +def test_full_extent_window_still_works(vrt_4x4): + """A window covering the full VRT extent still reads the full array.""" + arr, _ = read_vrt(vrt_4x4, window=(0, 0, 4, 4)) + assert arr.shape == (4, 4) + + +def test_interior_window_still_works(vrt_4x4): + """An interior window returns the requested subset shape.""" + arr, _ = read_vrt(vrt_4x4, window=(1, 1, 3, 3)) + assert arr.shape == (2, 2) + + +def test_edge_aligned_window_still_works(vrt_4x4): + """A window that touches but does not exceed the edges is accepted.""" + arr, _ = read_vrt(vrt_4x4, window=(2, 2, 4, 4)) + assert arr.shape == (2, 2) + + +def test_none_window_still_returns_full_array(vrt_4x4): + """``window=None`` still returns the full VRT extent.""" + arr, _ = read_vrt(vrt_4x4) + assert arr.shape == (4, 4) + + +# --------------------------------------------------------------------------- +# Cross-path parity: VRT and local-TIFF reject the same bad window +# --------------------------------------------------------------------------- + + +def test_vrt_and_local_paths_share_window_validation(tmp_path): + """Same bad window rejected on both VRT and local-TIFF paths with the + same error class and message shape (one word swap is fine).""" + d = _unique_dir(tmp_path, "parity") + tif = os.path.join(d, 'data.tif') + _write_tif(tif, size=4) + vrt = os.path.join(d, 'mosaic.vrt') + _write_vrt(vrt, 'data.tif', size=4) + + bad_window = (-1, 0, 2, 2) + + with pytest.raises(ValueError) as vrt_exc: + read_vrt(vrt, window=bad_window) + with pytest.raises(ValueError) as local_exc: + read_to_array(tif, window=bad_window) + + vrt_msg = str(vrt_exc.value) + local_msg = str(local_exc.value) + + # Both messages must echo the offending window and the source dims. + assert 'window=' in vrt_msg + assert 'window=' in local_msg + assert '4x4' in vrt_msg + assert '4x4' in local_msg + # And both must signal "out of bounds" via the same wording template, + # with a single word swap on the extent label. + assert 'extent' in vrt_msg + assert 'extent' in local_msg + assert 'non-positive size' in vrt_msg + assert 'non-positive size' in local_msg + # The VRT path says "VRT extent"; the local path says "source extent". + assert 'VRT extent' in vrt_msg + assert 'source extent' in local_msg From 9aa76e95c40e467fa2428a90a8d7d64b7b3283df Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 12 May 2026 11:29:49 -0700 Subject: [PATCH 2/2] Reword stale read_vrt window-validation comment (PR #1698 review) The block comment above the window check still referenced "the clamp below", but the clamp logic was removed in #1697. Reword to describe the prior behavior (per-source reads silently clamped out-of-bounds windows) and why we now validate up front, keeping the issue reference so future readers can trace history. --- xrspatial/geotiff/_vrt.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/xrspatial/geotiff/_vrt.py b/xrspatial/geotiff/_vrt.py index 9d3fadb2..a5b40efd 100644 --- a/xrspatial/geotiff/_vrt.py +++ b/xrspatial/geotiff/_vrt.py @@ -411,13 +411,15 @@ def read_vrt(vrt_path: str, *, window=None, f"{len(vrt.bands)} band(s)") # Validate ``window`` against the VRT's parsed extent before any - # source reads. Without this, the clamp below silently shrinks an - # out-of-bounds window and returns a smaller array, which then - # mismatches caller-built coord arrays in ``open_geotiff`` and - # surfaces as an opaque ``CoordinateValidationError``. Mirrors the - # local-path validator in ``read_to_array`` (#1634) and the HTTP - # path validator in ``_read_cog_http`` (#1669) so all backends - # agree on the contract. See issue #1697. + # source reads. Prior to #1697, per-source reads silently clamped + # out-of-bounds windows to the source extent and returned a smaller + # array, which then mismatched caller-built coord arrays in + # ``open_geotiff`` and surfaced as an opaque + # ``CoordinateValidationError`` far from the real cause. We now + # reject invalid windows up front. Mirrors the local-path validator + # in ``read_to_array`` (#1634) and the HTTP path validator in + # ``_read_cog_http`` (#1669) so all backends agree on the contract. + # See issue #1697. if window is not None: r0, c0, r1, c1 = window if (r0 < 0 or c0 < 0