From c6daae54b8776b7c6ac67c77fcb9f9531cb56069 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 12 May 2026 08:11:41 -0700 Subject: [PATCH 1/2] Narrow except Exception in read_vrt to OSError/ValueError/struct.error (#1670) The VRT source-read fallback in ``_vrt.py`` caught every ``Exception`` subclass, so a ``RuntimeError`` or ``MemoryError`` thrown by future code paths would silently turn into a warn-and-skip "hole" in the mosaic. The intent was always "this one source file is unreadable, fall back to fill values"; widen-to-Exception leaked real bugs. Restrict the catch to the families ``read_to_array`` actually documents/raises: ``OSError`` (and ``FileNotFoundError`` / ``PermissionError``) for I/O, ``ValueError`` for the typed parse errors, and ``struct.error`` which still leaks from a couple of parse paths until that gets cleaned up. Strict mode (#1662) and the warning text are unchanged. --- xrspatial/geotiff/_vrt.py | 16 +- .../tests/test_vrt_narrow_except_1670.py | 278 ++++++++++++++++++ 2 files changed, 292 insertions(+), 2 deletions(-) create mode 100644 xrspatial/geotiff/tests/test_vrt_narrow_except_1670.py diff --git a/xrspatial/geotiff/_vrt.py b/xrspatial/geotiff/_vrt.py index 031246b5..dbb859f3 100644 --- a/xrspatial/geotiff/_vrt.py +++ b/xrspatial/geotiff/_vrt.py @@ -6,6 +6,7 @@ from __future__ import annotations import os +import struct from dataclasses import dataclass, field from xml.sax.saxutils import escape as _xml_escape, quoteattr as _xml_quoteattr @@ -349,14 +350,25 @@ def read_vrt(vrt_path: str, *, window=None, src_r1 = sr.y_off + int((clip_r1 - dst_r0) * scale_y) src_c1 = sr.x_off + int((clip_c1 - dst_c0) * scale_x) - # Read from source file using windowed read + # Read from source file using windowed read. + # + # Narrow the catch to the exception families ``read_to_array`` + # actually documents/raises for an unreadable or malformed + # source: ``OSError`` (and subclasses ``FileNotFoundError`` / + # ``PermissionError``) for I/O problems, ``ValueError`` for the + # typed parse errors from ``parse_header`` / ``parse_ifd`` and + # friends, and ``struct.error`` which still leaks from a few + # parse paths until that work lands. ``RuntimeError``, + # ``MemoryError``, and other non-I/O bugs should NOT be + # absorbed by the "skip the tile" fallback -- they signal real + # failures and need to surface to the caller. See issue #1670. try: src_arr, _ = read_to_array( src.filename, window=(src_r0, src_c0, src_r1, src_c1), band=src.band - 1, # convert 1-based to 0-based ) - except Exception as e: + except (OSError, ValueError, struct.error) as e: # Under XRSPATIAL_GEOTIFF_STRICT=1, surface the read failure # so partial mosaics are caught in CI. Default mode warns # once per missing source then continues, preserving the diff --git a/xrspatial/geotiff/tests/test_vrt_narrow_except_1670.py b/xrspatial/geotiff/tests/test_vrt_narrow_except_1670.py new file mode 100644 index 00000000..3eb198b9 --- /dev/null +++ b/xrspatial/geotiff/tests/test_vrt_narrow_except_1670.py @@ -0,0 +1,278 @@ +"""Regression tests for #1670: narrow ``except Exception`` in VRT reader. + +``read_vrt`` historically wrapped each source read in ``except Exception``, +which under default mode swallowed any subclass of ``Exception`` -- including +``RuntimeError``, ``MemoryError``, and other bugs unrelated to the "source +file is unreadable" fallback the catch was meant to cover. + +This module pins the narrowed contract: + +* I/O errors (``OSError`` and its subclasses) plus parse errors + (``ValueError``, ``struct.error``) warn-and-continue in default mode and + re-raise under ``XRSPATIAL_GEOTIFF_STRICT=1``. +* Other exception types (``RuntimeError`` here as a stand-in for real bugs) + propagate in both modes so callers and CI see them. +""" +from __future__ import annotations + +import struct +import warnings + +import pytest + +from xrspatial.geotiff import GeoTIFFFallbackWarning + + +@pytest.fixture +def clear_strict_env(monkeypatch): + """Ensure XRSPATIAL_GEOTIFF_STRICT is unset for default-mode tests.""" + monkeypatch.delenv('XRSPATIAL_GEOTIFF_STRICT', raising=False) + + +@pytest.fixture +def set_strict_env(monkeypatch): + """Set XRSPATIAL_GEOTIFF_STRICT=1 for strict-mode tests.""" + monkeypatch.setenv('XRSPATIAL_GEOTIFF_STRICT', '1') + + +def _write_simple_vrt(tmp_path, src_path, name='mosaic_1670.vrt'): + """Write a 4x4 VRT pointing at a single source path.""" + vrt_path = tmp_path / name + vrt_path.write_text( + '\n' + ' \n' + ' 0, 1, 0, 0, 0, -1\n' + ' \n' + ' -9999\n' + ' \n' + f' {src_path}' + '\n' + ' 1\n' + ' \n' + ' \n' + ' \n' + ' \n' + '\n' + ) + return vrt_path + + +def _patch_read_to_array(monkeypatch, exc): + """Make ``read_to_array`` inside ``_vrt`` raise ``exc`` on every call. + + The VRT reader does a local ``from ._reader import read_to_array`` inside + ``read_vrt``, so we patch the source attribute and the import will pick up + the stub. + """ + from xrspatial.geotiff import _reader + + def _boom(*args, **kwargs): + raise exc + + monkeypatch.setattr(_reader, 'read_to_array', _boom) + + +def test_runtime_error_propagates_default_mode( + clear_strict_env, monkeypatch, tmp_path, +): + """A non-I/O bug (RuntimeError) must NOT be absorbed by the VRT fallback. + + Default mode used to catch ``Exception``, which swallowed real bugs. The + narrowed catch only handles I/O / parse errors, so a RuntimeError raised + from ``read_to_array`` should bubble straight up. + """ + from xrspatial.geotiff import read_vrt + + src_path = tmp_path / 'src_1670.tif' + src_path.write_bytes(b'placeholder') # parse will be replaced by stub + vrt_path = _write_simple_vrt(tmp_path, str(src_path)) + + _patch_read_to_array(monkeypatch, RuntimeError("synthetic 1670")) + + with pytest.raises(RuntimeError, match='synthetic 1670'): + read_vrt(str(vrt_path)) + + +def test_runtime_error_propagates_strict_mode( + set_strict_env, monkeypatch, tmp_path, +): + """Strict mode already propagates everything; double-check RuntimeError.""" + from xrspatial.geotiff import read_vrt + + src_path = tmp_path / 'src_1670_strict.tif' + src_path.write_bytes(b'placeholder') + vrt_path = _write_simple_vrt( + tmp_path, str(src_path), name='mosaic_1670_rt_strict.vrt') + + _patch_read_to_array(monkeypatch, RuntimeError("synthetic 1670 strict")) + + with pytest.raises(RuntimeError, match='synthetic 1670 strict'): + read_vrt(str(vrt_path)) + + +def test_file_not_found_warns_and_continues( + clear_strict_env, monkeypatch, tmp_path, +): + """FileNotFoundError is the canonical "source is unreadable" case.""" + from xrspatial.geotiff import read_vrt + + src_path = tmp_path / 'src_1670_fnf.tif' + src_path.write_bytes(b'placeholder') + vrt_path = _write_simple_vrt( + tmp_path, str(src_path), name='mosaic_1670_fnf.vrt') + + _patch_read_to_array( + monkeypatch, FileNotFoundError("synthetic missing 1670")) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + da = read_vrt(str(vrt_path)) + + # Mosaic still loads (with a hole) and the skipped source is reported. + assert da.shape == (4, 4) + fallback_warnings = [ + x for x in w if issubclass(x.category, GeoTIFFFallbackWarning) + ] + assert len(fallback_warnings) >= 1 + msgs = ' '.join(str(x.message) for x in fallback_warnings) + assert 'VRT source' in msgs + assert 'FileNotFoundError' in msgs + + +def test_file_not_found_strict_reraises( + set_strict_env, monkeypatch, tmp_path, +): + """Strict mode re-raises the FileNotFoundError.""" + from xrspatial.geotiff import read_vrt + + src_path = tmp_path / 'src_1670_fnf_strict.tif' + src_path.write_bytes(b'placeholder') + vrt_path = _write_simple_vrt( + tmp_path, str(src_path), name='mosaic_1670_fnf_strict.vrt') + + _patch_read_to_array( + monkeypatch, FileNotFoundError("synthetic missing 1670 strict")) + + with pytest.raises(FileNotFoundError, match='synthetic missing 1670 strict'): + read_vrt(str(vrt_path)) + + +def test_value_error_warns_and_continues( + clear_strict_env, monkeypatch, tmp_path, +): + """A typed ValueError from parse_header / parse_ifd is a documented case.""" + from xrspatial.geotiff import read_vrt + + src_path = tmp_path / 'src_1670_val.tif' + src_path.write_bytes(b'placeholder') + vrt_path = _write_simple_vrt( + tmp_path, str(src_path), name='mosaic_1670_val.vrt') + + _patch_read_to_array( + monkeypatch, ValueError("bad header 1670")) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + da = read_vrt(str(vrt_path)) + + assert da.shape == (4, 4) + fallback_warnings = [ + x for x in w if issubclass(x.category, GeoTIFFFallbackWarning) + ] + assert len(fallback_warnings) >= 1 + msgs = ' '.join(str(x.message) for x in fallback_warnings) + assert 'VRT source' in msgs + assert 'ValueError' in msgs + + +def test_value_error_strict_reraises( + set_strict_env, monkeypatch, tmp_path, +): + """Strict mode re-raises the ValueError.""" + from xrspatial.geotiff import read_vrt + + src_path = tmp_path / 'src_1670_val_strict.tif' + src_path.write_bytes(b'placeholder') + vrt_path = _write_simple_vrt( + tmp_path, str(src_path), name='mosaic_1670_val_strict.vrt') + + _patch_read_to_array( + monkeypatch, ValueError("bad header 1670 strict")) + + with pytest.raises(ValueError, match='bad header 1670 strict'): + read_vrt(str(vrt_path)) + + +def test_struct_error_warns_and_continues( + clear_strict_env, monkeypatch, tmp_path, +): + """struct.error still leaks from some parse paths; catch it too. + + Future PRs may convert these to ValueError, but until then the VRT + reader should be robust to the raw ``struct.error`` exception so a + single malformed source does not abort the whole mosaic. + """ + from xrspatial.geotiff import read_vrt + + src_path = tmp_path / 'src_1670_se.tif' + src_path.write_bytes(b'placeholder') + vrt_path = _write_simple_vrt( + tmp_path, str(src_path), name='mosaic_1670_se.vrt') + + _patch_read_to_array( + monkeypatch, struct.error("short buffer 1670")) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + da = read_vrt(str(vrt_path)) + + assert da.shape == (4, 4) + fallback_warnings = [ + x for x in w if issubclass(x.category, GeoTIFFFallbackWarning) + ] + assert len(fallback_warnings) >= 1 + msgs = ' '.join(str(x.message) for x in fallback_warnings) + assert 'VRT source' in msgs + assert 'error' in msgs # struct.error type name is 'error' + + +def test_permission_error_warns_and_continues( + clear_strict_env, monkeypatch, tmp_path, +): + """PermissionError is an OSError subclass, so it goes through the fallback.""" + from xrspatial.geotiff import read_vrt + + src_path = tmp_path / 'src_1670_perm.tif' + src_path.write_bytes(b'placeholder') + vrt_path = _write_simple_vrt( + tmp_path, str(src_path), name='mosaic_1670_perm.vrt') + + _patch_read_to_array( + monkeypatch, PermissionError("denied 1670")) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + da = read_vrt(str(vrt_path)) + + assert da.shape == (4, 4) + fallback_warnings = [ + x for x in w if issubclass(x.category, GeoTIFFFallbackWarning) + ] + assert len(fallback_warnings) >= 1 + + +def test_memory_error_propagates_default_mode( + clear_strict_env, monkeypatch, tmp_path, +): + """MemoryError is a real failure, not an "unreadable source"; propagate.""" + from xrspatial.geotiff import read_vrt + + src_path = tmp_path / 'src_1670_mem.tif' + src_path.write_bytes(b'placeholder') + vrt_path = _write_simple_vrt( + tmp_path, str(src_path), name='mosaic_1670_mem.vrt') + + _patch_read_to_array(monkeypatch, MemoryError("OOM 1670")) + + with pytest.raises(MemoryError, match='OOM 1670'): + read_vrt(str(vrt_path)) From 99c9a6f1a8f8b07e91efcaa1b7ad24034f699a61 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 12 May 2026 08:47:17 -0700 Subject: [PATCH 2/2] Add codec decode errors (zlib.error, ZstdError) to read_vrt allowlist Addresses Copilot feedback on PR #1675: the narrowed (OSError, ValueError, struct.error) catch in read_vrt did not cover codec-specific exceptions raised from corrupt compressed tiles, which meant a single bad deflate or ZSTD payload aborted the whole mosaic instead of warning-and-skipping like the historical behaviour. Expand the allowlist via a small helper that returns the tuple of codec-library decode errors worth swallowing (zlib.error always; zstandard.ZstdError when the optional dep is installed). RuntimeError is deliberately left out since lz4 frame, LERC and glymur all raise it for both corruption and real bugs. --- xrspatial/geotiff/_vrt.py | 59 ++++++++++-- .../tests/test_vrt_narrow_except_1670.py | 95 ++++++++++++++++++- 2 files changed, 145 insertions(+), 9 deletions(-) diff --git a/xrspatial/geotiff/_vrt.py b/xrspatial/geotiff/_vrt.py index dbb859f3..75827817 100644 --- a/xrspatial/geotiff/_vrt.py +++ b/xrspatial/geotiff/_vrt.py @@ -7,6 +7,7 @@ import os import struct +import zlib from dataclasses import dataclass, field from xml.sax.saxutils import escape as _xml_escape, quoteattr as _xml_quoteattr @@ -15,6 +16,46 @@ from ._safe_xml import safe_fromstring +def _codec_decode_exceptions() -> tuple[type[BaseException], ...]: + """Return the tuple of codec-specific decode exceptions worth swallowing. + + ``read_to_array`` dispatches to per-codec wrappers in + :mod:`._compression`. Most of those wrappers raise ``ValueError`` on + malformed input (LZW, PackBits, LERC pre-decode bomb check, JPEG 2000 + fail-closed shape check), but a few codecs leak their library's + native exception class through the wrapper: + + * ``zlib.error`` from ``zlib.decompress`` for deflate / adobe-deflate + payloads. + * ``zstandard.ZstdError`` from ``zstandard.stream_reader.read`` when + a ZSTD frame is corrupt. + + These are recoverable per-source failures -- they mean "this tile's + compressed payload is bad", not "the program is broken" -- so they + belong in the same warn-and-skip catch as ``OSError`` / ``ValueError`` + / ``struct.error``. ``RuntimeError`` (raised by lz4 frame decoder, + LERC error-code translation, and glymur on malformed JP2) is + deliberately NOT included: it can come from real bugs as easily as + from corruption, so it stays in the propagate-and-fail bucket. + + ``zstandard`` is an optional dependency; if it's not installed the + decoder path is unreachable and there's no exception class to catch. + """ + excs: list[type[BaseException]] = [zlib.error] + try: # pragma: no cover - depends on optional install + from zstandard import ZstdError + excs.append(ZstdError) + except ImportError: + pass + return tuple(excs) + + +# Computed once at import: tuple of codec exception classes to catch in +# the per-source read fallback below. Defined at module scope so the +# import-time work doesn't repeat on every VRT source. +_CODEC_DECODE_EXCEPTIONS = _codec_decode_exceptions() + + def _xml_text(value) -> str: """Escape *value* for safe inclusion as XML element text. @@ -357,18 +398,24 @@ def read_vrt(vrt_path: str, *, window=None, # source: ``OSError`` (and subclasses ``FileNotFoundError`` / # ``PermissionError``) for I/O problems, ``ValueError`` for the # typed parse errors from ``parse_header`` / ``parse_ifd`` and - # friends, and ``struct.error`` which still leaks from a few - # parse paths until that work lands. ``RuntimeError``, - # ``MemoryError``, and other non-I/O bugs should NOT be - # absorbed by the "skip the tile" fallback -- they signal real - # failures and need to surface to the caller. See issue #1670. + # friends, ``struct.error`` which still leaks from a few parse + # paths until that work lands, and the codec-library decode + # exceptions enumerated in :data:`_CODEC_DECODE_EXCEPTIONS` + # (``zlib.error`` for corrupt deflate tiles, plus + # ``zstandard.ZstdError`` when zstandard is installed). + # ``RuntimeError``, ``MemoryError``, and other non-I/O bugs + # should NOT be absorbed by the "skip the tile" fallback -- + # they signal real failures and need to surface to the + # caller. See issues #1670 and PR #1675. try: src_arr, _ = read_to_array( src.filename, window=(src_r0, src_c0, src_r1, src_c1), band=src.band - 1, # convert 1-based to 0-based ) - except (OSError, ValueError, struct.error) as e: + except ( + OSError, ValueError, struct.error, + ) + _CODEC_DECODE_EXCEPTIONS as e: # Under XRSPATIAL_GEOTIFF_STRICT=1, surface the read failure # so partial mosaics are caught in CI. Default mode warns # once per missing source then continues, preserving the diff --git a/xrspatial/geotiff/tests/test_vrt_narrow_except_1670.py b/xrspatial/geotiff/tests/test_vrt_narrow_except_1670.py index 3eb198b9..96fe2026 100644 --- a/xrspatial/geotiff/tests/test_vrt_narrow_except_1670.py +++ b/xrspatial/geotiff/tests/test_vrt_narrow_except_1670.py @@ -7,9 +7,11 @@ This module pins the narrowed contract: -* I/O errors (``OSError`` and its subclasses) plus parse errors - (``ValueError``, ``struct.error``) warn-and-continue in default mode and - re-raise under ``XRSPATIAL_GEOTIFF_STRICT=1``. +* I/O errors (``OSError`` and its subclasses), parse errors + (``ValueError``, ``struct.error``), and codec-library decode errors + (``zlib.error`` for deflate; ``zstandard.ZstdError`` when zstandard is + installed) warn-and-continue in default mode and re-raise under + ``XRSPATIAL_GEOTIFF_STRICT=1``. * Other exception types (``RuntimeError`` here as a stand-in for real bugs) propagate in both modes so callers and CI see them. """ @@ -17,6 +19,7 @@ import struct import warnings +import zlib import pytest @@ -276,3 +279,89 @@ def test_memory_error_propagates_default_mode( with pytest.raises(MemoryError, match='OOM 1670'): read_vrt(str(vrt_path)) + + +def test_zlib_error_warns_and_continues( + clear_strict_env, monkeypatch, tmp_path, +): + """A ``zlib.error`` from a corrupt deflate tile should warn-and-skip. + + The narrowed catch in PR #1675 covered only ``(OSError, ValueError, + struct.error)``; ``zlib.error`` is a direct ``Exception`` subclass and + used to abort the whole mosaic when a deflate tile was corrupt. The + follow-up adds codec-library decode errors to the allowlist so the + historical warn-and-skip behaviour is restored for corrupt payloads. + """ + from xrspatial.geotiff import read_vrt + + src_path = tmp_path / 'src_1670_zlib.tif' + src_path.write_bytes(b'placeholder') + vrt_path = _write_simple_vrt( + tmp_path, str(src_path), name='mosaic_1670_zlib.vrt') + + _patch_read_to_array(monkeypatch, zlib.error("synthetic deflate 1670")) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + da = read_vrt(str(vrt_path)) + + assert da.shape == (4, 4) + fallback_warnings = [ + x for x in w if issubclass(x.category, GeoTIFFFallbackWarning) + ] + assert len(fallback_warnings) >= 1 + msgs = ' '.join(str(x.message) for x in fallback_warnings) + assert 'VRT source' in msgs + assert 'error' in msgs # zlib.error type name is 'error' + + +def test_zlib_error_strict_reraises( + set_strict_env, monkeypatch, tmp_path, +): + """Under ``XRSPATIAL_GEOTIFF_STRICT=1`` a corrupt deflate tile re-raises.""" + from xrspatial.geotiff import read_vrt + + src_path = tmp_path / 'src_1670_zlib_strict.tif' + src_path.write_bytes(b'placeholder') + vrt_path = _write_simple_vrt( + tmp_path, str(src_path), name='mosaic_1670_zlib_strict.vrt') + + _patch_read_to_array( + monkeypatch, zlib.error("synthetic deflate 1670 strict")) + + with pytest.raises(zlib.error, match='synthetic deflate 1670 strict'): + read_vrt(str(vrt_path)) + + +def test_zstd_error_warns_and_continues_if_available( + clear_strict_env, monkeypatch, tmp_path, +): + """``zstandard.ZstdError`` from a corrupt ZSTD tile should warn-and-skip. + + Skipped when ``zstandard`` is not installed -- the wrapper path that + raises this exception is unreachable in that case and there's no class + to instantiate. See ``_CODEC_DECODE_EXCEPTIONS`` in ``_vrt.py``. + """ + pytest.importorskip('zstandard') + from zstandard import ZstdError + from xrspatial.geotiff import read_vrt + + src_path = tmp_path / 'src_1670_zstd.tif' + src_path.write_bytes(b'placeholder') + vrt_path = _write_simple_vrt( + tmp_path, str(src_path), name='mosaic_1670_zstd.vrt') + + _patch_read_to_array(monkeypatch, ZstdError("synthetic zstd 1670")) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter('always') + da = read_vrt(str(vrt_path)) + + assert da.shape == (4, 4) + fallback_warnings = [ + x for x in w if issubclass(x.category, GeoTIFFFallbackWarning) + ] + assert len(fallback_warnings) >= 1 + msgs = ' '.join(str(x.message) for x in fallback_warnings) + assert 'VRT source' in msgs + assert 'ZstdError' in msgs