diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index 581d9ccdf..c01ba22e5 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -55,6 +55,16 @@ coords_to_transform as _coords_to_transform, require_transform_for_georeferenced as _require_transform_for_georeferenced, ) +from ._errors import ( + ConflictingCRSError, + ConflictingNodataError, + GeoTIFFAmbiguousMetadataError, + InvalidCRSCodeError, + MixedBandMetadataError, + NonUniformCoordsError, + RotatedTransformError, + UnparseableCRSError, +) from ._geotags import GeoTransform, RASTER_PIXEL_IS_AREA, RASTER_PIXEL_IS_POINT from ._reader import UnsafeURLError # ``read_to_array`` is internal: it is used by ``open_geotiff`` and the @@ -108,7 +118,15 @@ # is intentionally omitted: it is deprecated in favour of ``da.xrs.plot()`` # and emits a ``DeprecationWarning`` when called. __all__ = [ + 'ConflictingCRSError', + 'ConflictingNodataError', + 'GeoTIFFAmbiguousMetadataError', 'GeoTIFFFallbackWarning', + 'InvalidCRSCodeError', + 'MixedBandMetadataError', + 'NonUniformCoordsError', + 'RotatedTransformError', + 'UnparseableCRSError', 'UnsafeURLError', 'open_geotiff', 'read_geotiff_gpu', diff --git a/xrspatial/geotiff/_errors.py b/xrspatial/geotiff/_errors.py new file mode 100644 index 000000000..aa972b546 --- /dev/null +++ b/xrspatial/geotiff/_errors.py @@ -0,0 +1,115 @@ +"""Typed errors for ambiguous GeoTIFF metadata (issue #1987). + +The reader and writer used to "guess and continue" when geospatial +metadata was ambiguous: invalid CRS codes, unparseable CRS strings, +rotated transforms, non-uniform coords, mixed band metadata, conflicting +``crs`` vs ``crs_wkt`` attrs, conflicting nodata aliases. Each case +becomes a hard error by default with a per-case typed subclass so +callers can ``except`` the family or a specific case. + +This module provides the error class hierarchy only. The validator +hooks in ``_validation.py`` decide when each one fires; the per-case +PRs (issue #1987 PRs 1-7) wire them up at the read/write entry points. + +Hierarchy:: + + Exception + └── GeoTIFFAmbiguousMetadataError + ├── InvalidCRSCodeError (PR 1 / #1971) + ├── UnparseableCRSError (PR 2) + ├── RotatedTransformError (PR 3) + ├── NonUniformCoordsError (PR 4) + ├── MixedBandMetadataError (PR 5) + ├── ConflictingCRSError (PR 6, blocked on #1984) + └── ConflictingNodataError (PR 7, blocked on #1988) +""" +from __future__ import annotations + + +class GeoTIFFAmbiguousMetadataError(ValueError): + """Base class for ambiguous GeoTIFF metadata failures (#1987). + + Subclasses ``ValueError`` so existing ``except ValueError`` callers + keep catching these. Catch this class directly to handle the whole + family, or one of the per-case subclasses to handle a single + ambiguity type. + """ + + +class InvalidCRSCodeError(GeoTIFFAmbiguousMetadataError): + """Invalid EPSG / authority code on read or write (#1971, PR 1). + + Raised when a CRS code does not resolve to a known authority entry + (e.g. ``to_geotiff(crs=True)`` formerly wrote ``EPSG=1`` silently). + """ + + +class UnparseableCRSError(GeoTIFFAmbiguousMetadataError): + """CRS string cannot be parsed as WKT or recognised authority code (PR 2). + + Partial WKT or malformed input that the legacy path would have + emitted unchanged, producing mismatched ``crs`` vs ``crs_wkt`` + attrs downstream. + """ + + +class RotatedTransformError(GeoTIFFAmbiguousMetadataError): + """Affine transform has non-zero rotation/shear terms (PR 3). + + Downstream xrspatial functions assume axis-aligned rasters and + would otherwise produce wrong results on a rotated grid. The + read entry points raise this by default; pass ``allow_rotated=True`` + to retain the existing attr-flag behaviour and read the pixel + grid without the geospatial assumption. + """ + + +class NonUniformCoordsError(GeoTIFFAmbiguousMetadataError): + """DataArray coords disagree with the implied transform on write (PR 4). + + ``to_geotiff`` accepts coords that imply a non-uniform pixel grid + (variable cell size, gaps); the writer would otherwise pick the + first two coord values as the transform and silently truncate the + rest. The existing sentinel exemption for int-dtype coords stays + (#1969). + """ + + +class MixedBandMetadataError(GeoTIFFAmbiguousMetadataError): + """VRT bands declare conflicting per-band metadata (PR 5). + + Most often disagreeing nodata sentinels across bands. The legacy + read path flattened to one value silently. Pass + ``band_nodata='first'`` to keep the legacy behaviour explicitly. + """ + + +class ConflictingCRSError(GeoTIFFAmbiguousMetadataError): + """``attrs['crs']`` and ``attrs['crs_wkt']`` disagree on write (PR 6). + + Both keys set to CRS strings that do not canonicalise to the same + WKT (after EPSG → WKT lookup). The writer would otherwise pick one + and emit it, silently dropping the other. + """ + + +class ConflictingNodataError(GeoTIFFAmbiguousMetadataError): + """Nodata sentinel aliases disagree on write (PR 7). + + ``attrs['nodata']`` and ``attrs['nodatavals']`` set to different + values. ``_resolve_nodata_attr`` formerly picked one and ignored + the other. ``_FillValue`` is a CF alias and remains deprioritised + per the existing convention. + """ + + +__all__ = [ + "GeoTIFFAmbiguousMetadataError", + "InvalidCRSCodeError", + "UnparseableCRSError", + "RotatedTransformError", + "NonUniformCoordsError", + "MixedBandMetadataError", + "ConflictingCRSError", + "ConflictingNodataError", +] diff --git a/xrspatial/geotiff/_validation.py b/xrspatial/geotiff/_validation.py index 2e96b3c58..f299bbbe1 100644 --- a/xrspatial/geotiff/_validation.py +++ b/xrspatial/geotiff/_validation.py @@ -8,9 +8,21 @@ every backend. Extracted in step 4 of issue #1813. + +Ambiguous-metadata hooks (issue #1987) +-------------------------------------- +``validate_read_metadata`` and ``validate_write_metadata`` are +plug-points for the per-case checks listed in #1987 (unparseable CRS, +rotated transforms, non-uniform coords, mixed band metadata, conflicting +crs/crs_wkt, conflicting nodata aliases). PR 0 lands the hook +signatures and a registry; each follow-up PR registers its check. +The hooks are no-ops until at least one check is registered, so +behaviour does not change until a per-case PR opts in. """ from __future__ import annotations +from typing import Any, Callable, Iterable, Mapping + import numpy as np from ._coords import _BAND_DIM_NAMES @@ -302,3 +314,122 @@ def _validate_nodata_arg(nodata) -> None: f"the array dtype; a non-numeric value would otherwise " f"crash inside NumPy with a ufunc TypeError." ) from e + + +# --------------------------------------------------------------------------- +# Ambiguous-metadata hooks (issue #1987 PR 0) +# +# Each per-case PR (#1987 PRs 2-7) registers a check via +# ``register_read_metadata_check`` / ``register_write_metadata_check``. +# The hooks below iterate the registered checks in registration order. +# A check raises one of the ``_errors.GeoTIFFAmbiguousMetadataError`` +# subclasses to refuse the input; returning normally lets the call +# continue. +# +# The registry is process-global and additive. Tests that need to +# unregister a check should use ``unregister_*`` rather than mutating +# the list in place so the surrounding helpers stay typed. +# --------------------------------------------------------------------------- + +_ReadCheck = Callable[[Mapping[str, Any]], None] +_WriteCheck = Callable[[Mapping[str, Any]], None] + +_READ_METADATA_CHECKS: list[_ReadCheck] = [] +_WRITE_METADATA_CHECKS: list[_WriteCheck] = [] + + +def register_read_metadata_check(check: _ReadCheck) -> _ReadCheck: + """Register a read-side ambiguous-metadata check (issue #1987). + + Returns ``check`` so the call can be used as a decorator. Idempotent: + re-registering the same callable is a no-op. + """ + if check not in _READ_METADATA_CHECKS: + _READ_METADATA_CHECKS.append(check) + return check + + +def register_write_metadata_check(check: _WriteCheck) -> _WriteCheck: + """Register a write-side ambiguous-metadata check (issue #1987).""" + if check not in _WRITE_METADATA_CHECKS: + _WRITE_METADATA_CHECKS.append(check) + return check + + +def unregister_read_metadata_check(check: _ReadCheck) -> None: + """Remove a previously-registered read-side check. + + Tolerant of ``check`` not being registered so tests can call this + in teardown without guarding on ``in``. + """ + try: + _READ_METADATA_CHECKS.remove(check) + except ValueError: + pass + + +def unregister_write_metadata_check(check: _WriteCheck) -> None: + """Remove a previously-registered write-side check.""" + try: + _WRITE_METADATA_CHECKS.remove(check) + except ValueError: + pass + + +def _registered_read_metadata_checks() -> Iterable[_ReadCheck]: + """Snapshot of registered read-side checks for testing/introspection.""" + return tuple(_READ_METADATA_CHECKS) + + +def _registered_write_metadata_checks() -> Iterable[_WriteCheck]: + """Snapshot of registered write-side checks for testing/introspection.""" + return tuple(_WRITE_METADATA_CHECKS) + + +def validate_read_metadata(context: Mapping[str, Any] | None = None) -> None: + """Run all registered read-side ambiguous-metadata checks (issue #1987). + + Parameters + ---------- + context : mapping, optional + Keys consumed by the registered checks. The PR-0 hook does not + prescribe a schema; each per-case PR documents the keys it + reads (e.g. ``'crs_wkt'``, ``'transform'``, ``'band_nodata'``). + A missing key is treated as "nothing to check" by the + downstream check, not as an error here. + + Raises + ------ + GeoTIFFAmbiguousMetadataError + Or one of its subclasses, from a registered check. + + Notes + ----- + No-op when no checks are registered, so PR 0 does not change + behaviour at any entry point. + """ + if not _READ_METADATA_CHECKS: + return + ctx: Mapping[str, Any] = {} if context is None else context + # Iterate over a snapshot. A check that registers or unregisters another + # check during dispatch (whether on purpose or via an import side effect) + # would otherwise reshape the list mid-loop and skip or repeat entries. + # The cost is one tuple per dispatch, paid only when at least one check + # is registered. + for check in tuple(_READ_METADATA_CHECKS): + check(ctx) + + +def validate_write_metadata(context: Mapping[str, Any] | None = None) -> None: + """Run all registered write-side ambiguous-metadata checks (issue #1987). + + Mirror of ``validate_read_metadata`` for ``to_geotiff`` / + ``write_geotiff_gpu`` / ``write_vrt``. See that docstring for the + context-schema convention and the no-op-when-empty guarantee. + """ + if not _WRITE_METADATA_CHECKS: + return + ctx: Mapping[str, Any] = {} if context is None else context + # Snapshot for the same reason as the read hook above. + for check in tuple(_WRITE_METADATA_CHECKS): + check(ctx) diff --git a/xrspatial/geotiff/tests/test_ambiguous_metadata_hooks_1987.py b/xrspatial/geotiff/tests/test_ambiguous_metadata_hooks_1987.py new file mode 100644 index 000000000..8e0495b7b --- /dev/null +++ b/xrspatial/geotiff/tests/test_ambiguous_metadata_hooks_1987.py @@ -0,0 +1,400 @@ +"""Tests for the ambiguous-metadata validator framework (issue #1987 PR 0). + +PR 0 lands the error class hierarchy in ``_errors.py`` and the +register / dispatch framework in ``_validation.py``. No raises yet; +each per-case PR (#1987 PRs 2-7) registers its own check. + +These tests cover: + +- the error class hierarchy is what the per-case PRs expect to subclass +- the hooks are no-ops when no checks are registered (so PR 0 cannot + regress any existing entry point) +- registration is idempotent and ordered +- unregistration is tolerant of unknown callables +- a registered check that raises propagates through the hook +- a context mapping is forwarded verbatim to each check +""" +from __future__ import annotations + +import pytest + +from xrspatial.geotiff._errors import ( + ConflictingCRSError, + ConflictingNodataError, + GeoTIFFAmbiguousMetadataError, + InvalidCRSCodeError, + MixedBandMetadataError, + NonUniformCoordsError, + RotatedTransformError, + UnparseableCRSError, +) +from xrspatial.geotiff import _validation as _validation_mod +from xrspatial.geotiff._validation import ( + _registered_read_metadata_checks, + _registered_write_metadata_checks, + register_read_metadata_check, + register_write_metadata_check, + unregister_read_metadata_check, + unregister_write_metadata_check, + validate_read_metadata, + validate_write_metadata, +) + + +@pytest.fixture(autouse=True) +def _reset_metadata_check_registries(): + """Snapshot and restore the process-global check registries (#1987). + + The registries are module-global lists. A test that registers a + check and crashes before its ``try/finally unregister`` would + leave a stale callable in place and pollute later tests. This + autouse fixture snapshots both registries before the test and + restores them after, regardless of whether the test passed, + failed, or raised, so the file is robust to per-test mistakes. + """ + read_snapshot = list(_validation_mod._READ_METADATA_CHECKS) + write_snapshot = list(_validation_mod._WRITE_METADATA_CHECKS) + try: + yield + finally: + _validation_mod._READ_METADATA_CHECKS[:] = read_snapshot + _validation_mod._WRITE_METADATA_CHECKS[:] = write_snapshot + + +# ---------------------------------------------------------------------- +# Error class hierarchy +# ---------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "subclass", + [ + InvalidCRSCodeError, + UnparseableCRSError, + RotatedTransformError, + NonUniformCoordsError, + MixedBandMetadataError, + ConflictingCRSError, + ConflictingNodataError, + ], +) +def test_subclass_inherits_from_base(subclass): + """Each per-case error is catchable via the family base class.""" + assert issubclass(subclass, GeoTIFFAmbiguousMetadataError) + + +def test_base_is_value_error_subclass(): + """Existing ``except ValueError`` callers keep catching the family.""" + assert issubclass(GeoTIFFAmbiguousMetadataError, ValueError) + + +def test_error_classes_reexported_from_public_namespace(): + """User-facing ambiguity errors must be importable from ``xrspatial.geotiff``. + + ``UnsafeURLError`` and ``GeoTIFFFallbackWarning`` follow the same + convention. Without the re-export, callers would have to dig into + the private ``_errors`` module to write ``except`` clauses, which + couples them to an implementation-detail import path. + """ + import xrspatial.geotiff as geotiff_pkg + + for name in ( + "GeoTIFFAmbiguousMetadataError", + "InvalidCRSCodeError", + "UnparseableCRSError", + "RotatedTransformError", + "NonUniformCoordsError", + "MixedBandMetadataError", + "ConflictingCRSError", + "ConflictingNodataError", + ): + assert hasattr(geotiff_pkg, name), ( + f"{name} not exposed on xrspatial.geotiff") + assert name in geotiff_pkg.__all__, ( + f"{name} not listed in xrspatial.geotiff.__all__") + # The re-exported class is the same object as the one in ``_errors``. + assert (geotiff_pkg.GeoTIFFAmbiguousMetadataError + is GeoTIFFAmbiguousMetadataError) + + +def test_subclass_catch_does_not_catch_siblings(): + """``except UnparseableCRSError`` must not catch ``RotatedTransformError``.""" + with pytest.raises(RotatedTransformError): + try: + raise RotatedTransformError("rotated") + except UnparseableCRSError: + pytest.fail("sibling subclass should not catch") + + +# ---------------------------------------------------------------------- +# Hook no-op behaviour +# ---------------------------------------------------------------------- + + +def test_read_hook_is_noop_when_no_checks_registered(): + """PR 0 must not change behaviour at any read entry point.""" + # Even with no context, an empty registry must return cleanly. + validate_read_metadata() + validate_read_metadata({}) + validate_read_metadata({"unused": object()}) + + +def test_write_hook_is_noop_when_no_checks_registered(): + """PR 0 must not change behaviour at any write entry point.""" + validate_write_metadata() + validate_write_metadata({}) + validate_write_metadata({"unused": object()}) + + +# ---------------------------------------------------------------------- +# Registration / dispatch +# ---------------------------------------------------------------------- + + +def test_register_and_dispatch_read_check(): + seen: list[dict] = [] + + def check(ctx): + seen.append(dict(ctx)) + + register_read_metadata_check(check) + try: + validate_read_metadata({"crs_wkt": "EPSG:4326"}) + assert seen == [{"crs_wkt": "EPSG:4326"}] + finally: + unregister_read_metadata_check(check) + + +def test_register_and_dispatch_write_check(): + seen: list[dict] = [] + + def check(ctx): + seen.append(dict(ctx)) + + register_write_metadata_check(check) + try: + validate_write_metadata({"transform": (1.0, 0, 0, 0, -1.0, 0)}) + assert seen == [{"transform": (1.0, 0, 0, 0, -1.0, 0)}] + finally: + unregister_write_metadata_check(check) + + +def test_register_is_idempotent_read(): + def check(ctx): + return None + + register_read_metadata_check(check) + register_read_metadata_check(check) + try: + # Same callable registered twice still appears once. + assert _registered_read_metadata_checks().count(check) == 1 + finally: + unregister_read_metadata_check(check) + + +def test_register_is_idempotent_write(): + def check(ctx): + return None + + register_write_metadata_check(check) + register_write_metadata_check(check) + try: + assert _registered_write_metadata_checks().count(check) == 1 + finally: + unregister_write_metadata_check(check) + + +def test_dispatch_preserves_registration_order(): + order: list[str] = [] + + def first(ctx): + order.append("first") + + def second(ctx): + order.append("second") + + register_read_metadata_check(first) + register_read_metadata_check(second) + try: + validate_read_metadata({}) + assert order == ["first", "second"] + finally: + unregister_read_metadata_check(first) + unregister_read_metadata_check(second) + + +def test_write_dispatch_preserves_registration_order(): + """Symmetry: write-side has its own registry, must order the same.""" + order: list[str] = [] + + def first(ctx): + order.append("first") + + def second(ctx): + order.append("second") + + register_write_metadata_check(first) + register_write_metadata_check(second) + try: + validate_write_metadata({}) + assert order == ["first", "second"] + finally: + unregister_write_metadata_check(first) + unregister_write_metadata_check(second) + + +def test_write_first_raising_check_short_circuits(): + """Symmetry: write-side short-circuits on first raise like read-side.""" + later_called = {"flag": False} + + def deny(ctx): + raise ConflictingCRSError("crs mismatch") + + def later(ctx): + later_called["flag"] = True + + register_write_metadata_check(deny) + register_write_metadata_check(later) + try: + with pytest.raises(ConflictingCRSError): + validate_write_metadata({}) + assert later_called["flag"] is False + finally: + unregister_write_metadata_check(deny) + unregister_write_metadata_check(later) + + +def test_read_and_write_registries_are_independent(): + """A read-side check must not fire from the write hook (and vice versa).""" + read_calls = {"count": 0} + write_calls = {"count": 0} + + def read_check(ctx): + read_calls["count"] += 1 + + def write_check(ctx): + write_calls["count"] += 1 + + register_read_metadata_check(read_check) + register_write_metadata_check(write_check) + try: + validate_read_metadata({}) + assert read_calls["count"] == 1 + assert write_calls["count"] == 0 + + validate_write_metadata({}) + assert read_calls["count"] == 1 + assert write_calls["count"] == 1 + finally: + unregister_read_metadata_check(read_check) + unregister_write_metadata_check(write_check) + + +def test_unregister_unknown_check_is_safe(): + """Test teardown must tolerate double-unregister.""" + + def never_registered(ctx): + return None + + # Must not raise. + unregister_read_metadata_check(never_registered) + unregister_write_metadata_check(never_registered) + + +def test_check_can_raise_typed_error(): + def deny(ctx): + raise UnparseableCRSError("bad WKT") + + register_read_metadata_check(deny) + try: + with pytest.raises(UnparseableCRSError, match="bad WKT"): + validate_read_metadata({"crs_wkt": "MALFORMED"}) + finally: + unregister_read_metadata_check(deny) + + +def test_first_raising_check_short_circuits(): + """Once a check raises, later checks must not see the call.""" + later_called = {"flag": False} + + def deny(ctx): + raise RotatedTransformError("rotated") + + def later(ctx): + later_called["flag"] = True + + register_read_metadata_check(deny) + register_read_metadata_check(later) + try: + with pytest.raises(RotatedTransformError): + validate_read_metadata({}) + assert later_called["flag"] is False + finally: + unregister_read_metadata_check(deny) + unregister_read_metadata_check(later) + + +def test_dispatch_is_safe_against_registry_mutation_read(): + """A check that mutates the read registry must not skip later checks. + + The dispatcher iterates over a snapshot of the registry, so a check + that registers or unregisters another callable (whether on purpose + or via an import side effect) cannot reshape the live list mid-loop + and skip a sibling check or visit one twice. + """ + seen: list[str] = [] + + def mutating(ctx): + seen.append("mutating") + # Self-unregister mid-dispatch; the live list would otherwise + # shift left and the iterator would skip ``second``. + unregister_read_metadata_check(mutating) + + def second(ctx): + seen.append("second") + + register_read_metadata_check(mutating) + register_read_metadata_check(second) + try: + validate_read_metadata({}) + assert seen == ["mutating", "second"] + finally: + unregister_read_metadata_check(mutating) + unregister_read_metadata_check(second) + + +def test_dispatch_is_safe_against_registry_mutation_write(): + """Write hook mirrors the read-side snapshot guarantee.""" + seen: list[str] = [] + + def mutating(ctx): + seen.append("mutating") + unregister_write_metadata_check(mutating) + + def second(ctx): + seen.append("second") + + register_write_metadata_check(mutating) + register_write_metadata_check(second) + try: + validate_write_metadata({}) + assert seen == ["mutating", "second"] + finally: + unregister_write_metadata_check(mutating) + unregister_write_metadata_check(second) + + +def test_none_context_is_treated_as_empty_mapping(): + """Calling ``validate_*_metadata()`` with no args must not crash a check.""" + seen: list[object] = [] + + def check(ctx): + # The contract: ctx is a mapping, never None. + seen.append(dict(ctx)) + + register_read_metadata_check(check) + try: + validate_read_metadata() + assert seen == [{}] + finally: + unregister_read_metadata_check(check) diff --git a/xrspatial/geotiff/tests/test_features.py b/xrspatial/geotiff/tests/test_features.py index 0782b6dbf..c36552afc 100644 --- a/xrspatial/geotiff/tests/test_features.py +++ b/xrspatial/geotiff/tests/test_features.py @@ -2726,6 +2726,17 @@ def test_all_lists_supported_functions(self): # public API. If any of these gets removed or renamed, that is a # breaking change and should go through a deprecation cycle. expected = { + # Ambiguous-metadata error hierarchy (#1987). Re-exported in + # PR 0 so callers can ``except`` the family or a specific + # case without importing from the private ``_errors`` module. + 'ConflictingCRSError', + 'ConflictingNodataError', + 'GeoTIFFAmbiguousMetadataError', + 'InvalidCRSCodeError', + 'MixedBandMetadataError', + 'NonUniformCoordsError', + 'RotatedTransformError', + 'UnparseableCRSError', 'GeoTIFFFallbackWarning', 'UnsafeURLError', 'open_geotiff',