Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions xrspatial/geotiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,16 @@
coords_to_transform as _coords_to_transform,
require_transform_for_georeferenced as _require_transform_for_georeferenced,
)
from ._errors import (
ConflictingCRSError,
ConflictingNodataError,
GeoTIFFAmbiguousMetadataError,
InvalidCRSCodeError,
MixedBandMetadataError,
NonUniformCoordsError,
RotatedTransformError,
UnparseableCRSError,
)
from ._geotags import GeoTransform, RASTER_PIXEL_IS_AREA, RASTER_PIXEL_IS_POINT
from ._reader import UnsafeURLError
# ``read_to_array`` is internal: it is used by ``open_geotiff`` and the
Expand Down Expand Up @@ -108,7 +118,15 @@
# is intentionally omitted: it is deprecated in favour of ``da.xrs.plot()``
# and emits a ``DeprecationWarning`` when called.
__all__ = [
'ConflictingCRSError',
'ConflictingNodataError',
'GeoTIFFAmbiguousMetadataError',
'GeoTIFFFallbackWarning',
'InvalidCRSCodeError',
'MixedBandMetadataError',
'NonUniformCoordsError',
'RotatedTransformError',
'UnparseableCRSError',
'UnsafeURLError',
'open_geotiff',
'read_geotiff_gpu',
Expand Down
115 changes: 115 additions & 0 deletions xrspatial/geotiff/_errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
"""Typed errors for ambiguous GeoTIFF metadata (issue #1987).

The reader and writer used to "guess and continue" when geospatial
metadata was ambiguous: invalid CRS codes, unparseable CRS strings,
rotated transforms, non-uniform coords, mixed band metadata, conflicting
``crs`` vs ``crs_wkt`` attrs, conflicting nodata aliases. Each case
becomes a hard error by default with a per-case typed subclass so
callers can ``except`` the family or a specific case.

This module provides the error class hierarchy only. The validator
hooks in ``_validation.py`` decide when each one fires; the per-case
PRs (issue #1987 PRs 1-7) wire them up at the read/write entry points.

Hierarchy::

Exception
└── GeoTIFFAmbiguousMetadataError
├── InvalidCRSCodeError (PR 1 / #1971)
├── UnparseableCRSError (PR 2)
├── RotatedTransformError (PR 3)
├── NonUniformCoordsError (PR 4)
├── MixedBandMetadataError (PR 5)
├── ConflictingCRSError (PR 6, blocked on #1984)
└── ConflictingNodataError (PR 7, blocked on #1988)
"""
from __future__ import annotations


class GeoTIFFAmbiguousMetadataError(ValueError):
"""Base class for ambiguous GeoTIFF metadata failures (#1987).

Subclasses ``ValueError`` so existing ``except ValueError`` callers
keep catching these. Catch this class directly to handle the whole
family, or one of the per-case subclasses to handle a single
ambiguity type.
"""


class InvalidCRSCodeError(GeoTIFFAmbiguousMetadataError):
"""Invalid EPSG / authority code on read or write (#1971, PR 1).

Raised when a CRS code does not resolve to a known authority entry
(e.g. ``to_geotiff(crs=True)`` formerly wrote ``EPSG=1`` silently).
"""


class UnparseableCRSError(GeoTIFFAmbiguousMetadataError):
"""CRS string cannot be parsed as WKT or recognised authority code (PR 2).

Partial WKT or malformed input that the legacy path would have
emitted unchanged, producing mismatched ``crs`` vs ``crs_wkt``
attrs downstream.
"""


class RotatedTransformError(GeoTIFFAmbiguousMetadataError):
"""Affine transform has non-zero rotation/shear terms (PR 3).

Downstream xrspatial functions assume axis-aligned rasters and
would otherwise produce wrong results on a rotated grid. The
read entry points raise this by default; pass ``allow_rotated=True``
to retain the existing attr-flag behaviour and read the pixel
grid without the geospatial assumption.
"""


class NonUniformCoordsError(GeoTIFFAmbiguousMetadataError):
"""DataArray coords disagree with the implied transform on write (PR 4).

``to_geotiff`` accepts coords that imply a non-uniform pixel grid
(variable cell size, gaps); the writer would otherwise pick the
first two coord values as the transform and silently truncate the
rest. The existing sentinel exemption for int-dtype coords stays
(#1969).
"""


class MixedBandMetadataError(GeoTIFFAmbiguousMetadataError):
"""VRT bands declare conflicting per-band metadata (PR 5).

Most often disagreeing nodata sentinels across bands. The legacy
read path flattened to one value silently. Pass
``band_nodata='first'`` to keep the legacy behaviour explicitly.
"""


class ConflictingCRSError(GeoTIFFAmbiguousMetadataError):
"""``attrs['crs']`` and ``attrs['crs_wkt']`` disagree on write (PR 6).

Both keys set to CRS strings that do not canonicalise to the same
WKT (after EPSG → WKT lookup). The writer would otherwise pick one
and emit it, silently dropping the other.
"""


class ConflictingNodataError(GeoTIFFAmbiguousMetadataError):
"""Nodata sentinel aliases disagree on write (PR 7).

``attrs['nodata']`` and ``attrs['nodatavals']`` set to different
values. ``_resolve_nodata_attr`` formerly picked one and ignored
the other. ``_FillValue`` is a CF alias and remains deprioritised
per the existing convention.
"""


__all__ = [
"GeoTIFFAmbiguousMetadataError",
"InvalidCRSCodeError",
"UnparseableCRSError",
"RotatedTransformError",
"NonUniformCoordsError",
"MixedBandMetadataError",
"ConflictingCRSError",
"ConflictingNodataError",
]
131 changes: 131 additions & 0 deletions xrspatial/geotiff/_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,21 @@
every backend.

Extracted in step 4 of issue #1813.

Ambiguous-metadata hooks (issue #1987)
--------------------------------------
``validate_read_metadata`` and ``validate_write_metadata`` are
plug-points for the per-case checks listed in #1987 (unparseable CRS,
rotated transforms, non-uniform coords, mixed band metadata, conflicting
crs/crs_wkt, conflicting nodata aliases). PR 0 lands the hook
signatures and a registry; each follow-up PR registers its check.
The hooks are no-ops until at least one check is registered, so
behaviour does not change until a per-case PR opts in.
"""
from __future__ import annotations

from typing import Any, Callable, Iterable, Mapping

import numpy as np

from ._coords import _BAND_DIM_NAMES
Expand Down Expand Up @@ -302,3 +314,122 @@ def _validate_nodata_arg(nodata) -> None:
f"the array dtype; a non-numeric value would otherwise "
f"crash inside NumPy with a ufunc TypeError."
) from e


# ---------------------------------------------------------------------------
# Ambiguous-metadata hooks (issue #1987 PR 0)
#
# Each per-case PR (#1987 PRs 2-7) registers a check via
# ``register_read_metadata_check`` / ``register_write_metadata_check``.
# The hooks below iterate the registered checks in registration order.
# A check raises one of the ``_errors.GeoTIFFAmbiguousMetadataError``
# subclasses to refuse the input; returning normally lets the call
# continue.
#
# The registry is process-global and additive. Tests that need to
# unregister a check should use ``unregister_*`` rather than mutating
# the list in place so the surrounding helpers stay typed.
# ---------------------------------------------------------------------------

_ReadCheck = Callable[[Mapping[str, Any]], None]
_WriteCheck = Callable[[Mapping[str, Any]], None]

_READ_METADATA_CHECKS: list[_ReadCheck] = []
_WRITE_METADATA_CHECKS: list[_WriteCheck] = []


def register_read_metadata_check(check: _ReadCheck) -> _ReadCheck:
"""Register a read-side ambiguous-metadata check (issue #1987).

Returns ``check`` so the call can be used as a decorator. Idempotent:
re-registering the same callable is a no-op.
"""
if check not in _READ_METADATA_CHECKS:
_READ_METADATA_CHECKS.append(check)
return check


def register_write_metadata_check(check: _WriteCheck) -> _WriteCheck:
"""Register a write-side ambiguous-metadata check (issue #1987)."""
if check not in _WRITE_METADATA_CHECKS:
_WRITE_METADATA_CHECKS.append(check)
return check


def unregister_read_metadata_check(check: _ReadCheck) -> None:
"""Remove a previously-registered read-side check.

Tolerant of ``check`` not being registered so tests can call this
in teardown without guarding on ``in``.
"""
try:
_READ_METADATA_CHECKS.remove(check)
except ValueError:
pass


def unregister_write_metadata_check(check: _WriteCheck) -> None:
"""Remove a previously-registered write-side check."""
try:
_WRITE_METADATA_CHECKS.remove(check)
except ValueError:
pass


def _registered_read_metadata_checks() -> Iterable[_ReadCheck]:
"""Snapshot of registered read-side checks for testing/introspection."""
return tuple(_READ_METADATA_CHECKS)


def _registered_write_metadata_checks() -> Iterable[_WriteCheck]:
"""Snapshot of registered write-side checks for testing/introspection."""
return tuple(_WRITE_METADATA_CHECKS)


def validate_read_metadata(context: Mapping[str, Any] | None = None) -> None:
"""Run all registered read-side ambiguous-metadata checks (issue #1987).

Parameters
----------
context : mapping, optional
Keys consumed by the registered checks. The PR-0 hook does not
prescribe a schema; each per-case PR documents the keys it
reads (e.g. ``'crs_wkt'``, ``'transform'``, ``'band_nodata'``).
A missing key is treated as "nothing to check" by the
downstream check, not as an error here.

Raises
------
GeoTIFFAmbiguousMetadataError
Or one of its subclasses, from a registered check.

Notes
-----
No-op when no checks are registered, so PR 0 does not change
behaviour at any entry point.
"""
if not _READ_METADATA_CHECKS:
return
ctx: Mapping[str, Any] = {} if context is None else context
# Iterate over a snapshot. A check that registers or unregisters another
# check during dispatch (whether on purpose or via an import side effect)
# would otherwise reshape the list mid-loop and skip or repeat entries.
# The cost is one tuple per dispatch, paid only when at least one check
# is registered.
for check in tuple(_READ_METADATA_CHECKS):
check(ctx)


def validate_write_metadata(context: Mapping[str, Any] | None = None) -> None:
"""Run all registered write-side ambiguous-metadata checks (issue #1987).

Mirror of ``validate_read_metadata`` for ``to_geotiff`` /
``write_geotiff_gpu`` / ``write_vrt``. See that docstring for the
context-schema convention and the no-op-when-empty guarantee.
"""
if not _WRITE_METADATA_CHECKS:
return
ctx: Mapping[str, Any] = {} if context is None else context
# Snapshot for the same reason as the read hook above.
for check in tuple(_WRITE_METADATA_CHECKS):
check(ctx)
Loading
Loading