Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions xrspatial/geotiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,12 @@
# and internal callers that genuinely need it can import directly from
# ``xrspatial.geotiff._reader``. See issue #1708.
from ._attrs import (
GEOREF_STATUS_CRS_ONLY,
GEOREF_STATUS_FULL,
GEOREF_STATUS_NONE,
GEOREF_STATUS_ROTATED_DROPPED,
GEOREF_STATUS_TRANSFORM_ONLY,
GEOREF_STATUS_VALUES,
_LEVEL_RANGES,
_VALID_COMPRESSIONS,
_extent_to_window,
Expand Down Expand Up @@ -124,6 +130,12 @@
'ConflictingNodataError',
'GeoTIFFAmbiguousMetadataError',
'GeoTIFFFallbackWarning',
'GEOREF_STATUS_CRS_ONLY',
'GEOREF_STATUS_FULL',
'GEOREF_STATUS_NONE',
'GEOREF_STATUS_ROTATED_DROPPED',
'GEOREF_STATUS_TRANSFORM_ONLY',
'GEOREF_STATUS_VALUES',
'InvalidCRSCodeError',
'MixedBandMetadataError',
'NonUniformCoordsError',
Expand Down
137 changes: 134 additions & 3 deletions xrspatial/geotiff/_attrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
reconstruct them from canonical state.

The contract version is recorded in ``attrs['_xrspatial_geotiff_contract']``
(currently ``2``). Consumers can branch on this integer if the tier
(currently ``3``). Consumers can branch on this integer if the tier
split changes in a future release.

Canonical (xrspatial owns these; round-trip stable):
Expand Down Expand Up @@ -53,6 +53,13 @@
"no declared sentinel" signal. See ``_set_nodata_attrs``.
- ``raster_type``: ``'area'`` (implicit / RasterPixelIsArea) or ``'point'``
(explicit / RasterPixelIsPoint).
- ``georef_status``: one of ``'full'``, ``'transform_only'``, ``'crs_only'``,
``'none'``, ``'rotated_dropped'``. Single attr that encodes the five
distinct states the reader can land in when CRS / transform tags are
combined. See :func:`_compute_georef_status` for the decision table and
issue #2136 for the rationale. The attr is additive: ``crs`` / ``crs_wkt``
/ ``transform`` / ``_xrspatial_no_georef`` remain present with unchanged
semantics so existing consumers keep working.
- ``extra_tags``: list of ``(tag_id, type_id, count, value)`` tuples for
TIFF tags outside the structured set. Omitted when no out-of-band
tags are present.
Expand Down Expand Up @@ -175,7 +182,37 @@
# matplotlib-colormap attrs that v1 still emitted under a
# ``DeprecationWarning``. Downstream code that read those keys via
# ``attrs[key]`` now sees ``KeyError`` rather than the deprecated value.
_ATTRS_CONTRACT_VERSION = 2
#
# Version 3 (issue #2136) adds ``attrs['georef_status']`` to the canonical
# tier. Existing keys (``crs``, ``crs_wkt``, ``transform``, the
# ``_xrspatial_no_georef`` marker) keep their pre-v3 shape so downstream
# code that branches on them still works; the new attr is additive and
# disambiguates ``crs_only`` from ``none`` and ``rotated_dropped`` from
# the truly-no-transform case.
_ATTRS_CONTRACT_VERSION = 3


# Canonical ``attrs['georef_status']`` values (issue #2136). One attr
# encodes the five distinct states the reader can land in when CRS and
# transform tags are combined; downstream code can branch on this rather
# than reconstructing the state from the union of ``crs``, ``crs_wkt``,
# ``transform``, and ``_xrspatial_no_georef``.
GEOREF_STATUS_FULL = 'full'
GEOREF_STATUS_TRANSFORM_ONLY = 'transform_only'
GEOREF_STATUS_CRS_ONLY = 'crs_only'
GEOREF_STATUS_NONE = 'none'
GEOREF_STATUS_ROTATED_DROPPED = 'rotated_dropped'

# Public frozenset of every valid ``georef_status`` value. Exposed so
# downstream code can validate user-set values without hard-coding the
# five-string list (e.g. ``status in GEOREF_STATUS_VALUES``).
GEOREF_STATUS_VALUES = frozenset({
GEOREF_STATUS_FULL,
GEOREF_STATUS_TRANSFORM_ONLY,
GEOREF_STATUS_CRS_ONLY,
GEOREF_STATUS_NONE,
GEOREF_STATUS_ROTATED_DROPPED,
})


# String identifiers (used in xrspatial attrs) -> TIFF ResolutionUnit tag ids.
Expand Down Expand Up @@ -333,6 +370,89 @@ def _validate_read_geo_info(
})


def _compute_georef_status(geo_info) -> str:
"""Classify ``geo_info`` into one of the five ``georef_status`` values.

See the module docstring and issue #2136 for the full rationale. The
decision table:

============================ ================= ===============
transform tags CRS present georef_status
============================ ================= ===============
axis-aligned yes ``full``
axis-aligned no ``transform_only``
absent yes ``crs_only``
absent no ``none``
rotated, dropped either ``rotated_dropped``
============================ ================= ===============

"CRS present" is signalled by either ``geo_info.crs_epsg`` or
``geo_info.crs_wkt`` being non-None. The rotated-dropped branch
fires when the upstream reader saw a rotated
``ModelTransformationTag`` and was opened with ``allow_rotated=True``;
that path returns ``has_georef=False`` with the rotated 6-tuple on
``geo_info.transform.rotated_affine``. The check is on
``rotated_affine`` rather than the surrounding state so a future
reader change cannot accidentally re-route a real "no transform"
file into the rotated bucket.

The eager numpy, dask, and three GPU read sites (chunked / eager /
tile in ``_backends/gpu.py``) all call this through
:func:`_populate_attrs_from_geo_info`. The two VRT inline branches
(eager + chunked in ``_backends/vrt.py``) call
:func:`_compute_georef_status_from_parts` directly because they
build their attrs dict from a different dataclass and would have to
synthesise a fake ``GeoInfo`` to reuse this helper. Keep all the
call sites in lockstep through one of the two helpers.
"""
transform = getattr(geo_info, 'transform', None)
rotated_affine = (
getattr(transform, 'rotated_affine', None)
if transform is not None else None
)
if rotated_affine is not None:
return GEOREF_STATUS_ROTATED_DROPPED
has_georef = bool(getattr(geo_info, 'has_georef', False))
has_crs = (
getattr(geo_info, 'crs_epsg', None) is not None
or getattr(geo_info, 'crs_wkt', None) is not None
)
if has_georef and has_crs:
return GEOREF_STATUS_FULL
if has_georef:
return GEOREF_STATUS_TRANSFORM_ONLY
if has_crs:
return GEOREF_STATUS_CRS_ONLY
return GEOREF_STATUS_NONE


def _compute_georef_status_from_parts(
*,
has_transform: bool,
has_crs: bool,
rotated_dropped: bool = False,
) -> str:
"""Compute ``georef_status`` from raw booleans rather than a ``GeoInfo``.

The VRT inline branches do not build a ``GeoInfo`` instance: they
parse the VRT XML straight into ``geo_transform`` / ``crs_wkt``
fields on a different dataclass. Calling :func:`_compute_georef_status`
from those sites would require synthesising a fake ``GeoInfo`` for
each branch. This helper takes the underlying booleans directly so
the VRT paths and the ``_populate_attrs_from_geo_info`` path share
the same decision rule without the intermediate object.
"""
if rotated_dropped:
return GEOREF_STATUS_ROTATED_DROPPED
if has_transform and has_crs:
return GEOREF_STATUS_FULL
if has_transform:
return GEOREF_STATUS_TRANSFORM_ONLY
if has_crs:
return GEOREF_STATUS_CRS_ONLY
return GEOREF_STATUS_NONE


def _populate_attrs_from_geo_info(attrs: dict, geo_info, *, window=None) -> None:
"""Populate ``attrs`` with all GeoTIFF metadata from ``geo_info``.

Expand Down Expand Up @@ -366,6 +486,15 @@ def _populate_attrs_from_geo_info(attrs: dict, geo_info, *, window=None) -> None
# rather than the bare literal.
attrs['_xrspatial_geotiff_contract'] = _ATTRS_CONTRACT_VERSION

# Stamp ``georef_status`` (issue #2136) before any of the optional
# CRS / transform branches below. The decision uses the unmodified
# ``geo_info``, not the post-branch ``attrs`` dict, so a future
# change to which attrs get emitted cannot accidentally shift the
# status value. The VRT inline paths compute this directly via
# ``_compute_georef_status_from_parts``; keep both sites in lockstep
# via the same constants.
attrs['georef_status'] = _compute_georef_status(geo_info)

src_t = geo_info.transform
has_georef = getattr(geo_info, 'has_georef', True)
# ``allow_rotated=True`` opt-in path (#2115): the parser returns a
Expand All @@ -376,7 +505,9 @@ def _populate_attrs_from_geo_info(attrs: dict, geo_info, *, window=None) -> None
# ``open_geotiff(allow_rotated=True)``, CRS attrs are dropped on
# this path too -- otherwise downstream code that gates on
# ``"crs" in da.attrs`` treats the array as spatially meaningful
# while the actual mapping is gone (#2122 / #2126).
# while the actual mapping is gone (#2122 / #2126). The
# ``georef_status='rotated_dropped'`` stamp above still records the
# state for consumers that branch on it.
rotated_optin = (
src_t is not None
and getattr(src_t, 'rotated_affine', None) is not None
Expand Down
29 changes: 28 additions & 1 deletion xrspatial/geotiff/_backends/vrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,11 @@
import numpy as np
import xarray as xr

from .._attrs import _ATTRS_CONTRACT_VERSION, _set_nodata_attrs
from .._attrs import (
_ATTRS_CONTRACT_VERSION,
_compute_georef_status_from_parts,
_set_nodata_attrs,
)
from .._coords import (
coords_from_pixel_geometry as _coords_from_pixel_geometry,
transform_tuple_from_pixel_geometry as _transform_tuple_from_pixel_geometry,
Expand Down Expand Up @@ -293,6 +297,21 @@ def read_vrt(source: str, *,
# ``_populate_attrs_from_geo_info``; stamp the contract version here
# so both code paths emit the same marker.
attrs = {'_xrspatial_geotiff_contract': _ATTRS_CONTRACT_VERSION}
# ``georef_status`` (issue #2136): five-valued classifier shared
# with the non-VRT read paths. ``has_crs`` uses ``is not None`` (not
# a truthiness check) to stay aligned with ``_compute_georef_status``;
# the VRT XML parser returns None for missing/empty ``<SRS>`` rather
# than ``""``, but pinning the rule defends the alignment if the
# parser ever changes. ``rotated_dropped=_vrt_is_rotated`` matches
# the rotated arm of the read path (issue #2122): a VRT geo_transform
# with non-zero rotation/skew lands the array in the same
# rotated_dropped bucket as a rotated ``ModelTransformationTag`` on
# the non-VRT path.
attrs['georef_status'] = _compute_georef_status_from_parts(
has_transform=gt is not None and not _vrt_is_rotated,
has_crs=vrt.crs_wkt is not None and not _vrt_is_rotated,
rotated_dropped=_vrt_is_rotated,
)
if gt is None or _vrt_is_rotated:
# Mirror the eager non-VRT no-georef path: stamp the no-georef
# marker whenever the read produced placeholder int64 coords.
Expand Down Expand Up @@ -730,6 +749,14 @@ def _read_vrt_chunked(source, *, window, band, name, chunks, gpu, dtype,
_vrt_is_rotated = (
gt is not None and (gt[2] != 0.0 or gt[4] != 0.0)
)
# ``georef_status`` (issue #2136). See the eager VRT branch above
# for the rationale; the rotated VRT path lands the array in the
# ``rotated_dropped`` bucket so consumers can branch on it.
attrs['georef_status'] = _compute_georef_status_from_parts(
has_transform=gt is not None and not _vrt_is_rotated,
has_crs=vrt.crs_wkt is not None and not _vrt_is_rotated,
rotated_dropped=_vrt_is_rotated,
)
if gt is not None:
origin_x, res_x, _, origin_y, _, res_y = gt
coord_window = (win_r0, win_c0, win_r0 + full_h, win_c0 + full_w)
Expand Down
13 changes: 13 additions & 0 deletions xrspatial/geotiff/tests/test_attrs_contract_canonical_1984.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,11 @@
'x_resolution',
'y_resolution',
'resolution_unit',
# Added in contract v3 (issue #2136). The fixture is georef + CRS
# so the round-tripped value is the ``'full'`` literal; the
# value-equality check lives on ``test_georef_status_roundtrip``
# below to keep the membership and value assertions independent.
'georef_status',
_CONTRACT_KEY,
)

Expand Down Expand Up @@ -292,6 +297,14 @@ def test_contract_version_roundtrip(canonical_roundtrip):
assert rd.attrs[_CONTRACT_KEY] == _ATTRS_CONTRACT_VERSION


def test_georef_status_roundtrip(canonical_roundtrip):
"""``georef_status`` (issue #2136) is canonical from contract v3.
The fixture sets ``crs`` + axis-aligned transform-from-coords, so
the round-tripped value must be the ``'full'`` literal."""
rd, _ = canonical_roundtrip
assert rd.attrs['georef_status'] == 'full'


# ---------------------------------------------------------------------------
# Per-backend coverage for canonical-key *presence*.
#
Expand Down
26 changes: 16 additions & 10 deletions xrspatial/geotiff/tests/test_attrs_contract_passthrough_1984.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,18 +300,24 @@ def test_removed_attrs_absent_after_roundtrip(tmp_path):
)


def test_contract_version_is_two(tmp_path):
"""``attrs['_xrspatial_geotiff_contract']`` is ``2`` on every read.

The contract version is the user-visible signal that the removal
landed. Downstream code branching on the integer needs the bump
to fire here on every read path.
def test_contract_version_is_current(tmp_path):
"""``attrs['_xrspatial_geotiff_contract']`` matches the constant on
every read.

The contract version is the user-visible signal that a tier change
landed. Issue #2016 bumped it to 2 (removal of deprecated GeoKey
attrs); issue #2136 bumped it to 3 (addition of
``attrs['georef_status']``). Pinning against ``_ATTRS_CONTRACT_VERSION``
means the next bump only has to touch the constant and the
bump-specific tests, not every "is the stamp set" assertion.
"""
from xrspatial.geotiff._attrs import _ATTRS_CONTRACT_VERSION

da = _make_da(crs=4326)
rd = _roundtrip(tmp_path, da, name='contract_v2_signal.tif')
rd = _roundtrip(tmp_path, da, name='contract_version_signal.tif')

assert rd.attrs.get('_xrspatial_geotiff_contract') == 2, (
assert rd.attrs.get('_xrspatial_geotiff_contract') == _ATTRS_CONTRACT_VERSION, (
f"contract version stamp on a fresh read is "
f"{rd.attrs.get('_xrspatial_geotiff_contract')!r}; issue "
f"#2016 bumped it to 2."
f"{rd.attrs.get('_xrspatial_geotiff_contract')!r}; expected "
f"{_ATTRS_CONTRACT_VERSION}."
)
23 changes: 14 additions & 9 deletions xrspatial/geotiff/tests/test_attrs_contract_version_1984.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,14 @@ def _write_minimal_vrt(vrt_path, source_name, *, height, width):
)


def test_attrs_contract_version_constant_is_two():
"""Pin the integer value so a careless bump shows up here first."""
assert _ATTRS_CONTRACT_VERSION == 2
def test_attrs_contract_version_constant_is_current():
"""Pin the integer value so a careless bump shows up here first.

Contract v3 (issue #2136) added ``attrs['georef_status']`` to the
canonical tier. Bumping past 3 should be paired with a docs update
and a sibling test for the new key.
"""
assert _ATTRS_CONTRACT_VERSION == 3


def test_eager_numpy_stamps_contract_version(tmp_path):
Expand All @@ -89,7 +94,7 @@ def test_eager_numpy_stamps_contract_version(tmp_path):

da = open_geotiff(path)

assert da.attrs[_CONTRACT_KEY] == 2
assert da.attrs[_CONTRACT_KEY] == _ATTRS_CONTRACT_VERSION


def test_dask_numpy_stamps_contract_version(tmp_path):
Expand All @@ -98,7 +103,7 @@ def test_dask_numpy_stamps_contract_version(tmp_path):

da = open_geotiff(path, chunks=32)

assert da.attrs[_CONTRACT_KEY] == 2
assert da.attrs[_CONTRACT_KEY] == _ATTRS_CONTRACT_VERSION


@_gpu_only
Expand All @@ -108,7 +113,7 @@ def test_gpu_stamps_contract_version(tmp_path):

da = open_geotiff(path, gpu=True)

assert da.attrs[_CONTRACT_KEY] == 2
assert da.attrs[_CONTRACT_KEY] == _ATTRS_CONTRACT_VERSION


@_gpu_only
Expand All @@ -118,7 +123,7 @@ def test_dask_gpu_stamps_contract_version(tmp_path):

da = open_geotiff(path, gpu=True, chunks=32)

assert da.attrs[_CONTRACT_KEY] == 2
assert da.attrs[_CONTRACT_KEY] == _ATTRS_CONTRACT_VERSION


def test_vrt_eager_stamps_contract_version(tmp_path):
Expand All @@ -129,7 +134,7 @@ def test_vrt_eager_stamps_contract_version(tmp_path):

da = read_vrt(str(vrt))

assert da.attrs[_CONTRACT_KEY] == 2
assert da.attrs[_CONTRACT_KEY] == _ATTRS_CONTRACT_VERSION


def test_vrt_chunked_stamps_contract_version(tmp_path):
Expand All @@ -140,4 +145,4 @@ def test_vrt_chunked_stamps_contract_version(tmp_path):

da = read_vrt(str(vrt), chunks=32)

assert da.attrs[_CONTRACT_KEY] == 2
assert da.attrs[_CONTRACT_KEY] == _ATTRS_CONTRACT_VERSION
Loading
Loading