From 5eab2b3b7ec7033ee358047632fc7b0008eaae4c Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 19 May 2026 11:53:13 -0700 Subject: [PATCH 1/2] geotiff: add canonical attrs['georef_status'] (#2136) Read paths emit one canonical attr that encodes the five distinct states the reader can land in when CRS and transform tags are combined: full, transform_only, crs_only, none, rotated_dropped. Without this attr, downstream code has to reconcile crs / crs_wkt / transform / _xrspatial_no_georef by hand, and two distinct on-disk situations (rotated-with-allow_rotated and truly-no-transform) end up indistinguishable via the public attrs. * Stamped via _populate_attrs_from_geo_info, covering eager numpy, dask, and the GPU read paths. * Stamped inline in both VRT branches (eager and chunked) through a shared _compute_georef_status_from_parts helper so all six call sites share one decision rule. * Bumps _ATTRS_CONTRACT_VERSION 2 -> 3. * Additive: crs, crs_wkt, transform, and the _xrspatial_no_georef marker keep their pre-v3 semantics so existing consumers keep working. --- xrspatial/geotiff/_attrs.py | 125 ++++- xrspatial/geotiff/_backends/vrt.py | 25 +- .../test_attrs_contract_canonical_1984.py | 13 + .../test_attrs_contract_passthrough_1984.py | 26 +- .../tests/test_attrs_contract_version_1984.py | 23 +- .../geotiff/tests/test_georef_status_2136.py | 442 ++++++++++++++++++ 6 files changed, 633 insertions(+), 21 deletions(-) create mode 100644 xrspatial/geotiff/tests/test_georef_status_2136.py diff --git a/xrspatial/geotiff/_attrs.py b/xrspatial/geotiff/_attrs.py index 9f60c31d2..d8f5d4159 100644 --- a/xrspatial/geotiff/_attrs.py +++ b/xrspatial/geotiff/_attrs.py @@ -48,6 +48,13 @@ "no declared sentinel" signal. See ``_set_nodata_attrs``. - ``raster_type``: ``'area'`` (implicit / RasterPixelIsArea) or ``'point'`` (explicit / RasterPixelIsPoint). +- ``georef_status``: one of ``'full'``, ``'transform_only'``, ``'crs_only'``, + ``'none'``, ``'rotated_dropped'``. Single attr that encodes the five + distinct states the reader can land in when CRS / transform tags are + combined. See :func:`_compute_georef_status` for the decision table and + issue #2136 for the rationale. The attr is additive: ``crs`` / ``crs_wkt`` + / ``transform`` / ``_xrspatial_no_georef`` remain present with unchanged + semantics so existing consumers keep working. - ``extra_tags``: list of ``(tag_id, type_id, count, value)`` tuples for TIFF tags outside the structured set. Omitted when no out-of-band tags are present. @@ -170,7 +177,34 @@ # matplotlib-colormap attrs that v1 still emitted under a # ``DeprecationWarning``. Downstream code that read those keys via # ``attrs[key]`` now sees ``KeyError`` rather than the deprecated value. -_ATTRS_CONTRACT_VERSION = 2 +# +# Version 3 (issue #2136) adds ``attrs['georef_status']`` to the canonical +# tier. Existing keys (``crs``, ``crs_wkt``, ``transform``, the +# ``_xrspatial_no_georef`` marker) keep their pre-v3 shape so downstream +# code that branches on them still works; the new attr is additive and +# disambiguates ``crs_only`` from ``none`` and ``rotated_dropped`` from +# the truly-no-transform case. +_ATTRS_CONTRACT_VERSION = 3 + + +# Canonical ``attrs['georef_status']`` values (issue #2136). One attr +# encodes the five distinct states the reader can land in when CRS and +# transform tags are combined; downstream code can branch on this rather +# than reconstructing the state from the union of ``crs``, ``crs_wkt``, +# ``transform``, and ``_xrspatial_no_georef``. +GEOREF_STATUS_FULL = 'full' +GEOREF_STATUS_TRANSFORM_ONLY = 'transform_only' +GEOREF_STATUS_CRS_ONLY = 'crs_only' +GEOREF_STATUS_NONE = 'none' +GEOREF_STATUS_ROTATED_DROPPED = 'rotated_dropped' + +_GEOREF_STATUS_VALUES = frozenset({ + GEOREF_STATUS_FULL, + GEOREF_STATUS_TRANSFORM_ONLY, + GEOREF_STATUS_CRS_ONLY, + GEOREF_STATUS_NONE, + GEOREF_STATUS_ROTATED_DROPPED, +}) # String identifiers (used in xrspatial attrs) -> TIFF ResolutionUnit tag ids. @@ -328,6 +362,86 @@ def _validate_read_geo_info( }) +def _compute_georef_status(geo_info) -> str: + """Classify ``geo_info`` into one of the five ``georef_status`` values. + + See the module docstring and issue #2136 for the full rationale. The + decision table: + + ============================ ================= =============== + transform tags CRS present georef_status + ============================ ================= =============== + axis-aligned yes ``full`` + axis-aligned no ``transform_only`` + absent yes ``crs_only`` + absent no ``none`` + rotated, dropped either ``rotated_dropped`` + ============================ ================= =============== + + "CRS present" is signalled by either ``geo_info.crs_epsg`` or + ``geo_info.crs_wkt`` being non-None. The rotated-dropped branch + fires when the upstream reader saw a rotated + ``ModelTransformationTag`` and was opened with ``allow_rotated=True``; + that path returns ``has_georef=False`` with the rotated 6-tuple on + ``geo_info.transform.rotated_affine``. The check is on + ``rotated_affine`` rather than the surrounding state so a future + reader change cannot accidentally re-route a real "no transform" + file into the rotated bucket. + + The four backends (eager, dask, GPU) call this through + :func:`_populate_attrs_from_geo_info`; the VRT inline paths import + this helper directly because they build their attrs dicts inline + (see ``_backends/vrt.py``). Keep all five callers in lockstep by + routing every decision through this one function. + """ + transform = getattr(geo_info, 'transform', None) + rotated_affine = ( + getattr(transform, 'rotated_affine', None) + if transform is not None else None + ) + if rotated_affine is not None: + return GEOREF_STATUS_ROTATED_DROPPED + has_georef = bool(getattr(geo_info, 'has_georef', False)) + has_crs = ( + getattr(geo_info, 'crs_epsg', None) is not None + or getattr(geo_info, 'crs_wkt', None) is not None + ) + if has_georef and has_crs: + return GEOREF_STATUS_FULL + if has_georef: + return GEOREF_STATUS_TRANSFORM_ONLY + if has_crs: + return GEOREF_STATUS_CRS_ONLY + return GEOREF_STATUS_NONE + + +def _compute_georef_status_from_parts( + *, + has_transform: bool, + has_crs: bool, + rotated_dropped: bool = False, +) -> str: + """Compute ``georef_status`` from raw booleans rather than a ``GeoInfo``. + + The VRT inline branches do not build a ``GeoInfo`` instance: they + parse the VRT XML straight into ``geo_transform`` / ``crs_wkt`` + fields on a different dataclass. Calling :func:`_compute_georef_status` + from those sites would require synthesising a fake ``GeoInfo`` for + each branch. This helper takes the underlying booleans directly so + the VRT paths and the ``_populate_attrs_from_geo_info`` path share + the same decision rule without the intermediate object. + """ + if rotated_dropped: + return GEOREF_STATUS_ROTATED_DROPPED + if has_transform and has_crs: + return GEOREF_STATUS_FULL + if has_transform: + return GEOREF_STATUS_TRANSFORM_ONLY + if has_crs: + return GEOREF_STATUS_CRS_ONLY + return GEOREF_STATUS_NONE + + def _populate_attrs_from_geo_info(attrs: dict, geo_info, *, window=None) -> None: """Populate ``attrs`` with all GeoTIFF metadata from ``geo_info``. @@ -361,6 +475,15 @@ def _populate_attrs_from_geo_info(attrs: dict, geo_info, *, window=None) -> None # rather than the bare literal. attrs['_xrspatial_geotiff_contract'] = _ATTRS_CONTRACT_VERSION + # Stamp ``georef_status`` (issue #2136) before any of the optional + # CRS / transform branches below. The decision uses the unmodified + # ``geo_info``, not the post-branch ``attrs`` dict, so a future + # change to which attrs get emitted cannot accidentally shift the + # status value. The VRT inline paths compute this directly via + # ``_compute_georef_status_from_parts``; keep both sites in lockstep + # via the same constants. + attrs['georef_status'] = _compute_georef_status(geo_info) + if geo_info.crs_epsg is not None: attrs['crs'] = geo_info.crs_epsg if geo_info.crs_wkt is not None: diff --git a/xrspatial/geotiff/_backends/vrt.py b/xrspatial/geotiff/_backends/vrt.py index 0156ba8f0..8934fe55c 100644 --- a/xrspatial/geotiff/_backends/vrt.py +++ b/xrspatial/geotiff/_backends/vrt.py @@ -13,7 +13,11 @@ import numpy as np import xarray as xr -from .._attrs import _ATTRS_CONTRACT_VERSION, _set_nodata_attrs +from .._attrs import ( + _ATTRS_CONTRACT_VERSION, + _compute_georef_status_from_parts, + _set_nodata_attrs, +) from .._coords import ( coords_from_pixel_geometry as _coords_from_pixel_geometry, transform_tuple_from_pixel_geometry as _transform_tuple_from_pixel_geometry, @@ -273,6 +277,18 @@ def read_vrt(source: str, *, # ``_populate_attrs_from_geo_info``; stamp the contract version here # so both code paths emit the same marker. attrs = {'_xrspatial_geotiff_contract': _ATTRS_CONTRACT_VERSION} + # ``georef_status`` (issue #2136): five-valued classifier shared + # with the non-VRT read paths. The VRT reader does not currently + # expose a rotated-dropped path (rotated VRT geo_transforms are + # rejected upstream by ``_validate_read_geo_info``), so the rotated + # arg stays False here. The eager VRT branch derives + # ``has_transform`` from the parsed ``geo_transform`` element and + # ``has_crs`` from ``vrt.crs_wkt`` to keep the decision aligned + # with ``_compute_georef_status``. + attrs['georef_status'] = _compute_georef_status_from_parts( + has_transform=gt is not None, + has_crs=bool(vrt.crs_wkt), + ) if gt is None: # Mirror the eager non-VRT no-georef path: stamp the no-georef # marker whenever the source carries no transform. The current @@ -695,6 +711,13 @@ def _read_vrt_chunked(source, *, window, band, name, chunks, gpu, dtype, # ``_populate_attrs_from_geo_info``, so the contract version is # stamped inline using the shared constant to stay in lockstep. attrs = {'_xrspatial_geotiff_contract': _ATTRS_CONTRACT_VERSION} + # ``georef_status`` (issue #2136). See the eager VRT branch above + # for the rationale; the rotated-dropped state is not reachable + # through the VRT reader today. + attrs['georef_status'] = _compute_georef_status_from_parts( + has_transform=gt is not None, + has_crs=bool(vrt.crs_wkt), + ) if gt is not None: origin_x, res_x, _, origin_y, _, res_y = gt coord_window = (win_r0, win_c0, win_r0 + full_h, win_c0 + full_w) diff --git a/xrspatial/geotiff/tests/test_attrs_contract_canonical_1984.py b/xrspatial/geotiff/tests/test_attrs_contract_canonical_1984.py index 306fbd82b..58d0d36a6 100644 --- a/xrspatial/geotiff/tests/test_attrs_contract_canonical_1984.py +++ b/xrspatial/geotiff/tests/test_attrs_contract_canonical_1984.py @@ -82,6 +82,11 @@ 'x_resolution', 'y_resolution', 'resolution_unit', + # Added in contract v3 (issue #2136). The fixture is georef + CRS + # so the round-tripped value is the ``'full'`` literal; the + # value-equality check lives on ``test_georef_status_roundtrip`` + # below to keep the membership and value assertions independent. + 'georef_status', _CONTRACT_KEY, ) @@ -292,6 +297,14 @@ def test_contract_version_roundtrip(canonical_roundtrip): assert rd.attrs[_CONTRACT_KEY] == _ATTRS_CONTRACT_VERSION +def test_georef_status_roundtrip(canonical_roundtrip): + """``georef_status`` (issue #2136) is canonical from contract v3. + The fixture sets ``crs`` + axis-aligned transform-from-coords, so + the round-tripped value must be the ``'full'`` literal.""" + rd, _ = canonical_roundtrip + assert rd.attrs['georef_status'] == 'full' + + # --------------------------------------------------------------------------- # Per-backend coverage for canonical-key *presence*. # diff --git a/xrspatial/geotiff/tests/test_attrs_contract_passthrough_1984.py b/xrspatial/geotiff/tests/test_attrs_contract_passthrough_1984.py index fa3c066a6..7375033dc 100644 --- a/xrspatial/geotiff/tests/test_attrs_contract_passthrough_1984.py +++ b/xrspatial/geotiff/tests/test_attrs_contract_passthrough_1984.py @@ -300,18 +300,24 @@ def test_removed_attrs_absent_after_roundtrip(tmp_path): ) -def test_contract_version_is_two(tmp_path): - """``attrs['_xrspatial_geotiff_contract']`` is ``2`` on every read. - - The contract version is the user-visible signal that the removal - landed. Downstream code branching on the integer needs the bump - to fire here on every read path. +def test_contract_version_is_current(tmp_path): + """``attrs['_xrspatial_geotiff_contract']`` matches the constant on + every read. + + The contract version is the user-visible signal that a tier change + landed. Issue #2016 bumped it to 2 (removal of deprecated GeoKey + attrs); issue #2136 bumped it to 3 (addition of + ``attrs['georef_status']``). Pinning against ``_ATTRS_CONTRACT_VERSION`` + means the next bump only has to touch the constant and the + bump-specific tests, not every "is the stamp set" assertion. """ + from xrspatial.geotiff._attrs import _ATTRS_CONTRACT_VERSION + da = _make_da(crs=4326) - rd = _roundtrip(tmp_path, da, name='contract_v2_signal.tif') + rd = _roundtrip(tmp_path, da, name='contract_version_signal.tif') - assert rd.attrs.get('_xrspatial_geotiff_contract') == 2, ( + assert rd.attrs.get('_xrspatial_geotiff_contract') == _ATTRS_CONTRACT_VERSION, ( f"contract version stamp on a fresh read is " - f"{rd.attrs.get('_xrspatial_geotiff_contract')!r}; issue " - f"#2016 bumped it to 2." + f"{rd.attrs.get('_xrspatial_geotiff_contract')!r}; expected " + f"{_ATTRS_CONTRACT_VERSION}." ) diff --git a/xrspatial/geotiff/tests/test_attrs_contract_version_1984.py b/xrspatial/geotiff/tests/test_attrs_contract_version_1984.py index 65d6c7f75..90bd53102 100644 --- a/xrspatial/geotiff/tests/test_attrs_contract_version_1984.py +++ b/xrspatial/geotiff/tests/test_attrs_contract_version_1984.py @@ -78,9 +78,14 @@ def _write_minimal_vrt(vrt_path, source_name, *, height, width): ) -def test_attrs_contract_version_constant_is_two(): - """Pin the integer value so a careless bump shows up here first.""" - assert _ATTRS_CONTRACT_VERSION == 2 +def test_attrs_contract_version_constant_is_current(): + """Pin the integer value so a careless bump shows up here first. + + Contract v3 (issue #2136) added ``attrs['georef_status']`` to the + canonical tier. Bumping past 3 should be paired with a docs update + and a sibling test for the new key. + """ + assert _ATTRS_CONTRACT_VERSION == 3 def test_eager_numpy_stamps_contract_version(tmp_path): @@ -89,7 +94,7 @@ def test_eager_numpy_stamps_contract_version(tmp_path): da = open_geotiff(path) - assert da.attrs[_CONTRACT_KEY] == 2 + assert da.attrs[_CONTRACT_KEY] == _ATTRS_CONTRACT_VERSION def test_dask_numpy_stamps_contract_version(tmp_path): @@ -98,7 +103,7 @@ def test_dask_numpy_stamps_contract_version(tmp_path): da = open_geotiff(path, chunks=32) - assert da.attrs[_CONTRACT_KEY] == 2 + assert da.attrs[_CONTRACT_KEY] == _ATTRS_CONTRACT_VERSION @_gpu_only @@ -108,7 +113,7 @@ def test_gpu_stamps_contract_version(tmp_path): da = open_geotiff(path, gpu=True) - assert da.attrs[_CONTRACT_KEY] == 2 + assert da.attrs[_CONTRACT_KEY] == _ATTRS_CONTRACT_VERSION @_gpu_only @@ -118,7 +123,7 @@ def test_dask_gpu_stamps_contract_version(tmp_path): da = open_geotiff(path, gpu=True, chunks=32) - assert da.attrs[_CONTRACT_KEY] == 2 + assert da.attrs[_CONTRACT_KEY] == _ATTRS_CONTRACT_VERSION def test_vrt_eager_stamps_contract_version(tmp_path): @@ -129,7 +134,7 @@ def test_vrt_eager_stamps_contract_version(tmp_path): da = read_vrt(str(vrt)) - assert da.attrs[_CONTRACT_KEY] == 2 + assert da.attrs[_CONTRACT_KEY] == _ATTRS_CONTRACT_VERSION def test_vrt_chunked_stamps_contract_version(tmp_path): @@ -140,4 +145,4 @@ def test_vrt_chunked_stamps_contract_version(tmp_path): da = read_vrt(str(vrt), chunks=32) - assert da.attrs[_CONTRACT_KEY] == 2 + assert da.attrs[_CONTRACT_KEY] == _ATTRS_CONTRACT_VERSION diff --git a/xrspatial/geotiff/tests/test_georef_status_2136.py b/xrspatial/geotiff/tests/test_georef_status_2136.py new file mode 100644 index 000000000..f05a1995f --- /dev/null +++ b/xrspatial/geotiff/tests/test_georef_status_2136.py @@ -0,0 +1,442 @@ +"""Canonical ``attrs['georef_status']`` covers the five reader states (#2136). + +Pre-#2136 the read paths emitted three independent signals -- +``attrs['crs']`` / ``attrs['crs_wkt']``, ``attrs['transform']``, and the +``_xrspatial_no_georef`` marker -- and downstream code had to reconcile +them by hand. Two distinct on-disk situations (rotated-with-``allow_rotated`` +vs truly-no-transform) looked identical via the public attrs. + +Contract v3 adds one canonical attr that encodes the five distinct +states the reader can land in: + +* ``full`` -- CRS resolved + axis-aligned transform. +* ``transform_only`` -- transform present, no CRS. +* ``crs_only`` -- CRS present, no transform tags. +* ``none`` -- neither CRS nor transform. +* ``rotated_dropped`` -- rotated transform tags dropped under + ``allow_rotated=True``. + +The attr is additive: ``crs`` / ``crs_wkt`` / ``transform`` / +``_xrspatial_no_georef`` keep their existing semantics. The tests here +pin the value per reader state and on round-trip. +""" +from __future__ import annotations + +import importlib.util +import os + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import open_geotiff, read_vrt, to_geotiff +from xrspatial.geotiff._attrs import ( + GEOREF_STATUS_CRS_ONLY, + GEOREF_STATUS_FULL, + GEOREF_STATUS_NONE, + GEOREF_STATUS_ROTATED_DROPPED, + GEOREF_STATUS_TRANSFORM_ONLY, + _ATTRS_CONTRACT_VERSION, + _compute_georef_status, + _compute_georef_status_from_parts, +) +from xrspatial.geotiff._coords import _NO_GEOREF_KEY +from xrspatial.geotiff._geotags import GeoInfo, GeoTransform + +tifffile = pytest.importorskip("tifffile") + +# Reuse the rotated-TIFF writer from the #2115 test rather than copying +# the byte layout. The function is private to that test module but +# the test runner sees the package directory so the import succeeds. +from xrspatial.geotiff.tests.test_allow_rotated_geotiff_2115 import ( # noqa: E402 + _ROTATED_M, + _write_rotated_tiff, +) + + +_STATUS_KEY = 'georef_status' + + +def _gpu_available() -> bool: + if importlib.util.find_spec("cupy") is None: + return False + try: + import cupy + return bool(cupy.cuda.is_available()) + except Exception: + return False + + +_HAS_GPU = _gpu_available() +_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required") + + +# --------------------------------------------------------------------------- +# Unit tests on the helpers +# --------------------------------------------------------------------------- + + +def test_contract_version_is_three(): + """The new attr lands in v3; pin the bump alongside the new key.""" + assert _ATTRS_CONTRACT_VERSION == 3 + + +def test_compute_status_full(): + info = GeoInfo( + transform=GeoTransform(origin_x=0.0, origin_y=0.0, + pixel_width=1.0, pixel_height=-1.0), + has_georef=True, crs_epsg=4326, + ) + assert _compute_georef_status(info) == GEOREF_STATUS_FULL + + +def test_compute_status_transform_only(): + info = GeoInfo( + transform=GeoTransform(origin_x=0.0, origin_y=0.0, + pixel_width=1.0, pixel_height=-1.0), + has_georef=True, + ) + assert _compute_georef_status(info) == GEOREF_STATUS_TRANSFORM_ONLY + + +def test_compute_status_crs_only(): + info = GeoInfo(has_georef=False, crs_epsg=4326) + assert _compute_georef_status(info) == GEOREF_STATUS_CRS_ONLY + + +def test_compute_status_crs_only_wkt_no_epsg(): + """``crs_wkt`` without an EPSG code is still ``crs_only``: the + decision rule treats either CRS signal as present.""" + info = GeoInfo(has_georef=False, crs_wkt='GEOGCRS["custom"]') + assert _compute_georef_status(info) == GEOREF_STATUS_CRS_ONLY + + +def test_compute_status_none(): + info = GeoInfo(has_georef=False) + assert _compute_georef_status(info) == GEOREF_STATUS_NONE + + +def test_compute_status_rotated_dropped(): + """``rotated_affine`` set on the transform is the dropped-rotation + signal; the value wins over CRS presence and ``has_georef``.""" + info = GeoInfo( + transform=GeoTransform(rotated_affine=(1.0, 0.5, 0.0, 0.5, -1.0, 0.0)), + has_georef=False, crs_epsg=4326, + ) + assert _compute_georef_status(info) == GEOREF_STATUS_ROTATED_DROPPED + + +@pytest.mark.parametrize( + "has_transform,has_crs,rotated,expected", + [ + (True, True, False, GEOREF_STATUS_FULL), + (True, False, False, GEOREF_STATUS_TRANSFORM_ONLY), + (False, True, False, GEOREF_STATUS_CRS_ONLY), + (False, False, False, GEOREF_STATUS_NONE), + (False, False, True, GEOREF_STATUS_ROTATED_DROPPED), + (True, True, True, GEOREF_STATUS_ROTATED_DROPPED), + ], +) +def test_compute_status_from_parts(has_transform, has_crs, rotated, expected): + """The VRT-side helper mirrors the GeoInfo decision exactly.""" + assert _compute_georef_status_from_parts( + has_transform=has_transform, has_crs=has_crs, + rotated_dropped=rotated, + ) == expected + + +# --------------------------------------------------------------------------- +# Read-path coverage: TIFF +# --------------------------------------------------------------------------- + + +def _make_full_tiff(path): + """Float coords + CRS -> ``full``. Goes through to_geotiff so the + fixture exercises the same writer path real callers hit.""" + da = xr.DataArray( + np.zeros((4, 4), dtype=np.float32), + coords={ + 'y': np.array([200.0, 199.0, 198.0, 197.0]), + 'x': np.array([100.0, 101.0, 102.0, 103.0]), + }, + dims=('y', 'x'), + attrs={'crs': 4326}, + ) + to_geotiff(da, path) + + +def _make_transform_only_tiff(path): + """Float coords, no CRS -> ``transform_only``.""" + da = xr.DataArray( + np.zeros((4, 4), dtype=np.float32), + coords={ + 'y': np.array([200.0, 199.0, 198.0, 197.0]), + 'x': np.array([100.0, 101.0, 102.0, 103.0]), + }, + dims=('y', 'x'), + ) + to_geotiff(da, path) + + +def _make_crs_only_tiff(path): + """No-georef marker + CRS -> ``crs_only``.""" + da = xr.DataArray( + np.zeros((4, 4), dtype=np.float32), + coords={ + 'y': np.arange(4, dtype=np.int64), + 'x': np.arange(4, dtype=np.int64), + }, + dims=('y', 'x'), + attrs={_NO_GEOREF_KEY: True, 'crs': 4326}, + ) + to_geotiff(da, path) + + +def _make_none_tiff(path): + """No-georef marker, no CRS -> ``none``. Equivalent to a plain image.""" + arr = np.zeros((4, 4), dtype=np.float32) + # Write a bare TIFF with no GeoTIFF tags at all -- this is the + # tightest 'none' fixture; it has no GeoKeyDirectory, no transform + # tags, and no marker on disk. The reader stamps the marker on read. + tifffile.imwrite( + path, arr, photometric='minisblack', planarconfig='contig', + metadata=None, + ) + + +def test_full_status(tmp_path): + path = str(tmp_path / "georef_status_2136_full.tif") + _make_full_tiff(path) + rd = open_geotiff(path) + assert rd.attrs[_STATUS_KEY] == GEOREF_STATUS_FULL + assert 'transform' in rd.attrs + assert rd.attrs.get('crs') == 4326 + + +def test_transform_only_status(tmp_path): + path = str(tmp_path / "georef_status_2136_xform_only.tif") + _make_transform_only_tiff(path) + rd = open_geotiff(path) + assert rd.attrs[_STATUS_KEY] == GEOREF_STATUS_TRANSFORM_ONLY + assert 'transform' in rd.attrs + assert 'crs' not in rd.attrs + # ``crs_wkt`` may be absent or present depending on whether the + # writer emitted GeoKey citation text; either way the status is the + # source of truth for "no CRS". + + +def test_crs_only_status(tmp_path): + path = str(tmp_path / "georef_status_2136_crs_only.tif") + _make_crs_only_tiff(path) + rd = open_geotiff(path) + assert rd.attrs[_STATUS_KEY] == GEOREF_STATUS_CRS_ONLY + assert rd.attrs.get('crs') == 4326 + assert 'transform' not in rd.attrs + # Reader still stamps the legacy marker for back-compat. The new + # attr is additive; pinning both keys here so a future change that + # drops one is caught at the same site. + assert rd.attrs.get(_NO_GEOREF_KEY) is True + + +def test_none_status(tmp_path): + path = str(tmp_path / "georef_status_2136_none.tif") + _make_none_tiff(path) + rd = open_geotiff(path) + assert rd.attrs[_STATUS_KEY] == GEOREF_STATUS_NONE + assert 'transform' not in rd.attrs + assert 'crs' not in rd.attrs + assert rd.attrs.get(_NO_GEOREF_KEY) is True + + +def test_rotated_dropped_status(tmp_path): + """Rotated ``ModelTransformationTag`` + ``allow_rotated=True`` is + the only state where the attr disambiguates a case that all four + other public signals were silent about.""" + path = str(tmp_path / "georef_status_2136_rotated.tif") + arr = np.arange(20, dtype='{crs_wkt}\n' if crs_wkt else '' + ) + gt_elem = ( + f' {geo_transform}\n' + if geo_transform else '' + ) + vrt_path.write_text( + f'\n' + f'{crs_elem}{gt_elem}' + ' \n' + ' \n' + f' {source_name}' + '\n' + ' 1\n' + f' \n' + f' \n' + ' \n' + ' \n' + '\n' + ) + + +def test_vrt_full_status(tmp_path): + src = tmp_path / "georef_status_2136_vrt_full_src.tif" + _make_full_tiff(str(src)) + vrt = tmp_path / "georef_status_2136_vrt_full.vrt" + _write_vrt( + vrt, os.path.basename(src), height=4, width=4, + crs_wkt='EPSG:4326', + geo_transform='100.0, 1.0, 0.0, 200.5, 0.0, -1.0', + ) + rd = read_vrt(str(vrt)) + assert rd.attrs[_STATUS_KEY] == GEOREF_STATUS_FULL + + +def test_vrt_crs_only_status(tmp_path): + """VRT with SRS but no GeoTransform element -> ``crs_only``.""" + src = tmp_path / "georef_status_2136_vrt_crsonly_src.tif" + _make_none_tiff(str(src)) + vrt = tmp_path / "georef_status_2136_vrt_crsonly.vrt" + _write_vrt( + vrt, os.path.basename(src), height=4, width=4, + crs_wkt='EPSG:4326', + ) + rd = read_vrt(str(vrt)) + assert rd.attrs[_STATUS_KEY] == GEOREF_STATUS_CRS_ONLY + + +def test_vrt_none_status(tmp_path): + """VRT with neither SRS nor GeoTransform -> ``none``.""" + src = tmp_path / "georef_status_2136_vrt_none_src.tif" + _make_none_tiff(str(src)) + vrt = tmp_path / "georef_status_2136_vrt_none.vrt" + _write_vrt(vrt, os.path.basename(src), height=4, width=4) + rd = read_vrt(str(vrt)) + assert rd.attrs[_STATUS_KEY] == GEOREF_STATUS_NONE + + +def test_vrt_full_status_chunked(tmp_path): + src = tmp_path / "georef_status_2136_vrt_full_chunked_src.tif" + _make_full_tiff(str(src)) + vrt = tmp_path / "georef_status_2136_vrt_full_chunked.vrt" + _write_vrt( + vrt, os.path.basename(src), height=4, width=4, + crs_wkt='EPSG:4326', + geo_transform='100.0, 1.0, 0.0, 200.5, 0.0, -1.0', + ) + rd = read_vrt(str(vrt), chunks=2) + assert rd.attrs[_STATUS_KEY] == GEOREF_STATUS_FULL + + +# --------------------------------------------------------------------------- +# Round-trip stability: the attr survives write -> read for every state +# that has a writable encoding (full, transform_only, crs_only, none). +# rotated_dropped is read-only by nature: ``to_geotiff`` does not emit +# rotated ``ModelTransformationTag`` entries, so a write would +# axis-align the matrix. That asymmetry is documented in the issue +# under "Out of scope" / "rotated_dropped is for consumers". +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "make,expected", + [ + (_make_full_tiff, GEOREF_STATUS_FULL), + (_make_transform_only_tiff, GEOREF_STATUS_TRANSFORM_ONLY), + (_make_crs_only_tiff, GEOREF_STATUS_CRS_ONLY), + (_make_none_tiff, GEOREF_STATUS_NONE), + ], +) +def test_roundtrip_preserves_status(tmp_path, make, expected): + """Write a fixture of each writable state, read it, then write + + read again. The status attr is stable across the cycle because the + underlying CRS / transform decisions the writer makes are + deterministic from the input attrs. + + The fixture set deliberately overlaps with the per-state tests + above; the value of this parametrised test is the *cycle*, not the + one-shot read. + """ + p1 = str(tmp_path / f"georef_status_2136_rt_{expected}_1.tif") + make(p1) + rd1 = open_geotiff(p1) + assert rd1.attrs[_STATUS_KEY] == expected + + p2 = str(tmp_path / f"georef_status_2136_rt_{expected}_2.tif") + to_geotiff(rd1, p2) + rd2 = open_geotiff(p2) + assert rd2.attrs[_STATUS_KEY] == expected From 89283ce0cd475f8a94649dd18a360fd4497b5868 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 19 May 2026 12:00:22 -0700 Subject: [PATCH 2/2] Address review on georef_status (#2136) - Expose GEOREF_STATUS_* constants and GEOREF_STATUS_VALUES through xrspatial.geotiff. Downstream code can now branch on the constants via the public package surface; previously the only path was an import from the private _attrs module. - Align the VRT branches with _compute_georef_status by checking ``vrt.crs_wkt is not None`` rather than ``bool(vrt.crs_wkt)``. The VRT XML parser returns None (not "") for missing/empty SRS today; pinning the rule defends the alignment if the parser ever changes. - Drop the unused _ROTATED_M import in the test module. - Fix the _compute_georef_status docstring to enumerate the read call sites instead of an out-of-date count. --- xrspatial/geotiff/__init__.py | 12 ++++++++++++ xrspatial/geotiff/_attrs.py | 18 ++++++++++++------ xrspatial/geotiff/_backends/vrt.py | 13 +++++++------ xrspatial/geotiff/tests/test_features.py | 9 +++++++++ .../geotiff/tests/test_georef_status_2136.py | 17 ++++++++++++++++- 5 files changed, 56 insertions(+), 13 deletions(-) diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index 342811f00..8c992a40b 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -74,6 +74,12 @@ # and internal callers that genuinely need it can import directly from # ``xrspatial.geotiff._reader``. See issue #1708. from ._attrs import ( + GEOREF_STATUS_CRS_ONLY, + GEOREF_STATUS_FULL, + GEOREF_STATUS_NONE, + GEOREF_STATUS_ROTATED_DROPPED, + GEOREF_STATUS_TRANSFORM_ONLY, + GEOREF_STATUS_VALUES, _LEVEL_RANGES, _VALID_COMPRESSIONS, _extent_to_window, @@ -124,6 +130,12 @@ 'ConflictingNodataError', 'GeoTIFFAmbiguousMetadataError', 'GeoTIFFFallbackWarning', + 'GEOREF_STATUS_CRS_ONLY', + 'GEOREF_STATUS_FULL', + 'GEOREF_STATUS_NONE', + 'GEOREF_STATUS_ROTATED_DROPPED', + 'GEOREF_STATUS_TRANSFORM_ONLY', + 'GEOREF_STATUS_VALUES', 'InvalidCRSCodeError', 'MixedBandMetadataError', 'NonUniformCoordsError', diff --git a/xrspatial/geotiff/_attrs.py b/xrspatial/geotiff/_attrs.py index d8f5d4159..6f5174076 100644 --- a/xrspatial/geotiff/_attrs.py +++ b/xrspatial/geotiff/_attrs.py @@ -198,7 +198,10 @@ GEOREF_STATUS_NONE = 'none' GEOREF_STATUS_ROTATED_DROPPED = 'rotated_dropped' -_GEOREF_STATUS_VALUES = frozenset({ +# Public frozenset of every valid ``georef_status`` value. Exposed so +# downstream code can validate user-set values without hard-coding the +# five-string list (e.g. ``status in GEOREF_STATUS_VALUES``). +GEOREF_STATUS_VALUES = frozenset({ GEOREF_STATUS_FULL, GEOREF_STATUS_TRANSFORM_ONLY, GEOREF_STATUS_CRS_ONLY, @@ -388,11 +391,14 @@ def _compute_georef_status(geo_info) -> str: reader change cannot accidentally re-route a real "no transform" file into the rotated bucket. - The four backends (eager, dask, GPU) call this through - :func:`_populate_attrs_from_geo_info`; the VRT inline paths import - this helper directly because they build their attrs dicts inline - (see ``_backends/vrt.py``). Keep all five callers in lockstep by - routing every decision through this one function. + The eager numpy, dask, and three GPU read sites (chunked / eager / + tile in ``_backends/gpu.py``) all call this through + :func:`_populate_attrs_from_geo_info`. The two VRT inline branches + (eager + chunked in ``_backends/vrt.py``) call + :func:`_compute_georef_status_from_parts` directly because they + build their attrs dict from a different dataclass and would have to + synthesise a fake ``GeoInfo`` to reuse this helper. Keep all the + call sites in lockstep through one of the two helpers. """ transform = getattr(geo_info, 'transform', None) rotated_affine = ( diff --git a/xrspatial/geotiff/_backends/vrt.py b/xrspatial/geotiff/_backends/vrt.py index 8934fe55c..09db21c04 100644 --- a/xrspatial/geotiff/_backends/vrt.py +++ b/xrspatial/geotiff/_backends/vrt.py @@ -281,13 +281,14 @@ def read_vrt(source: str, *, # with the non-VRT read paths. The VRT reader does not currently # expose a rotated-dropped path (rotated VRT geo_transforms are # rejected upstream by ``_validate_read_geo_info``), so the rotated - # arg stays False here. The eager VRT branch derives - # ``has_transform`` from the parsed ``geo_transform`` element and - # ``has_crs`` from ``vrt.crs_wkt`` to keep the decision aligned - # with ``_compute_georef_status``. + # arg stays False here. ``has_crs`` uses ``is not None`` (not a + # truthiness check) to stay aligned with ``_compute_georef_status``; + # the VRT XML parser returns None for missing/empty ```` rather + # than ``""``, but pinning the rule defends the alignment if the + # parser ever changes. attrs['georef_status'] = _compute_georef_status_from_parts( has_transform=gt is not None, - has_crs=bool(vrt.crs_wkt), + has_crs=vrt.crs_wkt is not None, ) if gt is None: # Mirror the eager non-VRT no-georef path: stamp the no-georef @@ -716,7 +717,7 @@ def _read_vrt_chunked(source, *, window, band, name, chunks, gpu, dtype, # through the VRT reader today. attrs['georef_status'] = _compute_georef_status_from_parts( has_transform=gt is not None, - has_crs=bool(vrt.crs_wkt), + has_crs=vrt.crs_wkt is not None, ) if gt is not None: origin_x, res_x, _, origin_y, _, res_y = gt diff --git a/xrspatial/geotiff/tests/test_features.py b/xrspatial/geotiff/tests/test_features.py index 420b44332..0d62563ba 100644 --- a/xrspatial/geotiff/tests/test_features.py +++ b/xrspatial/geotiff/tests/test_features.py @@ -2783,6 +2783,15 @@ def test_all_lists_supported_functions(self): 'UnparseableCRSError', 'GeoTIFFFallbackWarning', 'UnsafeURLError', + # Canonical georef_status constants (issue #2136). Exposed + # so downstream code can branch on the five reader states + # via constants rather than string literals. + 'GEOREF_STATUS_CRS_ONLY', + 'GEOREF_STATUS_FULL', + 'GEOREF_STATUS_NONE', + 'GEOREF_STATUS_ROTATED_DROPPED', + 'GEOREF_STATUS_TRANSFORM_ONLY', + 'GEOREF_STATUS_VALUES', 'open_geotiff', 'read_geotiff_gpu', 'read_geotiff_dask', diff --git a/xrspatial/geotiff/tests/test_georef_status_2136.py b/xrspatial/geotiff/tests/test_georef_status_2136.py index f05a1995f..311cba08d 100644 --- a/xrspatial/geotiff/tests/test_georef_status_2136.py +++ b/xrspatial/geotiff/tests/test_georef_status_2136.py @@ -49,7 +49,6 @@ # the byte layout. The function is private to that test module but # the test runner sees the package directory so the import succeeds. from xrspatial.geotiff.tests.test_allow_rotated_geotiff_2115 import ( # noqa: E402 - _ROTATED_M, _write_rotated_tiff, ) @@ -81,6 +80,22 @@ def test_contract_version_is_three(): assert _ATTRS_CONTRACT_VERSION == 3 +def test_public_constants_reexported(): + """The five status constants and ``GEOREF_STATUS_VALUES`` are part + of the public surface (issue #2136 / review follow-up). Downstream + consumers should be able to import them from ``xrspatial.geotiff`` + rather than reaching into the private ``_attrs`` module.""" + import xrspatial.geotiff as pkg + assert pkg.GEOREF_STATUS_FULL == GEOREF_STATUS_FULL + assert pkg.GEOREF_STATUS_TRANSFORM_ONLY == GEOREF_STATUS_TRANSFORM_ONLY + assert pkg.GEOREF_STATUS_CRS_ONLY == GEOREF_STATUS_CRS_ONLY + assert pkg.GEOREF_STATUS_NONE == GEOREF_STATUS_NONE + assert pkg.GEOREF_STATUS_ROTATED_DROPPED == GEOREF_STATUS_ROTATED_DROPPED + assert pkg.GEOREF_STATUS_VALUES == frozenset({ + 'full', 'transform_only', 'crs_only', 'none', 'rotated_dropped', + }) + + def test_compute_status_full(): info = GeoInfo( transform=GeoTransform(origin_x=0.0, origin_y=0.0,