diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index b979d92e..63e4561f 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -74,6 +74,12 @@ # and internal callers that genuinely need it can import directly from # ``xrspatial.geotiff._reader``. See issue #1708. from ._attrs import ( + GEOREF_STATUS_CRS_ONLY, + GEOREF_STATUS_FULL, + GEOREF_STATUS_NONE, + GEOREF_STATUS_ROTATED_DROPPED, + GEOREF_STATUS_TRANSFORM_ONLY, + GEOREF_STATUS_VALUES, _LEVEL_RANGES, _VALID_COMPRESSIONS, _extent_to_window, @@ -128,6 +134,12 @@ 'ConflictingNodataError', 'GeoTIFFAmbiguousMetadataError', 'GeoTIFFFallbackWarning', + 'GEOREF_STATUS_CRS_ONLY', + 'GEOREF_STATUS_FULL', + 'GEOREF_STATUS_NONE', + 'GEOREF_STATUS_ROTATED_DROPPED', + 'GEOREF_STATUS_TRANSFORM_ONLY', + 'GEOREF_STATUS_VALUES', 'InvalidCRSCodeError', 'MixedBandMetadataError', 'NonUniformCoordsError', diff --git a/xrspatial/geotiff/_attrs.py b/xrspatial/geotiff/_attrs.py index 30f22e44..17cc4dc7 100644 --- a/xrspatial/geotiff/_attrs.py +++ b/xrspatial/geotiff/_attrs.py @@ -25,7 +25,7 @@ reconstruct them from canonical state. The contract version is recorded in ``attrs['_xrspatial_geotiff_contract']`` -(currently ``2``). Consumers can branch on this integer if the tier +(currently ``3``). Consumers can branch on this integer if the tier split changes in a future release. Canonical (xrspatial owns these; round-trip stable): @@ -65,6 +65,13 @@ float-because-promoted. - ``raster_type``: ``'area'`` (implicit / RasterPixelIsArea) or ``'point'`` (explicit / RasterPixelIsPoint). +- ``georef_status``: one of ``'full'``, ``'transform_only'``, ``'crs_only'``, + ``'none'``, ``'rotated_dropped'``. Single attr that encodes the five + distinct states the reader can land in when CRS / transform tags are + combined. See :func:`_compute_georef_status` for the decision table and + issue #2136 for the rationale. The attr is additive: ``crs`` / ``crs_wkt`` + / ``transform`` / ``_xrspatial_no_georef`` remain present with unchanged + semantics so existing consumers keep working. - ``extra_tags``: list of ``(tag_id, type_id, count, value)`` tuples for TIFF tags outside the structured set. Omitted when no out-of-band tags are present. @@ -244,7 +251,37 @@ # matplotlib-colormap attrs that v1 still emitted under a # ``DeprecationWarning``. Downstream code that read those keys via # ``attrs[key]`` now sees ``KeyError`` rather than the deprecated value. -_ATTRS_CONTRACT_VERSION = 2 +# +# Version 3 (issue #2136) adds ``attrs['georef_status']`` to the canonical +# tier. Existing keys (``crs``, ``crs_wkt``, ``transform``, the +# ``_xrspatial_no_georef`` marker) keep their pre-v3 shape so downstream +# code that branches on them still works; the new attr is additive and +# disambiguates ``crs_only`` from ``none`` and ``rotated_dropped`` from +# the truly-no-transform case. +_ATTRS_CONTRACT_VERSION = 3 + + +# Canonical ``attrs['georef_status']`` values (issue #2136). One attr +# encodes the five distinct states the reader can land in when CRS and +# transform tags are combined; downstream code can branch on this rather +# than reconstructing the state from the union of ``crs``, ``crs_wkt``, +# ``transform``, and ``_xrspatial_no_georef``. +GEOREF_STATUS_FULL = 'full' +GEOREF_STATUS_TRANSFORM_ONLY = 'transform_only' +GEOREF_STATUS_CRS_ONLY = 'crs_only' +GEOREF_STATUS_NONE = 'none' +GEOREF_STATUS_ROTATED_DROPPED = 'rotated_dropped' + +# Public frozenset of every valid ``georef_status`` value. Exposed so +# downstream code can validate user-set values without hard-coding the +# five-string list (e.g. ``status in GEOREF_STATUS_VALUES``). +GEOREF_STATUS_VALUES = frozenset({ + GEOREF_STATUS_FULL, + GEOREF_STATUS_TRANSFORM_ONLY, + GEOREF_STATUS_CRS_ONLY, + GEOREF_STATUS_NONE, + GEOREF_STATUS_ROTATED_DROPPED, +}) # String identifiers (used in xrspatial attrs) -> TIFF ResolutionUnit tag ids. @@ -304,6 +341,12 @@ class GeoTIFFMetadata: # VRT-only vrt_holes: list | None = None + # Canonical reader-state classifier (issue #2136). Carried on the + # record so the eager / dask / GPU / VRT read paths all stamp it via + # the same :func:`metadata_to_attrs` marshalling step instead of + # branching on attrs after the dict has been built. + georef_status: str | None = None + # Contract version stamped on read contract_version: int = _ATTRS_CONTRACT_VERSION @@ -386,6 +429,13 @@ def geo_info_to_metadata(geo_info, *, window=None) -> GeoTIFFMetadata: x_resolution=geo_info.x_resolution, y_resolution=geo_info.y_resolution, resolution_unit=resolution_unit, + # ``georef_status`` (#2136) is computed off the unmodified + # ``geo_info`` rather than the post-branch metadata fields so a + # future change to which fields the record carries cannot + # accidentally shift the status value. The VRT inline path uses + # ``_compute_georef_status_from_parts`` to fill this field + # without synthesising a ``GeoInfo``. + georef_status=_compute_georef_status(geo_info), contract_version=_ATTRS_CONTRACT_VERSION, ) @@ -400,6 +450,13 @@ def metadata_to_attrs(md: GeoTIFFMetadata) -> dict: """ attrs: dict = {'_xrspatial_geotiff_contract': md.contract_version} + # ``georef_status`` (#2136) is stamped before the optional CRS / + # transform branches so the value reflects the reader's state + # decision (computed off ``geo_info``) rather than which fields + # happened to land in the emitted dict. + if md.georef_status is not None: + attrs['georef_status'] = md.georef_status + if md.crs_epsg is not None: attrs['crs'] = md.crs_epsg if md.crs_wkt is not None: @@ -535,6 +592,7 @@ def attrs_to_metadata(attrs) -> GeoTIFFMetadata: y_resolution=attrs.get('y_resolution'), resolution_unit=attrs.get('resolution_unit'), vrt_holes=attrs.get('vrt_holes'), + georef_status=attrs.get('georef_status'), contract_version=contract_version, ) @@ -725,6 +783,89 @@ def _validate_read_geo_info( }) +def _compute_georef_status(geo_info) -> str: + """Classify ``geo_info`` into one of the five ``georef_status`` values. + + See the module docstring and issue #2136 for the full rationale. The + decision table: + + ============================ ================= =============== + transform tags CRS present georef_status + ============================ ================= =============== + axis-aligned yes ``full`` + axis-aligned no ``transform_only`` + absent yes ``crs_only`` + absent no ``none`` + rotated, dropped either ``rotated_dropped`` + ============================ ================= =============== + + "CRS present" is signalled by either ``geo_info.crs_epsg`` or + ``geo_info.crs_wkt`` being non-None. The rotated-dropped branch + fires when the upstream reader saw a rotated + ``ModelTransformationTag`` and was opened with ``allow_rotated=True``; + that path returns ``has_georef=False`` with the rotated 6-tuple on + ``geo_info.transform.rotated_affine``. The check is on + ``rotated_affine`` rather than the surrounding state so a future + reader change cannot accidentally re-route a real "no transform" + file into the rotated bucket. + + The eager numpy, dask, and three GPU read sites (chunked / eager / + tile in ``_backends/gpu.py``) all call this through + :func:`_populate_attrs_from_geo_info`. The two VRT inline branches + (eager + chunked in ``_backends/vrt.py``) call + :func:`_compute_georef_status_from_parts` directly because they + build their attrs dict from a different dataclass and would have to + synthesise a fake ``GeoInfo`` to reuse this helper. Keep all the + call sites in lockstep through one of the two helpers. + """ + transform = getattr(geo_info, 'transform', None) + rotated_affine = ( + getattr(transform, 'rotated_affine', None) + if transform is not None else None + ) + if rotated_affine is not None: + return GEOREF_STATUS_ROTATED_DROPPED + has_georef = bool(getattr(geo_info, 'has_georef', False)) + has_crs = ( + getattr(geo_info, 'crs_epsg', None) is not None + or getattr(geo_info, 'crs_wkt', None) is not None + ) + if has_georef and has_crs: + return GEOREF_STATUS_FULL + if has_georef: + return GEOREF_STATUS_TRANSFORM_ONLY + if has_crs: + return GEOREF_STATUS_CRS_ONLY + return GEOREF_STATUS_NONE + + +def _compute_georef_status_from_parts( + *, + has_transform: bool, + has_crs: bool, + rotated_dropped: bool = False, +) -> str: + """Compute ``georef_status`` from raw booleans rather than a ``GeoInfo``. + + The VRT inline branches do not build a ``GeoInfo`` instance: they + parse the VRT XML straight into ``geo_transform`` / ``crs_wkt`` + fields on a different dataclass. Calling :func:`_compute_georef_status` + from those sites would require synthesising a fake ``GeoInfo`` for + each branch. This helper takes the underlying booleans directly so + the VRT paths and the ``_populate_attrs_from_geo_info`` path share + the same decision rule without the intermediate object. + """ + if rotated_dropped: + return GEOREF_STATUS_ROTATED_DROPPED + if has_transform and has_crs: + return GEOREF_STATUS_FULL + if has_transform: + return GEOREF_STATUS_TRANSFORM_ONLY + if has_crs: + return GEOREF_STATUS_CRS_ONLY + return GEOREF_STATUS_NONE + + def _populate_attrs_from_geo_info(attrs: dict, geo_info, *, window=None) -> None: """Populate ``attrs`` with all GeoTIFF metadata from ``geo_info``. @@ -757,7 +898,9 @@ def _populate_attrs_from_geo_info(attrs: dict, geo_info, *, window=None) -> None # produce the same attrs surface; centralising on the record lets # the VRT path emit the same field set without copying this block. # The ``allow_rotated=True`` opt-in CRS-drop (#2126) is handled - # inside ``geo_info_to_metadata``. See issue #2139 / ``metadata_to_attrs``. + # inside ``geo_info_to_metadata``. ``georef_status`` (#2136) rides + # on the record so the VRT path can stamp it via the same + # marshalling step. See issue #2139 / ``metadata_to_attrs``. md = geo_info_to_metadata(geo_info, window=window) attrs.update(metadata_to_attrs(md)) diff --git a/xrspatial/geotiff/_backends/vrt.py b/xrspatial/geotiff/_backends/vrt.py index 70948532..58828cf7 100644 --- a/xrspatial/geotiff/_backends/vrt.py +++ b/xrspatial/geotiff/_backends/vrt.py @@ -15,6 +15,7 @@ from .._attrs import ( GeoTIFFMetadata, + _compute_georef_status_from_parts, _set_nodata_attrs, metadata_to_attrs, ) @@ -319,6 +320,17 @@ def read_vrt(source: str, *, # ``vrt.crs_wkt`` carries an empty string when the VRT XML has a # ```` element but no recognised CRS body; treat empty as # absent so ``metadata_to_attrs`` does not emit ``attrs['crs_wkt']=''``. + # + # ``georef_status`` (issue #2136) is computed from raw booleans and + # carried on the record so the VRT path stamps the same five-valued + # classifier the non-VRT read paths emit. ``has_crs`` mirrors + # ``_compute_georef_status`` by gating on ``is not None`` rather than + # truthiness, so a future parser change that returns ``""`` instead + # of ``None`` for a missing ```` would still route to ``none`` + # via the empty-string check above. ``rotated_dropped=_vrt_is_rotated`` + # matches the rotated arm on the non-VRT path: a VRT ``geo_transform`` + # with non-zero rotation/skew lands the array in the same + # ``rotated_dropped`` bucket as a rotated ``ModelTransformationTag``. _vrt_keep_crs = bool(vrt.crs_wkt) and not _vrt_is_rotated _vrt_epsg = _wkt_to_epsg(vrt.crs_wkt) if _vrt_keep_crs else None _vrt_md = GeoTIFFMetadata( @@ -330,6 +342,11 @@ def read_vrt(source: str, *, # callers can detect a partial mosaic by attribute lookup. See # issue #1734. vrt_holes=list(vrt.holes) if vrt.holes else None, + georef_status=_compute_georef_status_from_parts( + has_transform=gt is not None and not _vrt_is_rotated, + has_crs=vrt.crs_wkt is not None and not _vrt_is_rotated, + rotated_dropped=_vrt_is_rotated, + ), ) attrs = metadata_to_attrs(_vrt_md) # When a specific band is selected, source its nodata from that @@ -819,12 +836,20 @@ def _read_vrt_chunked(source, *, window, band, name, chunks, gpu, dtype, # Rotated VRTs drop CRS attrs alongside the transform (#2122). _vrt_keep_crs = bool(vrt.crs_wkt) and not _vrt_is_rotated _vrt_epsg = _wkt_to_epsg(vrt.crs_wkt) if _vrt_keep_crs else None + # ``georef_status`` (issue #2136). See the eager VRT branch above + # for the rationale; the rotated VRT path lands the array in the + # ``rotated_dropped`` bucket so consumers can branch on it. _vrt_md = GeoTIFFMetadata( transform=_vrt_transform, crs_epsg=_vrt_epsg, crs_wkt=vrt.crs_wkt if _vrt_keep_crs else None, raster_type='point' if vrt.raster_type == 'point' else 'area', has_georef=gt is not None and not _vrt_is_rotated, + georef_status=_compute_georef_status_from_parts( + has_transform=gt is not None and not _vrt_is_rotated, + has_crs=vrt.crs_wkt is not None and not _vrt_is_rotated, + rotated_dropped=_vrt_is_rotated, + ), ) attrs = metadata_to_attrs(_vrt_md) diff --git a/xrspatial/geotiff/tests/test_attrs_contract_canonical_1984.py b/xrspatial/geotiff/tests/test_attrs_contract_canonical_1984.py index 306fbd82..58d0d36a 100644 --- a/xrspatial/geotiff/tests/test_attrs_contract_canonical_1984.py +++ b/xrspatial/geotiff/tests/test_attrs_contract_canonical_1984.py @@ -82,6 +82,11 @@ 'x_resolution', 'y_resolution', 'resolution_unit', + # Added in contract v3 (issue #2136). The fixture is georef + CRS + # so the round-tripped value is the ``'full'`` literal; the + # value-equality check lives on ``test_georef_status_roundtrip`` + # below to keep the membership and value assertions independent. + 'georef_status', _CONTRACT_KEY, ) @@ -292,6 +297,14 @@ def test_contract_version_roundtrip(canonical_roundtrip): assert rd.attrs[_CONTRACT_KEY] == _ATTRS_CONTRACT_VERSION +def test_georef_status_roundtrip(canonical_roundtrip): + """``georef_status`` (issue #2136) is canonical from contract v3. + The fixture sets ``crs`` + axis-aligned transform-from-coords, so + the round-tripped value must be the ``'full'`` literal.""" + rd, _ = canonical_roundtrip + assert rd.attrs['georef_status'] == 'full' + + # --------------------------------------------------------------------------- # Per-backend coverage for canonical-key *presence*. # diff --git a/xrspatial/geotiff/tests/test_attrs_contract_passthrough_1984.py b/xrspatial/geotiff/tests/test_attrs_contract_passthrough_1984.py index fa3c066a..7375033d 100644 --- a/xrspatial/geotiff/tests/test_attrs_contract_passthrough_1984.py +++ b/xrspatial/geotiff/tests/test_attrs_contract_passthrough_1984.py @@ -300,18 +300,24 @@ def test_removed_attrs_absent_after_roundtrip(tmp_path): ) -def test_contract_version_is_two(tmp_path): - """``attrs['_xrspatial_geotiff_contract']`` is ``2`` on every read. - - The contract version is the user-visible signal that the removal - landed. Downstream code branching on the integer needs the bump - to fire here on every read path. +def test_contract_version_is_current(tmp_path): + """``attrs['_xrspatial_geotiff_contract']`` matches the constant on + every read. + + The contract version is the user-visible signal that a tier change + landed. Issue #2016 bumped it to 2 (removal of deprecated GeoKey + attrs); issue #2136 bumped it to 3 (addition of + ``attrs['georef_status']``). Pinning against ``_ATTRS_CONTRACT_VERSION`` + means the next bump only has to touch the constant and the + bump-specific tests, not every "is the stamp set" assertion. """ + from xrspatial.geotiff._attrs import _ATTRS_CONTRACT_VERSION + da = _make_da(crs=4326) - rd = _roundtrip(tmp_path, da, name='contract_v2_signal.tif') + rd = _roundtrip(tmp_path, da, name='contract_version_signal.tif') - assert rd.attrs.get('_xrspatial_geotiff_contract') == 2, ( + assert rd.attrs.get('_xrspatial_geotiff_contract') == _ATTRS_CONTRACT_VERSION, ( f"contract version stamp on a fresh read is " - f"{rd.attrs.get('_xrspatial_geotiff_contract')!r}; issue " - f"#2016 bumped it to 2." + f"{rd.attrs.get('_xrspatial_geotiff_contract')!r}; expected " + f"{_ATTRS_CONTRACT_VERSION}." ) diff --git a/xrspatial/geotiff/tests/test_attrs_contract_version_1984.py b/xrspatial/geotiff/tests/test_attrs_contract_version_1984.py index 65d6c7f7..90bd5310 100644 --- a/xrspatial/geotiff/tests/test_attrs_contract_version_1984.py +++ b/xrspatial/geotiff/tests/test_attrs_contract_version_1984.py @@ -78,9 +78,14 @@ def _write_minimal_vrt(vrt_path, source_name, *, height, width): ) -def test_attrs_contract_version_constant_is_two(): - """Pin the integer value so a careless bump shows up here first.""" - assert _ATTRS_CONTRACT_VERSION == 2 +def test_attrs_contract_version_constant_is_current(): + """Pin the integer value so a careless bump shows up here first. + + Contract v3 (issue #2136) added ``attrs['georef_status']`` to the + canonical tier. Bumping past 3 should be paired with a docs update + and a sibling test for the new key. + """ + assert _ATTRS_CONTRACT_VERSION == 3 def test_eager_numpy_stamps_contract_version(tmp_path): @@ -89,7 +94,7 @@ def test_eager_numpy_stamps_contract_version(tmp_path): da = open_geotiff(path) - assert da.attrs[_CONTRACT_KEY] == 2 + assert da.attrs[_CONTRACT_KEY] == _ATTRS_CONTRACT_VERSION def test_dask_numpy_stamps_contract_version(tmp_path): @@ -98,7 +103,7 @@ def test_dask_numpy_stamps_contract_version(tmp_path): da = open_geotiff(path, chunks=32) - assert da.attrs[_CONTRACT_KEY] == 2 + assert da.attrs[_CONTRACT_KEY] == _ATTRS_CONTRACT_VERSION @_gpu_only @@ -108,7 +113,7 @@ def test_gpu_stamps_contract_version(tmp_path): da = open_geotiff(path, gpu=True) - assert da.attrs[_CONTRACT_KEY] == 2 + assert da.attrs[_CONTRACT_KEY] == _ATTRS_CONTRACT_VERSION @_gpu_only @@ -118,7 +123,7 @@ def test_dask_gpu_stamps_contract_version(tmp_path): da = open_geotiff(path, gpu=True, chunks=32) - assert da.attrs[_CONTRACT_KEY] == 2 + assert da.attrs[_CONTRACT_KEY] == _ATTRS_CONTRACT_VERSION def test_vrt_eager_stamps_contract_version(tmp_path): @@ -129,7 +134,7 @@ def test_vrt_eager_stamps_contract_version(tmp_path): da = read_vrt(str(vrt)) - assert da.attrs[_CONTRACT_KEY] == 2 + assert da.attrs[_CONTRACT_KEY] == _ATTRS_CONTRACT_VERSION def test_vrt_chunked_stamps_contract_version(tmp_path): @@ -140,4 +145,4 @@ def test_vrt_chunked_stamps_contract_version(tmp_path): da = read_vrt(str(vrt), chunks=32) - assert da.attrs[_CONTRACT_KEY] == 2 + assert da.attrs[_CONTRACT_KEY] == _ATTRS_CONTRACT_VERSION diff --git a/xrspatial/geotiff/tests/test_features.py b/xrspatial/geotiff/tests/test_features.py index 148ea5ba..ef7906e4 100644 --- a/xrspatial/geotiff/tests/test_features.py +++ b/xrspatial/geotiff/tests/test_features.py @@ -2783,6 +2783,15 @@ def test_all_lists_supported_functions(self): 'UnparseableCRSError', 'GeoTIFFFallbackWarning', 'UnsafeURLError', + # Canonical georef_status constants (issue #2136). Exposed + # so downstream code can branch on the five reader states + # via constants rather than string literals. + 'GEOREF_STATUS_CRS_ONLY', + 'GEOREF_STATUS_FULL', + 'GEOREF_STATUS_NONE', + 'GEOREF_STATUS_ROTATED_DROPPED', + 'GEOREF_STATUS_TRANSFORM_ONLY', + 'GEOREF_STATUS_VALUES', # Issue #2137: tiered feature inventory exposed alongside # the writer's ``allow_experimental_codecs`` opt-in. 'SUPPORTED_FEATURES', diff --git a/xrspatial/geotiff/tests/test_georef_status_2136.py b/xrspatial/geotiff/tests/test_georef_status_2136.py new file mode 100644 index 00000000..311cba08 --- /dev/null +++ b/xrspatial/geotiff/tests/test_georef_status_2136.py @@ -0,0 +1,457 @@ +"""Canonical ``attrs['georef_status']`` covers the five reader states (#2136). + +Pre-#2136 the read paths emitted three independent signals -- +``attrs['crs']`` / ``attrs['crs_wkt']``, ``attrs['transform']``, and the +``_xrspatial_no_georef`` marker -- and downstream code had to reconcile +them by hand. Two distinct on-disk situations (rotated-with-``allow_rotated`` +vs truly-no-transform) looked identical via the public attrs. + +Contract v3 adds one canonical attr that encodes the five distinct +states the reader can land in: + +* ``full`` -- CRS resolved + axis-aligned transform. +* ``transform_only`` -- transform present, no CRS. +* ``crs_only`` -- CRS present, no transform tags. +* ``none`` -- neither CRS nor transform. +* ``rotated_dropped`` -- rotated transform tags dropped under + ``allow_rotated=True``. + +The attr is additive: ``crs`` / ``crs_wkt`` / ``transform`` / +``_xrspatial_no_georef`` keep their existing semantics. The tests here +pin the value per reader state and on round-trip. +""" +from __future__ import annotations + +import importlib.util +import os + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import open_geotiff, read_vrt, to_geotiff +from xrspatial.geotiff._attrs import ( + GEOREF_STATUS_CRS_ONLY, + GEOREF_STATUS_FULL, + GEOREF_STATUS_NONE, + GEOREF_STATUS_ROTATED_DROPPED, + GEOREF_STATUS_TRANSFORM_ONLY, + _ATTRS_CONTRACT_VERSION, + _compute_georef_status, + _compute_georef_status_from_parts, +) +from xrspatial.geotiff._coords import _NO_GEOREF_KEY +from xrspatial.geotiff._geotags import GeoInfo, GeoTransform + +tifffile = pytest.importorskip("tifffile") + +# Reuse the rotated-TIFF writer from the #2115 test rather than copying +# the byte layout. The function is private to that test module but +# the test runner sees the package directory so the import succeeds. +from xrspatial.geotiff.tests.test_allow_rotated_geotiff_2115 import ( # noqa: E402 + _write_rotated_tiff, +) + + +_STATUS_KEY = 'georef_status' + + +def _gpu_available() -> bool: + if importlib.util.find_spec("cupy") is None: + return False + try: + import cupy + return bool(cupy.cuda.is_available()) + except Exception: + return False + + +_HAS_GPU = _gpu_available() +_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required") + + +# --------------------------------------------------------------------------- +# Unit tests on the helpers +# --------------------------------------------------------------------------- + + +def test_contract_version_is_three(): + """The new attr lands in v3; pin the bump alongside the new key.""" + assert _ATTRS_CONTRACT_VERSION == 3 + + +def test_public_constants_reexported(): + """The five status constants and ``GEOREF_STATUS_VALUES`` are part + of the public surface (issue #2136 / review follow-up). Downstream + consumers should be able to import them from ``xrspatial.geotiff`` + rather than reaching into the private ``_attrs`` module.""" + import xrspatial.geotiff as pkg + assert pkg.GEOREF_STATUS_FULL == GEOREF_STATUS_FULL + assert pkg.GEOREF_STATUS_TRANSFORM_ONLY == GEOREF_STATUS_TRANSFORM_ONLY + assert pkg.GEOREF_STATUS_CRS_ONLY == GEOREF_STATUS_CRS_ONLY + assert pkg.GEOREF_STATUS_NONE == GEOREF_STATUS_NONE + assert pkg.GEOREF_STATUS_ROTATED_DROPPED == GEOREF_STATUS_ROTATED_DROPPED + assert pkg.GEOREF_STATUS_VALUES == frozenset({ + 'full', 'transform_only', 'crs_only', 'none', 'rotated_dropped', + }) + + +def test_compute_status_full(): + info = GeoInfo( + transform=GeoTransform(origin_x=0.0, origin_y=0.0, + pixel_width=1.0, pixel_height=-1.0), + has_georef=True, crs_epsg=4326, + ) + assert _compute_georef_status(info) == GEOREF_STATUS_FULL + + +def test_compute_status_transform_only(): + info = GeoInfo( + transform=GeoTransform(origin_x=0.0, origin_y=0.0, + pixel_width=1.0, pixel_height=-1.0), + has_georef=True, + ) + assert _compute_georef_status(info) == GEOREF_STATUS_TRANSFORM_ONLY + + +def test_compute_status_crs_only(): + info = GeoInfo(has_georef=False, crs_epsg=4326) + assert _compute_georef_status(info) == GEOREF_STATUS_CRS_ONLY + + +def test_compute_status_crs_only_wkt_no_epsg(): + """``crs_wkt`` without an EPSG code is still ``crs_only``: the + decision rule treats either CRS signal as present.""" + info = GeoInfo(has_georef=False, crs_wkt='GEOGCRS["custom"]') + assert _compute_georef_status(info) == GEOREF_STATUS_CRS_ONLY + + +def test_compute_status_none(): + info = GeoInfo(has_georef=False) + assert _compute_georef_status(info) == GEOREF_STATUS_NONE + + +def test_compute_status_rotated_dropped(): + """``rotated_affine`` set on the transform is the dropped-rotation + signal; the value wins over CRS presence and ``has_georef``.""" + info = GeoInfo( + transform=GeoTransform(rotated_affine=(1.0, 0.5, 0.0, 0.5, -1.0, 0.0)), + has_georef=False, crs_epsg=4326, + ) + assert _compute_georef_status(info) == GEOREF_STATUS_ROTATED_DROPPED + + +@pytest.mark.parametrize( + "has_transform,has_crs,rotated,expected", + [ + (True, True, False, GEOREF_STATUS_FULL), + (True, False, False, GEOREF_STATUS_TRANSFORM_ONLY), + (False, True, False, GEOREF_STATUS_CRS_ONLY), + (False, False, False, GEOREF_STATUS_NONE), + (False, False, True, GEOREF_STATUS_ROTATED_DROPPED), + (True, True, True, GEOREF_STATUS_ROTATED_DROPPED), + ], +) +def test_compute_status_from_parts(has_transform, has_crs, rotated, expected): + """The VRT-side helper mirrors the GeoInfo decision exactly.""" + assert _compute_georef_status_from_parts( + has_transform=has_transform, has_crs=has_crs, + rotated_dropped=rotated, + ) == expected + + +# --------------------------------------------------------------------------- +# Read-path coverage: TIFF +# --------------------------------------------------------------------------- + + +def _make_full_tiff(path): + """Float coords + CRS -> ``full``. Goes through to_geotiff so the + fixture exercises the same writer path real callers hit.""" + da = xr.DataArray( + np.zeros((4, 4), dtype=np.float32), + coords={ + 'y': np.array([200.0, 199.0, 198.0, 197.0]), + 'x': np.array([100.0, 101.0, 102.0, 103.0]), + }, + dims=('y', 'x'), + attrs={'crs': 4326}, + ) + to_geotiff(da, path) + + +def _make_transform_only_tiff(path): + """Float coords, no CRS -> ``transform_only``.""" + da = xr.DataArray( + np.zeros((4, 4), dtype=np.float32), + coords={ + 'y': np.array([200.0, 199.0, 198.0, 197.0]), + 'x': np.array([100.0, 101.0, 102.0, 103.0]), + }, + dims=('y', 'x'), + ) + to_geotiff(da, path) + + +def _make_crs_only_tiff(path): + """No-georef marker + CRS -> ``crs_only``.""" + da = xr.DataArray( + np.zeros((4, 4), dtype=np.float32), + coords={ + 'y': np.arange(4, dtype=np.int64), + 'x': np.arange(4, dtype=np.int64), + }, + dims=('y', 'x'), + attrs={_NO_GEOREF_KEY: True, 'crs': 4326}, + ) + to_geotiff(da, path) + + +def _make_none_tiff(path): + """No-georef marker, no CRS -> ``none``. Equivalent to a plain image.""" + arr = np.zeros((4, 4), dtype=np.float32) + # Write a bare TIFF with no GeoTIFF tags at all -- this is the + # tightest 'none' fixture; it has no GeoKeyDirectory, no transform + # tags, and no marker on disk. The reader stamps the marker on read. + tifffile.imwrite( + path, arr, photometric='minisblack', planarconfig='contig', + metadata=None, + ) + + +def test_full_status(tmp_path): + path = str(tmp_path / "georef_status_2136_full.tif") + _make_full_tiff(path) + rd = open_geotiff(path) + assert rd.attrs[_STATUS_KEY] == GEOREF_STATUS_FULL + assert 'transform' in rd.attrs + assert rd.attrs.get('crs') == 4326 + + +def test_transform_only_status(tmp_path): + path = str(tmp_path / "georef_status_2136_xform_only.tif") + _make_transform_only_tiff(path) + rd = open_geotiff(path) + assert rd.attrs[_STATUS_KEY] == GEOREF_STATUS_TRANSFORM_ONLY + assert 'transform' in rd.attrs + assert 'crs' not in rd.attrs + # ``crs_wkt`` may be absent or present depending on whether the + # writer emitted GeoKey citation text; either way the status is the + # source of truth for "no CRS". + + +def test_crs_only_status(tmp_path): + path = str(tmp_path / "georef_status_2136_crs_only.tif") + _make_crs_only_tiff(path) + rd = open_geotiff(path) + assert rd.attrs[_STATUS_KEY] == GEOREF_STATUS_CRS_ONLY + assert rd.attrs.get('crs') == 4326 + assert 'transform' not in rd.attrs + # Reader still stamps the legacy marker for back-compat. The new + # attr is additive; pinning both keys here so a future change that + # drops one is caught at the same site. + assert rd.attrs.get(_NO_GEOREF_KEY) is True + + +def test_none_status(tmp_path): + path = str(tmp_path / "georef_status_2136_none.tif") + _make_none_tiff(path) + rd = open_geotiff(path) + assert rd.attrs[_STATUS_KEY] == GEOREF_STATUS_NONE + assert 'transform' not in rd.attrs + assert 'crs' not in rd.attrs + assert rd.attrs.get(_NO_GEOREF_KEY) is True + + +def test_rotated_dropped_status(tmp_path): + """Rotated ``ModelTransformationTag`` + ``allow_rotated=True`` is + the only state where the attr disambiguates a case that all four + other public signals were silent about.""" + path = str(tmp_path / "georef_status_2136_rotated.tif") + arr = np.arange(20, dtype='{crs_wkt}\n' if crs_wkt else '' + ) + gt_elem = ( + f' {geo_transform}\n' + if geo_transform else '' + ) + vrt_path.write_text( + f'\n' + f'{crs_elem}{gt_elem}' + ' \n' + ' \n' + f' {source_name}' + '\n' + ' 1\n' + f' \n' + f' \n' + ' \n' + ' \n' + '\n' + ) + + +def test_vrt_full_status(tmp_path): + src = tmp_path / "georef_status_2136_vrt_full_src.tif" + _make_full_tiff(str(src)) + vrt = tmp_path / "georef_status_2136_vrt_full.vrt" + _write_vrt( + vrt, os.path.basename(src), height=4, width=4, + crs_wkt='EPSG:4326', + geo_transform='100.0, 1.0, 0.0, 200.5, 0.0, -1.0', + ) + rd = read_vrt(str(vrt)) + assert rd.attrs[_STATUS_KEY] == GEOREF_STATUS_FULL + + +def test_vrt_crs_only_status(tmp_path): + """VRT with SRS but no GeoTransform element -> ``crs_only``.""" + src = tmp_path / "georef_status_2136_vrt_crsonly_src.tif" + _make_none_tiff(str(src)) + vrt = tmp_path / "georef_status_2136_vrt_crsonly.vrt" + _write_vrt( + vrt, os.path.basename(src), height=4, width=4, + crs_wkt='EPSG:4326', + ) + rd = read_vrt(str(vrt)) + assert rd.attrs[_STATUS_KEY] == GEOREF_STATUS_CRS_ONLY + + +def test_vrt_none_status(tmp_path): + """VRT with neither SRS nor GeoTransform -> ``none``.""" + src = tmp_path / "georef_status_2136_vrt_none_src.tif" + _make_none_tiff(str(src)) + vrt = tmp_path / "georef_status_2136_vrt_none.vrt" + _write_vrt(vrt, os.path.basename(src), height=4, width=4) + rd = read_vrt(str(vrt)) + assert rd.attrs[_STATUS_KEY] == GEOREF_STATUS_NONE + + +def test_vrt_full_status_chunked(tmp_path): + src = tmp_path / "georef_status_2136_vrt_full_chunked_src.tif" + _make_full_tiff(str(src)) + vrt = tmp_path / "georef_status_2136_vrt_full_chunked.vrt" + _write_vrt( + vrt, os.path.basename(src), height=4, width=4, + crs_wkt='EPSG:4326', + geo_transform='100.0, 1.0, 0.0, 200.5, 0.0, -1.0', + ) + rd = read_vrt(str(vrt), chunks=2) + assert rd.attrs[_STATUS_KEY] == GEOREF_STATUS_FULL + + +# --------------------------------------------------------------------------- +# Round-trip stability: the attr survives write -> read for every state +# that has a writable encoding (full, transform_only, crs_only, none). +# rotated_dropped is read-only by nature: ``to_geotiff`` does not emit +# rotated ``ModelTransformationTag`` entries, so a write would +# axis-align the matrix. That asymmetry is documented in the issue +# under "Out of scope" / "rotated_dropped is for consumers". +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "make,expected", + [ + (_make_full_tiff, GEOREF_STATUS_FULL), + (_make_transform_only_tiff, GEOREF_STATUS_TRANSFORM_ONLY), + (_make_crs_only_tiff, GEOREF_STATUS_CRS_ONLY), + (_make_none_tiff, GEOREF_STATUS_NONE), + ], +) +def test_roundtrip_preserves_status(tmp_path, make, expected): + """Write a fixture of each writable state, read it, then write + + read again. The status attr is stable across the cycle because the + underlying CRS / transform decisions the writer makes are + deterministic from the input attrs. + + The fixture set deliberately overlaps with the per-state tests + above; the value of this parametrised test is the *cycle*, not the + one-shot read. + """ + p1 = str(tmp_path / f"georef_status_2136_rt_{expected}_1.tif") + make(p1) + rd1 = open_geotiff(p1) + assert rd1.attrs[_STATUS_KEY] == expected + + p2 = str(tmp_path / f"georef_status_2136_rt_{expected}_2.tif") + to_geotiff(rd1, p2) + rd2 = open_geotiff(p2) + assert rd2.attrs[_STATUS_KEY] == expected