diff --git a/xrspatial/geotiff/_geotags.py b/xrspatial/geotiff/_geotags.py index 6471bf462..3ca8470ac 100644 --- a/xrspatial/geotiff/_geotags.py +++ b/xrspatial/geotiff/_geotags.py @@ -474,6 +474,120 @@ def _parse_geokeys(ifd: IFD, data: bytes | memoryview, return geokeys +# Default relative tolerance for the multi-tiepoint consistency check. +# Applied as ``rel_tol * max(|sx|, |sy|, 1.0)``, so the absolute threshold +# tracks pixel size: on a 10 m pixel file the threshold is 10 µm, on a +# 1° pixel file it is ~11 cm. Surveying / high-precision geodetic workflows +# that want to catch GCP files with smaller residuals can pass a tighter +# ``rel_tol`` to :func:`_validate_tiepoint_consistency` directly. +_TIEPOINT_CONSISTENCY_REL_TOL = 1e-6 + + +def _validate_tiepoint_consistency(tiepoint: tuple, + origin_x: float, + origin_y: float, + sx: float, + sy: float, + *, + rel_tol: float = _TIEPOINT_CONSISTENCY_REL_TOL, + scale_source: str = "ModelPixelScale") -> None: + """Verify every ``ModelTiepointTag`` tuple agrees with the inferred affine. + + A ``ModelTiepointTag`` may carry one or many ``(I, J, K, X, Y, Z)`` + tuples. The single-tuple case (paired with ``ModelPixelScale``) is + by far the most common and is unambiguous. Files with multiple + tuples either repeat the same affine at every corner -- the tuples + agree within tolerance and the reader can keep its single-tiepoint + code path -- or carry a ground-control-point (GCP) set whose + tuples do not agree, because the mapping from pixel to world is + non-affine. + + Before this check landed, the GCP case silently fabricated an + axis-aligned affine from the first tuple and downstream spatial + ops trusted wrong coordinates. The reader now validates that every + extra tuple is predicted by the inferred affine within a tolerance + scaled to the pixel size; mismatches raise ``NotImplementedError`` + with a clear pointer at the GCP case so users know why their file + is being rejected (issue #2117). + + Parameters + ---------- + tiepoint : tuple + Raw ``ModelTiepointTag`` value (length ``6 * N``). + origin_x, origin_y : float + World coords of pixel ``(0, 0)`` inferred from the first tuple. + sx, sy : float + Pixel size (``ModelPixelScaleTag`` magnitudes, both positive). + rel_tol : float, optional + Relative tolerance factor. The absolute threshold is + ``rel_tol * max(|sx|, |sy|, 1.0)``. Defaults to + :data:`_TIEPOINT_CONSISTENCY_REL_TOL`. + scale_source : str, optional + Where ``sx`` / ``sy`` came from. ``"ModelPixelScale"`` (default) + names the scale tag in the GCP-case error. ``"unit fallback"`` + is used when ``ModelPixelScale`` was absent and the caller fell + back to ``sx = sy = 1.0``; in that case a multi-tiepoint file is + almost certainly malformed rather than a real GCP warp, and the + error message says so. + """ + n = len(tiepoint) // 6 + if n <= 1: + return + + # Tolerance scales with pixel size so files in different units + # (degrees vs metres) are treated consistently. The factor lives in + # _TIEPOINT_CONSISTENCY_REL_TOL and is a relative residual on world + # coordinates -- distinct from the 1e-12 absolute floor that the + # rotation check in _extract_transform applies to raw + # ModelTransformation matrix off-diagonals. + tol = rel_tol * max(abs(sx), abs(sy), 1.0) + + for k in range(1, n): + base = 6 * k + tp_i = tiepoint[base + 0] + tp_j = tiepoint[base + 1] + tp_x = tiepoint[base + 3] + tp_y = tiepoint[base + 4] + + # Sign convention: ``_extract_transform`` recovers the origin via + # ``origin_y = tp_y + tp_j * sy`` because ``sy`` is a positive + # magnitude and the raster's y decreases as row index increases. + # Inverting that gives the predicted world y at row J below. + predicted_x = origin_x + tp_i * sx + predicted_y = origin_y - tp_j * sy + + dx = tp_x - predicted_x + dy = tp_y - predicted_y + if abs(dx) > tol or abs(dy) > tol: + primary = ( + "ModelTiepointTag carries multiple non-affine tiepoints " + f"(tuple {k} predicts world coords " + f"({predicted_x!r}, {predicted_y!r}) but the file " + f"declares ({tp_x!r}, {tp_y!r}); residual " + f"({dx!r}, {dy!r}) exceeds tolerance {tol!r})." + ) + if scale_source == "unit fallback": + cause = ( + "The file has multiple tiepoints but no " + "ModelPixelScale tag, so the reader cannot recover a " + "consistent affine. The file is most likely " + "malformed; if it is a real ground-control-point " + "warp, add a ModelPixelScale tag or rectify it first." + ) + else: + cause = ( + "The file uses a ground-control-point warp that the " + "reader cannot represent as an axis-aligned affine." + ) + hint = ( + "Rectify the file to a regular grid first (``gdalwarp``, " + "``rasterio.warp.reproject``, or any GIS tool that " + "resamples GCP files to an affine raster) and reopen " + "the rectified output. See issue #2117." + ) + raise NotImplementedError(f"{primary}\n{cause}\n{hint}") + + def _extract_transform(ifd: IFD) -> tuple[GeoTransform, bool]: """Extract affine transform from ModelTransformation, or ModelTiepoint + ModelPixelScale tags. @@ -536,7 +650,14 @@ def _extract_transform(ifd: IFD) -> tuple[GeoTransform, bool]: if tiepoint is not None: if not isinstance(tiepoint, tuple): tiepoint = (tiepoint,) - # tiepoint: (I, J, K, X, Y, Z) + # tiepoint: (I, J, K, X, Y, Z); a ModelTiepointTag may carry + # more than one (I, J, K, X, Y, Z) tuple. Files use that to + # either repeat the same affine at every corner (the tuples + # agree, common case) or to encode a GCP warp where the + # tuples describe a non-affine mapping. Silently picking the + # first tuple turns the GCP case into wrong coordinates + # downstream (issue #2117). Validate that every tuple is + # consistent with the inferred affine; fail closed otherwise. tp_i = tiepoint[0] if len(tiepoint) > 0 else 0.0 tp_j = tiepoint[1] if len(tiepoint) > 1 else 0.0 tp_x = tiepoint[3] if len(tiepoint) > 3 else 0.0 @@ -545,6 +666,10 @@ def _extract_transform(ifd: IFD) -> tuple[GeoTransform, bool]: origin_x = tp_x - tp_i * sx origin_y = tp_y + tp_j * sy # sy is positive, but y goes down + _validate_tiepoint_consistency( + tiepoint, origin_x, origin_y, sx, sy, + ) + return GeoTransform( origin_x=origin_x, origin_y=origin_y, @@ -578,6 +703,17 @@ def _extract_transform(ifd: IFD) -> tuple[GeoTransform, bool]: origin_x = tp_x - tp_i * 1.0 origin_y = tp_y + tp_j * 1.0 + # Same multi-tiepoint consistency check the scale branch above + # runs; the absence of ModelPixelScale just means the scale is + # the unit fallback. ``scale_source`` tells the helper to blame + # the missing scale tag rather than the GCP-warp case in the + # error message, since a multi-tiepoint file without + # ModelPixelScale is almost certainly malformed (issue #2117). + _validate_tiepoint_consistency( + tiepoint, origin_x, origin_y, 1.0, 1.0, + scale_source="unit fallback", + ) + return GeoTransform( origin_x=origin_x, origin_y=origin_y, diff --git a/xrspatial/geotiff/tests/test_multi_tiepoint_validation_2117.py b/xrspatial/geotiff/tests/test_multi_tiepoint_validation_2117.py new file mode 100644 index 000000000..b7f45e824 --- /dev/null +++ b/xrspatial/geotiff/tests/test_multi_tiepoint_validation_2117.py @@ -0,0 +1,183 @@ +"""Multi-tiepoint consistency check in ``_extract_transform`` (issue #2117). + +A ``ModelTiepointTag`` may carry one or many ``(I, J, K, X, Y, Z)`` +tuples. Before the fix, the reader sliced only ``tiepoint[0:6]`` and +silently dropped any additional records. That works when the extra +tuples encode the same affine at every corner (common case) but +silently produces wrong coordinates when the tuples encode a non-affine +GCP warp. + +These tests pin the new behaviour: + +* Single-tiepoint files continue to read the same way (no regression). +* Multi-tiepoint files whose tuples agree within tolerance still read. +* Multi-tiepoint files with inconsistent tuples raise + ``NotImplementedError`` with a message that names the GCP case so the + user has a path forward (``gdalwarp`` to rectify first). +* The tolerance scales with pixel size, so files in different units + (degrees vs metres) are treated consistently. +""" +from __future__ import annotations + +import pytest + +from xrspatial.geotiff._geotags import ( + TAG_MODEL_PIXEL_SCALE, + TAG_MODEL_TIEPOINT, + _extract_transform, + _validate_tiepoint_consistency, +) +from xrspatial.geotiff._header import IFD, IFDEntry + + +def _make_ifd(tiepoint: tuple, scale: tuple | None = (10.0, 10.0, 0.0)) -> IFD: + ifd = IFD() + ifd.entries[TAG_MODEL_TIEPOINT] = IFDEntry( + tag=TAG_MODEL_TIEPOINT, type_id=12, + count=len(tiepoint), value=tiepoint, + ) + if scale is not None: + ifd.entries[TAG_MODEL_PIXEL_SCALE] = IFDEntry( + tag=TAG_MODEL_PIXEL_SCALE, type_id=12, + count=len(scale), value=scale, + ) + return ifd + + +# A simple axis-aligned affine: origin (100, 200), pixel size 10 in both axes. +# Pixel (i, j) maps to world (100 + 10*i, 200 - 10*j). +_SX = 10.0 +_SY = 10.0 +_ORIGIN_X = 100.0 +_ORIGIN_Y = 200.0 + + +def _world_at(i: float, j: float) -> tuple[float, float]: + return (_ORIGIN_X + i * _SX, _ORIGIN_Y - j * _SY) + + +def test_single_tiepoint_unchanged(): + ifd = _make_ifd((0.0, 0.0, 0.0, _ORIGIN_X, _ORIGIN_Y, 0.0)) + gt, has_georef = _extract_transform(ifd) + assert has_georef is True + assert gt.origin_x == _ORIGIN_X + assert gt.origin_y == _ORIGIN_Y + assert gt.pixel_width == _SX + assert gt.pixel_height == -_SY + + +def test_multiple_consistent_tiepoints_pass(): + # Four corners of a 100x100 raster, all consistent with the same affine. + corners = [] + for i, j in [(0, 0), (100, 0), (0, 100), (100, 100)]: + wx, wy = _world_at(i, j) + corners.extend([float(i), float(j), 0.0, wx, wy, 0.0]) + ifd = _make_ifd(tuple(corners)) + gt, has_georef = _extract_transform(ifd) + assert has_georef is True + assert gt.origin_x == pytest.approx(_ORIGIN_X) + assert gt.origin_y == pytest.approx(_ORIGIN_Y) + assert gt.pixel_width == pytest.approx(_SX) + assert gt.pixel_height == pytest.approx(-_SY) + + +def test_inconsistent_tiepoints_raise(): + # Second tuple disagrees by a full pixel: that is a GCP warp. + tiepoint = ( + 0.0, 0.0, 0.0, _ORIGIN_X, _ORIGIN_Y, 0.0, + 100.0, 0.0, 0.0, _ORIGIN_X + 100 * _SX + 5.0, _ORIGIN_Y, 0.0, + ) + ifd = _make_ifd(tiepoint) + with pytest.raises(NotImplementedError, match="ground-control-point"): + _extract_transform(ifd) + + +def test_tolerance_scales_with_pixel_size(): + # A 1e-7 residual on a pixel_size=10 file is below tolerance. + tiny_resid = 1e-7 + tiepoint = ( + 0.0, 0.0, 0.0, _ORIGIN_X, _ORIGIN_Y, 0.0, + 100.0, 0.0, 0.0, _ORIGIN_X + 100 * _SX + tiny_resid, _ORIGIN_Y, 0.0, + ) + ifd = _make_ifd(tiepoint) + # Should not raise. + _extract_transform(ifd) + + +def test_validate_helper_no_op_for_single_tuple(): + # 6 elements -> n == 1; nothing to validate. + _validate_tiepoint_consistency( + (0.0, 0.0, 0.0, _ORIGIN_X, _ORIGIN_Y, 0.0), + _ORIGIN_X, _ORIGIN_Y, _SX, _SY, + ) + + +def test_validate_helper_rejects_disagreement(): + tiepoint = ( + 0.0, 0.0, 0.0, _ORIGIN_X, _ORIGIN_Y, 0.0, + 50.0, 0.0, 0.0, _ORIGIN_X + 50 * _SX + 100.0, _ORIGIN_Y, 0.0, + ) + with pytest.raises(NotImplementedError, match="tuple 1"): + _validate_tiepoint_consistency( + tiepoint, _ORIGIN_X, _ORIGIN_Y, _SX, _SY, + ) + + +def test_validate_helper_y_axis_sign(): + # Verify the y-axis sign convention: predicted_y = origin_y - j * sy. + # A consistent tuple at (i=0, j=100) is (origin_x, origin_y - 100 * sy). + tp_world_y = _ORIGIN_Y - 100.0 * _SY + tiepoint = ( + 0.0, 0.0, 0.0, _ORIGIN_X, _ORIGIN_Y, 0.0, + 0.0, 100.0, 0.0, _ORIGIN_X, tp_world_y, 0.0, + ) + _validate_tiepoint_consistency( + tiepoint, _ORIGIN_X, _ORIGIN_Y, _SX, _SY, + ) + + +def test_tiepoint_without_scale_also_validates(): + # When ModelPixelScale is absent, the reader falls back to unit pixel + # size; the consistency check must still fire, and the error message + # must blame the missing ModelPixelScale tag (not the GCP-warp case), + # since a real multi-tiepoint file without ModelPixelScale is almost + # certainly malformed rather than a deliberate GCP set. + tiepoint = ( + 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, + 10.0, 0.0, 0.0, 50.0, 0.0, 0.0, # predicts x=10.0, declares 50.0 + ) + ifd = _make_ifd(tiepoint, scale=None) + with pytest.raises(NotImplementedError, match="no ModelPixelScale"): + _extract_transform(ifd) + + +def test_validate_helper_honours_custom_rel_tol(): + # A residual that passes the default 1e-6 * pixel_size tolerance + # (= 1e-5 here) can still be caught by a tighter caller-supplied + # rel_tol. Surveying / high-precision geodetic callers that want to + # flag near-affine GCP files can pass a smaller rel_tol. + residual = 5e-6 # below default tol (1e-5) but above tight tol (1e-7) + tiepoint = ( + 0.0, 0.0, 0.0, _ORIGIN_X, _ORIGIN_Y, 0.0, + 100.0, 0.0, 0.0, _ORIGIN_X + 100 * _SX + residual, _ORIGIN_Y, 0.0, + ) + # Default tolerance accepts it. + _validate_tiepoint_consistency( + tiepoint, _ORIGIN_X, _ORIGIN_Y, _SX, _SY, + ) + # Tighter tolerance rejects it. + with pytest.raises(NotImplementedError, match="tuple 1"): + _validate_tiepoint_consistency( + tiepoint, _ORIGIN_X, _ORIGIN_Y, _SX, _SY, rel_tol=1e-8, + ) + + +def test_short_tiepoint_is_treated_as_single_tuple(): + # A truncated tiepoint with fewer than 12 elements has n == 1 (truncated + # second tuple is dropped by integer division). The reader should not + # crash; it falls back to the existing single-tuple semantics. + tiepoint = (0.0, 0.0, 0.0, _ORIGIN_X, _ORIGIN_Y, 0.0, 1.0) + ifd = _make_ifd(tiepoint) + gt, has_georef = _extract_transform(ifd) + assert has_georef is True + assert gt.origin_x == _ORIGIN_X