Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 137 additions & 1 deletion xrspatial/geotiff/_geotags.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,120 @@ def _parse_geokeys(ifd: IFD, data: bytes | memoryview,
return geokeys


# Default relative tolerance for the multi-tiepoint consistency check.
# Applied as ``rel_tol * max(|sx|, |sy|, 1.0)``, so the absolute threshold
# tracks pixel size: on a 10 m pixel file the threshold is 10 µm, on a
# 1° pixel file it is ~11 cm. Surveying / high-precision geodetic workflows
# that want to catch GCP files with smaller residuals can pass a tighter
# ``rel_tol`` to :func:`_validate_tiepoint_consistency` directly.
_TIEPOINT_CONSISTENCY_REL_TOL = 1e-6


def _validate_tiepoint_consistency(tiepoint: tuple,
origin_x: float,
origin_y: float,
sx: float,
sy: float,
*,
rel_tol: float = _TIEPOINT_CONSISTENCY_REL_TOL,
scale_source: str = "ModelPixelScale") -> None:
"""Verify every ``ModelTiepointTag`` tuple agrees with the inferred affine.

A ``ModelTiepointTag`` may carry one or many ``(I, J, K, X, Y, Z)``
tuples. The single-tuple case (paired with ``ModelPixelScale``) is
by far the most common and is unambiguous. Files with multiple
tuples either repeat the same affine at every corner -- the tuples
agree within tolerance and the reader can keep its single-tiepoint
code path -- or carry a ground-control-point (GCP) set whose
tuples do not agree, because the mapping from pixel to world is
non-affine.

Before this check landed, the GCP case silently fabricated an
axis-aligned affine from the first tuple and downstream spatial
ops trusted wrong coordinates. The reader now validates that every
extra tuple is predicted by the inferred affine within a tolerance
scaled to the pixel size; mismatches raise ``NotImplementedError``
with a clear pointer at the GCP case so users know why their file
is being rejected (issue #2117).

Parameters
----------
tiepoint : tuple
Raw ``ModelTiepointTag`` value (length ``6 * N``).
origin_x, origin_y : float
World coords of pixel ``(0, 0)`` inferred from the first tuple.
sx, sy : float
Pixel size (``ModelPixelScaleTag`` magnitudes, both positive).
rel_tol : float, optional
Relative tolerance factor. The absolute threshold is
``rel_tol * max(|sx|, |sy|, 1.0)``. Defaults to
:data:`_TIEPOINT_CONSISTENCY_REL_TOL`.
scale_source : str, optional
Where ``sx`` / ``sy`` came from. ``"ModelPixelScale"`` (default)
names the scale tag in the GCP-case error. ``"unit fallback"``
is used when ``ModelPixelScale`` was absent and the caller fell
back to ``sx = sy = 1.0``; in that case a multi-tiepoint file is
almost certainly malformed rather than a real GCP warp, and the
error message says so.
"""
n = len(tiepoint) // 6
if n <= 1:
return

# Tolerance scales with pixel size so files in different units
# (degrees vs metres) are treated consistently. The factor lives in
# _TIEPOINT_CONSISTENCY_REL_TOL and is a relative residual on world
# coordinates -- distinct from the 1e-12 absolute floor that the
# rotation check in _extract_transform applies to raw
# ModelTransformation matrix off-diagonals.
tol = rel_tol * max(abs(sx), abs(sy), 1.0)

for k in range(1, n):
base = 6 * k
tp_i = tiepoint[base + 0]
tp_j = tiepoint[base + 1]
tp_x = tiepoint[base + 3]
tp_y = tiepoint[base + 4]

# Sign convention: ``_extract_transform`` recovers the origin via
# ``origin_y = tp_y + tp_j * sy`` because ``sy`` is a positive
# magnitude and the raster's y decreases as row index increases.
# Inverting that gives the predicted world y at row J below.
predicted_x = origin_x + tp_i * sx
predicted_y = origin_y - tp_j * sy

dx = tp_x - predicted_x
dy = tp_y - predicted_y
if abs(dx) > tol or abs(dy) > tol:
primary = (
"ModelTiepointTag carries multiple non-affine tiepoints "
f"(tuple {k} predicts world coords "
f"({predicted_x!r}, {predicted_y!r}) but the file "
f"declares ({tp_x!r}, {tp_y!r}); residual "
f"({dx!r}, {dy!r}) exceeds tolerance {tol!r})."
)
if scale_source == "unit fallback":
cause = (
"The file has multiple tiepoints but no "
"ModelPixelScale tag, so the reader cannot recover a "
"consistent affine. The file is most likely "
"malformed; if it is a real ground-control-point "
"warp, add a ModelPixelScale tag or rectify it first."
)
else:
cause = (
"The file uses a ground-control-point warp that the "
"reader cannot represent as an axis-aligned affine."
)
hint = (
"Rectify the file to a regular grid first (``gdalwarp``, "
"``rasterio.warp.reproject``, or any GIS tool that "
"resamples GCP files to an affine raster) and reopen "
"the rectified output. See issue #2117."
)
raise NotImplementedError(f"{primary}\n{cause}\n{hint}")


def _extract_transform(ifd: IFD) -> tuple[GeoTransform, bool]:
"""Extract affine transform from ModelTransformation, or
ModelTiepoint + ModelPixelScale tags.
Expand Down Expand Up @@ -536,7 +650,14 @@ def _extract_transform(ifd: IFD) -> tuple[GeoTransform, bool]:
if tiepoint is not None:
if not isinstance(tiepoint, tuple):
tiepoint = (tiepoint,)
# tiepoint: (I, J, K, X, Y, Z)
# tiepoint: (I, J, K, X, Y, Z); a ModelTiepointTag may carry
# more than one (I, J, K, X, Y, Z) tuple. Files use that to
# either repeat the same affine at every corner (the tuples
# agree, common case) or to encode a GCP warp where the
# tuples describe a non-affine mapping. Silently picking the
# first tuple turns the GCP case into wrong coordinates
# downstream (issue #2117). Validate that every tuple is
# consistent with the inferred affine; fail closed otherwise.
tp_i = tiepoint[0] if len(tiepoint) > 0 else 0.0
tp_j = tiepoint[1] if len(tiepoint) > 1 else 0.0
tp_x = tiepoint[3] if len(tiepoint) > 3 else 0.0
Expand All @@ -545,6 +666,10 @@ def _extract_transform(ifd: IFD) -> tuple[GeoTransform, bool]:
origin_x = tp_x - tp_i * sx
origin_y = tp_y + tp_j * sy # sy is positive, but y goes down

_validate_tiepoint_consistency(
tiepoint, origin_x, origin_y, sx, sy,
)

return GeoTransform(
origin_x=origin_x,
origin_y=origin_y,
Expand Down Expand Up @@ -578,6 +703,17 @@ def _extract_transform(ifd: IFD) -> tuple[GeoTransform, bool]:
origin_x = tp_x - tp_i * 1.0
origin_y = tp_y + tp_j * 1.0

# Same multi-tiepoint consistency check the scale branch above
# runs; the absence of ModelPixelScale just means the scale is
# the unit fallback. ``scale_source`` tells the helper to blame
# the missing scale tag rather than the GCP-warp case in the
# error message, since a multi-tiepoint file without
# ModelPixelScale is almost certainly malformed (issue #2117).
_validate_tiepoint_consistency(
tiepoint, origin_x, origin_y, 1.0, 1.0,
scale_source="unit fallback",
)

return GeoTransform(
origin_x=origin_x,
origin_y=origin_y,
Expand Down
183 changes: 183 additions & 0 deletions xrspatial/geotiff/tests/test_multi_tiepoint_validation_2117.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
"""Multi-tiepoint consistency check in ``_extract_transform`` (issue #2117).

A ``ModelTiepointTag`` may carry one or many ``(I, J, K, X, Y, Z)``
tuples. Before the fix, the reader sliced only ``tiepoint[0:6]`` and
silently dropped any additional records. That works when the extra
tuples encode the same affine at every corner (common case) but
silently produces wrong coordinates when the tuples encode a non-affine
GCP warp.

These tests pin the new behaviour:

* Single-tiepoint files continue to read the same way (no regression).
* Multi-tiepoint files whose tuples agree within tolerance still read.
* Multi-tiepoint files with inconsistent tuples raise
``NotImplementedError`` with a message that names the GCP case so the
user has a path forward (``gdalwarp`` to rectify first).
* The tolerance scales with pixel size, so files in different units
(degrees vs metres) are treated consistently.
"""
from __future__ import annotations

import pytest

from xrspatial.geotiff._geotags import (
TAG_MODEL_PIXEL_SCALE,
TAG_MODEL_TIEPOINT,
_extract_transform,
_validate_tiepoint_consistency,
)
from xrspatial.geotiff._header import IFD, IFDEntry


def _make_ifd(tiepoint: tuple, scale: tuple | None = (10.0, 10.0, 0.0)) -> IFD:
ifd = IFD()
ifd.entries[TAG_MODEL_TIEPOINT] = IFDEntry(
tag=TAG_MODEL_TIEPOINT, type_id=12,
count=len(tiepoint), value=tiepoint,
)
if scale is not None:
ifd.entries[TAG_MODEL_PIXEL_SCALE] = IFDEntry(
tag=TAG_MODEL_PIXEL_SCALE, type_id=12,
count=len(scale), value=scale,
)
return ifd


# A simple axis-aligned affine: origin (100, 200), pixel size 10 in both axes.
# Pixel (i, j) maps to world (100 + 10*i, 200 - 10*j).
_SX = 10.0
_SY = 10.0
_ORIGIN_X = 100.0
_ORIGIN_Y = 200.0


def _world_at(i: float, j: float) -> tuple[float, float]:
return (_ORIGIN_X + i * _SX, _ORIGIN_Y - j * _SY)


def test_single_tiepoint_unchanged():
ifd = _make_ifd((0.0, 0.0, 0.0, _ORIGIN_X, _ORIGIN_Y, 0.0))
gt, has_georef = _extract_transform(ifd)
assert has_georef is True
assert gt.origin_x == _ORIGIN_X
assert gt.origin_y == _ORIGIN_Y
assert gt.pixel_width == _SX
assert gt.pixel_height == -_SY


def test_multiple_consistent_tiepoints_pass():
# Four corners of a 100x100 raster, all consistent with the same affine.
corners = []
for i, j in [(0, 0), (100, 0), (0, 100), (100, 100)]:
wx, wy = _world_at(i, j)
corners.extend([float(i), float(j), 0.0, wx, wy, 0.0])
ifd = _make_ifd(tuple(corners))
gt, has_georef = _extract_transform(ifd)
assert has_georef is True
assert gt.origin_x == pytest.approx(_ORIGIN_X)
assert gt.origin_y == pytest.approx(_ORIGIN_Y)
assert gt.pixel_width == pytest.approx(_SX)
assert gt.pixel_height == pytest.approx(-_SY)


def test_inconsistent_tiepoints_raise():
# Second tuple disagrees by a full pixel: that is a GCP warp.
tiepoint = (
0.0, 0.0, 0.0, _ORIGIN_X, _ORIGIN_Y, 0.0,
100.0, 0.0, 0.0, _ORIGIN_X + 100 * _SX + 5.0, _ORIGIN_Y, 0.0,
)
ifd = _make_ifd(tiepoint)
with pytest.raises(NotImplementedError, match="ground-control-point"):
_extract_transform(ifd)


def test_tolerance_scales_with_pixel_size():
# A 1e-7 residual on a pixel_size=10 file is below tolerance.
tiny_resid = 1e-7
tiepoint = (
0.0, 0.0, 0.0, _ORIGIN_X, _ORIGIN_Y, 0.0,
100.0, 0.0, 0.0, _ORIGIN_X + 100 * _SX + tiny_resid, _ORIGIN_Y, 0.0,
)
ifd = _make_ifd(tiepoint)
# Should not raise.
_extract_transform(ifd)


def test_validate_helper_no_op_for_single_tuple():
# 6 elements -> n == 1; nothing to validate.
_validate_tiepoint_consistency(
(0.0, 0.0, 0.0, _ORIGIN_X, _ORIGIN_Y, 0.0),
_ORIGIN_X, _ORIGIN_Y, _SX, _SY,
)


def test_validate_helper_rejects_disagreement():
tiepoint = (
0.0, 0.0, 0.0, _ORIGIN_X, _ORIGIN_Y, 0.0,
50.0, 0.0, 0.0, _ORIGIN_X + 50 * _SX + 100.0, _ORIGIN_Y, 0.0,
)
with pytest.raises(NotImplementedError, match="tuple 1"):
_validate_tiepoint_consistency(
tiepoint, _ORIGIN_X, _ORIGIN_Y, _SX, _SY,
)


def test_validate_helper_y_axis_sign():
# Verify the y-axis sign convention: predicted_y = origin_y - j * sy.
# A consistent tuple at (i=0, j=100) is (origin_x, origin_y - 100 * sy).
tp_world_y = _ORIGIN_Y - 100.0 * _SY
tiepoint = (
0.0, 0.0, 0.0, _ORIGIN_X, _ORIGIN_Y, 0.0,
0.0, 100.0, 0.0, _ORIGIN_X, tp_world_y, 0.0,
)
_validate_tiepoint_consistency(
tiepoint, _ORIGIN_X, _ORIGIN_Y, _SX, _SY,
)


def test_tiepoint_without_scale_also_validates():
# When ModelPixelScale is absent, the reader falls back to unit pixel
# size; the consistency check must still fire, and the error message
# must blame the missing ModelPixelScale tag (not the GCP-warp case),
# since a real multi-tiepoint file without ModelPixelScale is almost
# certainly malformed rather than a deliberate GCP set.
tiepoint = (
0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
10.0, 0.0, 0.0, 50.0, 0.0, 0.0, # predicts x=10.0, declares 50.0
)
ifd = _make_ifd(tiepoint, scale=None)
with pytest.raises(NotImplementedError, match="no ModelPixelScale"):
_extract_transform(ifd)


def test_validate_helper_honours_custom_rel_tol():
# A residual that passes the default 1e-6 * pixel_size tolerance
# (= 1e-5 here) can still be caught by a tighter caller-supplied
# rel_tol. Surveying / high-precision geodetic callers that want to
# flag near-affine GCP files can pass a smaller rel_tol.
residual = 5e-6 # below default tol (1e-5) but above tight tol (1e-7)
tiepoint = (
0.0, 0.0, 0.0, _ORIGIN_X, _ORIGIN_Y, 0.0,
100.0, 0.0, 0.0, _ORIGIN_X + 100 * _SX + residual, _ORIGIN_Y, 0.0,
)
# Default tolerance accepts it.
_validate_tiepoint_consistency(
tiepoint, _ORIGIN_X, _ORIGIN_Y, _SX, _SY,
)
# Tighter tolerance rejects it.
with pytest.raises(NotImplementedError, match="tuple 1"):
_validate_tiepoint_consistency(
tiepoint, _ORIGIN_X, _ORIGIN_Y, _SX, _SY, rel_tol=1e-8,
)


def test_short_tiepoint_is_treated_as_single_tuple():
# A truncated tiepoint with fewer than 12 elements has n == 1 (truncated
# second tuple is dropped by integer division). The reader should not
# crash; it falls back to the existing single-tuple semantics.
tiepoint = (0.0, 0.0, 0.0, _ORIGIN_X, _ORIGIN_Y, 0.0, 1.0)
ifd = _make_ifd(tiepoint)
gt, has_georef = _extract_transform(ifd)
assert has_georef is True
assert gt.origin_x == _ORIGIN_X
Loading