diff --git a/CHANGELOG.md b/CHANGELOG.md index f3a5c272..ef29e435 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ #### Bug fixes and improvements - Remove read-side emission of the 13 deprecated GeoTIFF attrs (`crs_name`, `geog_citation`, `datum_code`, `angular_units`, `semi_major_axis`, `inv_flattening`, `linear_units`, `projection_code`, `vertical_crs`, `vertical_citation`, `vertical_units`, `colormap_rgba`, `cmap`) and bump `attrs['_xrspatial_geotiff_contract']` from 1 to 2. Downstream code that read these via `attrs[key]` now sees `KeyError`; migrate to `attrs.get(key)` or derive the value from `attrs['crs']` / `attrs['crs_wkt']` with pyproj. The `.xrs.plot()` accessor still surfaces palette colormaps by building a `ListedColormap` from the canonical `attrs['colormap']`. (#2016) - Accept numpy integer scalars as the `crs=` argument to `to_geotiff` / `write_geotiff_gpu`. The validator already allowed `numbers.Integral`, but the writers gated EPSG assignment on `isinstance(crs, int)`, so `np.int32` / `np.int64` / `np.uint16` values passed validation then silently fell through with no EPSG written. (#2082) +- Tighten the writer's no-georef sentinel for integer x/y coords. The pre-fix check treated any integer dtype on either axis as the read-side no-georef placeholder and skipped transform inference, which also caught user-authored projected grids with integer-spaced coords (e.g. `x=[100,101,102], y=[200,199]`) and silently stripped their georef on write. The sentinel now matches only the exact reader pattern: `int64` ascending contiguous-step-1 arange on both axes. User-authored integer-coord grids that don't match (descending, non-unit step, non-uniform, or non-`int64`) now produce a real transform or raise `NonUniformCoordsError`. Coord values round-trip exactly through the new path; dtype flips int->float on subsequent reads. (#2087) - Deprecate read-side emission of geographic-CRS GeoKey attrs (crs_name, geog_citation, datum_code, angular_units, semi_major_axis, inv_flattening); the writer cannot reconstruct them so they do not round-trip. These attrs still emit for now but trigger a DeprecationWarning. Removal planned for a future release. (#1984) - Deprecate read-side emission of projected-CRS GeoKey attrs (linear_units, projection_code); the writer cannot reconstruct them so they do not round-trip. These attrs still emit for now but trigger a DeprecationWarning. Removal planned for a future release. (#1984) - Deprecate read-side emission of vertical-CRS GeoKey attrs (vertical_crs, vertical_citation, vertical_units); the writer does not emit vertical GeoKeys so they do not round-trip. These attrs still emit for now but trigger a DeprecationWarning. Removal planned for a future release. (#1984) diff --git a/xrspatial/geotiff/_coords.py b/xrspatial/geotiff/_coords.py index 717d9798..7970bcbd 100644 --- a/xrspatial/geotiff/_coords.py +++ b/xrspatial/geotiff/_coords.py @@ -36,6 +36,47 @@ _BAND_DIM_NAMES = ('band', 'bands', 'channel') +def _is_no_georef_sentinel(coord: np.ndarray) -> bool: + """True iff ``coord`` matches the read-side no-georef placeholder. + + ``coords_from_pixel_geometry`` emits ``np.arange(start, stop, + dtype=np.int64)`` for the y/x coords whenever the source file + carries no GeoTIFF transform tags -- both for full reads + (``start=0``) and windowed reads (``start=window_offset``). See + issues #1710, #1753, #1949. Round-tripping such an array through + ``to_geotiff`` should not invent a synthetic unit transform, so + the writer's transform-inference code paths skip it. + + The pre-#2087 check was ``dtype.kind in ('i', 'u')`` on either + axis, which also caught user-authored projected grids with + integer-spaced coords (e.g. ``x=[100, 101, 102]``, + ``y=[200, 199]``). Those were silently stripped of georef on + write -- the file landed on disk with no transform tags and read + back as pixel coords. This helper tightens the match to the + exact pattern the reader emits: ``int64`` dtype, ascending, + contiguous step ``+1``, starting at an arbitrary integer + (windowed reads do not start at 0). A descending or + non-unit-step int array does not match, so the writer falls + through to the normal ``coords_to_transform`` path and either + synthesises a real transform or raises ``NonUniformCoordsError``. + + Trade-off: a user-authored array whose coords happen to match + the read-side ``arange`` pattern exactly (e.g. ascending step-1 + int64 starting at any integer, with no ``attrs['transform']`` + set) is still treated as no-georef. Callers in that niche can + write a real transform by setting ``attrs['transform']`` + explicitly or by using float coords. + """ + if coord.dtype != np.int64: + return False + n = len(coord) + if n < 1: + return False + return bool(np.array_equal( + coord, np.arange(coord[0], coord[0] + n, dtype=np.int64) + )) + + def coords_from_pixel_geometry( origin_x: float, origin_y: float, @@ -264,12 +305,18 @@ def require_transform_for_georeferenced( if xdim in da.coords and ydim in da.coords: x = da.coords[xdim].values y = da.coords[ydim].values - # Integer coord dtype is the read-side no-georef sentinel - # (#1710, #1753, #1949). ``coords_to_transform`` returns ``None`` - # for these so int-coord arrays round-trip cleanly without a - # synthetic unit transform; mirror that here so the writer does - # not fail-close on a legitimate no-georef write. - if x.dtype.kind in ('i', 'u') or y.dtype.kind in ('i', 'u'): + # The read-side no-georef sentinel is ``np.arange(start, stop, + # dtype=int64)`` on both axes (#1710, #1753, #1949). When both + # axes match that shape, ``coords_to_transform`` returns + # ``None`` and the array round-trips cleanly without a fake + # unit transform; mirror that here so the validator doesn't + # fail-close on a legitimate no-georef write. A + # user-authored integer-coord grid that does *not* match the + # sentinel (e.g. ``x=[100,101,102], y=[200,199]``) falls + # through to the raise below and the caller has to either + # supply ``attrs['transform']`` or accept the + # ``coords_to_transform`` path (#2087). + if _is_no_georef_sentinel(x) and _is_no_georef_sentinel(y): return raise ValueError( f"Cannot infer GeoTIFF transform from a " @@ -301,10 +348,14 @@ def coords_to_transform(da: xr.DataArray) -> 'GeoTransform | None': helper must handle both layouts to keep the geo-transform consistent with the file's coord arrays. See issue #1643. - Integer-dtype x/y coords are treated as a no-georef sentinel and - return ``None`` before the uniformity check runs; this is - intentional so the read-side ``np.arange`` placeholder round-trips - without inventing a fake unit transform (see issue #1949). + Coords matching the read-side no-georef placeholder + (``np.arange(start, stop, dtype=int64)`` on both axes) return + ``None`` before the uniformity check runs; this is intentional + so the placeholder round-trips without inventing a fake unit + transform (#1949). The check was tightened in #2087 from "any + integer dtype" to the exact ``arange`` pattern because the + broader check was silently stripping georef from user-authored + integer-coord projected grids. """ if da.ndim == 3: # Drop the band-like dim and keep the two spatial dims in their @@ -336,21 +387,29 @@ def coords_to_transform(da: xr.DataArray) -> 'GeoTransform | None': if len(x) < 2 and len(y) < 2: return None - # Integer coord dtype is the read-side no-georef signal: the - # ``has_georef=False`` branch in ``coords_from_pixel_geometry`` - # emits ``np.arange(N, dtype=np.int64)`` for files without GeoTIFF - # transform tags (#1710, #1753). Synthesising a GeoTransform from - # those ``[0, 1, 2, ...]`` arrays would inject a fake unit transform - # (``origin=-0.5, pixel_width=1.0``) into the written file's - # ModelPixelScale / ModelTiepoint tags. The next read then takes - # the georef branch and the coord dtype silently flips to - # ``float64`` with ``attrs['transform']`` present, breaking the - # no-georef contract that downstream code branches on. See issue - # #1949. Float coord arrays still produce a GeoTransform (the - # canonical georef path), and an explicit ``attrs['transform']`` - # bypasses this helper entirely so callers can still write a - # transform alongside int coords by setting the attr. - if x.dtype.kind in ('i', 'u') or y.dtype.kind in ('i', 'u'): + # No-georef sentinel: the ``has_georef=False`` branch in + # ``coords_from_pixel_geometry`` emits + # ``np.arange(start, stop, dtype=np.int64)`` for files without + # GeoTIFF transform tags (#1710, #1753). Synthesising a + # GeoTransform from those arrays would inject a fake unit + # transform (``pixel_width=1.0``, origin derived from + # ``coord[0]``) into the written file's ModelPixelScale / + # ModelTiepoint tags. The next read then takes the georef branch + # and the coord dtype silently flips to ``float64`` with + # ``attrs['transform']`` present, breaking the no-georef + # contract that downstream code branches on (#1949). + # + # The check used to be ``dtype.kind in ('i', 'u')`` on either + # axis, which was too broad: user-authored projected grids with + # integer-spaced coords (e.g. ``x=[100, 101, 102]``, + # ``y=[200, 199]``) matched the sentinel and lost their georef + # silently on write. The tightened check accepts only the exact + # shape the reader emits (``int64``, ascending, contiguous step + # ``+1`` on both axes). Anything else falls through here and + # gets a real transform synthesised below, or raises + # ``NonUniformCoordsError`` if the spacing is irregular. See + # issue #2087. + if _is_no_georef_sentinel(x) and _is_no_georef_sentinel(y): return None # GeoTIFF only supports an affine transform; non-uniform spacing diff --git a/xrspatial/geotiff/tests/test_int_coord_sentinel_2087.py b/xrspatial/geotiff/tests/test_int_coord_sentinel_2087.py new file mode 100644 index 00000000..a9ef92cc --- /dev/null +++ b/xrspatial/geotiff/tests/test_int_coord_sentinel_2087.py @@ -0,0 +1,237 @@ +"""User-authored integer spatial coords must not silently drop georef (#2087). + +The pre-fix sentinel at ``_coords.py:272`` and ``:353`` returned +``None`` from ``coords_to_transform`` (and waved the validator +through) whenever either x or y had an integer dtype. The intent +was to round-trip the read-side ``np.arange(N, dtype=int64)`` +placeholder that ``coords_from_pixel_geometry`` emits for files +with no GeoTIFF transform tags. The sentinel was too broad: it +also caught user-authored projected grids with integer-spaced +coords, which lost their georef silently on write. + +The tightened sentinel matches only the exact reader pattern: +``int64`` dtype, ascending, contiguous step ``+1`` on both axes. +These tests pin both directions: + +1. The legitimate no-georef round-trip still works -- a file + with no transform tags reads back with int64 coords and + writes back without inventing a transform. +2. User-authored integer-coord grids are no longer silently + stripped. ``x=[100,101,102], y=[200,199]`` now writes a real + transform and round-trips with float coords. +3. Subsets and windowed reads of the no-georef placeholder + (which still produce ``arange``-shaped int64 arrays) continue + to round-trip cleanly. +4. Non-uniform int coords raise ``NonUniformCoordsError`` instead + of silently stripping. +""" +from __future__ import annotations + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import open_geotiff, to_geotiff +from xrspatial.geotiff._coords import _is_no_georef_sentinel + + +# --- Unit checks on the sentinel helper itself -------------------------- + + +@pytest.mark.parametrize( + "coord", + [ + np.arange(5, dtype=np.int64), # full read + np.arange(3, 8, dtype=np.int64), # windowed read + np.arange(0, 1, dtype=np.int64), # degenerate 1-element + np.array([10, 11, 12], dtype=np.int64), + ], +) +def test_sentinel_accepts_arange_int64(coord): + assert _is_no_georef_sentinel(coord) + + +@pytest.mark.parametrize( + "coord", + [ + np.array([100, 101, 102], dtype=np.int32), # int32, not int64 + np.array([100, 101, 102], dtype=np.float64), # float + np.array([200, 199], dtype=np.int64), # descending + np.array([0, 2, 4], dtype=np.int64), # step != 1 + np.array([1, 2, 5], dtype=np.int64), # non-uniform + np.array([], dtype=np.int64), # empty + ], +) +def test_sentinel_rejects_non_arange(coord): + assert not _is_no_georef_sentinel(coord) + + +# --- Round-trip behaviour ------------------------------------------------ + + +def _make_georef_int_grid(): + # User-authored projected grid with integer-spaced coords. ``y`` + # decreases top-to-bottom by convention, so it does not match the + # ascending sentinel even before any other check. + return xr.DataArray( + np.zeros((2, 3), dtype=np.float32), + coords={'y': np.array([200, 199]), 'x': np.array([100, 101, 102])}, + dims=('y', 'x'), + ) + + +def test_user_authored_int_grid_writes_real_transform(tmp_path): + da = _make_georef_int_grid() + path = str(tmp_path / "tmp_2087_int_grid.tif") + to_geotiff(da, path) + + out = open_geotiff(path) + # Coord values round-trip exactly; dtype flips int -> float because + # the file now carries a real transform and the reader emits float + # pixel-center coords. + assert out.coords['x'].dtype.kind == 'f' + assert out.coords['y'].dtype.kind == 'f' + np.testing.assert_array_equal(out.coords['x'].values, [100.0, 101.0, 102.0]) + np.testing.assert_array_equal(out.coords['y'].values, [200.0, 199.0]) + # Transform attr is present (the bug was that it wasn't). + assert out.attrs.get('transform') is not None + + +def test_both_axes_ascending_int64_step1_trade_off(tmp_path): + # Documented trade-off corner: when both axes are int64 ascending + # step-1 (i.e. both match the read-side arange pattern exactly), + # the sentinel cannot distinguish a user-authored grid from the + # read-side no-georef placeholder. The writer resolves to + # no-georef. A caller wanting georef on this pattern must set + # ``attrs['transform']`` explicitly (see the next test). + da = xr.DataArray( + np.zeros((3, 3), dtype=np.float32), + coords={ + 'y': np.array([200, 201, 202], dtype=np.int64), + 'x': np.array([100, 101, 102], dtype=np.int64), + }, + dims=('y', 'x'), + ) + # Both axes match the sentinel exactly (int64 ascending step 1). + # This is the documented trade-off: ambiguous between + # "read-side placeholder" and "user-authored arange-shaped grid". + # The sentinel resolves to no-georef. A caller wanting georef on + # this pattern must set attrs['transform'] explicitly. + path = str(tmp_path / "tmp_2087_both_arange.tif") + to_geotiff(da, path) + out = open_geotiff(path) + # No-georef round-trip: coords come back as int64 0..N-1 + # placeholders, not the original values. + assert out.coords['x'].dtype == np.int64 + assert out.coords['y'].dtype == np.int64 + + +def test_user_authored_int_grid_with_explicit_transform(tmp_path): + # Caller in the ambiguous-trade-off corner who wants georef sets + # attrs['transform'] explicitly. The writer must use that + # transform rather than the sentinel inference. + da = xr.DataArray( + np.zeros((3, 3), dtype=np.float32), + coords={ + 'y': np.array([200, 201, 202], dtype=np.int64), + 'x': np.array([100, 101, 102], dtype=np.int64), + }, + dims=('y', 'x'), + attrs={'transform': (1.0, 0.0, 99.5, 0.0, 1.0, 199.5)}, + ) + path = str(tmp_path / "tmp_2087_explicit_transform.tif") + to_geotiff(da, path) + out = open_geotiff(path) + assert out.attrs.get('transform') is not None + np.testing.assert_array_equal(out.coords['x'].values, [100.0, 101.0, 102.0]) + + +def test_non_uniform_int_coords_raise(tmp_path): + # Non-uniform integer spacing under the old sentinel silently + # stripped georef. Under the tightened sentinel it falls through + # to ``coords_to_transform`` and trips the uniform-spacing check. + da = xr.DataArray( + np.zeros((3, 3), dtype=np.float32), + coords={ + 'y': np.array([10, 11, 12], dtype=np.int64), + 'x': np.array([1, 2, 5], dtype=np.int64), # non-uniform + }, + dims=('y', 'x'), + ) + path = str(tmp_path / "tmp_2087_non_uniform.tif") + with pytest.raises(ValueError, match="not uniformly spaced"): + to_geotiff(da, path) + + +def test_int_x_float_y_writes_transform(tmp_path): + # One axis integer, the other float: under the old sentinel any + # integer axis defeated the transform-inference. Under the + # tightened sentinel, the float y axis means the int x axis falls + # through to ``coords_to_transform`` (which handles int math + # fine) and a transform is written. + da = xr.DataArray( + np.zeros((2, 3), dtype=np.float32), + coords={ + 'y': np.array([50.5, 49.5], dtype=np.float64), + 'x': np.array([100, 101, 102], dtype=np.int64), + }, + dims=('y', 'x'), + ) + path = str(tmp_path / "tmp_2087_mixed_dtypes.tif") + to_geotiff(da, path) + out = open_geotiff(path) + assert out.attrs.get('transform') is not None + np.testing.assert_array_equal(out.coords['x'].values, [100.0, 101.0, 102.0]) + + +# --- The legitimate no-georef round-trip must keep working --------------- + + +def test_no_georef_roundtrip_preserved(tmp_path): + # Build a no-georef file via the read-side path: write a file with + # ``attrs['transform']`` explicitly cleared, then re-read it. + # ``open_geotiff`` returns int64 ``np.arange``-shaped coords; the + # next write must not invent a transform. + src = xr.DataArray( + np.zeros((4, 4), dtype=np.float32), + coords={'y': np.arange(4, dtype=np.int64), 'x': np.arange(4, dtype=np.int64)}, + dims=('y', 'x'), + ) + path = str(tmp_path / "tmp_2087_no_georef.tif") + to_geotiff(src, path) + + out = open_geotiff(path) + # Verify the read came back as no-georef. + assert out.coords['x'].dtype == np.int64 + assert out.coords['y'].dtype == np.int64 + assert out.attrs.get('transform') is None + + # Round-trip: write again. No transform should be invented. + path2 = str(tmp_path / "tmp_2087_no_georef_rt.tif") + to_geotiff(out, path2) + out2 = open_geotiff(path2) + assert out2.coords['x'].dtype == np.int64 + assert out2.attrs.get('transform') is None + + +def test_windowed_no_georef_roundtrip(tmp_path): + # The read-side emits ``np.arange(c0, c1, dtype=int64)`` for + # windowed reads, so the sentinel must accept arange starting at + # any integer, not just 0. Test by writing an array whose coords + # mimic a windowed no-georef read. + da = xr.DataArray( + np.zeros((3, 4), dtype=np.float32), + coords={ + 'y': np.arange(10, 13, dtype=np.int64), + 'x': np.arange(20, 24, dtype=np.int64), + }, + dims=('y', 'x'), + ) + path = str(tmp_path / "tmp_2087_windowed.tif") + to_geotiff(da, path) + out = open_geotiff(path) + # Under the documented trade-off this is still treated as + # no-georef (both axes match the arange pattern). Coords come + # back as 0..N-1 placeholders. + assert out.coords['x'].dtype == np.int64 + assert out.attrs.get('transform') is None diff --git a/xrspatial/geotiff/tests/test_no_georef_writer_round_trip_1949.py b/xrspatial/geotiff/tests/test_no_georef_writer_round_trip_1949.py index 12703129..5735e981 100644 --- a/xrspatial/geotiff/tests/test_no_georef_writer_round_trip_1949.py +++ b/xrspatial/geotiff/tests/test_no_georef_writer_round_trip_1949.py @@ -1,4 +1,4 @@ -"""Round-trip tests for issue #1949. +"""Round-trip tests for issue #1949 (sentinel tightened in #2087). A TIFF with no GeoTIFF tags (no ``ModelPixelScale``, no ``ModelTiepoint``, no GeoKeys) reads back with integer pixel coords @@ -7,15 +7,21 @@ Before the fix, writing that DataArray back through ``to_geotiff`` silently injected a synthetic ``ModelPixelScale`` + ``ModelTiepoint`` -because ``_coords_to_transform`` happily inferred a unit transform from -the integer pixel coords. The next read then took the georef branch +because ``_coords_to_transform`` happily inferred a unit transform +from the integer pixel coords. The next read took the georef branch and the y/x dtype flipped from ``int64`` to ``float64`` with a ``transform`` attr present. -The fix makes ``_coords_to_transform`` return ``None`` when either -spatial coord array carries an integer dtype, so the no-georef contract -survives an arbitrary number of read -> write -> read cycles across -every writer path. +The original #1949 fix made ``_coords_to_transform`` return ``None`` +whenever EITHER spatial coord array carried an integer dtype. That +also caught user-authored projected grids with integer-spaced +coords and silently stripped their georef -- the bug #2087 closes. +The tightened sentinel requires BOTH axes to match the exact +read-side pattern (``int64`` ascending arange) before returning +``None``. The no-georef round-trip still survives an arbitrary +number of read -> write -> read cycles because the reader always +emits both axes as matching ``np.arange(start, stop, +dtype=int64)``. """ from __future__ import annotations @@ -49,10 +55,10 @@ def _gpu_available() -> bool: # --------------------------------------------------------------------------- -def test_coords_to_transform_returns_none_for_int64_y_coords(): - """Integer y coords ``[0, 1, 2, ...]`` are the no-georef signal the - read side emits; ``_coords_to_transform`` must not fabricate a - transform from them.""" +def test_coords_to_transform_returns_transform_for_int_y_float_x(): + """Mixed int / float coords are user-authored, not the read-side + sentinel; the sentinel requires both axes to match. The transform + inference path runs and produces a real GeoTransform (#2087).""" da = xr.DataArray( np.zeros((4, 5), dtype=np.float32), dims=['y', 'x'], @@ -61,11 +67,14 @@ def test_coords_to_transform_returns_none_for_int64_y_coords(): 'x': np.arange(5, dtype=np.float64), }, ) - assert _coords_to_transform(da) is None + gt = _coords_to_transform(da) + assert gt is not None + assert gt.pixel_width == pytest.approx(1.0) + assert gt.pixel_height == pytest.approx(1.0) -def test_coords_to_transform_returns_none_for_int64_x_coords(): - """Same check on the other axis.""" +def test_coords_to_transform_returns_transform_for_float_y_int_x(): + """Symmetric check on the other axis.""" da = xr.DataArray( np.zeros((4, 5), dtype=np.float32), dims=['y', 'x'], @@ -74,13 +83,19 @@ def test_coords_to_transform_returns_none_for_int64_x_coords(): 'x': np.arange(5, dtype=np.int64), }, ) - assert _coords_to_transform(da) is None + gt = _coords_to_transform(da) + assert gt is not None + assert gt.pixel_width == pytest.approx(1.0) + assert gt.pixel_height == pytest.approx(1.0) @pytest.mark.parametrize("kind", [np.int32, np.int16, np.uint32]) -def test_coords_to_transform_returns_none_for_int32_coords(kind): - """int32 / int16 / uint also short-circuit -- the integer-dtype - signal is "no georef", regardless of width.""" +def test_coords_to_transform_returns_transform_for_non_int64_kinds(kind): + """The tightened sentinel only matches ``int64``. int32 / int16 / + uint32 are not the read-side placeholder (the reader explicitly + uses ``np.int64``), so a user authoring an integer-coord grid in + those dtypes gets a real transform instead of silent georef loss + (#2087).""" da = xr.DataArray( np.zeros((4, 5), dtype=np.float32), dims=['y', 'x'], @@ -89,8 +104,10 @@ def test_coords_to_transform_returns_none_for_int32_coords(kind): 'x': np.arange(5, dtype=kind), }, ) - assert _coords_to_transform(da) is None, ( - f"expected None for {kind.__name__} coords") + gt = _coords_to_transform(da) + assert gt is not None, ( + f"expected a real GeoTransform for {kind.__name__} coords; " + f"the sentinel only catches int64 ascending arange") def test_coords_to_transform_float_coords_unchanged():