Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#### Bug fixes and improvements
- Remove read-side emission of the 13 deprecated GeoTIFF attrs (`crs_name`, `geog_citation`, `datum_code`, `angular_units`, `semi_major_axis`, `inv_flattening`, `linear_units`, `projection_code`, `vertical_crs`, `vertical_citation`, `vertical_units`, `colormap_rgba`, `cmap`) and bump `attrs['_xrspatial_geotiff_contract']` from 1 to 2. Downstream code that read these via `attrs[key]` now sees `KeyError`; migrate to `attrs.get(key)` or derive the value from `attrs['crs']` / `attrs['crs_wkt']` with pyproj. The `.xrs.plot()` accessor still surfaces palette colormaps by building a `ListedColormap` from the canonical `attrs['colormap']`. (#2016)
- Accept numpy integer scalars as the `crs=` argument to `to_geotiff` / `write_geotiff_gpu`. The validator already allowed `numbers.Integral`, but the writers gated EPSG assignment on `isinstance(crs, int)`, so `np.int32` / `np.int64` / `np.uint16` values passed validation then silently fell through with no EPSG written. (#2082)
- Tighten the writer's no-georef sentinel for integer x/y coords. The pre-fix check treated any integer dtype on either axis as the read-side no-georef placeholder and skipped transform inference, which also caught user-authored projected grids with integer-spaced coords (e.g. `x=[100,101,102], y=[200,199]`) and silently stripped their georef on write. The sentinel now matches only the exact reader pattern: `int64` ascending contiguous-step-1 arange on both axes. User-authored integer-coord grids that don't match (descending, non-unit step, non-uniform, or non-`int64`) now produce a real transform or raise `NonUniformCoordsError`. Coord values round-trip exactly through the new path; dtype flips int->float on subsequent reads. (#2087)
- Deprecate read-side emission of geographic-CRS GeoKey attrs (crs_name, geog_citation, datum_code, angular_units, semi_major_axis, inv_flattening); the writer cannot reconstruct them so they do not round-trip. These attrs still emit for now but trigger a DeprecationWarning. Removal planned for a future release. (#1984)
- Deprecate read-side emission of projected-CRS GeoKey attrs (linear_units, projection_code); the writer cannot reconstruct them so they do not round-trip. These attrs still emit for now but trigger a DeprecationWarning. Removal planned for a future release. (#1984)
- Deprecate read-side emission of vertical-CRS GeoKey attrs (vertical_crs, vertical_citation, vertical_units); the writer does not emit vertical GeoKeys so they do not round-trip. These attrs still emit for now but trigger a DeprecationWarning. Removal planned for a future release. (#1984)
Expand Down
109 changes: 84 additions & 25 deletions xrspatial/geotiff/_coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,47 @@
_BAND_DIM_NAMES = ('band', 'bands', 'channel')


def _is_no_georef_sentinel(coord: np.ndarray) -> bool:
"""True iff ``coord`` matches the read-side no-georef placeholder.

``coords_from_pixel_geometry`` emits ``np.arange(start, stop,
dtype=np.int64)`` for the y/x coords whenever the source file
carries no GeoTIFF transform tags -- both for full reads
(``start=0``) and windowed reads (``start=window_offset``). See
issues #1710, #1753, #1949. Round-tripping such an array through
``to_geotiff`` should not invent a synthetic unit transform, so
the writer's transform-inference code paths skip it.

The pre-#2087 check was ``dtype.kind in ('i', 'u')`` on either
axis, which also caught user-authored projected grids with
integer-spaced coords (e.g. ``x=[100, 101, 102]``,
``y=[200, 199]``). Those were silently stripped of georef on
write -- the file landed on disk with no transform tags and read
back as pixel coords. This helper tightens the match to the
exact pattern the reader emits: ``int64`` dtype, ascending,
contiguous step ``+1``, starting at an arbitrary integer
(windowed reads do not start at 0). A descending or
non-unit-step int array does not match, so the writer falls
through to the normal ``coords_to_transform`` path and either
synthesises a real transform or raises ``NonUniformCoordsError``.

Trade-off: a user-authored array whose coords happen to match
the read-side ``arange`` pattern exactly (e.g. ascending step-1
int64 starting at any integer, with no ``attrs['transform']``
set) is still treated as no-georef. Callers in that niche can
write a real transform by setting ``attrs['transform']``
explicitly or by using float coords.
"""
if coord.dtype != np.int64:
return False
n = len(coord)
if n < 1:
return False
return bool(np.array_equal(
coord, np.arange(coord[0], coord[0] + n, dtype=np.int64)
))


def coords_from_pixel_geometry(
origin_x: float,
origin_y: float,
Expand Down Expand Up @@ -264,12 +305,18 @@ def require_transform_for_georeferenced(
if xdim in da.coords and ydim in da.coords:
x = da.coords[xdim].values
y = da.coords[ydim].values
# Integer coord dtype is the read-side no-georef sentinel
# (#1710, #1753, #1949). ``coords_to_transform`` returns ``None``
# for these so int-coord arrays round-trip cleanly without a
# synthetic unit transform; mirror that here so the writer does
# not fail-close on a legitimate no-georef write.
if x.dtype.kind in ('i', 'u') or y.dtype.kind in ('i', 'u'):
# The read-side no-georef sentinel is ``np.arange(start, stop,
# dtype=int64)`` on both axes (#1710, #1753, #1949). When both
# axes match that shape, ``coords_to_transform`` returns
# ``None`` and the array round-trips cleanly without a fake
# unit transform; mirror that here so the validator doesn't
# fail-close on a legitimate no-georef write. A
# user-authored integer-coord grid that does *not* match the
# sentinel (e.g. ``x=[100,101,102], y=[200,199]``) falls
# through to the raise below and the caller has to either
# supply ``attrs['transform']`` or accept the
# ``coords_to_transform`` path (#2087).
if _is_no_georef_sentinel(x) and _is_no_georef_sentinel(y):
return
raise ValueError(
f"Cannot infer GeoTIFF transform from a "
Expand Down Expand Up @@ -301,10 +348,14 @@ def coords_to_transform(da: xr.DataArray) -> 'GeoTransform | None':
helper must handle both layouts to keep the geo-transform consistent
with the file's coord arrays. See issue #1643.

Integer-dtype x/y coords are treated as a no-georef sentinel and
return ``None`` before the uniformity check runs; this is
intentional so the read-side ``np.arange`` placeholder round-trips
without inventing a fake unit transform (see issue #1949).
Coords matching the read-side no-georef placeholder
(``np.arange(start, stop, dtype=int64)`` on both axes) return
``None`` before the uniformity check runs; this is intentional
so the placeholder round-trips without inventing a fake unit
transform (#1949). The check was tightened in #2087 from "any
integer dtype" to the exact ``arange`` pattern because the
broader check was silently stripping georef from user-authored
integer-coord projected grids.
"""
if da.ndim == 3:
# Drop the band-like dim and keep the two spatial dims in their
Expand Down Expand Up @@ -336,21 +387,29 @@ def coords_to_transform(da: xr.DataArray) -> 'GeoTransform | None':
if len(x) < 2 and len(y) < 2:
return None

# Integer coord dtype is the read-side no-georef signal: the
# ``has_georef=False`` branch in ``coords_from_pixel_geometry``
# emits ``np.arange(N, dtype=np.int64)`` for files without GeoTIFF
# transform tags (#1710, #1753). Synthesising a GeoTransform from
# those ``[0, 1, 2, ...]`` arrays would inject a fake unit transform
# (``origin=-0.5, pixel_width=1.0``) into the written file's
# ModelPixelScale / ModelTiepoint tags. The next read then takes
# the georef branch and the coord dtype silently flips to
# ``float64`` with ``attrs['transform']`` present, breaking the
# no-georef contract that downstream code branches on. See issue
# #1949. Float coord arrays still produce a GeoTransform (the
# canonical georef path), and an explicit ``attrs['transform']``
# bypasses this helper entirely so callers can still write a
# transform alongside int coords by setting the attr.
if x.dtype.kind in ('i', 'u') or y.dtype.kind in ('i', 'u'):
# No-georef sentinel: the ``has_georef=False`` branch in
# ``coords_from_pixel_geometry`` emits
# ``np.arange(start, stop, dtype=np.int64)`` for files without
# GeoTIFF transform tags (#1710, #1753). Synthesising a
# GeoTransform from those arrays would inject a fake unit
# transform (``pixel_width=1.0``, origin derived from
# ``coord[0]``) into the written file's ModelPixelScale /
# ModelTiepoint tags. The next read then takes the georef branch
# and the coord dtype silently flips to ``float64`` with
# ``attrs['transform']`` present, breaking the no-georef
# contract that downstream code branches on (#1949).
#
# The check used to be ``dtype.kind in ('i', 'u')`` on either
# axis, which was too broad: user-authored projected grids with
# integer-spaced coords (e.g. ``x=[100, 101, 102]``,
# ``y=[200, 199]``) matched the sentinel and lost their georef
# silently on write. The tightened check accepts only the exact
# shape the reader emits (``int64``, ascending, contiguous step
# ``+1`` on both axes). Anything else falls through here and
# gets a real transform synthesised below, or raises
# ``NonUniformCoordsError`` if the spacing is irregular. See
# issue #2087.
if _is_no_georef_sentinel(x) and _is_no_georef_sentinel(y):
return None

# GeoTIFF only supports an affine transform; non-uniform spacing
Expand Down
237 changes: 237 additions & 0 deletions xrspatial/geotiff/tests/test_int_coord_sentinel_2087.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,237 @@
"""User-authored integer spatial coords must not silently drop georef (#2087).

The pre-fix sentinel at ``_coords.py:272`` and ``:353`` returned
``None`` from ``coords_to_transform`` (and waved the validator
through) whenever either x or y had an integer dtype. The intent
was to round-trip the read-side ``np.arange(N, dtype=int64)``
placeholder that ``coords_from_pixel_geometry`` emits for files
with no GeoTIFF transform tags. The sentinel was too broad: it
also caught user-authored projected grids with integer-spaced
coords, which lost their georef silently on write.

The tightened sentinel matches only the exact reader pattern:
``int64`` dtype, ascending, contiguous step ``+1`` on both axes.
These tests pin both directions:

1. The legitimate no-georef round-trip still works -- a file
with no transform tags reads back with int64 coords and
writes back without inventing a transform.
2. User-authored integer-coord grids are no longer silently
stripped. ``x=[100,101,102], y=[200,199]`` now writes a real
transform and round-trips with float coords.
3. Subsets and windowed reads of the no-georef placeholder
(which still produce ``arange``-shaped int64 arrays) continue
to round-trip cleanly.
4. Non-uniform int coords raise ``NonUniformCoordsError`` instead
of silently stripping.
"""
from __future__ import annotations

import numpy as np
import pytest
import xarray as xr

from xrspatial.geotiff import open_geotiff, to_geotiff
from xrspatial.geotiff._coords import _is_no_georef_sentinel


# --- Unit checks on the sentinel helper itself --------------------------


@pytest.mark.parametrize(
"coord",
[
np.arange(5, dtype=np.int64), # full read
np.arange(3, 8, dtype=np.int64), # windowed read
np.arange(0, 1, dtype=np.int64), # degenerate 1-element
np.array([10, 11, 12], dtype=np.int64),
],
)
def test_sentinel_accepts_arange_int64(coord):
assert _is_no_georef_sentinel(coord)


@pytest.mark.parametrize(
"coord",
[
np.array([100, 101, 102], dtype=np.int32), # int32, not int64
np.array([100, 101, 102], dtype=np.float64), # float
np.array([200, 199], dtype=np.int64), # descending
np.array([0, 2, 4], dtype=np.int64), # step != 1
np.array([1, 2, 5], dtype=np.int64), # non-uniform
np.array([], dtype=np.int64), # empty
],
)
def test_sentinel_rejects_non_arange(coord):
assert not _is_no_georef_sentinel(coord)


# --- Round-trip behaviour ------------------------------------------------


def _make_georef_int_grid():
# User-authored projected grid with integer-spaced coords. ``y``
# decreases top-to-bottom by convention, so it does not match the
# ascending sentinel even before any other check.
return xr.DataArray(
np.zeros((2, 3), dtype=np.float32),
coords={'y': np.array([200, 199]), 'x': np.array([100, 101, 102])},
dims=('y', 'x'),
)


def test_user_authored_int_grid_writes_real_transform(tmp_path):
da = _make_georef_int_grid()
path = str(tmp_path / "tmp_2087_int_grid.tif")
to_geotiff(da, path)

out = open_geotiff(path)
# Coord values round-trip exactly; dtype flips int -> float because
# the file now carries a real transform and the reader emits float
# pixel-center coords.
assert out.coords['x'].dtype.kind == 'f'
assert out.coords['y'].dtype.kind == 'f'
np.testing.assert_array_equal(out.coords['x'].values, [100.0, 101.0, 102.0])
np.testing.assert_array_equal(out.coords['y'].values, [200.0, 199.0])
# Transform attr is present (the bug was that it wasn't).
assert out.attrs.get('transform') is not None


def test_both_axes_ascending_int64_step1_trade_off(tmp_path):
# Documented trade-off corner: when both axes are int64 ascending
# step-1 (i.e. both match the read-side arange pattern exactly),
# the sentinel cannot distinguish a user-authored grid from the
# read-side no-georef placeholder. The writer resolves to
# no-georef. A caller wanting georef on this pattern must set
# ``attrs['transform']`` explicitly (see the next test).
da = xr.DataArray(
np.zeros((3, 3), dtype=np.float32),
coords={
'y': np.array([200, 201, 202], dtype=np.int64),
'x': np.array([100, 101, 102], dtype=np.int64),
},
dims=('y', 'x'),
)
# Both axes match the sentinel exactly (int64 ascending step 1).
# This is the documented trade-off: ambiguous between
# "read-side placeholder" and "user-authored arange-shaped grid".
# The sentinel resolves to no-georef. A caller wanting georef on
# this pattern must set attrs['transform'] explicitly.
path = str(tmp_path / "tmp_2087_both_arange.tif")
to_geotiff(da, path)
out = open_geotiff(path)
# No-georef round-trip: coords come back as int64 0..N-1
# placeholders, not the original values.
assert out.coords['x'].dtype == np.int64
assert out.coords['y'].dtype == np.int64


def test_user_authored_int_grid_with_explicit_transform(tmp_path):
# Caller in the ambiguous-trade-off corner who wants georef sets
# attrs['transform'] explicitly. The writer must use that
# transform rather than the sentinel inference.
da = xr.DataArray(
np.zeros((3, 3), dtype=np.float32),
coords={
'y': np.array([200, 201, 202], dtype=np.int64),
'x': np.array([100, 101, 102], dtype=np.int64),
},
dims=('y', 'x'),
attrs={'transform': (1.0, 0.0, 99.5, 0.0, 1.0, 199.5)},
)
path = str(tmp_path / "tmp_2087_explicit_transform.tif")
to_geotiff(da, path)
out = open_geotiff(path)
assert out.attrs.get('transform') is not None
np.testing.assert_array_equal(out.coords['x'].values, [100.0, 101.0, 102.0])


def test_non_uniform_int_coords_raise(tmp_path):
# Non-uniform integer spacing under the old sentinel silently
# stripped georef. Under the tightened sentinel it falls through
# to ``coords_to_transform`` and trips the uniform-spacing check.
da = xr.DataArray(
np.zeros((3, 3), dtype=np.float32),
coords={
'y': np.array([10, 11, 12], dtype=np.int64),
'x': np.array([1, 2, 5], dtype=np.int64), # non-uniform
},
dims=('y', 'x'),
)
path = str(tmp_path / "tmp_2087_non_uniform.tif")
with pytest.raises(ValueError, match="not uniformly spaced"):
to_geotiff(da, path)


def test_int_x_float_y_writes_transform(tmp_path):
# One axis integer, the other float: under the old sentinel any
# integer axis defeated the transform-inference. Under the
# tightened sentinel, the float y axis means the int x axis falls
# through to ``coords_to_transform`` (which handles int math
# fine) and a transform is written.
da = xr.DataArray(
np.zeros((2, 3), dtype=np.float32),
coords={
'y': np.array([50.5, 49.5], dtype=np.float64),
'x': np.array([100, 101, 102], dtype=np.int64),
},
dims=('y', 'x'),
)
path = str(tmp_path / "tmp_2087_mixed_dtypes.tif")
to_geotiff(da, path)
out = open_geotiff(path)
assert out.attrs.get('transform') is not None
np.testing.assert_array_equal(out.coords['x'].values, [100.0, 101.0, 102.0])


# --- The legitimate no-georef round-trip must keep working ---------------


def test_no_georef_roundtrip_preserved(tmp_path):
# Build a no-georef file via the read-side path: write a file with
# ``attrs['transform']`` explicitly cleared, then re-read it.
# ``open_geotiff`` returns int64 ``np.arange``-shaped coords; the
# next write must not invent a transform.
src = xr.DataArray(
np.zeros((4, 4), dtype=np.float32),
coords={'y': np.arange(4, dtype=np.int64), 'x': np.arange(4, dtype=np.int64)},
dims=('y', 'x'),
)
path = str(tmp_path / "tmp_2087_no_georef.tif")
to_geotiff(src, path)

out = open_geotiff(path)
# Verify the read came back as no-georef.
assert out.coords['x'].dtype == np.int64
assert out.coords['y'].dtype == np.int64
assert out.attrs.get('transform') is None

# Round-trip: write again. No transform should be invented.
path2 = str(tmp_path / "tmp_2087_no_georef_rt.tif")
to_geotiff(out, path2)
out2 = open_geotiff(path2)
assert out2.coords['x'].dtype == np.int64
assert out2.attrs.get('transform') is None


def test_windowed_no_georef_roundtrip(tmp_path):
# The read-side emits ``np.arange(c0, c1, dtype=int64)`` for
# windowed reads, so the sentinel must accept arange starting at
# any integer, not just 0. Test by writing an array whose coords
# mimic a windowed no-georef read.
da = xr.DataArray(
np.zeros((3, 4), dtype=np.float32),
coords={
'y': np.arange(10, 13, dtype=np.int64),
'x': np.arange(20, 24, dtype=np.int64),
},
dims=('y', 'x'),
)
path = str(tmp_path / "tmp_2087_windowed.tif")
to_geotiff(da, path)
out = open_geotiff(path)
# Under the documented trade-off this is still treated as
# no-georef (both axes match the arange pattern). Coords come
# back as 0..N-1 placeholders.
assert out.coords['x'].dtype == np.int64
assert out.attrs.get('transform') is None
Loading
Loading