From 7c3a9847438a6c33b1ff50c7f0a0b415968f1c80 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Fri, 15 May 2026 08:29:20 -0700 Subject: [PATCH 1/3] geotiff: keep no-georef int coords through to_geotiff round-trip (#1949) ``_coords_to_transform`` previously inferred a unit GeoTransform from the integer pixel coords the read-side no-georef branch emits (``np.arange(N, dtype=np.int64)``), so writing a no-georef DataArray back through ``to_geotiff`` silently injected ``ModelPixelScale`` / ``ModelTiepoint`` tags. On the next read the file looked georeferenced, the coord dtype flipped to float64, and a synthetic ``attrs['transform']`` appeared. Short-circuit when either spatial coord array has an integer dtype -- that signal only comes from the read-side no-georef fallback, so returning ``None`` here keeps the no-georef contract across an arbitrary number of read -> write -> read cycles on every writer path (eager numpy, dask streaming, GPU). An explicit ``attrs['transform']`` still wins because the writers consult it before calling this helper. Adds ``test_no_georef_writer_round_trip_1949.py`` covering the helper directly, eager / dask streaming / GPU round-trips, a double round-trip, and the explicit-attr override. --- xrspatial/geotiff/_coords.py | 17 ++ .../test_no_georef_writer_round_trip_1949.py | 286 ++++++++++++++++++ 2 files changed, 303 insertions(+) create mode 100644 xrspatial/geotiff/tests/test_no_georef_writer_round_trip_1949.py diff --git a/xrspatial/geotiff/_coords.py b/xrspatial/geotiff/_coords.py index b1ee185e..0d88eae2 100644 --- a/xrspatial/geotiff/_coords.py +++ b/xrspatial/geotiff/_coords.py @@ -277,6 +277,23 @@ def coords_to_transform(da: xr.DataArray) -> 'GeoTransform | None': if len(x) < 2 or len(y) < 2: return None + # Integer coord dtype is the read-side no-georef signal: the + # ``has_georef=False`` branch in ``coords_from_pixel_geometry`` + # emits ``np.arange(N, dtype=np.int64)`` for files without GeoTIFF + # transform tags (#1710, #1753). Synthesising a GeoTransform from + # those ``[0, 1, 2, ...]`` arrays would inject a fake unit transform + # (``origin=-0.5, pixel_width=1.0``) into the written file's + # ModelPixelScale / ModelTiepoint tags. The next read then takes + # the georef branch and the coord dtype silently flips to + # ``float64`` with ``attrs['transform']`` present, breaking the + # no-georef contract that downstream code branches on. See issue + # #1949. Float coord arrays still produce a GeoTransform (the + # canonical georef path), and an explicit ``attrs['transform']`` + # bypasses this helper entirely so callers can still write a + # transform alongside int coords by setting the attr. + if x.dtype.kind in ('i', 'u') or y.dtype.kind in ('i', 'u'): + return None + # GeoTIFF only supports an affine transform; non-uniform spacing # cannot be expressed faithfully. Validate up-front instead of # silently writing a transform that only matches the first step. diff --git a/xrspatial/geotiff/tests/test_no_georef_writer_round_trip_1949.py b/xrspatial/geotiff/tests/test_no_georef_writer_round_trip_1949.py new file mode 100644 index 00000000..990ca7ab --- /dev/null +++ b/xrspatial/geotiff/tests/test_no_georef_writer_round_trip_1949.py @@ -0,0 +1,286 @@ +"""Round-trip tests for issue #1949. + +A TIFF with no GeoTIFF tags (no ``ModelPixelScale``, no +``ModelTiepoint``, no GeoKeys) reads back with integer pixel coords +``[0, 1, 2, ...]`` and ``int64`` dtype (the no-georef branch added in +#1710 and #1753). + +Before the fix, writing that DataArray back through ``to_geotiff`` +silently injected a synthetic ``ModelPixelScale`` + ``ModelTiepoint`` +because ``_coords_to_transform`` happily inferred a unit transform from +the integer pixel coords. The next read then took the georef branch +and the y/x dtype flipped from ``int64`` to ``float64`` with a +``transform`` attr present. + +The fix makes ``_coords_to_transform`` return ``None`` when either +spatial coord array carries an integer dtype, so the no-georef contract +survives an arbitrary number of read -> write -> read cycles across +every writer path. +""" +from __future__ import annotations + +import importlib.util +import os +import tempfile + +import numpy as np +import pytest +import tifffile +import xarray as xr + +from xrspatial.geotiff import _coords_to_transform, open_geotiff, to_geotiff + + +def _gpu_available() -> bool: + if importlib.util.find_spec("cupy") is None: + return False + try: + import cupy + return bool(cupy.cuda.is_available()) + except Exception: + return False + + +_HAS_GPU = _gpu_available() +_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required") + + +# --------------------------------------------------------------------------- +# Unit tests on _coords_to_transform +# --------------------------------------------------------------------------- + + +def test_coords_to_transform_returns_none_for_int64_y_coords(): + """Integer y coords ``[0, 1, 2, ...]`` are the no-georef signal the + read side emits; ``_coords_to_transform`` must not fabricate a + transform from them.""" + da = xr.DataArray( + np.zeros((4, 5), dtype=np.float32), + dims=['y', 'x'], + coords={ + 'y': np.arange(4, dtype=np.int64), + 'x': np.arange(5, dtype=np.float64), + }, + ) + assert _coords_to_transform(da) is None + + +def test_coords_to_transform_returns_none_for_int64_x_coords(): + """Same check on the other axis.""" + da = xr.DataArray( + np.zeros((4, 5), dtype=np.float32), + dims=['y', 'x'], + coords={ + 'y': np.arange(4, dtype=np.float64), + 'x': np.arange(5, dtype=np.int64), + }, + ) + assert _coords_to_transform(da) is None + + +def test_coords_to_transform_returns_none_for_int32_coords(): + """int32 / int16 / uint also short-circuit -- the integer-dtype + signal is "no georef", regardless of width.""" + for kind in (np.int32, np.int16, np.uint32): + da = xr.DataArray( + np.zeros((4, 5), dtype=np.float32), + dims=['y', 'x'], + coords={ + 'y': np.arange(4, dtype=kind), + 'x': np.arange(5, dtype=kind), + }, + ) + assert _coords_to_transform(da) is None, ( + f"expected None for {kind.__name__} coords") + + +def test_coords_to_transform_float_coords_unchanged(): + """Float coords still produce a GeoTransform (sanity baseline).""" + da = xr.DataArray( + np.zeros((4, 5), dtype=np.float32), + dims=['y', 'x'], + coords={ + 'y': np.array([0.5, 1.5, 2.5, 3.5]), + 'x': np.array([10.0, 20.0, 30.0, 40.0, 50.0]), + }, + ) + gt = _coords_to_transform(da) + assert gt is not None + assert gt.pixel_width == pytest.approx(10.0) + assert gt.pixel_height == pytest.approx(1.0) + + +def test_coords_to_transform_3d_yxband_int_y_returns_none(): + """3D (y, x, band) with int y coords also short-circuits. + + The helper picks the y/x dims (filtering out 'band'); the integer + check must apply to those, not to the band axis. + """ + da = xr.DataArray( + np.zeros((4, 5, 3), dtype=np.float32), + dims=['y', 'x', 'band'], + coords={ + 'y': np.arange(4, dtype=np.int64), + 'x': np.arange(5, dtype=np.int64), + 'band': np.arange(3), + }, + ) + assert _coords_to_transform(da) is None + + +# --------------------------------------------------------------------------- +# Round-trip tests through to_geotiff +# --------------------------------------------------------------------------- + + +def _make_no_georef_tiff(path, shape=(4, 5), seed=1949): + """Write a TIFF with no GeoTIFF tags.""" + arr = np.random.default_rng(seed=seed).random(shape).astype(np.float32) + tifffile.imwrite( + path, arr, photometric='minisblack', planarconfig='contig', + metadata=None, + ) + return arr + + +def test_round_trip_preserves_int_coords_eager(tmp_path): + """Eager numpy round-trip: y/x coord dtype stays int64 and no + transform attr is injected.""" + src = str(tmp_path / "no_georef_src_1949.tif") + _make_no_georef_tiff(src) + + da = open_geotiff(src) + assert da.coords['y'].dtype == np.int64 + assert da.coords['x'].dtype == np.int64 + assert 'transform' not in da.attrs + + out = str(tmp_path / "no_georef_out_1949.tif") + to_geotiff(da, out, compression='none', tiled=False) + + da2 = open_geotiff(out) + assert da2.coords['y'].dtype == np.int64, ( + f"round-trip flipped y coord dtype from int64 to " + f"{da2.coords['y'].dtype}: {da2.coords['y'].values}") + assert da2.coords['x'].dtype == np.int64 + assert 'transform' not in da2.attrs, ( + f"round-trip injected fake transform attr: " + f"{da2.attrs.get('transform')}") + np.testing.assert_array_equal( + da2.coords['y'].values, da.coords['y'].values) + np.testing.assert_array_equal( + da2.coords['x'].values, da.coords['x'].values) + + +def test_round_trip_preserves_int_coords_3d(tmp_path): + """Same round-trip on a 3D multi-band file.""" + src = str(tmp_path / "no_georef_src_3d_1949.tif") + arr = np.random.default_rng(seed=1949).random( + (4, 5, 3)).astype(np.float32) + tifffile.imwrite(src, arr, photometric='minisblack', + planarconfig='contig', metadata=None) + + da = open_geotiff(src) + assert da.coords['y'].dtype == np.int64 + assert da.coords['x'].dtype == np.int64 + assert 'transform' not in da.attrs + + out = str(tmp_path / "no_georef_out_3d_1949.tif") + to_geotiff(da, out, compression='none') + + da2 = open_geotiff(out) + assert da2.coords['y'].dtype == np.int64 + assert da2.coords['x'].dtype == np.int64 + assert 'transform' not in da2.attrs + + +def test_round_trip_dask_streaming_preserves_int_coords(tmp_path): + """Same contract via the dask streaming writer path.""" + src = str(tmp_path / "no_georef_src_dask_1949.tif") + arr = np.random.default_rng(seed=1949).random( + (64, 64)).astype(np.float32) + tifffile.imwrite(src, arr, photometric='minisblack', + planarconfig='contig', tile=(32, 32), + compression='deflate', metadata=None) + + da_dk = open_geotiff(src, chunks=32) + assert da_dk.coords['y'].dtype == np.int64 + assert da_dk.coords['x'].dtype == np.int64 + assert 'transform' not in da_dk.attrs + + out = str(tmp_path / "no_georef_out_dask_1949.tif") + to_geotiff(da_dk, out, compression='deflate', tile_size=32) + + da_rt = open_geotiff(out) + assert da_rt.coords['y'].dtype == np.int64 + assert da_rt.coords['x'].dtype == np.int64 + assert 'transform' not in da_rt.attrs + + +def test_double_round_trip_stable(tmp_path): + """Two consecutive read -> write -> read cycles produce identical + coord arrays and attrs sets.""" + src = str(tmp_path / "no_georef_double_1949.tif") + _make_no_georef_tiff(src, shape=(6, 8)) + + da1 = open_geotiff(src) + out1 = str(tmp_path / "no_georef_double_out1_1949.tif") + to_geotiff(da1, out1, compression='none', tiled=False) + + da2 = open_geotiff(out1) + out2 = str(tmp_path / "no_georef_double_out2_1949.tif") + to_geotiff(da2, out2, compression='none', tiled=False) + + da3 = open_geotiff(out2) + np.testing.assert_array_equal( + da1.coords['y'].values, da3.coords['y'].values) + np.testing.assert_array_equal( + da1.coords['x'].values, da3.coords['x'].values) + assert da3.coords['y'].dtype == da1.coords['y'].dtype + assert da3.coords['x'].dtype == da1.coords['x'].dtype + # ``transform`` should never appear in either attrs set + assert 'transform' not in da1.attrs + assert 'transform' not in da3.attrs + + +def test_explicit_transform_attr_still_writes(tmp_path): + """When the user explicitly sets ``attrs['transform']`` on a + DataArray with int coords (an odd combination, but allowed), the + writer still honours that explicit value -- the fix only catches the + implicit-from-int-coords path.""" + arr = np.zeros((4, 5), dtype=np.float32) + da = xr.DataArray( + arr, dims=['y', 'x'], + coords={ + 'y': np.arange(4, dtype=np.int64), + 'x': np.arange(5, dtype=np.int64), + }, + attrs={'transform': (30.0, 0.0, 100000.0, 0.0, -30.0, 4000000.0), + 'crs': 32610}, + ) + out = str(tmp_path / "explicit_transform_1949.tif") + to_geotiff(da, out, compression='none', tiled=False) + da_rt = open_geotiff(out) + assert 'transform' in da_rt.attrs + assert da_rt.attrs['transform'][0] == 30.0 + assert da_rt.attrs['crs'] == 32610 + + +@_gpu_only +def test_gpu_writer_preserves_int_coords(tmp_path): + """The GPU writer path goes through the same ``_coords_to_transform`` + fallback; the fix has to extend there too.""" + src = str(tmp_path / "no_georef_src_gpu_1949.tif") + _make_no_georef_tiff(src, shape=(64, 64)) + + da_gpu = open_geotiff(src, gpu=True) + assert da_gpu.coords['y'].dtype == np.int64 + assert da_gpu.coords['x'].dtype == np.int64 + assert 'transform' not in da_gpu.attrs + + out = str(tmp_path / "no_georef_out_gpu_1949.tif") + to_geotiff(da_gpu, out, compression='deflate') + + da_rt = open_geotiff(out) + assert da_rt.coords['y'].dtype == np.int64 + assert da_rt.coords['x'].dtype == np.int64 + assert 'transform' not in da_rt.attrs From b45422d93148b952b85ebe05aab175e8446ac8b5 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Fri, 15 May 2026 09:03:08 -0700 Subject: [PATCH 2/3] geotiff: address PR #1954 review (docstring + parametrize dtypes) --- xrspatial/geotiff/_coords.py | 5 ++++ .../test_no_georef_writer_round_trip_1949.py | 24 +++++++++---------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/xrspatial/geotiff/_coords.py b/xrspatial/geotiff/_coords.py index 0d88eae2..b8dfd27a 100644 --- a/xrspatial/geotiff/_coords.py +++ b/xrspatial/geotiff/_coords.py @@ -250,6 +250,11 @@ def coords_to_transform(da: xr.DataArray) -> 'GeoTransform | None': :func:`coords_to_transform` against the original DataArray, so the helper must handle both layouts to keep the geo-transform consistent with the file's coord arrays. See issue #1643. + + Integer-dtype x/y coords are treated as a no-georef sentinel and + return ``None`` before the uniformity check runs; this is + intentional so the read-side ``np.arange`` placeholder round-trips + without inventing a fake unit transform (see issue #1949). """ if da.ndim == 3: # Drop the band-like dim and keep the two spatial dims in their diff --git a/xrspatial/geotiff/tests/test_no_georef_writer_round_trip_1949.py b/xrspatial/geotiff/tests/test_no_georef_writer_round_trip_1949.py index 990ca7ab..4ccb5411 100644 --- a/xrspatial/geotiff/tests/test_no_georef_writer_round_trip_1949.py +++ b/xrspatial/geotiff/tests/test_no_georef_writer_round_trip_1949.py @@ -78,20 +78,20 @@ def test_coords_to_transform_returns_none_for_int64_x_coords(): assert _coords_to_transform(da) is None -def test_coords_to_transform_returns_none_for_int32_coords(): +@pytest.mark.parametrize("kind", [np.int32, np.int16, np.uint32]) +def test_coords_to_transform_returns_none_for_int32_coords(kind): """int32 / int16 / uint also short-circuit -- the integer-dtype signal is "no georef", regardless of width.""" - for kind in (np.int32, np.int16, np.uint32): - da = xr.DataArray( - np.zeros((4, 5), dtype=np.float32), - dims=['y', 'x'], - coords={ - 'y': np.arange(4, dtype=kind), - 'x': np.arange(5, dtype=kind), - }, - ) - assert _coords_to_transform(da) is None, ( - f"expected None for {kind.__name__} coords") + da = xr.DataArray( + np.zeros((4, 5), dtype=np.float32), + dims=['y', 'x'], + coords={ + 'y': np.arange(4, dtype=kind), + 'x': np.arange(5, dtype=kind), + }, + ) + assert _coords_to_transform(da) is None, ( + f"expected None for {kind.__name__} coords") def test_coords_to_transform_float_coords_unchanged(): From 8da3ca7bac409da99bc9fc8f0baa733508a6f7b0 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Fri, 15 May 2026 10:29:23 -0700 Subject: [PATCH 3/3] geotiff: importorskip tifffile in #1949 round-trip test tifffile is not a runtime dep, so the unconditional module-level import broke collection on CI where tifffile isn't installed. Switch to ``pytest.importorskip`` to skip the file gracefully instead, matching the pattern used by other geotiff tests (``test_miniswhite_nodata_1809``, ``test_predictor3_big_endian``). Also drop two unused stdlib imports. --- .../geotiff/tests/test_no_georef_writer_round_trip_1949.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/xrspatial/geotiff/tests/test_no_georef_writer_round_trip_1949.py b/xrspatial/geotiff/tests/test_no_georef_writer_round_trip_1949.py index 4ccb5411..12703129 100644 --- a/xrspatial/geotiff/tests/test_no_georef_writer_round_trip_1949.py +++ b/xrspatial/geotiff/tests/test_no_georef_writer_round_trip_1949.py @@ -20,15 +20,14 @@ from __future__ import annotations import importlib.util -import os -import tempfile import numpy as np import pytest -import tifffile import xarray as xr -from xrspatial.geotiff import _coords_to_transform, open_geotiff, to_geotiff +tifffile = pytest.importorskip("tifffile") + +from xrspatial.geotiff import _coords_to_transform, open_geotiff, to_geotiff # noqa: E402 def _gpu_available() -> bool: