Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .claude/sweep-metadata-state.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
module,last_inspected,issue,severity_max,categories_found,notes
geotiff,2026-05-12,1657,HIGH,1;5,NewSubfileType (254) and SubIFDs (330) leaked through attrs['extra_tags'] across all four backends. Round-trip read overview -> write produced primary IFD wrongly marked as overview (NewSubfileType=1) and SubIFDs with stale byte offsets that crashed readers. Fixed by adding both tags to _MANAGED_TAGS and to _DANGEROUS_EXTRA_TAG_IDS writer-side filter (#1657).
geotiff,2026-05-12,1710,MEDIUM,2,"open_geotiff/read_geotiff_dask/read_geotiff_gpu windowed reads of non-georef TIFFs produced float64 half-pixel-shifted coords while full reads produced int64 [0,1,2,...] coords. Affected every backend the same way; not a backend parity bug, a windowed-vs-full inconsistency. _populate_attrs_from_geo_info also fabricated an identity transform attr on non-georef files. Fixed by threading has_georef through all windowed coord paths and through the transform attr emitter (#1710)."
reproject,2026-05-10,1572;1573,HIGH,1;3;4,geoid_height_raster dropped input attrs and used dims[-2:] for 3D inputs (#1572). reproject/merge ignored nodatavals (rasterio convention) when rioxarray absent (#1573). Fixed in same branch.
74 changes: 49 additions & 25 deletions xrspatial/geotiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,16 @@ def _populate_attrs_from_geo_info(attrs: dict, geo_info, *, window=None) -> None
attrs['raster_type'] = 'point'

src_t = geo_info.transform
if src_t is not None:
# Skip the transform attr for files where no GeoTIFF transform tags
# (ModelTransformation, ModelPixelScale, or ModelTiepoint) are
# present, signalled by ``has_georef=False``. GeoKeys / CRS metadata
# can still be present in that case. The default unit
# ``GeoTransform`` is a struct placeholder, not real georef --
# emitting it leaks an identity transform into attrs and confuses
# downstream code that expects ``'transform' in attrs`` to mean
# "this raster has a georef transform" (#1710).
has_georef = getattr(geo_info, 'has_georef', True)
if src_t is not None and has_georef:
if window is not None:
r0, c0, _r1, _c1 = window
origin_x_w = float(src_t.origin_x) + c0 * float(src_t.pixel_width)
Expand Down Expand Up @@ -695,15 +704,22 @@ def open_geotiff(source, *, dtype=None,
if window is not None:
# Adjust coordinates for windowed read. ``read_to_array`` rejected
# out-of-bounds windows above, so ``r0/c0/r1/c1`` are guaranteed
# in-range here (no clamp needed).
# in-range here (no clamp needed). For files with no GeoTIFF
# tags (``has_georef=False``), the default unit transform is
# not real, so fall back to integer pixel coords matching the
# ``_geo_to_coords`` shortcut taken on full reads. See #1710.
r0, c0, r1, c1 = window
t = geo_info.transform
if geo_info.raster_type == RASTER_PIXEL_IS_POINT:
full_x = np.arange(c0, c1, dtype=np.float64) * t.pixel_width + t.origin_x
full_y = np.arange(r0, r1, dtype=np.float64) * t.pixel_height + t.origin_y
if not getattr(geo_info, 'has_georef', True):
full_x = np.arange(c0, c1, dtype=np.int64)
full_y = np.arange(r0, r1, dtype=np.int64)
else:
full_x = np.arange(c0, c1, dtype=np.float64) * t.pixel_width + t.origin_x + t.pixel_width * 0.5
full_y = np.arange(r0, r1, dtype=np.float64) * t.pixel_height + t.origin_y + t.pixel_height * 0.5
t = geo_info.transform
if geo_info.raster_type == RASTER_PIXEL_IS_POINT:
full_x = np.arange(c0, c1, dtype=np.float64) * t.pixel_width + t.origin_x
full_y = np.arange(r0, r1, dtype=np.float64) * t.pixel_height + t.origin_y
else:
full_x = np.arange(c0, c1, dtype=np.float64) * t.pixel_width + t.origin_x + t.pixel_width * 0.5
full_y = np.arange(r0, r1, dtype=np.float64) * t.pixel_height + t.origin_y + t.pixel_height * 0.5
coords = {'y': full_y, 'x': full_x}

if name is None:
Expand Down Expand Up @@ -1844,20 +1860,26 @@ def read_geotiff_dask(source: str, *, dtype=None, chunks: int | tuple = 512,
raise ValueError(
f"window={window} is outside the source extent "
f"({full_h}x{full_w}) or has non-positive size.")
# Mirror the eager-path windowed coord computation in open_geotiff.
t = geo_info.transform
if geo_info.raster_type == RASTER_PIXEL_IS_POINT:
win_x = (np.arange(win_c0, win_c1, dtype=np.float64)
* t.pixel_width + t.origin_x)
win_y = (np.arange(win_r0, win_r1, dtype=np.float64)
* t.pixel_height + t.origin_y)
# Mirror the eager-path windowed coord computation in open_geotiff,
# including the ``has_georef=False`` shortcut to integer pixel
# coords for non-georef files (#1710).
if not getattr(geo_info, 'has_georef', True):
win_x = np.arange(win_c0, win_c1, dtype=np.int64)
win_y = np.arange(win_r0, win_r1, dtype=np.int64)
else:
win_x = (np.arange(win_c0, win_c1, dtype=np.float64)
* t.pixel_width + t.origin_x
+ t.pixel_width * 0.5)
win_y = (np.arange(win_r0, win_r1, dtype=np.float64)
* t.pixel_height + t.origin_y
+ t.pixel_height * 0.5)
t = geo_info.transform
if geo_info.raster_type == RASTER_PIXEL_IS_POINT:
win_x = (np.arange(win_c0, win_c1, dtype=np.float64)
* t.pixel_width + t.origin_x)
win_y = (np.arange(win_r0, win_r1, dtype=np.float64)
* t.pixel_height + t.origin_y)
else:
win_x = (np.arange(win_c0, win_c1, dtype=np.float64)
* t.pixel_width + t.origin_x
+ t.pixel_width * 0.5)
win_y = (np.arange(win_r0, win_r1, dtype=np.float64)
* t.pixel_height + t.origin_y
+ t.pixel_height * 0.5)
coords = {'y': win_y, 'x': win_x}
full_h = win_r1 - win_r0
full_w = win_c1 - win_c0
Expand Down Expand Up @@ -2275,12 +2297,14 @@ def _gpu_apply_window_band(arr_gpu, geo_info, *, window, band):
out_w = c1 - c0
# Mirror the eager-numpy windowed coord computation in
# open_geotiff so the GPU-windowed coords carry the same
# absolute pixel-center values as the CPU path.
# absolute pixel-center values as the CPU path. For files
# with no GeoTIFF tags (``has_georef=False``), fall back to
# integer pixel coords matching ``_geo_to_coords`` (#1710).
t = geo_info.transform
if t is None:
if t is None or not getattr(geo_info, 'has_georef', True):
coords = {
'y': np.arange(out_h, dtype=np.int64),
'x': np.arange(out_w, dtype=np.int64),
'y': np.arange(r0, r1, dtype=np.int64),
'x': np.arange(c0, c1, dtype=np.int64),
}
else:
if geo_info.raster_type == RASTER_PIXEL_IS_POINT:
Expand Down
191 changes: 191 additions & 0 deletions xrspatial/geotiff/tests/test_no_georef_windowed_coords_1710.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
"""Regression tests for issue #1710.

A TIFF with no GeoTIFF tags (no ModelPixelScale / ModelTiepoint /
GeoKeyDirectory) used to produce different `y`/`x` coordinates between
full reads and windowed reads. Full reads emitted integer pixel coords
``[0, 1, 2, ...]`` via the ``has_georef=False`` shortcut in
``_geo_to_coords``; windowed reads bypassed that shortcut and synthesised
float64 coords like ``[-0.5, -1.5, ...]`` from the default unit
``GeoTransform``.

The fix routes every windowed-read path through the same shortcut, and
suppresses the fabricated ``attrs['transform']`` for non-georef files.
"""
from __future__ import annotations

import importlib.util

import numpy as np
import pytest

from xrspatial.geotiff import open_geotiff
from xrspatial.geotiff._writer import write


def _gpu_available() -> bool:
if importlib.util.find_spec("cupy") is None:
return False
try:
import cupy
return bool(cupy.cuda.is_available())
except Exception:
return False


_HAS_GPU = _gpu_available()
_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required")


@pytest.fixture
def no_georef_path_1710(tmp_path):
"""8x8 float32 TIFF with NO GeoTIFF tags (no ModelPixelScale etc.)."""
arr = np.arange(64, dtype=np.float32).reshape(8, 8)
path = str(tmp_path / "no_georef_1710.tif")
write(arr, path, compression='none', tiled=False)
return path


class TestEagerWindowedCoords:

def test_full_read_integer_coords(self, no_georef_path_1710):
da = open_geotiff(no_georef_path_1710)
assert da.y.dtype == np.int64
assert da.x.dtype == np.int64
np.testing.assert_array_equal(da.y.values, np.arange(8))
np.testing.assert_array_equal(da.x.values, np.arange(8))

def test_windowed_read_integer_coords(self, no_georef_path_1710):
da = open_geotiff(no_georef_path_1710, window=(0, 0, 4, 4))
assert da.y.dtype == np.int64
assert da.x.dtype == np.int64
np.testing.assert_array_equal(da.y.values, np.arange(4))
np.testing.assert_array_equal(da.x.values, np.arange(4))

def test_offset_window_integer_coords(self, no_georef_path_1710):
"""Windowed read at (2, 3) origin should give coords [2,3,4,5] / [3,4,5,6]."""
da = open_geotiff(no_georef_path_1710, window=(2, 3, 6, 7))
assert da.y.dtype == np.int64
assert da.x.dtype == np.int64
np.testing.assert_array_equal(da.y.values, np.arange(2, 6))
np.testing.assert_array_equal(da.x.values, np.arange(3, 7))

def test_no_transform_attr_emitted(self, no_georef_path_1710):
"""Non-georef files should not advertise a fabricated transform."""
da = open_geotiff(no_georef_path_1710)
assert 'transform' not in da.attrs, (
f"non-georef file should not carry a fabricated transform; "
f"got attrs={dict(da.attrs)}"
)
# Same for windowed read
da_w = open_geotiff(no_georef_path_1710, window=(0, 0, 4, 4))
assert 'transform' not in da_w.attrs


class TestDaskWindowedCoords:

def test_full_read_integer_coords(self, no_georef_path_1710):
da = open_geotiff(no_georef_path_1710, chunks=4)
assert da.y.dtype == np.int64
np.testing.assert_array_equal(da.y.values, np.arange(8))

def test_windowed_read_integer_coords(self, no_georef_path_1710):
da = open_geotiff(no_georef_path_1710, chunks=4, window=(0, 0, 4, 4))
assert da.y.dtype == np.int64
np.testing.assert_array_equal(da.y.values, np.arange(4))

def test_offset_window_integer_coords(self, no_georef_path_1710):
da = open_geotiff(no_georef_path_1710, chunks=4, window=(2, 3, 6, 7))
assert da.y.dtype == np.int64
np.testing.assert_array_equal(da.y.values, np.arange(2, 6))
np.testing.assert_array_equal(da.x.values, np.arange(3, 7))


@_gpu_only
class TestGpuWindowedCoords:

def test_full_read_integer_coords(self, no_georef_path_1710):
da = open_geotiff(no_georef_path_1710, gpu=True)
assert da.y.dtype == np.int64
np.testing.assert_array_equal(da.y.values, np.arange(8))

def test_windowed_read_integer_coords(self, no_georef_path_1710):
da = open_geotiff(no_georef_path_1710, gpu=True, window=(0, 0, 4, 4))
assert da.y.dtype == np.int64
np.testing.assert_array_equal(da.y.values, np.arange(4))

def test_dask_cupy_windowed_integer_coords(self, no_georef_path_1710):
da = open_geotiff(no_georef_path_1710, gpu=True, chunks=4,
window=(0, 0, 4, 4))
assert da.y.dtype == np.int64
np.testing.assert_array_equal(da.y.values, np.arange(4))


class TestBackendParity:
"""Full read and windowed read must agree on coord dtype and values
across every backend pair."""

def test_dtype_parity_full(self, no_georef_path_1710):
np_da = open_geotiff(no_georef_path_1710)
dk_da = open_geotiff(no_georef_path_1710, chunks=4)
assert np_da.y.dtype == dk_da.y.dtype == np.int64
if _HAS_GPU:
gpu_da = open_geotiff(no_georef_path_1710, gpu=True)
assert gpu_da.y.dtype == np.int64

def test_dtype_parity_windowed(self, no_georef_path_1710):
win = (0, 0, 4, 4)
np_da = open_geotiff(no_georef_path_1710, window=win)
dk_da = open_geotiff(no_georef_path_1710, chunks=4, window=win)
assert np_da.y.dtype == dk_da.y.dtype == np.int64
if _HAS_GPU:
gpu_da = open_geotiff(no_georef_path_1710, gpu=True, window=win)
assert gpu_da.y.dtype == np.int64

def test_full_and_windowed_first_n_match(self, no_georef_path_1710):
"""Coords for a window starting at (0, 0) should equal the first N
coords of the full read."""
win = (0, 0, 4, 4)
full = open_geotiff(no_georef_path_1710)
win_da = open_geotiff(no_georef_path_1710, window=win)
np.testing.assert_array_equal(full.y.values[:4], win_da.y.values)
np.testing.assert_array_equal(full.x.values[:4], win_da.x.values)


class TestGeorefStillWorks:
"""The fix must not regress georeferenced reads.

Files with real GeoTIFF tags still get float64 coords with the
half-pixel shift for PixelIsArea.
"""

def test_georef_full_read_keeps_float64(self, tmp_path):
from xrspatial.geotiff._geotags import GeoTransform
arr = np.arange(64, dtype=np.float32).reshape(8, 8)
gt = GeoTransform(
origin_x=500000.0, origin_y=4000000.0,
pixel_width=30.0, pixel_height=-30.0,
)
path = str(tmp_path / "georef_1710.tif")
write(arr, path, geo_transform=gt, crs_epsg=32610,
compression='none', tiled=False)
da = open_geotiff(path)
assert da.y.dtype == np.float64
assert da.x.dtype == np.float64
assert da.x.values[0] == pytest.approx(500015.0)
# transform attr SHOULD be present for georef files
assert 'transform' in da.attrs

def test_georef_windowed_read_keeps_float64(self, tmp_path):
from xrspatial.geotiff._geotags import GeoTransform
arr = np.arange(64, dtype=np.float32).reshape(8, 8)
gt = GeoTransform(
origin_x=500000.0, origin_y=4000000.0,
pixel_width=30.0, pixel_height=-30.0,
)
path = str(tmp_path / "georef_win_1710.tif")
write(arr, path, geo_transform=gt, crs_epsg=32610,
compression='none', tiled=False)
da = open_geotiff(path, window=(0, 0, 4, 4))
assert da.y.dtype == np.float64
assert da.x.dtype == np.float64
assert 'transform' in da.attrs
Loading