diff --git a/.claude/sweep-metadata-state.csv b/.claude/sweep-metadata-state.csv index 76ab451d..fab813b0 100644 --- a/.claude/sweep-metadata-state.csv +++ b/.claude/sweep-metadata-state.csv @@ -1,3 +1,3 @@ module,last_inspected,issue,severity_max,categories_found,notes -geotiff,2026-05-12,1657,HIGH,1;5,NewSubfileType (254) and SubIFDs (330) leaked through attrs['extra_tags'] across all four backends. Round-trip read overview -> write produced primary IFD wrongly marked as overview (NewSubfileType=1) and SubIFDs with stale byte offsets that crashed readers. Fixed by adding both tags to _MANAGED_TAGS and to _DANGEROUS_EXTRA_TAG_IDS writer-side filter (#1657). +geotiff,2026-05-12,1710,MEDIUM,2,"open_geotiff/read_geotiff_dask/read_geotiff_gpu windowed reads of non-georef TIFFs produced float64 half-pixel-shifted coords while full reads produced int64 [0,1,2,...] coords. Affected every backend the same way; not a backend parity bug, a windowed-vs-full inconsistency. _populate_attrs_from_geo_info also fabricated an identity transform attr on non-georef files. Fixed by threading has_georef through all windowed coord paths and through the transform attr emitter (#1710)." reproject,2026-05-10,1572;1573,HIGH,1;3;4,geoid_height_raster dropped input attrs and used dims[-2:] for 3D inputs (#1572). reproject/merge ignored nodatavals (rasterio convention) when rioxarray absent (#1573). Fixed in same branch. diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index 70f35984..63065811 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -436,7 +436,16 @@ def _populate_attrs_from_geo_info(attrs: dict, geo_info, *, window=None) -> None attrs['raster_type'] = 'point' src_t = geo_info.transform - if src_t is not None: + # Skip the transform attr for files where no GeoTIFF transform tags + # (ModelTransformation, ModelPixelScale, or ModelTiepoint) are + # present, signalled by ``has_georef=False``. GeoKeys / CRS metadata + # can still be present in that case. The default unit + # ``GeoTransform`` is a struct placeholder, not real georef -- + # emitting it leaks an identity transform into attrs and confuses + # downstream code that expects ``'transform' in attrs`` to mean + # "this raster has a georef transform" (#1710). + has_georef = getattr(geo_info, 'has_georef', True) + if src_t is not None and has_georef: if window is not None: r0, c0, _r1, _c1 = window origin_x_w = float(src_t.origin_x) + c0 * float(src_t.pixel_width) @@ -695,15 +704,22 @@ def open_geotiff(source, *, dtype=None, if window is not None: # Adjust coordinates for windowed read. ``read_to_array`` rejected # out-of-bounds windows above, so ``r0/c0/r1/c1`` are guaranteed - # in-range here (no clamp needed). + # in-range here (no clamp needed). For files with no GeoTIFF + # tags (``has_georef=False``), the default unit transform is + # not real, so fall back to integer pixel coords matching the + # ``_geo_to_coords`` shortcut taken on full reads. See #1710. r0, c0, r1, c1 = window - t = geo_info.transform - if geo_info.raster_type == RASTER_PIXEL_IS_POINT: - full_x = np.arange(c0, c1, dtype=np.float64) * t.pixel_width + t.origin_x - full_y = np.arange(r0, r1, dtype=np.float64) * t.pixel_height + t.origin_y + if not getattr(geo_info, 'has_georef', True): + full_x = np.arange(c0, c1, dtype=np.int64) + full_y = np.arange(r0, r1, dtype=np.int64) else: - full_x = np.arange(c0, c1, dtype=np.float64) * t.pixel_width + t.origin_x + t.pixel_width * 0.5 - full_y = np.arange(r0, r1, dtype=np.float64) * t.pixel_height + t.origin_y + t.pixel_height * 0.5 + t = geo_info.transform + if geo_info.raster_type == RASTER_PIXEL_IS_POINT: + full_x = np.arange(c0, c1, dtype=np.float64) * t.pixel_width + t.origin_x + full_y = np.arange(r0, r1, dtype=np.float64) * t.pixel_height + t.origin_y + else: + full_x = np.arange(c0, c1, dtype=np.float64) * t.pixel_width + t.origin_x + t.pixel_width * 0.5 + full_y = np.arange(r0, r1, dtype=np.float64) * t.pixel_height + t.origin_y + t.pixel_height * 0.5 coords = {'y': full_y, 'x': full_x} if name is None: @@ -1844,20 +1860,26 @@ def read_geotiff_dask(source: str, *, dtype=None, chunks: int | tuple = 512, raise ValueError( f"window={window} is outside the source extent " f"({full_h}x{full_w}) or has non-positive size.") - # Mirror the eager-path windowed coord computation in open_geotiff. - t = geo_info.transform - if geo_info.raster_type == RASTER_PIXEL_IS_POINT: - win_x = (np.arange(win_c0, win_c1, dtype=np.float64) - * t.pixel_width + t.origin_x) - win_y = (np.arange(win_r0, win_r1, dtype=np.float64) - * t.pixel_height + t.origin_y) + # Mirror the eager-path windowed coord computation in open_geotiff, + # including the ``has_georef=False`` shortcut to integer pixel + # coords for non-georef files (#1710). + if not getattr(geo_info, 'has_georef', True): + win_x = np.arange(win_c0, win_c1, dtype=np.int64) + win_y = np.arange(win_r0, win_r1, dtype=np.int64) else: - win_x = (np.arange(win_c0, win_c1, dtype=np.float64) - * t.pixel_width + t.origin_x - + t.pixel_width * 0.5) - win_y = (np.arange(win_r0, win_r1, dtype=np.float64) - * t.pixel_height + t.origin_y - + t.pixel_height * 0.5) + t = geo_info.transform + if geo_info.raster_type == RASTER_PIXEL_IS_POINT: + win_x = (np.arange(win_c0, win_c1, dtype=np.float64) + * t.pixel_width + t.origin_x) + win_y = (np.arange(win_r0, win_r1, dtype=np.float64) + * t.pixel_height + t.origin_y) + else: + win_x = (np.arange(win_c0, win_c1, dtype=np.float64) + * t.pixel_width + t.origin_x + + t.pixel_width * 0.5) + win_y = (np.arange(win_r0, win_r1, dtype=np.float64) + * t.pixel_height + t.origin_y + + t.pixel_height * 0.5) coords = {'y': win_y, 'x': win_x} full_h = win_r1 - win_r0 full_w = win_c1 - win_c0 @@ -2275,12 +2297,14 @@ def _gpu_apply_window_band(arr_gpu, geo_info, *, window, band): out_w = c1 - c0 # Mirror the eager-numpy windowed coord computation in # open_geotiff so the GPU-windowed coords carry the same - # absolute pixel-center values as the CPU path. + # absolute pixel-center values as the CPU path. For files + # with no GeoTIFF tags (``has_georef=False``), fall back to + # integer pixel coords matching ``_geo_to_coords`` (#1710). t = geo_info.transform - if t is None: + if t is None or not getattr(geo_info, 'has_georef', True): coords = { - 'y': np.arange(out_h, dtype=np.int64), - 'x': np.arange(out_w, dtype=np.int64), + 'y': np.arange(r0, r1, dtype=np.int64), + 'x': np.arange(c0, c1, dtype=np.int64), } else: if geo_info.raster_type == RASTER_PIXEL_IS_POINT: diff --git a/xrspatial/geotiff/tests/test_no_georef_windowed_coords_1710.py b/xrspatial/geotiff/tests/test_no_georef_windowed_coords_1710.py new file mode 100644 index 00000000..4c719ef0 --- /dev/null +++ b/xrspatial/geotiff/tests/test_no_georef_windowed_coords_1710.py @@ -0,0 +1,191 @@ +"""Regression tests for issue #1710. + +A TIFF with no GeoTIFF tags (no ModelPixelScale / ModelTiepoint / +GeoKeyDirectory) used to produce different `y`/`x` coordinates between +full reads and windowed reads. Full reads emitted integer pixel coords +``[0, 1, 2, ...]`` via the ``has_georef=False`` shortcut in +``_geo_to_coords``; windowed reads bypassed that shortcut and synthesised +float64 coords like ``[-0.5, -1.5, ...]`` from the default unit +``GeoTransform``. + +The fix routes every windowed-read path through the same shortcut, and +suppresses the fabricated ``attrs['transform']`` for non-georef files. +""" +from __future__ import annotations + +import importlib.util + +import numpy as np +import pytest + +from xrspatial.geotiff import open_geotiff +from xrspatial.geotiff._writer import write + + +def _gpu_available() -> bool: + if importlib.util.find_spec("cupy") is None: + return False + try: + import cupy + return bool(cupy.cuda.is_available()) + except Exception: + return False + + +_HAS_GPU = _gpu_available() +_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required") + + +@pytest.fixture +def no_georef_path_1710(tmp_path): + """8x8 float32 TIFF with NO GeoTIFF tags (no ModelPixelScale etc.).""" + arr = np.arange(64, dtype=np.float32).reshape(8, 8) + path = str(tmp_path / "no_georef_1710.tif") + write(arr, path, compression='none', tiled=False) + return path + + +class TestEagerWindowedCoords: + + def test_full_read_integer_coords(self, no_georef_path_1710): + da = open_geotiff(no_georef_path_1710) + assert da.y.dtype == np.int64 + assert da.x.dtype == np.int64 + np.testing.assert_array_equal(da.y.values, np.arange(8)) + np.testing.assert_array_equal(da.x.values, np.arange(8)) + + def test_windowed_read_integer_coords(self, no_georef_path_1710): + da = open_geotiff(no_georef_path_1710, window=(0, 0, 4, 4)) + assert da.y.dtype == np.int64 + assert da.x.dtype == np.int64 + np.testing.assert_array_equal(da.y.values, np.arange(4)) + np.testing.assert_array_equal(da.x.values, np.arange(4)) + + def test_offset_window_integer_coords(self, no_georef_path_1710): + """Windowed read at (2, 3) origin should give coords [2,3,4,5] / [3,4,5,6].""" + da = open_geotiff(no_georef_path_1710, window=(2, 3, 6, 7)) + assert da.y.dtype == np.int64 + assert da.x.dtype == np.int64 + np.testing.assert_array_equal(da.y.values, np.arange(2, 6)) + np.testing.assert_array_equal(da.x.values, np.arange(3, 7)) + + def test_no_transform_attr_emitted(self, no_georef_path_1710): + """Non-georef files should not advertise a fabricated transform.""" + da = open_geotiff(no_georef_path_1710) + assert 'transform' not in da.attrs, ( + f"non-georef file should not carry a fabricated transform; " + f"got attrs={dict(da.attrs)}" + ) + # Same for windowed read + da_w = open_geotiff(no_georef_path_1710, window=(0, 0, 4, 4)) + assert 'transform' not in da_w.attrs + + +class TestDaskWindowedCoords: + + def test_full_read_integer_coords(self, no_georef_path_1710): + da = open_geotiff(no_georef_path_1710, chunks=4) + assert da.y.dtype == np.int64 + np.testing.assert_array_equal(da.y.values, np.arange(8)) + + def test_windowed_read_integer_coords(self, no_georef_path_1710): + da = open_geotiff(no_georef_path_1710, chunks=4, window=(0, 0, 4, 4)) + assert da.y.dtype == np.int64 + np.testing.assert_array_equal(da.y.values, np.arange(4)) + + def test_offset_window_integer_coords(self, no_georef_path_1710): + da = open_geotiff(no_georef_path_1710, chunks=4, window=(2, 3, 6, 7)) + assert da.y.dtype == np.int64 + np.testing.assert_array_equal(da.y.values, np.arange(2, 6)) + np.testing.assert_array_equal(da.x.values, np.arange(3, 7)) + + +@_gpu_only +class TestGpuWindowedCoords: + + def test_full_read_integer_coords(self, no_georef_path_1710): + da = open_geotiff(no_georef_path_1710, gpu=True) + assert da.y.dtype == np.int64 + np.testing.assert_array_equal(da.y.values, np.arange(8)) + + def test_windowed_read_integer_coords(self, no_georef_path_1710): + da = open_geotiff(no_georef_path_1710, gpu=True, window=(0, 0, 4, 4)) + assert da.y.dtype == np.int64 + np.testing.assert_array_equal(da.y.values, np.arange(4)) + + def test_dask_cupy_windowed_integer_coords(self, no_georef_path_1710): + da = open_geotiff(no_georef_path_1710, gpu=True, chunks=4, + window=(0, 0, 4, 4)) + assert da.y.dtype == np.int64 + np.testing.assert_array_equal(da.y.values, np.arange(4)) + + +class TestBackendParity: + """Full read and windowed read must agree on coord dtype and values + across every backend pair.""" + + def test_dtype_parity_full(self, no_georef_path_1710): + np_da = open_geotiff(no_georef_path_1710) + dk_da = open_geotiff(no_georef_path_1710, chunks=4) + assert np_da.y.dtype == dk_da.y.dtype == np.int64 + if _HAS_GPU: + gpu_da = open_geotiff(no_georef_path_1710, gpu=True) + assert gpu_da.y.dtype == np.int64 + + def test_dtype_parity_windowed(self, no_georef_path_1710): + win = (0, 0, 4, 4) + np_da = open_geotiff(no_georef_path_1710, window=win) + dk_da = open_geotiff(no_georef_path_1710, chunks=4, window=win) + assert np_da.y.dtype == dk_da.y.dtype == np.int64 + if _HAS_GPU: + gpu_da = open_geotiff(no_georef_path_1710, gpu=True, window=win) + assert gpu_da.y.dtype == np.int64 + + def test_full_and_windowed_first_n_match(self, no_georef_path_1710): + """Coords for a window starting at (0, 0) should equal the first N + coords of the full read.""" + win = (0, 0, 4, 4) + full = open_geotiff(no_georef_path_1710) + win_da = open_geotiff(no_georef_path_1710, window=win) + np.testing.assert_array_equal(full.y.values[:4], win_da.y.values) + np.testing.assert_array_equal(full.x.values[:4], win_da.x.values) + + +class TestGeorefStillWorks: + """The fix must not regress georeferenced reads. + + Files with real GeoTIFF tags still get float64 coords with the + half-pixel shift for PixelIsArea. + """ + + def test_georef_full_read_keeps_float64(self, tmp_path): + from xrspatial.geotiff._geotags import GeoTransform + arr = np.arange(64, dtype=np.float32).reshape(8, 8) + gt = GeoTransform( + origin_x=500000.0, origin_y=4000000.0, + pixel_width=30.0, pixel_height=-30.0, + ) + path = str(tmp_path / "georef_1710.tif") + write(arr, path, geo_transform=gt, crs_epsg=32610, + compression='none', tiled=False) + da = open_geotiff(path) + assert da.y.dtype == np.float64 + assert da.x.dtype == np.float64 + assert da.x.values[0] == pytest.approx(500015.0) + # transform attr SHOULD be present for georef files + assert 'transform' in da.attrs + + def test_georef_windowed_read_keeps_float64(self, tmp_path): + from xrspatial.geotiff._geotags import GeoTransform + arr = np.arange(64, dtype=np.float32).reshape(8, 8) + gt = GeoTransform( + origin_x=500000.0, origin_y=4000000.0, + pixel_width=30.0, pixel_height=-30.0, + ) + path = str(tmp_path / "georef_win_1710.tif") + write(arr, path, geo_transform=gt, crs_epsg=32610, + compression='none', tiled=False) + da = open_geotiff(path, window=(0, 0, 4, 4)) + assert da.y.dtype == np.float64 + assert da.x.dtype == np.float64 + assert 'transform' in da.attrs