diff --git a/docs/source/user_guide/attrs_contract.rst b/docs/source/user_guide/attrs_contract.rst index 7bb80a31..b62502d1 100644 --- a/docs/source/user_guide/attrs_contract.rst +++ b/docs/source/user_guide/attrs_contract.rst @@ -44,6 +44,9 @@ write. * - ``crs`` - int - EPSG code of the horizontal CRS, when one can be resolved. + Dropped on rotated reads opened with ``allow_rotated=True`` + (issue #2122); the in-memory array is a pixel grid with integer + coords and is not georeferenced. * - ``crs_wkt`` - str - WKT string of the horizontal CRS. Present on read when any CRS @@ -52,7 +55,8 @@ write. dialect depends on the source: paths that synthesise a WKT from an EPSG code via pyproj emit WKT2; paths that read a WKT verbatim from the file (e.g. a VRT ``SRS`` tag) carry whatever - dialect was stored. + dialect was stored. Dropped on rotated reads opened with + ``allow_rotated=True`` (issue #2122), in lockstep with ``crs``. * - ``transform`` - tuple - ``(pixel_width, 0.0, origin_x, 0.0, pixel_height, origin_y)`` diff --git a/xrspatial/geotiff/_attrs.py b/xrspatial/geotiff/_attrs.py index aabda8e0..f9242232 100644 --- a/xrspatial/geotiff/_attrs.py +++ b/xrspatial/geotiff/_attrs.py @@ -30,13 +30,18 @@ Canonical (xrspatial owns these; round-trip stable): -- ``crs``: EPSG integer code for the horizontal CRS. +- ``crs``: EPSG integer code for the horizontal CRS. Dropped on rotated + reads opened with ``allow_rotated=True`` (issue #2122) -- the array is + treated as a no-georef pixel grid in that case. - ``crs_wkt``: WKT string for the horizontal CRS. Present on read whenever - any CRS information is available. + any CRS information is available. Dropped on rotated reads opened with + ``allow_rotated=True`` (issue #2122), in lockstep with ``crs``. - ``transform``: rasterio-style 6-tuple ``(pixel_width, 0.0, origin_x, 0.0, pixel_height, origin_y)``. Omitted for files with no GeoTIFF transform tags (ModelTransformation, - ModelPixelScale, or ModelTiepoint). + ModelPixelScale, or ModelTiepoint), and for rotated reads opened with + ``allow_rotated=True`` (axis-aligned 6-tuple would silently drop the + rotation terms). - ``nodata``: declared file sentinel as stored in the GDAL_NODATA tag. Set whenever the source declares one, as a scalar of the source dtype, regardless of whether the in-memory array is float-with-NaN @@ -371,7 +376,7 @@ def _populate_attrs_from_geo_info(attrs: dict, geo_info, *, window=None) -> None # ``open_geotiff(allow_rotated=True)``, CRS attrs are dropped on # this path too -- otherwise downstream code that gates on # ``"crs" in da.attrs`` treats the array as spatially meaningful - # while the actual mapping is gone (#2126). + # while the actual mapping is gone (#2122 / #2126). rotated_optin = ( src_t is not None and getattr(src_t, 'rotated_affine', None) is not None diff --git a/xrspatial/geotiff/_backends/vrt.py b/xrspatial/geotiff/_backends/vrt.py index 0156ba8f..894b9864 100644 --- a/xrspatial/geotiff/_backends/vrt.py +++ b/xrspatial/geotiff/_backends/vrt.py @@ -252,6 +252,18 @@ def read_vrt(source: str, *, # the origin already *is* the center of pixel (0, 0) and no shift # is applied. This mirrors ``_geo_to_coords`` for non-VRT reads. gt = vrt.geo_transform + # A rotated VRT under ``allow_rotated=True`` is treated as no-georef + # by the GeoTIFF contract (#2115): the in-memory array is a pixel + # grid, not a projected raster, so ``attrs['crs']`` would mislead + # downstream code that branches on ``'crs' in attrs`` (#2122). The + # rotated case is the non-zero ``b`` / ``d`` term on the GDAL + # GeoTransform (positions 2 and 4). A VRT with no ```` + # at all is the general no-georef case and the CRS is still + # surfaced (the writer-side ``crs=`` kwarg path emits CRS without a + # transform; see ``test_crs_kwarg_no_coords``). + _vrt_is_rotated = ( + gt is not None and (gt[2] != 0.0 or gt[4] != 0.0) + ) if gt is not None: origin_x, res_x, _, origin_y, _, res_y = gt height, width = arr.shape[:2] @@ -261,10 +273,18 @@ def read_vrt(source: str, *, coord_window = (r0, c0, r0 + height, c0 + width) else: coord_window = None + # Rotated VRTs emit int64 pixel coords to match the eager + # non-VRT rotated path (#2122 follow-up). Without this gate + # the VRT branch handed back float projected coords while + # the rest of the read pretended the array had no georef, + # so a downstream consumer saw float64 x/y dtypes on a + # no-georef array, the inverse of the contract documented in + # ``docs/source/user_guide/attrs_contract.rst``. coords = _coords_from_pixel_geometry( origin_x, origin_y, res_x, res_y, height, width, is_point=vrt.raster_type == 'point', window=coord_window, + has_georef=not _vrt_is_rotated, ) else: coords = {} @@ -273,15 +293,16 @@ def read_vrt(source: str, *, # ``_populate_attrs_from_geo_info``; stamp the contract version here # so both code paths emit the same marker. attrs = {'_xrspatial_geotiff_contract': _ATTRS_CONTRACT_VERSION} - if gt is None: + if gt is None or _vrt_is_rotated: # Mirror the eager non-VRT no-georef path: stamp the no-georef - # marker whenever the source carries no transform. The current - # VRT no-transform branch emits empty coords so the writer has - # nothing to misinterpret, but stamping defensively keeps the - # contract consistent if a future change adds placeholder - # coords here. See issue #2120. + # marker whenever the read produced placeholder int64 coords. + # Covers both "no transform tags at all" and rotated reads under + # ``allow_rotated=True``. Without the rotated arm, the writer + # round-trip on a rotated read mistook the int64 coords for a + # user-authored integer-coord grid and stripped georef from the + # output (#2120 / #2122 follow-up). attrs[_NO_GEOREF_KEY] = True - if vrt.crs_wkt: + if vrt.crs_wkt and not _vrt_is_rotated: epsg = _wkt_to_epsg(vrt.crs_wkt) if epsg is not None: attrs['crs'] = epsg @@ -344,7 +365,11 @@ def read_vrt(source: str, *, # by open_geotiff: (pixel_width, 0, origin_x, 0, pixel_height, origin_y). # vrt.geo_transform is GDAL ordering, so reorder. For a windowed read # the origin shifts by (col_offset * res_x, row_offset * res_y). - if gt is not None: + # Rotated VRTs (allow_rotated=True opt-in) intentionally skip the + # transform attr because the axis-aligned 6-tuple would silently drop + # the rotation terms and downstream code that branches on + # ``'transform' in attrs`` would treat the array as projected (#2122). + if gt is not None and not _vrt_is_rotated: if window is not None: tt_window = (max(0, window[0]), max(0, window[1]), 0, 0) else: @@ -695,23 +720,46 @@ def _read_vrt_chunked(source, *, window, band, name, chunks, gpu, dtype, # ``_populate_attrs_from_geo_info``, so the contract version is # stamped inline using the shared constant to stay in lockstep. attrs = {'_xrspatial_geotiff_contract': _ATTRS_CONTRACT_VERSION} + # ``_vrt_is_rotated`` matches the eager VRT branch's gate (#2122): + # rotated VRTs read with ``allow_rotated=True`` opt out of the + # georef attrs (``crs`` / ``crs_wkt`` / ``transform``) so downstream + # code that branches on ``'crs' in attrs`` does not treat the pixel + # grid as projected. A VRT with no ```` still + # surfaces CRS (mirroring the GeoTIFF writer's ``crs=`` kwarg + # path). + _vrt_is_rotated = ( + gt is not None and (gt[2] != 0.0 or gt[4] != 0.0) + ) if gt is not None: origin_x, res_x, _, origin_y, _, res_y = gt coord_window = (win_r0, win_c0, win_r0 + full_h, win_c0 + full_w) + # Rotated VRTs emit int64 pixel coords to match the eager + # non-VRT rotated path (#2122 follow-up). Without this gate the + # chunked VRT branch handed back float projected coords on a + # no-georef array, the inverse of the contract documented in + # ``docs/source/user_guide/attrs_contract.rst``. coords = _coords_from_pixel_geometry( origin_x, origin_y, res_x, res_y, full_h, full_w, is_point=vrt.raster_type == 'point', window=coord_window, + has_georef=not _vrt_is_rotated, ) - attrs['transform'] = _transform_tuple_from_pixel_geometry( - origin_x, origin_y, res_x, res_y, - window=(win_r0, win_c0, 0, 0), - ) + if not _vrt_is_rotated: + attrs['transform'] = _transform_tuple_from_pixel_geometry( + origin_x, origin_y, res_x, res_y, + window=(win_r0, win_c0, 0, 0), + ) + else: + # Mirrors the no-transform arm: rotated reads emit + # placeholder int64 coords, so stamp the marker that lets + # the writer tell them apart from a user-authored integer + # grid (#2120 / #2122 follow-up). + attrs[_NO_GEOREF_KEY] = True else: # Defensive marker (issue #2120). See the eager VRT branch. attrs[_NO_GEOREF_KEY] = True - if vrt.crs_wkt: + if vrt.crs_wkt and not _vrt_is_rotated: epsg = _wkt_to_epsg(vrt.crs_wkt) if epsg is not None: attrs['crs'] = epsg diff --git a/xrspatial/geotiff/tests/test_allow_rotated_no_crs_2122.py b/xrspatial/geotiff/tests/test_allow_rotated_no_crs_2122.py new file mode 100644 index 00000000..e03665c1 --- /dev/null +++ b/xrspatial/geotiff/tests/test_allow_rotated_no_crs_2122.py @@ -0,0 +1,357 @@ +"""``allow_rotated=True`` drops attrs['crs'] / ['crs_wkt'] (issue #2122). + +The ``open_geotiff`` docstring promises that ``allow_rotated=True`` +reads a rotated GeoTIFF as a no-georef pixel grid: integer pixel coords +on ``x`` / ``y`` and ``attrs['crs']`` dropped. Before #2122, +``_populate_attrs_from_geo_info`` only gated ``transform`` on the +``has_georef`` flag and still surfaced ``crs`` / ``crs_wkt`` from the +GeoKey block. Downstream code that branches on ``'crs' in da.attrs`` to +detect a projected raster would treat the rotated-read pixel grid as +georeferenced and apply the wrong projection math against integer pixel +indices. + +These tests pin the contract on the four GeoTIFF backends and on the +two VRT entry points (eager + chunked). +""" +from __future__ import annotations + +import importlib.util +import struct + +import numpy as np +import pytest + +from xrspatial.geotiff import open_geotiff +from xrspatial.geotiff._geotags import _NO_GEOREF_KEY, TAG_MODEL_TRANSFORMATION + + +# Rotated 4x4 ModelTransformation: pixel_width 1.0, b=0.1 (column-axis +# rotation), pixel_height -1.0, origin (100, 200). Identical structure +# to the fixture in ``test_allow_rotated_geotiff_2115.py`` but with the +# EPSG:4326 GeoKey block appended so the CRS-leak path is exercised. +_ROTATED_M_2122 = ( + 1.0, 0.1, 0.0, 100.0, + 0.0, -1.0, 0.0, 200.0, + 0.0, 0.0, 1.0, 0.0, + 0.0, 0.0, 0.0, 1.0, +) + +# Minimal GeoKeyDirectoryTag declaring EPSG:4326. +# Layout: (version, key_revision, minor_revision, n_keys, +# KeyID, TIFFTagLocation, Count, Value_Offset, ...) +_GEO_KEYS_4326 = ( + 1, 1, 0, 3, + 1024, 0, 1, 2, # GTModelTypeGeoKey = Geographic + 1025, 0, 1, 1, # GTRasterTypeGeoKey = Area + 2048, 0, 1, 4326, # GeographicTypeGeoKey +) + + +def _gpu_available() -> bool: + if importlib.util.find_spec("cupy") is None: + return False + try: + import cupy + return bool(cupy.cuda.is_available()) + except Exception: + return False + + +_HAS_GPU = _gpu_available() +_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required") + + +def _write_rotated_tiff_with_crs(path, arr: np.ndarray) -> None: + """Write a TIFF with both rotated ModelTransformationTag AND EPSG:4326 GeoKeys. + + Single-strip uncompressed uint16, written by hand so the fixture has + no rasterio / GDAL dependency. Mirrors the helper in + ``test_allow_rotated_geotiff_2115.py`` but adds the + GeoKeyDirectoryTag so the reader populates ``geo_info.crs_epsg`` + and ``geo_info.crs_wkt`` alongside the rotated transform. + """ + h, w = arr.shape + arr = np.ascontiguousarray(arr.astype('\n' + f' {wkt}\n' + # GDAL ordering: (origin_x, pixel_width, b, origin_y, d, pixel_height) + # b=0.5 forces rotation. + f' 0.0, 1.0, 0.5, 0.0, 0.0, -1.0\n' + f' \n' + f' \n' + f' {source_filename}' + f'\n' + f' 1\n' + f' \n' + f' \n' + f' \n' + f' \n' + f'\n' + ) + with open(vrt_path, 'w') as f: + f.write(xml) + + +def test_vrt_eager_rotated_read_drops_crs(tmp_path): + """Eager VRT path drops ``crs`` / ``crs_wkt`` / ``transform`` on + rotated read under ``allow_rotated=True``.""" + src = tmp_path / "vrt_src_2122.tif" + _write_minimal_aligned_tiff(src) + vrt = tmp_path / "rotated_2122.vrt" + _write_rotated_vrt(vrt, src.name) + + da = open_geotiff(str(vrt), allow_rotated=True) + + assert 'crs' not in da.attrs, sorted(da.attrs.keys()) + assert 'crs_wkt' not in da.attrs, sorted(da.attrs.keys()) + assert 'transform' not in da.attrs, sorted(da.attrs.keys()) + # VRT rotated reads must match the eager non-VRT contract: int64 + # pixel coords plus the no-georef marker, so the writer round-trip + # treats the array as a no-georef pixel grid rather than a + # user-authored integer-coord raster. + assert da.x.dtype == np.int64, da.x.dtype + assert da.y.dtype == np.int64, da.y.dtype + assert da.attrs.get(_NO_GEOREF_KEY) is True, sorted(da.attrs.keys()) + + +def test_vrt_chunked_rotated_read_drops_crs(tmp_path): + """Chunked VRT path drops ``crs`` / ``crs_wkt`` / ``transform`` on + rotated read under ``allow_rotated=True``.""" + src = tmp_path / "vrt_src_2122_chunks.tif" + _write_minimal_aligned_tiff(src) + vrt = tmp_path / "rotated_2122_chunks.vrt" + _write_rotated_vrt(vrt, src.name) + + da = open_geotiff(str(vrt), allow_rotated=True, chunks=2) + + assert 'crs' not in da.attrs, sorted(da.attrs.keys()) + assert 'crs_wkt' not in da.attrs, sorted(da.attrs.keys()) + assert 'transform' not in da.attrs, sorted(da.attrs.keys()) + assert da.x.dtype == np.int64, da.x.dtype + assert da.y.dtype == np.int64, da.y.dtype + assert da.attrs.get(_NO_GEOREF_KEY) is True, sorted(da.attrs.keys()) + + +def test_vrt_axis_aligned_still_emits_crs(tmp_path): + """Regression guard for the VRT path: non-rotated VRTs still emit crs.""" + src = tmp_path / "vrt_src_2122_ok.tif" + _write_minimal_aligned_tiff(src) + vrt = tmp_path / "aligned_2122.vrt" + wkt = ( + 'GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,' + '298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",' + '0.0174532925199433],AUTHORITY["EPSG","4326"]]' + ) + xml = ( + f'\n' + f' {wkt}\n' + f' 0.0, 1.0, 0.0, 0.0, 0.0, -1.0\n' + f' \n' + f' \n' + f' {src.name}' + f'\n' + f' 1\n' + f' \n' + f' \n' + f' \n' + f' \n' + f'\n' + ) + with open(vrt, 'w') as f: + f.write(xml) + + da = open_geotiff(str(vrt)) + + assert da.attrs.get('crs') == 4326 + assert 'crs_wkt' in da.attrs + assert 'transform' in da.attrs