diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index 6fe74d17..8aada140 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -140,7 +140,8 @@ ] -def _read_geo_info(source, *, overview_level: int | None = None): +def _read_geo_info(source, *, overview_level: int | None = None, + allow_rotated: bool = False): """Read only the geographic metadata and image dimensions from a GeoTIFF. Returns (geo_info, height, width, dtype, n_bands) without reading pixel @@ -154,6 +155,10 @@ def _read_geo_info(source, *, overview_level: int | None = None): Path or any object with ``read``/``seek``. overview_level : int or None Overview IFD index (0 = full resolution). + allow_rotated : bool, optional + Forwarded to the geotag parser. When True, a rotated + ``ModelTransformationTag`` reads as an ungeoreferenced pixel + grid instead of raising ``NotImplementedError`` (issue #2115). """ from ._dtypes import resolve_bits_per_sample, tiff_dtype_to_numpy from ._geotags import extract_geo_info_with_overview_inheritance @@ -177,7 +182,8 @@ def _read_geo_info(source, *, overview_level: int | None = None): _src = _CloudSource(source) try: _header, _ifd, geo_info, _ = _parse_cog_http_meta( - _src, overview_level=overview_level) + _src, overview_level=overview_level, + allow_rotated=allow_rotated) finally: _src.close() bps = resolve_bits_per_sample(_ifd.bits_per_sample) @@ -224,7 +230,8 @@ def _read_geo_info(source, *, overview_level: int | None = None): # Inherit georef from the level-0 IFD when the overview itself # has no geokeys (issue #1640). Pass-through for level 0. geo_info = extract_geo_info_with_overview_inheritance( - ifd, ifds, data, header.byte_order) + ifd, ifds, data, header.byte_order, + allow_rotated=allow_rotated) bps = resolve_bits_per_sample(ifd.bits_per_sample) file_dtype = tiff_dtype_to_numpy(bps, ifd.sample_format) _validate_predictor_sample_format(ifd.predictor, ifd.sample_format) @@ -348,6 +355,19 @@ def open_geotiff(source: str | BinaryIO, *, promotes to ``float64`` whenever the sentinel matches an actual pixel, and ``dtype=`` then raises ``ValueError`` on the float-to-int cast. + allow_rotated : bool, default False + Read-side opt-in for rotated / sheared ``ModelTransformationTag`` + files. By default the reader raises ``NotImplementedError`` + because the rest of xrspatial assumes an axis-aligned grid. + ``allow_rotated=True`` reads the pixel grid without the + geospatial assumption: the result has integer pixel coords on + ``x`` / ``y`` and ``attrs['crs']`` is dropped. The original + rotated 6-tuple is preserved on + ``geo_info.transform.rotated_affine`` for callers that want it + (issue #2115). The contract is read-only -- ``to_geotiff`` does + not currently emit ``rotated_affine``, so a read-then-write + round-trip writes an identity-affine output and silently drops + the rotation. Returns ------- @@ -555,6 +575,7 @@ def open_geotiff(source: str | BinaryIO, *, arr, geo_info = _read_to_array( source, window=window, overview_level=overview_level, band=band, + allow_rotated=allow_rotated, **kwargs, ) diff --git a/xrspatial/geotiff/_backends/dask.py b/xrspatial/geotiff/_backends/dask.py index 4ce41e42..6e143d41 100644 --- a/xrspatial/geotiff/_backends/dask.py +++ b/xrspatial/geotiff/_backends/dask.py @@ -202,7 +202,8 @@ def read_geotiff_dask(source: str, *, # above: ``_read_geo_info`` still lives in ``xrspatial.geotiff``. from .. import _read_geo_info geo_info, full_h, full_w, file_dtype, n_bands = _read_geo_info( - source, overview_level=overview_level) + source, overview_level=overview_level, + allow_rotated=allow_rotated) nodata = geo_info.nodata nodata_attr = nodata # original sentinel preserved for attrs['nodata'] # When the source is MinIsWhite (photometric == 0, samples_per_pixel == 1), @@ -424,7 +425,8 @@ def read_geotiff_dask(source: str, *, band_arg, target_dtype=target_dtype, http_meta_key=http_meta_key, - max_pixels=max_pixels), + max_pixels=max_pixels, + allow_rotated=allow_rotated), shape=block_shape, dtype=target_dtype, ) @@ -446,7 +448,7 @@ def read_geotiff_dask(source: str, *, def _delayed_read_window(source, r0, c0, r1, c1, overview_level, nodata, band, *, target_dtype=None, http_meta_key=None, - max_pixels=None): + max_pixels=None, allow_rotated=False): """Dask-delayed function to read a single window. *http_meta_key* is an optional ``Delayed[(TIFFHeader, IFD)]`` parsed @@ -502,7 +504,9 @@ def _read(http_meta): _r2a_kwargs['max_pixels'] = max_pixels arr, _ = _read_to_array(source, window=(r0, c0, r1, c1), overview_level=overview_level, - band=band, **_r2a_kwargs) + band=band, + allow_rotated=allow_rotated, + **_r2a_kwargs) if nodata is not None: # ``arr`` was just decoded by ``_fetch_decode_cog_http_tiles`` # or ``read_to_array``; both return freshly-allocated buffers diff --git a/xrspatial/geotiff/_backends/gpu.py b/xrspatial/geotiff/_backends/gpu.py index f1dbcfbb..78315b31 100644 --- a/xrspatial/geotiff/_backends/gpu.py +++ b/xrspatial/geotiff/_backends/gpu.py @@ -301,7 +301,8 @@ def read_geotiff_gpu(source: str, *, # Inherit georef from the level-0 IFD when the overview itself # has no geokeys (issue #1640); pass-through for level 0. geo_info = extract_geo_info_with_overview_inheritance( - ifd, ifds, data, header.byte_order) + ifd, ifds, data, header.byte_order, + allow_rotated=allow_rotated) # Capture the Orientation tag (274) once so the post-decode flip # below picks it up for both the stripped fallback and the tiled # GPU pipelines. CPU read_to_array applies the array remap + @@ -1031,6 +1032,7 @@ def _read_geotiff_gpu_chunked(source, *, dtype, chunks, overview_level, ifd = select_overview_ifd(ifds, overview_level) geo_info = extract_geo_info_with_overview_inheritance( ifd, ifds, raw, header.byte_order, + allow_rotated=allow_rotated, ) orientation = ifd.orientation has_sparse_tile = ( diff --git a/xrspatial/geotiff/_geotags.py b/xrspatial/geotiff/_geotags.py index 3ca8470a..0050da55 100644 --- a/xrspatial/geotiff/_geotags.py +++ b/xrspatial/geotiff/_geotags.py @@ -114,11 +114,18 @@ class GeoTransform: y = origin_y + row * pixel_height pixel_height is typically negative (y decreases downward). + + ``rotated_affine`` carries the original 6-tuple + ``(pixel_width, b, origin_x, d, pixel_height, origin_y)`` (rasterio + ``Affine`` ordering) for files opened with ``allow_rotated=True``. + It is ``None`` for axis-aligned reads. Storing it as a real field + means ``dataclasses.replace`` and similar helpers preserve it. """ origin_x: float = 0.0 origin_y: float = 0.0 pixel_width: float = 1.0 pixel_height: float = -1.0 + rotated_affine: tuple | None = None @dataclass @@ -588,16 +595,48 @@ def _validate_tiepoint_consistency(tiepoint: tuple, raise NotImplementedError(f"{primary}\n{cause}\n{hint}") -def _extract_transform(ifd: IFD) -> tuple[GeoTransform, bool]: +def _extract_transform(ifd: IFD, + allow_rotated: bool = False + ) -> tuple[GeoTransform, bool]: """Extract affine transform from ModelTransformation, or ModelTiepoint + ModelPixelScale tags. + Parameters + ---------- + ifd : IFD + Parsed IFD. + allow_rotated : bool + When True, a rotated / sheared / z-coupled ``ModelTransformationTag`` + does not raise. The function returns an *intentionally inert* + identity ``GeoTransform`` (``origin_x=origin_y=0``, + ``pixel_width=1``, ``pixel_height=-1``) with ``has_georef=False`` + so the reader treats the file as an ungeoreferenced pixel grid. + Consumers MUST NOT read ``transform.origin_x`` / + ``transform.pixel_width`` / etc. as a stand-in for the real + mapping in this case -- those fields are the default identity, + not a fallback. The rotated 6-tuple is attached to the returned + ``GeoTransform`` via ``transform.rotated_affine`` (rasterio + ``Affine`` ordering: ``(pixel_width, b, origin_x, d, + pixel_height, origin_y)``); read it directly when the rotated + mapping is needed. Default ``False`` -- existing behaviour, + raise ``NotImplementedError``. + + This contract is read-only. ``rotated_affine`` is not currently + emitted by the writer, so a read-with-``allow_rotated`` + followed by ``to_geotiff`` round-trip silently writes an + identity-affine output and drops the original rotation. If + round-trip preservation matters, the writer needs a separate + ``ModelTransformationTag`` emit path that consumes + ``rotated_affine`` (see issue #2115 follow-up). + Returns ------- (transform, has_georef) ``has_georef`` is True when at least one of the geo-transform tags - was present. When False, ``transform`` is the default identity - and callers should fall back to pixel coordinates. + was present *and* axis-aligned. When False, ``transform`` is the + default identity and callers should fall back to pixel coordinates. + For the ``allow_rotated=True`` opt-in path, ``has_georef`` is + False and the rotated 6-tuple lives on ``transform.rotated_affine``. """ # Try ModelTransformationTag (4x4 row-major matrix, 16 doubles). @@ -608,8 +647,10 @@ def _extract_transform(ifd: IFD) -> tuple[GeoTransform, bool]: # y = M[4]*col + M[5]*row + M[6]*z + M[7] # # GeoTransform only carries the axis-aligned case. For rotated, sheared, - # or z-coupled transforms we raise NotImplementedError instead of silently - # dropping the off-diagonal terms. + # or z-coupled transforms we raise NotImplementedError unless the caller + # opts out via ``allow_rotated`` (issue #2115). The opt-out drops the + # georef so downstream coord generation uses pixel indices and any + # spatial op that runs on the array sees no geo assumption to violate. transform_tag = ifd.get_value(TAG_MODEL_TRANSFORMATION) if transform_tag is not None: if isinstance(transform_tag, tuple) and len(transform_tag) >= 12: @@ -621,14 +662,24 @@ def _extract_transform(ifd: IFD) -> tuple[GeoTransform, bool]: rotation_terms = (m[1], m[4]) z_terms = (m[2], m[6]) if len(m) >= 8 else (0.0, 0.0) if any(abs(t) > tol for t in rotation_terms + z_terms): - raise NotImplementedError( - "ModelTransformationTag (34264) contains rotation, " - "skew, or z-coupling terms " - f"(M[1]={m[1]!r}, M[4]={m[4]!r}, " - f"M[2]={m[2] if len(m) > 2 else 0.0!r}, " - f"M[6]={m[6] if len(m) > 6 else 0.0!r}). " - "Only axis-aligned affine transforms are supported." - ) + if not allow_rotated: + raise NotImplementedError( + "ModelTransformationTag (34264) contains rotation, " + "skew, or z-coupling terms " + f"(M[1]={m[1]!r}, M[4]={m[4]!r}, " + f"M[2]={m[2] if len(m) > 2 else 0.0!r}, " + f"M[6]={m[6] if len(m) > 6 else 0.0!r}). " + "Only axis-aligned affine transforms are supported. " + "Pass ``allow_rotated=True`` to read the pixel grid " + "without the geospatial assumption (issue #2115)." + ) + # Opt-in: drop georef, stash the rotated matrix on the + # GeoTransform so the validator + attrs-roundtrip code + # can see it. ``rasterio.Affine`` order: (a, b, c, d, e, f) + # = (pixel_width, b, origin_x, d, pixel_height, origin_y). + return GeoTransform( + rotated_affine=(m[0], m[1], m[3], m[4], m[5], m[7]), + ), False return GeoTransform( origin_x=m[3], origin_y=m[7], @@ -757,7 +808,9 @@ def _parse_nodata_str(text: str | None) -> int | float | None: def extract_geo_info(ifd: IFD, data: bytes | memoryview, - byte_order: str) -> GeoInfo: + byte_order: str, + *, + allow_rotated: bool = False) -> GeoInfo: """Extract full geographic metadata from a parsed IFD. Parameters @@ -768,12 +821,16 @@ def extract_geo_info(ifd: IFD, data: bytes | memoryview, Full file data (needed for resolving GeoKey param offsets). byte_order : str '<' or '>'. + allow_rotated : bool, optional + Forwarded to :func:`_extract_transform`. When True, a rotated + ``ModelTransformationTag`` is read as an ungeoreferenced pixel + grid instead of raising ``NotImplementedError`` (issue #2115). Returns ------- GeoInfo """ - transform, has_georef = _extract_transform(ifd) + transform, has_georef = _extract_transform(ifd, allow_rotated=allow_rotated) geokeys = _parse_geokeys(ifd, data, byte_order) # Extract EPSG @@ -1017,6 +1074,8 @@ def extract_geo_info_with_overview_inheritance( ifds: list, data: bytes | memoryview, byte_order: str, + *, + allow_rotated: bool = False, ) -> GeoInfo: """Extract geo metadata, inheriting from level 0 when the IFD lacks it. @@ -1071,7 +1130,8 @@ def extract_geo_info_with_overview_inheritance( ------- GeoInfo """ - info = extract_geo_info(ifd, data, byte_order) + info = extract_geo_info(ifd, data, byte_order, + allow_rotated=allow_rotated) # Overview IFDs have NewSubfileType bit 0 set; mask IFDs (bit 2) and # page IFDs (bit 1) are filtered out by ``select_overview_ifd`` @@ -1095,7 +1155,8 @@ def extract_geo_info_with_overview_inheritance( if base_ifd is None: return info - base_info = extract_geo_info(base_ifd, data, byte_order) + base_info = extract_geo_info(base_ifd, data, byte_order, + allow_rotated=allow_rotated) # Inherit the per-IFD metadata that the COG writer emits only on the # level-0 IFD: GDAL_NODATA, GDAL_METADATA, x/y resolution, colormap, diff --git a/xrspatial/geotiff/_reader.py b/xrspatial/geotiff/_reader.py index b8fa54a5..b469412a 100644 --- a/xrspatial/geotiff/_reader.py +++ b/xrspatial/geotiff/_reader.py @@ -2275,6 +2275,8 @@ def _ifd_required_extent( def _parse_cog_http_meta( source: _HTTPSource, overview_level: int | None = None, + *, + allow_rotated: bool = False, ) -> tuple[TIFFHeader, IFD, GeoInfo, bytes]: """Fetch + parse the leading IFDs of an HTTP COG once. @@ -2341,7 +2343,8 @@ def _parse_cog_http_meta( # IFD so overview reads do not silently lose CRS / transform. # See issue #1640. geo_info = extract_geo_info_with_overview_inheritance( - ifd, ifds, header_bytes, header.byte_order) + ifd, ifds, header_bytes, header.byte_order, + allow_rotated=allow_rotated) return header, ifd, geo_info, header_bytes @@ -2349,6 +2352,8 @@ def _read_cog_http(url: str, overview_level: int | None = None, band: int | None = None, max_pixels: int = MAX_PIXELS_DEFAULT, window: tuple[int, int, int, int] | None = None, + *, + allow_rotated: bool = False, ) -> tuple[np.ndarray, GeoInfo]: """Read a COG via HTTP range requests. @@ -2386,7 +2391,8 @@ def _read_cog_http(url: str, overview_level: int | None = None, # calls in the validation blocks below stay as-is. try: header, ifd, geo_info, header_bytes = _parse_cog_http_meta( - source, overview_level=overview_level) + source, overview_level=overview_level, + allow_rotated=allow_rotated) # Mirror the local-path orientation guard in ``read_to_array``: a # windowed read against a non-default Orientation tag (274) has @@ -3183,6 +3189,7 @@ def read_to_array(source, *, window=None, overview_level: int | None = None, band: int | None = None, max_pixels: int = MAX_PIXELS_DEFAULT, max_cloud_bytes=_MAX_CLOUD_BYTES_SENTINEL, + allow_rotated: bool = False, ) -> tuple[np.ndarray, GeoInfo]: """Read a GeoTIFF/COG to a numpy array. @@ -3218,7 +3225,8 @@ def read_to_array(source, *, window=None, overview_level: int | None = None, source = _coerce_path(source) if isinstance(source, str) and source.startswith(('http://', 'https://')): return _read_cog_http(source, overview_level=overview_level, band=band, - max_pixels=max_pixels, window=window) + max_pixels=max_pixels, window=window, + allow_rotated=allow_rotated) # Local file, cloud storage, or file-like buffer: read all bytes then parse if _is_file_like(source): @@ -3271,7 +3279,8 @@ def read_to_array(source, *, window=None, overview_level: int | None = None, # no-op: the helper short-circuits when the IFD is not a # NewSubfileType=overview entry. geo_info = extract_geo_info_with_overview_inheritance( - ifd, ifds, data, header.byte_order) + ifd, ifds, data, header.byte_order, + allow_rotated=allow_rotated) # Orientation tag (274): values 2-8 mean the stored pixel order # differs from display order. We need to remap the array post diff --git a/xrspatial/geotiff/tests/test_allow_rotated_geotiff_2115.py b/xrspatial/geotiff/tests/test_allow_rotated_geotiff_2115.py new file mode 100644 index 00000000..c07a2c0f --- /dev/null +++ b/xrspatial/geotiff/tests/test_allow_rotated_geotiff_2115.py @@ -0,0 +1,249 @@ +"""``allow_rotated=True`` works for rotated GeoTIFFs (issue #2115). + +Before the fix, ``open_geotiff(..., allow_rotated=True)`` raised +``NotImplementedError`` for a rotated ``ModelTransformationTag`` +because the geotag parser short-circuited before the validator's +opt-out could be consulted. The VRT path honoured the kwarg; the +GeoTIFF path did not. + +These tests pin the new behaviour: + +* Default (``allow_rotated=False``) still raises ``NotImplementedError`` + so an existing caller that does not know about the opt-out keeps the + safety net. +* ``allow_rotated=True`` drops the georef and reads the pixel grid + without raising. The pixel values match what the file actually holds. +* The rotated 6-tuple is preserved on ``geo_info.transform.rotated_affine`` + so callers / attrs-roundtrip code can see the matrix. +* Falls through to the same path on dask reads. +""" +from __future__ import annotations + +import struct + +import numpy as np +import pytest + +from xrspatial.geotiff import open_geotiff +from xrspatial.geotiff._geotags import ( + GeoTransform, + TAG_MODEL_TRANSFORMATION, + _extract_transform, +) +from xrspatial.geotiff._header import IFD, IFDEntry + + +def _make_ifd_with_rotated_transform(matrix_16: tuple) -> IFD: + """Build a synthetic IFD that carries only a rotated ModelTransformationTag. + + The other geo-related tags stay absent so the transform branch is the + only one exercised; this keeps the unit tests independent of the full + geo-key plumbing. + """ + ifd = IFD() + ifd.entries[TAG_MODEL_TRANSFORMATION] = IFDEntry( + tag=TAG_MODEL_TRANSFORMATION, + type_id=12, # DOUBLE + count=16, + value=tuple(matrix_16), + ) + return ifd + + +# A 30-degree rotation with pixel size 10, origin (100, 200). +# Row-major 4x4 in GeoTIFF order: +# x = M[0]*col + M[1]*row + M[2]*z + M[3] +# y = M[4]*col + M[5]*row + M[6]*z + M[7] +_COS30 = 0.8660254037844387 +_SIN30 = 0.5 +_ROTATED_M = ( + 10.0 * _COS30, -10.0 * _SIN30, 0.0, 100.0, # x row + 10.0 * _SIN30, 10.0 * _COS30, 0.0, 200.0, # y row + 0.0, 0.0, 1.0, 0.0, + 0.0, 0.0, 0.0, 1.0, +) + + +def test_extract_transform_rejects_rotated_by_default(): + ifd = _make_ifd_with_rotated_transform(_ROTATED_M) + with pytest.raises(NotImplementedError, match="rotation"): + _extract_transform(ifd) + + +def test_extract_transform_allow_rotated_returns_no_georef(): + ifd = _make_ifd_with_rotated_transform(_ROTATED_M) + gt, has_georef = _extract_transform(ifd, allow_rotated=True) + assert isinstance(gt, GeoTransform) + assert has_georef is False + # The rotated 6-tuple should be preserved for downstream consumers. + expected = ( + _ROTATED_M[0], _ROTATED_M[1], _ROTATED_M[3], + _ROTATED_M[4], _ROTATED_M[5], _ROTATED_M[7], + ) + assert gt.rotated_affine is not None + assert gt.rotated_affine == pytest.approx(expected) + + +def test_extract_transform_allow_rotated_passes_through_axis_aligned(): + """Axis-aligned files should be unchanged by the opt-in path.""" + aligned = ( + 10.0, 0.0, 0.0, 100.0, + 0.0, -10.0, 0.0, 200.0, + 0.0, 0.0, 1.0, 0.0, + 0.0, 0.0, 0.0, 1.0, + ) + ifd = _make_ifd_with_rotated_transform(aligned) + gt, has_georef = _extract_transform(ifd, allow_rotated=True) + assert has_georef is True + assert gt.pixel_width == 10.0 + assert gt.pixel_height == -10.0 + assert gt.origin_x == 100.0 + assert gt.origin_y == 200.0 + assert gt.rotated_affine is None + + +def _write_rotated_tiff(path, arr: np.ndarray) -> None: + """Write a minimal little-endian TIFF with a rotated ModelTransformationTag. + + Single-band, single-strip, uncompressed; just enough to exercise the + reader's rotation path without depending on rasterio/GDAL. + """ + h, w = arr.shape + arr = np.ascontiguousarray(arr.astype('