diff --git a/CHANGELOG.md b/CHANGELOG.md index b1b5726bb..f3a5c2724 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ #### Bug fixes and improvements - Remove read-side emission of the 13 deprecated GeoTIFF attrs (`crs_name`, `geog_citation`, `datum_code`, `angular_units`, `semi_major_axis`, `inv_flattening`, `linear_units`, `projection_code`, `vertical_crs`, `vertical_citation`, `vertical_units`, `colormap_rgba`, `cmap`) and bump `attrs['_xrspatial_geotiff_contract']` from 1 to 2. Downstream code that read these via `attrs[key]` now sees `KeyError`; migrate to `attrs.get(key)` or derive the value from `attrs['crs']` / `attrs['crs_wkt']` with pyproj. The `.xrs.plot()` accessor still surfaces palette colormaps by building a `ListedColormap` from the canonical `attrs['colormap']`. (#2016) +- Accept numpy integer scalars as the `crs=` argument to `to_geotiff` / `write_geotiff_gpu`. The validator already allowed `numbers.Integral`, but the writers gated EPSG assignment on `isinstance(crs, int)`, so `np.int32` / `np.int64` / `np.uint16` values passed validation then silently fell through with no EPSG written. (#2082) - Deprecate read-side emission of geographic-CRS GeoKey attrs (crs_name, geog_citation, datum_code, angular_units, semi_major_axis, inv_flattening); the writer cannot reconstruct them so they do not round-trip. These attrs still emit for now but trigger a DeprecationWarning. Removal planned for a future release. (#1984) - Deprecate read-side emission of projected-CRS GeoKey attrs (linear_units, projection_code); the writer cannot reconstruct them so they do not round-trip. These attrs still emit for now but trigger a DeprecationWarning. Removal planned for a future release. (#1984) - Deprecate read-side emission of vertical-CRS GeoKey attrs (vertical_crs, vertical_citation, vertical_units); the writer does not emit vertical GeoKeys so they do not round-trip. These attrs still emit for now but trigger a DeprecationWarning. Removal planned for a future release. (#1984) diff --git a/xrspatial/geotiff/_writers/eager.py b/xrspatial/geotiff/_writers/eager.py index 3cbb6c33e..012ef683a 100644 --- a/xrspatial/geotiff/_writers/eager.py +++ b/xrspatial/geotiff/_writers/eager.py @@ -10,6 +10,7 @@ """ from __future__ import annotations +import numbers import os import warnings from typing import TYPE_CHECKING @@ -95,10 +96,10 @@ def to_geotiff(data: xr.DataArray | np.ndarray, (e.g. ``io.BytesIO``). When a file-like is passed, the encoded TIFF bytes are written to that object once assembly completes. ``cog=True`` and ``.vrt`` outputs require a string path. - crs : int, str, or None - EPSG code (int), WKT string, or PROJ string. If None and data - is a DataArray, tries to read from attrs ('crs' for EPSG, - 'crs_wkt' for WKT). + crs : int, numpy.integer, str, or None + EPSG code (int or numpy integer scalar), WKT string, or PROJ + string. If None and data is a DataArray, tries to read from + attrs ('crs' for EPSG, 'crs_wkt' for WKT). EPSG codes are strongly preferred for interop. The WKT-only path writes ``ProjectedCSType`` / ``GeographicType`` = 32767 @@ -541,10 +542,13 @@ def to_geotiff(data: xr.DataArray | np.ndarray, # so they keep the pre-#1988 NaN->sentinel rewrite. restore_sentinel = True - # Resolve crs argument: can be int (EPSG) or str (WKT/PROJ) + # Resolve crs argument: can be int (EPSG) or str (WKT/PROJ). + # ``numbers.Integral`` covers plain ``int`` and numpy integer scalars + # (``np.int32``, ``np.int64``); ``_validate_crs_arg`` already rejects + # bool. Coerce to plain ``int`` so downstream ``epsg`` is a real int. _validate_crs_arg(crs) - if isinstance(crs, int): - epsg = crs + if isinstance(crs, numbers.Integral): + epsg = int(crs) elif isinstance(crs, str): epsg = _wkt_to_epsg(crs) # try to extract EPSG from WKT/PROJ if epsg is None: @@ -882,12 +886,15 @@ def _write_vrt_tiled(data, vrt_path, *, crs=None, nodata=None, f"Tiles directory already contains files: {tiles_dir}") os.makedirs(tiles_dir, exist_ok=True) - # Resolve CRS + # Resolve CRS. ``numbers.Integral`` covers numpy integer scalars + # (``np.int32``, ``np.int64``) so ``crs=np.int64(4326)`` does not + # silently fall through to ``epsg=None``. Validator already + # rejects bool. _validate_crs_arg(crs) epsg = None wkt_fallback = None - if isinstance(crs, int): - epsg = crs + if isinstance(crs, numbers.Integral): + epsg = int(crs) elif isinstance(crs, str): epsg = _wkt_to_epsg(crs) if epsg is None: diff --git a/xrspatial/geotiff/_writers/gpu.py b/xrspatial/geotiff/_writers/gpu.py index e596e01b3..25d01349f 100644 --- a/xrspatial/geotiff/_writers/gpu.py +++ b/xrspatial/geotiff/_writers/gpu.py @@ -7,6 +7,7 @@ """ from __future__ import annotations +import numbers import warnings from typing import TYPE_CHECKING @@ -82,12 +83,13 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, the auto-dispatch path through ``to_geotiff(gpu=True, cog=True)`` rejects file-like destinations, and the explicit GPU writer mirrors that rule (issue #1652). - crs : int, str, or None - EPSG code or WKT string. EPSG codes are strongly preferred for - interop; the WKT-only path emits a user-defined CRS (32767) with - the WKT stored in ``GTCitationGeoKey``, which many non-libgeotiff - readers ignore. A ``UserWarning`` is emitted when the WKT-only - path is taken. See issue #1768. + crs : int, numpy.integer, str, or None + EPSG code (int or numpy integer scalar) or WKT string. EPSG + codes are strongly preferred for interop; the WKT-only path + emits a user-defined CRS (32767) with the WKT stored in + ``GTCitationGeoKey``, which many non-libgeotiff readers + ignore. A ``UserWarning`` is emitted when the WKT-only path + is taken. See issue #1768. nodata : float, int, or None NoData value. compression : str @@ -349,9 +351,12 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, y_res = None res_unit = None + # ``numbers.Integral`` covers numpy integer scalars (``np.int32``, + # ``np.int64``) so they hit the EPSG branch instead of falling + # through to ``epsg=None``. Validator already rejects bool. _validate_crs_arg(crs) - if isinstance(crs, int): - epsg = crs + if isinstance(crs, numbers.Integral): + epsg = int(crs) elif isinstance(crs, str): epsg = _wkt_to_epsg(crs) if epsg is None: diff --git a/xrspatial/geotiff/tests/test_numpy_int_crs_2082.py b/xrspatial/geotiff/tests/test_numpy_int_crs_2082.py new file mode 100644 index 000000000..2df940a84 --- /dev/null +++ b/xrspatial/geotiff/tests/test_numpy_int_crs_2082.py @@ -0,0 +1,137 @@ +"""Numpy integer CRS values must round-trip through the writers (#2082). + +``_validate_crs_arg`` accepts ``numbers.Integral`` (incl. ``np.int32`` / +``np.int64`` / ``np.uint16``), but the eager and GPU writers used to +gate EPSG assignment on ``isinstance(crs, int)``. Numpy integers fail +that check, so the value passed validation, fell through the writer +branch, and the file landed on disk with no EPSG and no ``crs_wkt`` +tag. Anything reading the file back got no CRS and silently dropped +into the no-georef code path. + +These tests pin the round-trip at all three writer call sites that +used to do the gating: ``to_geotiff`` (eager), ``to_geotiff`` with a +``.vrt`` path (the deprecated ``_write_vrt_tiled`` branch), and +``write_geotiff_gpu`` (skipped without cuda). + +There is an existing ``test_to_geotiff_accepts_numpy_int_epsg`` in +``test_crs_arg_validation_1971`` that only asserts ``buf.nbytes > 0``. +A file with no CRS still has bytes, so that test passed even with the +bug in place. The asserts below check ``geo_info.epsg`` (or the +file-level ``attrs['crs']``) instead, so a regression to the +silent-drop behaviour trips immediately. +""" +from __future__ import annotations + +import io + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import open_geotiff, to_geotiff + +pyproj = pytest.importorskip("pyproj") + + +def _square(dtype=np.float32): + return xr.DataArray( + np.zeros((4, 4), dtype=dtype), + coords={'y': np.arange(4.0), 'x': np.arange(4.0)}, + dims=('y', 'x'), + ) + + +@pytest.mark.parametrize( + "np_int", + [np.int32(4326), np.int64(4326), np.uint16(4326)], +) +def test_to_geotiff_numpy_int_crs_roundtrips(tmp_path, np_int): + path = str(tmp_path / f"tmp_2082_eager_{type(np_int).__name__}.tif") + to_geotiff(_square(), path, crs=np_int) + out = open_geotiff(path) + assert out.attrs.get('crs') == 4326 + + +@pytest.mark.parametrize( + "np_int", + [np.int32(3857), np.int64(3857)], +) +def test_to_geotiff_numpy_int_crs_roundtrips_bytesio(np_int): + # The BytesIO branch is a separate code path from the path-based + # writer for some metadata; pin both. + buf = io.BytesIO() + to_geotiff(_square(), buf, crs=np_int) + buf.seek(0) + out = open_geotiff(buf) + assert out.attrs.get('crs') == 3857 + + +def test_to_geotiff_vrt_tiled_numpy_int_crs_roundtrips(tmp_path): + # ``to_geotiff(da, '*.vrt')`` dispatches to ``_write_vrt_tiled``, + # which has its own crs resolution block (eager.py:871). Pin + # numpy-int handling there separately. The writer emits per-tile + # ``tile_NN_NN.tif`` files in a ``_tiles/`` directory next + # to the .vrt; read one of those rather than the .vrt so the + # check exercises the writer-side CRS resolution, not the + # VRT reader's CRS handling. + vrt_path = str(tmp_path / "tmp_2082_vrt_tiled.vrt") + to_geotiff(_square(), vrt_path, crs=np.int64(4326)) + tile_path = str(tmp_path / "tmp_2082_vrt_tiled_tiles" / "tile_00_00.tif") + out = open_geotiff(tile_path) + assert out.attrs.get('crs') == 4326 + + +def test_to_geotiff_python_int_crs_still_works(tmp_path): + # Sanity: the existing plain-int path keeps working. If the + # writer-side ``numbers.Integral`` widening broke this, every + # existing caller that passes ``crs=4326`` would regress. + path = str(tmp_path / "tmp_2082_plain_int.tif") + to_geotiff(_square(), path, crs=4326) + out = open_geotiff(path) + assert out.attrs.get('crs') == 4326 + + +def test_to_geotiff_numpy_int_crs_writes_real_int_to_attrs(tmp_path): + # The writer coerces with ``int(crs)`` before stamping the EPSG + # tag so downstream consumers don't see a numpy scalar in + # ``attrs['crs']`` if the reader ever propagates the raw value. + # Check that the round-tripped value isn't a numpy integer + # subclass: ``int(crs_val) == 4326`` catches the value, and the + # ``not isinstance(crs_val, np.integer)`` guard catches the type + # regression class (JSON / dict serialisation hazard) without + # tying the assertion to ``type(...) is int`` -- if the reader + # ever wraps EPSG in a Python int subclass for unrelated reasons + # the test still passes. + path = str(tmp_path / "tmp_2082_attrs_int_type.tif") + to_geotiff(_square(), path, crs=np.int64(4326)) + out = open_geotiff(path) + crs_val = out.attrs.get('crs') + assert int(crs_val) == 4326 + assert not isinstance(crs_val, np.integer) + + +# --- GPU path ----------------------------------------------------------- + +cupy = pytest.importorskip("cupy", reason="GPU writer needs cupy") + +try: + import cupy # noqa: F401 -- already imported via importorskip + _GPU_AVAILABLE = True +except ImportError: + _GPU_AVAILABLE = False + + +@pytest.mark.skipif(not _GPU_AVAILABLE, reason="cuda not available") +def test_write_geotiff_gpu_numpy_int_crs_roundtrips(tmp_path): + from xrspatial.geotiff import write_geotiff_gpu + + arr = cupy.zeros((4, 4), dtype=cupy.float32) + da = xr.DataArray( + arr, + coords={'y': np.arange(4.0), 'x': np.arange(4.0)}, + dims=('y', 'x'), + ) + path = str(tmp_path / "tmp_2082_gpu.tif") + write_geotiff_gpu(da, path, crs=np.int64(4326)) + out = open_geotiff(path) + assert out.attrs.get('crs') == 4326