From 7a1b0daebe036a5189f7bb1fc9c03c486717f727 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Mon, 18 May 2026 14:33:03 -0700 Subject: [PATCH 1/2] geotiff: accept numpy integer CRS values at writers (#2082) _validate_crs_arg already accepts numbers.Integral (so np.int32 / np.int64 / np.uint16 pass), but the writers gated EPSG assignment on isinstance(crs, int). Numpy integers failed that check and the value silently fell through with no EPSG and no crs_wkt written; read-back returned a DataArray with no CRS at all. Widen the three writer-side checks (eager.py:540, eager.py:871, gpu.py:349) to numbers.Integral and coerce to int(crs). Validator already rejects bool so the writer branch is safe from the bool subclass slip. The VRT writer path goes through _resolve_crs_to_wkt which already coerces, so no change there. Add regression tests that round-trip np.int32 / np.int64 / np.uint16 through to_geotiff (path + BytesIO), the VRT-tiled branch, and write_geotiff_gpu (skipped without cuda). Tests check that attrs['crs'] resolves to the expected EPSG, not just that bytes got written -- the pre-existing test_to_geotiff_accepts_numpy_int_epsg only asserted nbytes > 0, which passed even with no CRS in the file. --- CHANGELOG.md | 1 + xrspatial/geotiff/_writers/eager.py | 19 ++- xrspatial/geotiff/_writers/gpu.py | 8 +- .../geotiff/tests/test_numpy_int_crs_2082.py | 132 ++++++++++++++++++ 4 files changed, 152 insertions(+), 8 deletions(-) create mode 100644 xrspatial/geotiff/tests/test_numpy_int_crs_2082.py diff --git a/CHANGELOG.md b/CHANGELOG.md index b1b5726bb..f3a5c2724 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ #### Bug fixes and improvements - Remove read-side emission of the 13 deprecated GeoTIFF attrs (`crs_name`, `geog_citation`, `datum_code`, `angular_units`, `semi_major_axis`, `inv_flattening`, `linear_units`, `projection_code`, `vertical_crs`, `vertical_citation`, `vertical_units`, `colormap_rgba`, `cmap`) and bump `attrs['_xrspatial_geotiff_contract']` from 1 to 2. Downstream code that read these via `attrs[key]` now sees `KeyError`; migrate to `attrs.get(key)` or derive the value from `attrs['crs']` / `attrs['crs_wkt']` with pyproj. The `.xrs.plot()` accessor still surfaces palette colormaps by building a `ListedColormap` from the canonical `attrs['colormap']`. (#2016) +- Accept numpy integer scalars as the `crs=` argument to `to_geotiff` / `write_geotiff_gpu`. The validator already allowed `numbers.Integral`, but the writers gated EPSG assignment on `isinstance(crs, int)`, so `np.int32` / `np.int64` / `np.uint16` values passed validation then silently fell through with no EPSG written. (#2082) - Deprecate read-side emission of geographic-CRS GeoKey attrs (crs_name, geog_citation, datum_code, angular_units, semi_major_axis, inv_flattening); the writer cannot reconstruct them so they do not round-trip. These attrs still emit for now but trigger a DeprecationWarning. Removal planned for a future release. (#1984) - Deprecate read-side emission of projected-CRS GeoKey attrs (linear_units, projection_code); the writer cannot reconstruct them so they do not round-trip. These attrs still emit for now but trigger a DeprecationWarning. Removal planned for a future release. (#1984) - Deprecate read-side emission of vertical-CRS GeoKey attrs (vertical_crs, vertical_citation, vertical_units); the writer does not emit vertical GeoKeys so they do not round-trip. These attrs still emit for now but trigger a DeprecationWarning. Removal planned for a future release. (#1984) diff --git a/xrspatial/geotiff/_writers/eager.py b/xrspatial/geotiff/_writers/eager.py index 3cbb6c33e..f15d22eda 100644 --- a/xrspatial/geotiff/_writers/eager.py +++ b/xrspatial/geotiff/_writers/eager.py @@ -10,6 +10,7 @@ """ from __future__ import annotations +import numbers import os import warnings from typing import TYPE_CHECKING @@ -541,10 +542,13 @@ def to_geotiff(data: xr.DataArray | np.ndarray, # so they keep the pre-#1988 NaN->sentinel rewrite. restore_sentinel = True - # Resolve crs argument: can be int (EPSG) or str (WKT/PROJ) + # Resolve crs argument: can be int (EPSG) or str (WKT/PROJ). + # ``numbers.Integral`` covers plain ``int`` and numpy integer scalars + # (``np.int32``, ``np.int64``); ``_validate_crs_arg`` already rejects + # bool. Coerce to plain ``int`` so downstream ``epsg`` is a real int. _validate_crs_arg(crs) - if isinstance(crs, int): - epsg = crs + if isinstance(crs, numbers.Integral): + epsg = int(crs) elif isinstance(crs, str): epsg = _wkt_to_epsg(crs) # try to extract EPSG from WKT/PROJ if epsg is None: @@ -882,12 +886,15 @@ def _write_vrt_tiled(data, vrt_path, *, crs=None, nodata=None, f"Tiles directory already contains files: {tiles_dir}") os.makedirs(tiles_dir, exist_ok=True) - # Resolve CRS + # Resolve CRS. ``numbers.Integral`` covers numpy integer scalars + # (``np.int32``, ``np.int64``) so ``crs=np.int64(4326)`` does not + # silently fall through to ``epsg=None``. Validator already + # rejects bool. _validate_crs_arg(crs) epsg = None wkt_fallback = None - if isinstance(crs, int): - epsg = crs + if isinstance(crs, numbers.Integral): + epsg = int(crs) elif isinstance(crs, str): epsg = _wkt_to_epsg(crs) if epsg is None: diff --git a/xrspatial/geotiff/_writers/gpu.py b/xrspatial/geotiff/_writers/gpu.py index e596e01b3..687815f4a 100644 --- a/xrspatial/geotiff/_writers/gpu.py +++ b/xrspatial/geotiff/_writers/gpu.py @@ -7,6 +7,7 @@ """ from __future__ import annotations +import numbers import warnings from typing import TYPE_CHECKING @@ -349,9 +350,12 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, y_res = None res_unit = None + # ``numbers.Integral`` covers numpy integer scalars (``np.int32``, + # ``np.int64``) so they hit the EPSG branch instead of falling + # through to ``epsg=None``. Validator already rejects bool. _validate_crs_arg(crs) - if isinstance(crs, int): - epsg = crs + if isinstance(crs, numbers.Integral): + epsg = int(crs) elif isinstance(crs, str): epsg = _wkt_to_epsg(crs) if epsg is None: diff --git a/xrspatial/geotiff/tests/test_numpy_int_crs_2082.py b/xrspatial/geotiff/tests/test_numpy_int_crs_2082.py new file mode 100644 index 000000000..87aab1b2a --- /dev/null +++ b/xrspatial/geotiff/tests/test_numpy_int_crs_2082.py @@ -0,0 +1,132 @@ +"""Numpy integer CRS values must round-trip through the writers (#2082). + +``_validate_crs_arg`` accepts ``numbers.Integral`` (incl. ``np.int32`` / +``np.int64`` / ``np.uint16``), but the eager and GPU writers used to +gate EPSG assignment on ``isinstance(crs, int)``. Numpy integers fail +that check, so the value passed validation, fell through the writer +branch, and the file landed on disk with no EPSG and no ``crs_wkt`` +tag. Anything reading the file back got no CRS and silently dropped +into the no-georef code path. + +These tests pin the round-trip at all three writer call sites that +used to do the gating: ``to_geotiff`` (eager), ``to_geotiff`` with a +``.vrt`` path (the deprecated ``_write_vrt_tiled`` branch), and +``write_geotiff_gpu`` (skipped without cuda). + +There is an existing ``test_to_geotiff_accepts_numpy_int_epsg`` in +``test_crs_arg_validation_1971`` that only asserts ``buf.nbytes > 0``. +A file with no CRS still has bytes, so that test passed even with the +bug in place. The asserts below check ``geo_info.epsg`` (or the +file-level ``attrs['crs']``) instead, so a regression to the +silent-drop behaviour trips immediately. +""" +from __future__ import annotations + +import io + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import open_geotiff, to_geotiff + +pyproj = pytest.importorskip("pyproj") + + +def _square(dtype=np.float32): + return xr.DataArray( + np.zeros((4, 4), dtype=dtype), + coords={'y': np.arange(4.0), 'x': np.arange(4.0)}, + dims=('y', 'x'), + ) + + +@pytest.mark.parametrize( + "np_int", + [np.int32(4326), np.int64(4326), np.uint16(4326)], +) +def test_to_geotiff_numpy_int_crs_roundtrips(tmp_path, np_int): + path = str(tmp_path / f"tmp_2082_eager_{type(np_int).__name__}.tif") + to_geotiff(_square(), path, crs=np_int) + out = open_geotiff(path) + assert out.attrs.get('crs') == 4326 + + +@pytest.mark.parametrize( + "np_int", + [np.int32(3857), np.int64(3857)], +) +def test_to_geotiff_numpy_int_crs_roundtrips_bytesio(np_int): + # The BytesIO branch is a separate code path from the path-based + # writer for some metadata; pin both. + buf = io.BytesIO() + to_geotiff(_square(), buf, crs=np_int) + buf.seek(0) + out = open_geotiff(buf) + assert out.attrs.get('crs') == 3857 + + +def test_to_geotiff_vrt_tiled_numpy_int_crs_roundtrips(tmp_path): + # ``to_geotiff(da, '*.vrt')`` dispatches to ``_write_vrt_tiled``, + # which has its own crs resolution block (eager.py:871). Pin + # numpy-int handling there separately. The writer emits per-tile + # ``tile_NN_NN.tif`` files in a ``_tiles/`` directory next + # to the .vrt; read one of those rather than the .vrt so the + # check exercises the writer-side CRS resolution, not the + # VRT reader's CRS handling. + vrt_path = str(tmp_path / "tmp_2082_vrt_tiled.vrt") + to_geotiff(_square(), vrt_path, crs=np.int64(4326)) + tile_path = str(tmp_path / "tmp_2082_vrt_tiled_tiles" / "tile_00_00.tif") + out = open_geotiff(tile_path) + assert out.attrs.get('crs') == 4326 + + +def test_to_geotiff_python_int_crs_still_works(tmp_path): + # Sanity: the existing plain-int path keeps working. If the + # writer-side ``numbers.Integral`` widening broke this, every + # existing caller that passes ``crs=4326`` would regress. + path = str(tmp_path / "tmp_2082_plain_int.tif") + to_geotiff(_square(), path, crs=4326) + out = open_geotiff(path) + assert out.attrs.get('crs') == 4326 + + +def test_to_geotiff_numpy_int_crs_writes_real_int_to_attrs(tmp_path): + # Coerce on the writer side so downstream consumers don't see a + # numpy scalar in ``attrs['crs']``. Mostly a defensive check: if + # the EPSG branch ever stores ``crs`` raw, ``attrs['crs']`` ends + # up as ``np.int64(4326)`` instead of ``int(4326)``, which is a + # serialisation hazard for downstream JSON / dict consumers. + path = str(tmp_path / "tmp_2082_attrs_int_type.tif") + to_geotiff(_square(), path, crs=np.int64(4326)) + out = open_geotiff(path) + crs_val = out.attrs.get('crs') + assert crs_val == 4326 + assert type(crs_val) is int + + +# --- GPU path ----------------------------------------------------------- + +cupy = pytest.importorskip("cupy", reason="GPU writer needs cupy") + +try: + import cupy # noqa: F401 -- already imported via importorskip + _GPU_AVAILABLE = True +except ImportError: + _GPU_AVAILABLE = False + + +@pytest.mark.skipif(not _GPU_AVAILABLE, reason="cuda not available") +def test_write_geotiff_gpu_numpy_int_crs_roundtrips(tmp_path): + from xrspatial.geotiff import write_geotiff_gpu + + arr = cupy.zeros((4, 4), dtype=cupy.float32) + da = xr.DataArray( + arr, + coords={'y': np.arange(4.0), 'x': np.arange(4.0)}, + dims=('y', 'x'), + ) + path = str(tmp_path / "tmp_2082_gpu.tif") + write_geotiff_gpu(da, path, crs=np.int64(4326)) + out = open_geotiff(path) + assert out.attrs.get('crs') == 4326 From 4ec1a85a2942be6da37684d11eca66cd1544e005 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Mon, 18 May 2026 14:36:47 -0700 Subject: [PATCH 2/2] Address review nits: docstrings + softer test assertion (#2082) --- xrspatial/geotiff/_writers/eager.py | 8 ++++---- xrspatial/geotiff/_writers/gpu.py | 13 +++++++------ .../geotiff/tests/test_numpy_int_crs_2082.py | 19 ++++++++++++------- 3 files changed, 23 insertions(+), 17 deletions(-) diff --git a/xrspatial/geotiff/_writers/eager.py b/xrspatial/geotiff/_writers/eager.py index f15d22eda..012ef683a 100644 --- a/xrspatial/geotiff/_writers/eager.py +++ b/xrspatial/geotiff/_writers/eager.py @@ -96,10 +96,10 @@ def to_geotiff(data: xr.DataArray | np.ndarray, (e.g. ``io.BytesIO``). When a file-like is passed, the encoded TIFF bytes are written to that object once assembly completes. ``cog=True`` and ``.vrt`` outputs require a string path. - crs : int, str, or None - EPSG code (int), WKT string, or PROJ string. If None and data - is a DataArray, tries to read from attrs ('crs' for EPSG, - 'crs_wkt' for WKT). + crs : int, numpy.integer, str, or None + EPSG code (int or numpy integer scalar), WKT string, or PROJ + string. If None and data is a DataArray, tries to read from + attrs ('crs' for EPSG, 'crs_wkt' for WKT). EPSG codes are strongly preferred for interop. The WKT-only path writes ``ProjectedCSType`` / ``GeographicType`` = 32767 diff --git a/xrspatial/geotiff/_writers/gpu.py b/xrspatial/geotiff/_writers/gpu.py index 687815f4a..25d01349f 100644 --- a/xrspatial/geotiff/_writers/gpu.py +++ b/xrspatial/geotiff/_writers/gpu.py @@ -83,12 +83,13 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, the auto-dispatch path through ``to_geotiff(gpu=True, cog=True)`` rejects file-like destinations, and the explicit GPU writer mirrors that rule (issue #1652). - crs : int, str, or None - EPSG code or WKT string. EPSG codes are strongly preferred for - interop; the WKT-only path emits a user-defined CRS (32767) with - the WKT stored in ``GTCitationGeoKey``, which many non-libgeotiff - readers ignore. A ``UserWarning`` is emitted when the WKT-only - path is taken. See issue #1768. + crs : int, numpy.integer, str, or None + EPSG code (int or numpy integer scalar) or WKT string. EPSG + codes are strongly preferred for interop; the WKT-only path + emits a user-defined CRS (32767) with the WKT stored in + ``GTCitationGeoKey``, which many non-libgeotiff readers + ignore. A ``UserWarning`` is emitted when the WKT-only path + is taken. See issue #1768. nodata : float, int, or None NoData value. compression : str diff --git a/xrspatial/geotiff/tests/test_numpy_int_crs_2082.py b/xrspatial/geotiff/tests/test_numpy_int_crs_2082.py index 87aab1b2a..2df940a84 100644 --- a/xrspatial/geotiff/tests/test_numpy_int_crs_2082.py +++ b/xrspatial/geotiff/tests/test_numpy_int_crs_2082.py @@ -92,17 +92,22 @@ def test_to_geotiff_python_int_crs_still_works(tmp_path): def test_to_geotiff_numpy_int_crs_writes_real_int_to_attrs(tmp_path): - # Coerce on the writer side so downstream consumers don't see a - # numpy scalar in ``attrs['crs']``. Mostly a defensive check: if - # the EPSG branch ever stores ``crs`` raw, ``attrs['crs']`` ends - # up as ``np.int64(4326)`` instead of ``int(4326)``, which is a - # serialisation hazard for downstream JSON / dict consumers. + # The writer coerces with ``int(crs)`` before stamping the EPSG + # tag so downstream consumers don't see a numpy scalar in + # ``attrs['crs']`` if the reader ever propagates the raw value. + # Check that the round-tripped value isn't a numpy integer + # subclass: ``int(crs_val) == 4326`` catches the value, and the + # ``not isinstance(crs_val, np.integer)`` guard catches the type + # regression class (JSON / dict serialisation hazard) without + # tying the assertion to ``type(...) is int`` -- if the reader + # ever wraps EPSG in a Python int subclass for unrelated reasons + # the test still passes. path = str(tmp_path / "tmp_2082_attrs_int_type.tif") to_geotiff(_square(), path, crs=np.int64(4326)) out = open_geotiff(path) crs_val = out.attrs.get('crs') - assert crs_val == 4326 - assert type(crs_val) is int + assert int(crs_val) == 4326 + assert not isinstance(crs_val, np.integer) # --- GPU path -----------------------------------------------------------