Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#### Bug fixes and improvements
- Remove read-side emission of the 13 deprecated GeoTIFF attrs (`crs_name`, `geog_citation`, `datum_code`, `angular_units`, `semi_major_axis`, `inv_flattening`, `linear_units`, `projection_code`, `vertical_crs`, `vertical_citation`, `vertical_units`, `colormap_rgba`, `cmap`) and bump `attrs['_xrspatial_geotiff_contract']` from 1 to 2. Downstream code that read these via `attrs[key]` now sees `KeyError`; migrate to `attrs.get(key)` or derive the value from `attrs['crs']` / `attrs['crs_wkt']` with pyproj. The `.xrs.plot()` accessor still surfaces palette colormaps by building a `ListedColormap` from the canonical `attrs['colormap']`. (#2016)
- Accept numpy integer scalars as the `crs=` argument to `to_geotiff` / `write_geotiff_gpu`. The validator already allowed `numbers.Integral`, but the writers gated EPSG assignment on `isinstance(crs, int)`, so `np.int32` / `np.int64` / `np.uint16` values passed validation then silently fell through with no EPSG written. (#2082)
- Deprecate read-side emission of geographic-CRS GeoKey attrs (crs_name, geog_citation, datum_code, angular_units, semi_major_axis, inv_flattening); the writer cannot reconstruct them so they do not round-trip. These attrs still emit for now but trigger a DeprecationWarning. Removal planned for a future release. (#1984)
- Deprecate read-side emission of projected-CRS GeoKey attrs (linear_units, projection_code); the writer cannot reconstruct them so they do not round-trip. These attrs still emit for now but trigger a DeprecationWarning. Removal planned for a future release. (#1984)
- Deprecate read-side emission of vertical-CRS GeoKey attrs (vertical_crs, vertical_citation, vertical_units); the writer does not emit vertical GeoKeys so they do not round-trip. These attrs still emit for now but trigger a DeprecationWarning. Removal planned for a future release. (#1984)
Expand Down
27 changes: 17 additions & 10 deletions xrspatial/geotiff/_writers/eager.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
"""
from __future__ import annotations

import numbers
import os
import warnings
from typing import TYPE_CHECKING
Expand Down Expand Up @@ -95,10 +96,10 @@ def to_geotiff(data: xr.DataArray | np.ndarray,
(e.g. ``io.BytesIO``). When a file-like is passed, the encoded
TIFF bytes are written to that object once assembly completes.
``cog=True`` and ``.vrt`` outputs require a string path.
crs : int, str, or None
EPSG code (int), WKT string, or PROJ string. If None and data
is a DataArray, tries to read from attrs ('crs' for EPSG,
'crs_wkt' for WKT).
crs : int, numpy.integer, str, or None
EPSG code (int or numpy integer scalar), WKT string, or PROJ
string. If None and data is a DataArray, tries to read from
attrs ('crs' for EPSG, 'crs_wkt' for WKT).

EPSG codes are strongly preferred for interop. The WKT-only
path writes ``ProjectedCSType`` / ``GeographicType`` = 32767
Expand Down Expand Up @@ -541,10 +542,13 @@ def to_geotiff(data: xr.DataArray | np.ndarray,
# so they keep the pre-#1988 NaN->sentinel rewrite.
restore_sentinel = True

# Resolve crs argument: can be int (EPSG) or str (WKT/PROJ)
# Resolve crs argument: can be int (EPSG) or str (WKT/PROJ).
# ``numbers.Integral`` covers plain ``int`` and numpy integer scalars
# (``np.int32``, ``np.int64``); ``_validate_crs_arg`` already rejects
# bool. Coerce to plain ``int`` so downstream ``epsg`` is a real int.
_validate_crs_arg(crs)
if isinstance(crs, int):
epsg = crs
if isinstance(crs, numbers.Integral):
epsg = int(crs)
elif isinstance(crs, str):
epsg = _wkt_to_epsg(crs) # try to extract EPSG from WKT/PROJ
if epsg is None:
Expand Down Expand Up @@ -882,12 +886,15 @@ def _write_vrt_tiled(data, vrt_path, *, crs=None, nodata=None,
f"Tiles directory already contains files: {tiles_dir}")
os.makedirs(tiles_dir, exist_ok=True)

# Resolve CRS
# Resolve CRS. ``numbers.Integral`` covers numpy integer scalars
# (``np.int32``, ``np.int64``) so ``crs=np.int64(4326)`` does not
# silently fall through to ``epsg=None``. Validator already
# rejects bool.
_validate_crs_arg(crs)
epsg = None
wkt_fallback = None
if isinstance(crs, int):
epsg = crs
if isinstance(crs, numbers.Integral):
epsg = int(crs)
elif isinstance(crs, str):
epsg = _wkt_to_epsg(crs)
if epsg is None:
Expand Down
21 changes: 13 additions & 8 deletions xrspatial/geotiff/_writers/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"""
from __future__ import annotations

import numbers
import warnings
from typing import TYPE_CHECKING

Expand Down Expand Up @@ -82,12 +83,13 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray,
the auto-dispatch path through ``to_geotiff(gpu=True, cog=True)``
rejects file-like destinations, and the explicit GPU writer
mirrors that rule (issue #1652).
crs : int, str, or None
EPSG code or WKT string. EPSG codes are strongly preferred for
interop; the WKT-only path emits a user-defined CRS (32767) with
the WKT stored in ``GTCitationGeoKey``, which many non-libgeotiff
readers ignore. A ``UserWarning`` is emitted when the WKT-only
path is taken. See issue #1768.
crs : int, numpy.integer, str, or None
EPSG code (int or numpy integer scalar) or WKT string. EPSG
codes are strongly preferred for interop; the WKT-only path
emits a user-defined CRS (32767) with the WKT stored in
``GTCitationGeoKey``, which many non-libgeotiff readers
ignore. A ``UserWarning`` is emitted when the WKT-only path
is taken. See issue #1768.
nodata : float, int, or None
NoData value.
compression : str
Expand Down Expand Up @@ -349,9 +351,12 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray,
y_res = None
res_unit = None

# ``numbers.Integral`` covers numpy integer scalars (``np.int32``,
# ``np.int64``) so they hit the EPSG branch instead of falling
# through to ``epsg=None``. Validator already rejects bool.
_validate_crs_arg(crs)
if isinstance(crs, int):
epsg = crs
if isinstance(crs, numbers.Integral):
epsg = int(crs)
elif isinstance(crs, str):
epsg = _wkt_to_epsg(crs)
if epsg is None:
Expand Down
137 changes: 137 additions & 0 deletions xrspatial/geotiff/tests/test_numpy_int_crs_2082.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
"""Numpy integer CRS values must round-trip through the writers (#2082).

``_validate_crs_arg`` accepts ``numbers.Integral`` (incl. ``np.int32`` /
``np.int64`` / ``np.uint16``), but the eager and GPU writers used to
gate EPSG assignment on ``isinstance(crs, int)``. Numpy integers fail
that check, so the value passed validation, fell through the writer
branch, and the file landed on disk with no EPSG and no ``crs_wkt``
tag. Anything reading the file back got no CRS and silently dropped
into the no-georef code path.

These tests pin the round-trip at all three writer call sites that
used to do the gating: ``to_geotiff`` (eager), ``to_geotiff`` with a
``.vrt`` path (the deprecated ``_write_vrt_tiled`` branch), and
``write_geotiff_gpu`` (skipped without cuda).

There is an existing ``test_to_geotiff_accepts_numpy_int_epsg`` in
``test_crs_arg_validation_1971`` that only asserts ``buf.nbytes > 0``.
A file with no CRS still has bytes, so that test passed even with the
bug in place. The asserts below check ``geo_info.epsg`` (or the
file-level ``attrs['crs']``) instead, so a regression to the
silent-drop behaviour trips immediately.
"""
from __future__ import annotations

import io

import numpy as np
import pytest
import xarray as xr

from xrspatial.geotiff import open_geotiff, to_geotiff

pyproj = pytest.importorskip("pyproj")


def _square(dtype=np.float32):
return xr.DataArray(
np.zeros((4, 4), dtype=dtype),
coords={'y': np.arange(4.0), 'x': np.arange(4.0)},
dims=('y', 'x'),
)


@pytest.mark.parametrize(
"np_int",
[np.int32(4326), np.int64(4326), np.uint16(4326)],
)
def test_to_geotiff_numpy_int_crs_roundtrips(tmp_path, np_int):
path = str(tmp_path / f"tmp_2082_eager_{type(np_int).__name__}.tif")
to_geotiff(_square(), path, crs=np_int)
out = open_geotiff(path)
assert out.attrs.get('crs') == 4326


@pytest.mark.parametrize(
"np_int",
[np.int32(3857), np.int64(3857)],
)
def test_to_geotiff_numpy_int_crs_roundtrips_bytesio(np_int):
# The BytesIO branch is a separate code path from the path-based
# writer for some metadata; pin both.
buf = io.BytesIO()
to_geotiff(_square(), buf, crs=np_int)
buf.seek(0)
out = open_geotiff(buf)
assert out.attrs.get('crs') == 3857


def test_to_geotiff_vrt_tiled_numpy_int_crs_roundtrips(tmp_path):
# ``to_geotiff(da, '*.vrt')`` dispatches to ``_write_vrt_tiled``,
# which has its own crs resolution block (eager.py:871). Pin
# numpy-int handling there separately. The writer emits per-tile
# ``tile_NN_NN.tif`` files in a ``<stem>_tiles/`` directory next
# to the .vrt; read one of those rather than the .vrt so the
# check exercises the writer-side CRS resolution, not the
# VRT reader's CRS handling.
vrt_path = str(tmp_path / "tmp_2082_vrt_tiled.vrt")
to_geotiff(_square(), vrt_path, crs=np.int64(4326))
tile_path = str(tmp_path / "tmp_2082_vrt_tiled_tiles" / "tile_00_00.tif")
out = open_geotiff(tile_path)
assert out.attrs.get('crs') == 4326


def test_to_geotiff_python_int_crs_still_works(tmp_path):
# Sanity: the existing plain-int path keeps working. If the
# writer-side ``numbers.Integral`` widening broke this, every
# existing caller that passes ``crs=4326`` would regress.
path = str(tmp_path / "tmp_2082_plain_int.tif")
to_geotiff(_square(), path, crs=4326)
out = open_geotiff(path)
assert out.attrs.get('crs') == 4326


def test_to_geotiff_numpy_int_crs_writes_real_int_to_attrs(tmp_path):
# The writer coerces with ``int(crs)`` before stamping the EPSG
# tag so downstream consumers don't see a numpy scalar in
# ``attrs['crs']`` if the reader ever propagates the raw value.
# Check that the round-tripped value isn't a numpy integer
# subclass: ``int(crs_val) == 4326`` catches the value, and the
# ``not isinstance(crs_val, np.integer)`` guard catches the type
# regression class (JSON / dict serialisation hazard) without
# tying the assertion to ``type(...) is int`` -- if the reader
# ever wraps EPSG in a Python int subclass for unrelated reasons
# the test still passes.
path = str(tmp_path / "tmp_2082_attrs_int_type.tif")
to_geotiff(_square(), path, crs=np.int64(4326))
out = open_geotiff(path)
crs_val = out.attrs.get('crs')
assert int(crs_val) == 4326
assert not isinstance(crs_val, np.integer)


# --- GPU path -----------------------------------------------------------

cupy = pytest.importorskip("cupy", reason="GPU writer needs cupy")

try:
import cupy # noqa: F401 -- already imported via importorskip
_GPU_AVAILABLE = True
except ImportError:
_GPU_AVAILABLE = False


@pytest.mark.skipif(not _GPU_AVAILABLE, reason="cuda not available")
def test_write_geotiff_gpu_numpy_int_crs_roundtrips(tmp_path):
from xrspatial.geotiff import write_geotiff_gpu

arr = cupy.zeros((4, 4), dtype=cupy.float32)
da = xr.DataArray(
arr,
coords={'y': np.arange(4.0), 'x': np.arange(4.0)},
dims=('y', 'x'),
)
path = str(tmp_path / "tmp_2082_gpu.tif")
write_geotiff_gpu(da, path, crs=np.int64(4326))
out = open_geotiff(path)
assert out.attrs.get('crs') == 4326
Loading