Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .claude/sweep-metadata-state.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
module,last_inspected,issue,severity_max,categories_found,notes
geotiff,2026-05-12,1753,HIGH,2,"read_geotiff_gpu stripped-fallback windowed read on a non-georef TIFF emitted float64 [-0.5, -1.5, ...] coords via the default unit GeoTransform placeholder, while the eager numpy and dask paths emit int64 file-relative pixel coords. Regression of #1710 (fix missed the stripped GPU branch). The tiled-GPU helper _gpu_apply_window_band already gates on has_georef correctly; the stripped fallback only checked t is None. Fixed by routing both non-georef and t-is-None cases through the integer pixel-coord branch and using file-relative offsets to match every other backend. Verified 4-backend parity (numpy / cupy / dask+numpy / dask+cupy)."
geotiff,2026-05-13,1812,HIGH,3,"Re-audit 2026-05-13 (af588e03 worktree, branch deep-sweep-metadata-geotiff-2026-05-13-af588e03): 4-backend (numpy/cupy/dask+numpy/dask+cupy) parity verified for full reads, windowed reads, multi-band, miniswhite, nodata, extra_tags, gdal_metadata, x/y_resolution, image_description. All three writers (eager to_geotiff, dask streaming, write_geotiff_gpu) preserve rich tags through round-trip. VRT read attrs agree across open_geotiff / read_geotiff_dask / read_vrt (all 3 drop x/y_resolution -- VRT XML format limitation, LOW). NEW HIGH finding #1812: to_geotiff silently corrupted data when 3D dims[0] was not in _BAND_DIM_NAMES (e.g. dims=('time','y','x')); writer skipped moveaxis and treated leading axis as y -- round-trip produced shape (2,4,5) instead of (4,5,2) with swapped per-slice contents. Fix raises ValueError at all 3 writer entry points with an actionable message (transpose to (y,x,band) or rename to band/bands/channel). Existing LOW (int+nodata dtype dask always-promotes-to-float64 while eager/gpu keep int when sentinel absent) is documented and intentional per #1597."
reproject,2026-05-10,1572;1573,HIGH,1;3;4,geoid_height_raster dropped input attrs and used dims[-2:] for 3D inputs (#1572). reproject/merge ignored nodatavals (rasterio convention) when rioxarray absent (#1573). Fixed in same branch.
89 changes: 89 additions & 0 deletions xrspatial/geotiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,58 @@
# a GeoTransform from coords (see ``_coords_to_transform`` and issue #1643).
_BAND_DIM_NAMES = ('band', 'bands', 'channel')

# Spatial dim names recognised on 3D writer inputs. ``y``/``x`` are the
# canonical TIFF axes; aliases are accepted so a user who happens to use
# ``lat``/``lon`` or ``row``/``col`` is not bounced by the validator below.
_Y_DIM_NAMES = ('y', 'lat', 'latitude', 'row')
_X_DIM_NAMES = ('x', 'lon', 'longitude', 'col')


def _validate_3d_writer_dims(dims) -> None:
"""Reject ambiguous 3D writer inputs (issue #1812).

The writer interprets a 3D DataArray as either ``(band, y, x)`` or
``(y, x, band)``. ``data.dims[0] in _BAND_DIM_NAMES`` decides which
branch fires the ``moveaxis``. Anything else (e.g. ``('time', 'y', 'x')``)
used to fall through silently: the writer kept the leading axis as
the spatial ``y`` axis and the result was a TIFF with the leading
axis values laid out along ``y`` (silent data corruption -- on
read-back the array round-tripped with a swapped shape).

Refuse the ambiguous case at the entry point. The message tells the
caller exactly how to fix the input (rename to one of
``_BAND_DIM_NAMES`` or transpose to ``(y, x, band)``).
"""
if len(dims) != 3:
return
d0, d1, d2 = dims
band_layout = (d0 in _BAND_DIM_NAMES
and d1 in _Y_DIM_NAMES
and d2 in _X_DIM_NAMES)
yxb_layout = (d0 in _Y_DIM_NAMES
and d1 in _X_DIM_NAMES
and d2 in _BAND_DIM_NAMES)
if band_layout or yxb_layout:
return
# Bare (y, x, *) or (*, y, x) where the third dim is unnamed but
# spatial -- the writer's old behaviour treats the non-spatial axis
# as bands. Accept that only when the unknown dim is in the band
# position (last), which matches how raw numpy callers typically
# build a band-last array.
if d0 in _Y_DIM_NAMES and d1 in _X_DIM_NAMES:
return
raise ValueError(
f"3D writer input has ambiguous dims {dims!r}. Expected "
f"(band, y, x) or (y, x, band); accepted band-dim aliases are "
f"{_BAND_DIM_NAMES} and spatial aliases are y={_Y_DIM_NAMES} / "
f"x={_X_DIM_NAMES}. Rename the non-spatial dim to 'band' or "
f"transpose the array so spatial dims come first (e.g. "
f"``da.transpose('y', 'x', {dims[0]!r})``). The writer cannot "
f"infer which axis is the band axis from arbitrary dim names "
f"and would otherwise silently treat the leading axis as the "
f"spatial y axis (issue #1812)."
)


class GeoTIFFFallbackWarning(UserWarning):
"""Warning emitted when a geotiff helper falls back to a slower path.
Expand Down Expand Up @@ -1388,6 +1440,15 @@ def to_geotiff(data: xr.DataArray | np.ndarray,
(``b != 0`` or ``d != 0`` in rasterio ``Affine`` ordering). The
on-disk GeoTIFF is axis-aligned; reproject onto an axis-aligned
grid first.
ValueError
If ``data`` is a 3D DataArray whose ``dims`` is not
``(band, y, x)`` or ``(y, x, band)`` (accepting the band-name
aliases ``bands`` / ``channel`` and spatial-name aliases
``lat`` / ``lon`` / ``latitude`` / ``longitude`` / ``row`` /
``col``). A leading non-band dim such as ``time`` is rejected
because the writer cannot infer the band axis from arbitrary
names and used to silently treat the leading axis as ``y``
(issue #1812).
"""
from ._reader import _coerce_path

Expand Down Expand Up @@ -1632,6 +1693,12 @@ def to_geotiff(data: xr.DataArray | np.ndarray,
# destinations we materialise eagerly and assemble in-memory.
if hasattr(raw, 'dask') and not cog and not _path_is_file_like:
dask_arr = raw
# Reject ambiguous 3D layouts at the entry point so a leading
# non-band dim like ``('time', 'y', 'x')`` cannot silently
# round-trip as a TIFF whose ``y`` axis carries the time
# values (issue #1812).
if raw.ndim == 3:
_validate_3d_writer_dims(data.dims)
# Handle band-first dimension order (band, y, x) -> (y, x, band)
if raw.ndim == 3 and data.dims[0] in _BAND_DIM_NAMES:
import dask.array as da
Expand Down Expand Up @@ -1682,6 +1749,12 @@ def to_geotiff(data: xr.DataArray | np.ndarray,
arr = arr.get() # Dask+CuPy -> numpy
else:
arr = np.asarray(raw)
# Reject ambiguous 3D layouts (issue #1812). The validator runs
# on ``data.dims`` (the original DataArray's dim names) rather
# than on ``arr`` so the error fires before the move-axis even
# for COG and file-like destinations that fall through here.
if arr.ndim == 3:
_validate_3d_writer_dims(data.dims)
# Handle band-first dimension order (band, y, x) -> (y, x, band)
if arr.ndim == 3 and data.dims[0] in _BAND_DIM_NAMES:
arr = np.moveaxis(arr, 0, -1)
Expand Down Expand Up @@ -3522,6 +3595,16 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray,
GPU writer forwards this kwarg unchanged. Default ``'auto'``
writes MinIsBlack for any band count, so a 4-band raster is
not silently tagged as RGB+alpha (issue #1769).

Raises
------
ValueError
If ``data`` is a 3D DataArray whose ``dims`` is not
``(band, y, x)`` or ``(y, x, band)`` (accepting band-name
aliases ``bands`` / ``channel`` and spatial-name aliases
``lat`` / ``lon`` / ``row`` / ``col``). A leading non-band
dim such as ``time`` would otherwise silently round-trip with
the leading axis treated as ``y`` (issue #1812).
"""
if not tiled:
raise ValueError(
Expand Down Expand Up @@ -3603,6 +3686,12 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray,
else:
arr = cupy.asarray(np.asarray(arr)) # numpy -> GPU

# Reject ambiguous 3D layouts (issue #1812). Mirrors the gate
# in ``to_geotiff``: a leading non-band dim like ``time`` would
# otherwise round-trip with the leading axis silently treated
# as ``y``.
if arr.ndim == 3:
_validate_3d_writer_dims(data.dims)
# Handle band-first dimension order (band, y, x) -> (y, x, band).
# rioxarray and CF-style multi-band rasters land here; without
# this remap the writer treats arr.shape[2] as the band axis and
Expand Down
19 changes: 14 additions & 5 deletions xrspatial/geotiff/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -1560,9 +1560,14 @@ def _read_tiles(data: bytes, ifd: IFD, header: TIFFHeader,
raise ValueError(
f"Invalid tile dimensions: TileWidth={tw}, TileLength={th}")

# Reject crafted tile dims that would force huge per-tile allocations.
# A single tile's decoded bytes must also fit under the pixel budget.
_check_dimensions(tw, th, samples, max_pixels)
# Reject crafted tile dims (e.g. TileWidth = 2**31). This guards the
# TIFF header against malformed values; it is not the caller's output
# budget. The output-window check below uses ``max_pixels`` and is
# what enforces the user's per-call memory cap. The source-read path
# under ``read_vrt`` (#1796) relies on that output check to honour a
# small caller ``max_pixels`` against a normal-tile source; see
# #1823.
_check_dimensions(tw, th, samples, MAX_PIXELS_DEFAULT)

# Per-tile compressed-byte cap (issue #1664). Same env var as the
# HTTP path. mmap slicing is bounded by the file size, but the slice
Expand Down Expand Up @@ -2016,10 +2021,14 @@ def _fetch_decode_cog_http_tiles(
# A windowed HTTP read of a multi-billion-pixel COG only allocates
# the window, so capping the full image would reject legitimate
# tiled reads. The full-image cap still applies for whole-file
# reads (window is None). The single-tile budget always applies.
# reads (window is None). The per-tile dim check below guards the
# TIFF header against absurd ``TileWidth`` / ``TileLength`` values
# (e.g. 2**31) and uses ``MAX_PIXELS_DEFAULT`` so a caller's small
# ``max_pixels`` -- intended as an output-window budget -- does not
# reject normal 256x256 tiles. See #1823.
if window is None:
_check_dimensions(width, height, samples, max_pixels)
_check_dimensions(tw, th, samples, max_pixels)
_check_dimensions(tw, th, samples, MAX_PIXELS_DEFAULT)

# Reject malformed TIFFs whose declared tile grid exceeds the supplied
# TileOffsets length. See issue #1219.
Expand Down
222 changes: 222 additions & 0 deletions xrspatial/geotiff/tests/test_to_geotiff_3d_dim_validation_1812.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
"""Regression tests for issue #1812.

``to_geotiff`` / ``write_geotiff_gpu`` used to silently mishandle 3D
DataArrays whose leading dim was not in
``_BAND_DIM_NAMES = ('band', 'bands', 'channel')``. The
``moveaxis(arr, 0, -1)`` that puts ``(band, y, x)`` into the on-disk
``(y, x, band)`` layout was skipped, the writer kept the leading axis
as the spatial ``y`` axis, and the result was a TIFF with silently
swapped axes -- on read-back, ``out[:, :, 0].sum() != arr[0].sum()``.

The fix raises ``ValueError`` at the entry point of all three writers
(eager, dask streaming, and GPU) when ``dims`` is not unambiguously
one of ``(band, y, x)`` or ``(y, x, band)``.
"""
from __future__ import annotations

import importlib.util

import dask.array as dsk
import numpy as np
import pytest
import xarray as xr

from xrspatial.geotiff import open_geotiff, to_geotiff


def _gpu_available() -> bool:
if importlib.util.find_spec("cupy") is None:
return False
try:
import cupy

return bool(cupy.cuda.is_available())
except Exception:
return False


_HAS_GPU = _gpu_available()
_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required")


# Inputs that must be accepted (round-trip cleanly).
_HAPPY_3D_INPUTS = [
pytest.param(("band", "y", "x"), (3, 4, 5), id="band-y-x"),
pytest.param(("bands", "y", "x"), (3, 4, 5), id="bands-y-x"),
pytest.param(("channel", "y", "x"), (3, 4, 5), id="channel-y-x"),
pytest.param(("y", "x", "band"), (4, 5, 3), id="y-x-band"),
pytest.param(("lat", "lon", "band"), (4, 5, 3), id="lat-lon-band"),
pytest.param(("row", "col", "channel"), (4, 5, 3), id="row-col-channel"),
pytest.param(("band", "lat", "lon"), (3, 4, 5), id="band-lat-lon-alias"),
]


def _make_da(dims, shape, dtype=np.uint8, backend="numpy"):
if backend == "numpy":
arr = np.arange(int(np.prod(shape)), dtype=dtype).reshape(shape)
elif backend == "dask":
arr_np = np.arange(int(np.prod(shape)), dtype=dtype).reshape(shape)
arr = dsk.from_array(arr_np, chunks=2)
elif backend == "cupy":
import cupy

arr = cupy.arange(int(np.prod(shape)),
dtype=cupy.dtype(dtype)).reshape(shape)
else:
raise ValueError(backend)
return xr.DataArray(arr, dims=dims, attrs={"crs": "EPSG:4326"})


def test_repro_silent_corruption_now_raises(tmp_path):
"""The original repro now raises a clear ValueError."""
arr = np.zeros((2, 4, 5), dtype=np.uint8)
arr[0] = 1
arr[1] = 2
da = xr.DataArray(arr, dims=("time", "y", "x"),
attrs={"crs": "EPSG:4326"})
out_path = tmp_path / "tmp_1812_time_y_x.tif"
with pytest.raises(ValueError, match="ambiguous dims"):
to_geotiff(da, str(out_path), crs=4326)


@pytest.mark.parametrize("dims, shape", [
pytest.param(("time", "y", "x"), (2, 4, 5), id="time-y-x"),
pytest.param(("z", "y", "x"), (2, 4, 5), id="z-y-x"),
pytest.param(("foo", "bar", "baz"), (2, 4, 5), id="foo-bar-baz"),
])
def test_eager_rejects_ambiguous_3d(tmp_path, dims, shape):
"""Eager numpy path raises ValueError on ambiguous 3D dim names."""
da = _make_da(dims, shape, backend="numpy")
out_path = tmp_path / f"tmp_1812_eager_{'_'.join(dims)}.tif"
with pytest.raises(ValueError, match="ambiguous dims"):
to_geotiff(da, str(out_path), crs=4326)


@pytest.mark.parametrize("dims, shape", [
pytest.param(("time", "y", "x"), (2, 4, 5), id="time-y-x"),
pytest.param(("z", "y", "x"), (2, 4, 5), id="z-y-x"),
pytest.param(("foo", "bar", "baz"), (2, 4, 5), id="foo-bar-baz"),
])
def test_dask_streaming_rejects_ambiguous_3d(tmp_path, dims, shape):
"""Dask-streaming branch raises ValueError on ambiguous 3D dim names."""
da = _make_da(dims, shape, backend="dask")
out_path = tmp_path / f"tmp_1812_dask_{'_'.join(dims)}.tif"
with pytest.raises(ValueError, match="ambiguous dims"):
to_geotiff(da, str(out_path), crs=4326)


@_gpu_only
@pytest.mark.parametrize("dims, shape", [
pytest.param(("time", "y", "x"), (2, 4, 5), id="time-y-x"),
pytest.param(("foo", "bar", "baz"), (2, 4, 5), id="foo-bar-baz"),
])
def test_gpu_writer_rejects_ambiguous_3d(tmp_path, dims, shape):
"""GPU writer raises ValueError on ambiguous 3D dim names."""
from xrspatial.geotiff import write_geotiff_gpu

da = _make_da(dims, shape, backend="cupy")
out_path = tmp_path / f"tmp_1812_gpu_{'_'.join(dims)}.tif"
with pytest.raises(ValueError, match="ambiguous dims"):
write_geotiff_gpu(da, str(out_path), crs=4326)


@pytest.mark.parametrize("dims, shape", _HAPPY_3D_INPUTS)
def test_happy_3d_round_trip(tmp_path, dims, shape):
"""Accepted 3D dim layouts still round-trip cleanly (eager + dask).

Each slice along the band axis is filled with a distinct constant
so a silent axis swap would change the per-slice sums.
"""
# Build a per-slice-distinguishable array. ``arr_full[k]`` along the
# band axis is filled with ``k + 1``.
band_pos = next(i for i, d in enumerate(dims)
if d in ("band", "bands", "channel"))
n_bands = shape[band_pos]
spatial_shape = tuple(s for i, s in enumerate(shape) if i != band_pos)
arr_np = np.empty(shape, dtype=np.uint8)
for k in range(n_bands):
slicer = [slice(None)] * 3
slicer[band_pos] = k
arr_np[tuple(slicer)] = k + 1

# Eager round-trip
da_eager = xr.DataArray(arr_np, dims=dims,
attrs={"crs": "EPSG:4326"})
p_eager = tmp_path / f"tmp_1812_happy_eager_{'_'.join(dims)}.tif"
to_geotiff(da_eager, str(p_eager), crs=4326)
out_eager = open_geotiff(str(p_eager))
# On-disk layout is always (y, x, band). Compare per-band sums.
assert out_eager.shape == spatial_shape + (n_bands,)
for k in range(n_bands):
expected = (k + 1) * (spatial_shape[0] * spatial_shape[1])
assert int(out_eager.values[:, :, k].sum()) == expected, (
f"band {k} sum mismatch on eager round-trip of dims={dims}"
)

# Dask streaming round-trip
da_dask = xr.DataArray(dsk.from_array(arr_np, chunks=2), dims=dims,
attrs={"crs": "EPSG:4326"})
p_dask = tmp_path / f"tmp_1812_happy_dask_{'_'.join(dims)}.tif"
to_geotiff(da_dask, str(p_dask), crs=4326)
out_dask = open_geotiff(str(p_dask))
assert out_dask.shape == spatial_shape + (n_bands,)
for k in range(n_bands):
expected = (k + 1) * (spatial_shape[0] * spatial_shape[1])
assert int(out_dask.values[:, :, k].sum()) == expected, (
f"band {k} sum mismatch on dask round-trip of dims={dims}"
)


def test_2d_still_works(tmp_path):
"""2D inputs are unaffected by the new 3D validator."""
arr = np.arange(20, dtype=np.uint8).reshape(4, 5)
da = xr.DataArray(arr, dims=("y", "x"), attrs={"crs": "EPSG:4326"})
p = tmp_path / "tmp_1812_2d.tif"
to_geotiff(da, str(p), crs=4326)
out = open_geotiff(str(p))
assert out.shape == (4, 5)
assert np.array_equal(out.values, arr)


def test_error_message_actionable(tmp_path):
"""The ValueError message tells the caller how to fix the input."""
arr = np.zeros((2, 4, 5), dtype=np.uint8)
da = xr.DataArray(arr, dims=("time", "y", "x"),
attrs={"crs": "EPSG:4326"})
p = tmp_path / "tmp_1812_msg.tif"
with pytest.raises(ValueError) as excinfo:
to_geotiff(da, str(p), crs=4326)
msg = str(excinfo.value)
# Mentions the offending dim layout
assert "('time', 'y', 'x')" in msg
# Mentions the accepted alternatives
assert "(band, y, x)" in msg
assert "(y, x, band)" in msg
# Points the user at a concrete remediation
assert "transpose" in msg.lower() or "rename" in msg.lower()
# References the issue
assert "#1812" in msg


@_gpu_only
def test_gpu_writer_happy_path_still_works(tmp_path):
"""GPU writer's existing happy paths (band-first and band-last) survive."""
import cupy

from xrspatial.geotiff import write_geotiff_gpu

arr_bf = cupy.arange(3 * 4 * 5, dtype=cupy.uint8).reshape(3, 4, 5)
da_bf = xr.DataArray(arr_bf, dims=("band", "y", "x"),
attrs={"crs": "EPSG:4326"})
p_bf = tmp_path / "tmp_1812_gpu_bf.tif"
write_geotiff_gpu(da_bf, str(p_bf), crs=4326)
out_bf = open_geotiff(str(p_bf))
assert out_bf.shape == (4, 5, 3)

arr_bl = cupy.arange(4 * 5 * 3, dtype=cupy.uint8).reshape(4, 5, 3)
da_bl = xr.DataArray(arr_bl, dims=("y", "x", "band"),
attrs={"crs": "EPSG:4326"})
p_bl = tmp_path / "tmp_1812_gpu_bl.tif"
write_geotiff_gpu(da_bl, str(p_bl), crs=4326)
out_bl = open_geotiff(str(p_bl))
assert out_bl.shape == (4, 5, 3)
Loading
Loading