Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions xrspatial/geotiff/_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,10 +142,26 @@ def _validate_writer_spatial_shape(shape, dims=None,
``entry_point`` is the function name used in the error message so
direct callers of ``write`` / ``write_streaming`` / ``write_geotiff_gpu``
see the function they actually invoked.

Also rejects 3D inputs whose band/sample axis is zero (issue #2095).
Without the band check, a DataArray of shape ``(0, y, x)`` band-first
or ``(y, x, 0)`` band-last passed every spatial guard and reached
the IFD assembly with ``samples_per_pixel == 0``. The resulting TIFF
was readable as a 2D single-band raster, masking the upstream
collapse of the band axis.

Note that this validator runs before ``_validate_3d_writer_dims``
(#1812 / #1972) in ``to_geotiff``. For an ambiguous-dim input like
``(5, 5, 0)`` with dims ``('y', 'x', 'time')``, the band-last branch
sees ``bands == 0`` and the "no bands" error wins over the friendlier
ambiguous-dim message. Both errors name the right call to fix, so
the ordering is acceptable; reorder only if the ambiguous-dim
diagnostic becomes more important than the empty-axis one.
"""
if shape is None:
return
ndim = len(shape)
bands = None
if ndim == 2:
h, w = int(shape[0]), int(shape[1])
elif ndim == 3:
Expand All @@ -156,9 +172,11 @@ def _validate_writer_spatial_shape(shape, dims=None,
if dims is not None and len(dims) == 3:
band_first = dims[0] in _BAND_DIM_NAMES
if band_first:
bands = int(shape[0])
h, w = int(shape[1]), int(shape[2])
else:
h, w = int(shape[0]), int(shape[1])
bands = int(shape[2])
else:
# Other rank errors are handled by the existing ndim check; do
# not shadow that message.
Expand All @@ -171,6 +189,15 @@ def _validate_writer_spatial_shape(shape, dims=None,
f"clip or window that produced an empty selection; check "
f"the upstream operation before writing."
)
if bands is not None and bands <= 0:
raise ValueError(
f"{entry_point} cannot write a raster with no bands: got "
f"shape {tuple(int(s) for s in shape)} with {bands} bands. "
f"The band/sample dimension must be positive. A common "
f"cause is a selection or reduction that collapsed the "
f"band axis upstream; reduce to 2D before writing, or "
f"select at least one band (issue #2095)."
)


def _validate_dtype_cast(source_dtype, target_dtype):
Expand Down
141 changes: 141 additions & 0 deletions xrspatial/geotiff/tests/test_to_geotiff_zero_bands_2095.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
"""Regression tests for issue #2095.

``to_geotiff`` validated the spatial axes of a 3D writer input but not
the band/sample axis. A DataArray of shape ``(0, y, x)`` band-first or
``(y, x, 0)`` band-last passed every guard and reached the IFD assembly
with ``samples_per_pixel == 0``. The resulting TIFF read back as a 2D
single-band raster, masking the upstream collapse of the band axis --
silent data fabrication.

The fix raises ``ValueError`` at the writer entry point on both layouts
and on every public writer surface (``to_geotiff``, ``write``,
``write_streaming``, and ``write_geotiff_gpu``). The message names the
offending axis so callers know what went empty.
"""
from __future__ import annotations

import importlib.util

import dask.array as dsk
import numpy as np
import pytest
import xarray as xr

from xrspatial.geotiff import to_geotiff


def _cupy_available() -> bool:
if importlib.util.find_spec("cupy") is None:
return False
try:
import cupy

return bool(cupy.cuda.is_available())
except Exception:
return False


_HAS_GPU = _cupy_available()


_ZERO_BAND_LAYOUTS = [
pytest.param(
(0, 5, 5),
("band", "y", "x"),
id="band-first",
),
pytest.param(
(5, 5, 0),
("y", "x", "band"),
id="band-last",
),
]


@pytest.mark.parametrize("shape,dims", _ZERO_BAND_LAYOUTS)
def test_to_geotiff_rejects_zero_bands_numpy(tmp_path, shape, dims):
da = xr.DataArray(np.zeros(shape, dtype=np.uint8), dims=dims)
out = tmp_path / f"tmp_2095_zerobands_{'_'.join(map(str, shape))}.tif"
with pytest.raises(ValueError) as excinfo:
to_geotiff(da, str(out))
msg = str(excinfo.value)
assert "to_geotiff" in msg
assert "no bands" in msg.lower() or "0 bands" in msg
# Nothing should have been written.
assert not out.exists()


@pytest.mark.parametrize("shape,dims", _ZERO_BAND_LAYOUTS)
def test_to_geotiff_rejects_zero_bands_dask(tmp_path, shape, dims):
# Dask cannot construct an array with a zero-length chunk along a
# zero-length dim, so build the dask array with chunks of 1 on the
# spatial axes and 1 on the band axis if non-zero. We only need the
# validator to fire before any compute happens.
chunks = tuple(1 if s == 0 else s for s in shape)
arr = dsk.zeros(shape, dtype=np.uint8, chunks=chunks)
da = xr.DataArray(arr, dims=dims)
out = tmp_path / f"tmp_2095_zerobands_dask_{'_'.join(map(str, shape))}.tif"
with pytest.raises(ValueError) as excinfo:
to_geotiff(da, str(out))
msg = str(excinfo.value).lower()
assert "band" in msg
assert not out.exists()


def test_write_band_last_zero_bands_direct(tmp_path):
"""``write`` is a public entry point. Direct callers (no DataArray
wrapper, no dims) pass raw numpy arrays through the band-last
convention, so a ``(y, x, 0)`` array must fail closed here too."""
from xrspatial.geotiff._writer import write

arr = np.zeros((5, 5, 0), dtype=np.uint8)
out = tmp_path / "tmp_2095_write_zerobands.tif"
with pytest.raises(ValueError) as excinfo:
write(arr, str(out))
msg = str(excinfo.value)
# The error template starts with ``"<entry_point> cannot write a
# raster with no bands"``. Anchor to that exact prefix so the
# assertion fails if the wrong entry point fires (every message
# also contains the substring "write" further on, so an `in`
# check would not distinguish ``write`` from ``write_streaming``
# or ``write_geotiff_gpu``).
assert msg.startswith("write cannot write")
assert "0 bands" in msg or "no bands" in msg.lower()
assert not out.exists()


def test_write_streaming_zero_bands_direct(tmp_path):
"""``write_streaming`` is the dask-aware entry point. Direct callers
pass band-last dask arrays, so a ``(y, x, 0)`` chunked array must
fail closed before any tile-row math runs."""
from xrspatial.geotiff._writer import write_streaming

arr = dsk.zeros((5, 5, 0), dtype=np.uint8, chunks=(5, 5, 1))
out = tmp_path / "tmp_2095_write_streaming_zerobands.tif"
with pytest.raises(ValueError) as excinfo:
write_streaming(arr, str(out))
msg = str(excinfo.value)
assert msg.startswith("write_streaming cannot write")
assert "0 bands" in msg or "no bands" in msg.lower()
assert not out.exists()


@pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required")
def test_write_geotiff_gpu_rejects_zero_bands(tmp_path):
"""The GPU writer is a separate public entry point. The zero-band
guard must fire there too without dispatching any GPU work."""
import cupy as cp

from xrspatial.geotiff._writers.gpu import write_geotiff_gpu

arr = xr.DataArray(
cp.zeros((0, 5, 5), dtype=cp.uint8),
dims=("band", "y", "x"),
)
out = tmp_path / "tmp_2095_zerobands_gpu.tif"
with pytest.raises(ValueError) as excinfo:
write_geotiff_gpu(arr, str(out))
msg = str(excinfo.value)
assert msg.startswith("write_geotiff_gpu cannot write")
assert "0 bands" in msg or "no bands" in msg.lower()
assert not out.exists()
Loading