diff --git a/xrspatial/geotiff/_validation.py b/xrspatial/geotiff/_validation.py index a608c43a..e8095fd1 100644 --- a/xrspatial/geotiff/_validation.py +++ b/xrspatial/geotiff/_validation.py @@ -142,10 +142,26 @@ def _validate_writer_spatial_shape(shape, dims=None, ``entry_point`` is the function name used in the error message so direct callers of ``write`` / ``write_streaming`` / ``write_geotiff_gpu`` see the function they actually invoked. + + Also rejects 3D inputs whose band/sample axis is zero (issue #2095). + Without the band check, a DataArray of shape ``(0, y, x)`` band-first + or ``(y, x, 0)`` band-last passed every spatial guard and reached + the IFD assembly with ``samples_per_pixel == 0``. The resulting TIFF + was readable as a 2D single-band raster, masking the upstream + collapse of the band axis. + + Note that this validator runs before ``_validate_3d_writer_dims`` + (#1812 / #1972) in ``to_geotiff``. For an ambiguous-dim input like + ``(5, 5, 0)`` with dims ``('y', 'x', 'time')``, the band-last branch + sees ``bands == 0`` and the "no bands" error wins over the friendlier + ambiguous-dim message. Both errors name the right call to fix, so + the ordering is acceptable; reorder only if the ambiguous-dim + diagnostic becomes more important than the empty-axis one. """ if shape is None: return ndim = len(shape) + bands = None if ndim == 2: h, w = int(shape[0]), int(shape[1]) elif ndim == 3: @@ -156,9 +172,11 @@ def _validate_writer_spatial_shape(shape, dims=None, if dims is not None and len(dims) == 3: band_first = dims[0] in _BAND_DIM_NAMES if band_first: + bands = int(shape[0]) h, w = int(shape[1]), int(shape[2]) else: h, w = int(shape[0]), int(shape[1]) + bands = int(shape[2]) else: # Other rank errors are handled by the existing ndim check; do # not shadow that message. @@ -171,6 +189,15 @@ def _validate_writer_spatial_shape(shape, dims=None, f"clip or window that produced an empty selection; check " f"the upstream operation before writing." ) + if bands is not None and bands <= 0: + raise ValueError( + f"{entry_point} cannot write a raster with no bands: got " + f"shape {tuple(int(s) for s in shape)} with {bands} bands. " + f"The band/sample dimension must be positive. A common " + f"cause is a selection or reduction that collapsed the " + f"band axis upstream; reduce to 2D before writing, or " + f"select at least one band (issue #2095)." + ) def _validate_dtype_cast(source_dtype, target_dtype): diff --git a/xrspatial/geotiff/tests/test_to_geotiff_zero_bands_2095.py b/xrspatial/geotiff/tests/test_to_geotiff_zero_bands_2095.py new file mode 100644 index 00000000..4a8e16d0 --- /dev/null +++ b/xrspatial/geotiff/tests/test_to_geotiff_zero_bands_2095.py @@ -0,0 +1,141 @@ +"""Regression tests for issue #2095. + +``to_geotiff`` validated the spatial axes of a 3D writer input but not +the band/sample axis. A DataArray of shape ``(0, y, x)`` band-first or +``(y, x, 0)`` band-last passed every guard and reached the IFD assembly +with ``samples_per_pixel == 0``. The resulting TIFF read back as a 2D +single-band raster, masking the upstream collapse of the band axis -- +silent data fabrication. + +The fix raises ``ValueError`` at the writer entry point on both layouts +and on every public writer surface (``to_geotiff``, ``write``, +``write_streaming``, and ``write_geotiff_gpu``). The message names the +offending axis so callers know what went empty. +""" +from __future__ import annotations + +import importlib.util + +import dask.array as dsk +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import to_geotiff + + +def _cupy_available() -> bool: + if importlib.util.find_spec("cupy") is None: + return False + try: + import cupy + + return bool(cupy.cuda.is_available()) + except Exception: + return False + + +_HAS_GPU = _cupy_available() + + +_ZERO_BAND_LAYOUTS = [ + pytest.param( + (0, 5, 5), + ("band", "y", "x"), + id="band-first", + ), + pytest.param( + (5, 5, 0), + ("y", "x", "band"), + id="band-last", + ), +] + + +@pytest.mark.parametrize("shape,dims", _ZERO_BAND_LAYOUTS) +def test_to_geotiff_rejects_zero_bands_numpy(tmp_path, shape, dims): + da = xr.DataArray(np.zeros(shape, dtype=np.uint8), dims=dims) + out = tmp_path / f"tmp_2095_zerobands_{'_'.join(map(str, shape))}.tif" + with pytest.raises(ValueError) as excinfo: + to_geotiff(da, str(out)) + msg = str(excinfo.value) + assert "to_geotiff" in msg + assert "no bands" in msg.lower() or "0 bands" in msg + # Nothing should have been written. + assert not out.exists() + + +@pytest.mark.parametrize("shape,dims", _ZERO_BAND_LAYOUTS) +def test_to_geotiff_rejects_zero_bands_dask(tmp_path, shape, dims): + # Dask cannot construct an array with a zero-length chunk along a + # zero-length dim, so build the dask array with chunks of 1 on the + # spatial axes and 1 on the band axis if non-zero. We only need the + # validator to fire before any compute happens. + chunks = tuple(1 if s == 0 else s for s in shape) + arr = dsk.zeros(shape, dtype=np.uint8, chunks=chunks) + da = xr.DataArray(arr, dims=dims) + out = tmp_path / f"tmp_2095_zerobands_dask_{'_'.join(map(str, shape))}.tif" + with pytest.raises(ValueError) as excinfo: + to_geotiff(da, str(out)) + msg = str(excinfo.value).lower() + assert "band" in msg + assert not out.exists() + + +def test_write_band_last_zero_bands_direct(tmp_path): + """``write`` is a public entry point. Direct callers (no DataArray + wrapper, no dims) pass raw numpy arrays through the band-last + convention, so a ``(y, x, 0)`` array must fail closed here too.""" + from xrspatial.geotiff._writer import write + + arr = np.zeros((5, 5, 0), dtype=np.uint8) + out = tmp_path / "tmp_2095_write_zerobands.tif" + with pytest.raises(ValueError) as excinfo: + write(arr, str(out)) + msg = str(excinfo.value) + # The error template starts with ``" cannot write a + # raster with no bands"``. Anchor to that exact prefix so the + # assertion fails if the wrong entry point fires (every message + # also contains the substring "write" further on, so an `in` + # check would not distinguish ``write`` from ``write_streaming`` + # or ``write_geotiff_gpu``). + assert msg.startswith("write cannot write") + assert "0 bands" in msg or "no bands" in msg.lower() + assert not out.exists() + + +def test_write_streaming_zero_bands_direct(tmp_path): + """``write_streaming`` is the dask-aware entry point. Direct callers + pass band-last dask arrays, so a ``(y, x, 0)`` chunked array must + fail closed before any tile-row math runs.""" + from xrspatial.geotiff._writer import write_streaming + + arr = dsk.zeros((5, 5, 0), dtype=np.uint8, chunks=(5, 5, 1)) + out = tmp_path / "tmp_2095_write_streaming_zerobands.tif" + with pytest.raises(ValueError) as excinfo: + write_streaming(arr, str(out)) + msg = str(excinfo.value) + assert msg.startswith("write_streaming cannot write") + assert "0 bands" in msg or "no bands" in msg.lower() + assert not out.exists() + + +@pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required") +def test_write_geotiff_gpu_rejects_zero_bands(tmp_path): + """The GPU writer is a separate public entry point. The zero-band + guard must fire there too without dispatching any GPU work.""" + import cupy as cp + + from xrspatial.geotiff._writers.gpu import write_geotiff_gpu + + arr = xr.DataArray( + cp.zeros((0, 5, 5), dtype=cp.uint8), + dims=("band", "y", "x"), + ) + out = tmp_path / "tmp_2095_zerobands_gpu.tif" + with pytest.raises(ValueError) as excinfo: + write_geotiff_gpu(arr, str(out)) + msg = str(excinfo.value) + assert msg.startswith("write_geotiff_gpu cannot write") + assert "0 bands" in msg or "no bands" in msg.lower() + assert not out.exists()