diff --git a/xrspatial/geotiff/_validation.py b/xrspatial/geotiff/_validation.py index 24b184c20..956b20e6a 100644 --- a/xrspatial/geotiff/_validation.py +++ b/xrspatial/geotiff/_validation.py @@ -122,6 +122,57 @@ def _validate_3d_writer_dims(dims) -> None: ) +def _validate_writer_spatial_shape(shape, dims=None, + entry_point: str = "to_geotiff") -> None: + """Reject empty spatial shapes at the writer entry point (issue #2075). + + Clip and window pipelines can produce empty rasters. The eager and + streaming writers used to accept those inputs, write a TIFF whose + IFD claimed shape ``(0, N)`` / ``(N, 0)``, and then the reader + rejected the file with a generic "Invalid image dimensions" message + that never named the writer as the source. + + Validate up front. ``shape`` is the array shape (2D ``(h, w)``, + band-first 3D ``(bands, h, w)``, or band-last 3D ``(h, w, bands)``). + ``dims`` is the DataArray ``dims`` tuple when available; it lets the + helper pick the right spatial pair when a 3D band-first input + arrives. Without ``dims`` the helper assumes band-last for 3D + (consistent with the writer's pre-moveaxis layout invariant), so + pass ``dims`` for DataArray inputs to avoid mis-naming the axis. + ``entry_point`` is the function name used in the error message so + direct callers of ``write`` / ``write_streaming`` / ``write_geotiff_gpu`` + see the function they actually invoked. + """ + if shape is None: + return + ndim = len(shape) + if ndim == 2: + h, w = int(shape[0]), int(shape[1]) + elif ndim == 3: + # Decide band-first vs band-last from ``dims`` when available. + # Both layouts are valid writer inputs; the spatial axes are + # whichever two are not the band axis. + band_first = False + if dims is not None and len(dims) == 3: + band_first = dims[0] in _BAND_DIM_NAMES + if band_first: + h, w = int(shape[1]), int(shape[2]) + else: + h, w = int(shape[0]), int(shape[1]) + else: + # Other rank errors are handled by the existing ndim check; do + # not shadow that message. + return + if h <= 0 or w <= 0: + raise ValueError( + f"{entry_point} cannot write an empty raster: got shape " + f"{tuple(int(s) for s in shape)} with height={h}, width={w}. " + f"Both spatial dims must be positive. A common cause is a " + f"clip or window that produced an empty selection; check " + f"the upstream operation before writing." + ) + + def _validate_dtype_cast(source_dtype, target_dtype): """Validate that casting source_dtype to target_dtype is allowed. diff --git a/xrspatial/geotiff/_writer.py b/xrspatial/geotiff/_writer.py index a2468bd29..e0b3517e4 100644 --- a/xrspatial/geotiff/_writer.py +++ b/xrspatial/geotiff/_writer.py @@ -1567,6 +1567,17 @@ def write(data: np.ndarray, path: str, *, Per-pixel error budget for LERC compression. ``0.0`` (default) is lossless. Only valid with ``compression='lerc'``. """ + # Issue #2075: reject empty spatial shapes before any IFD layout + # math runs. ``to_geotiff`` already guards this for DataArray inputs, + # but ``write`` is also called directly by tests and by the GPU + # path, so guard here too. ``write`` always receives band-last + # arrays (eager moveaxis ran upstream), so the ndim-based pair + # picked by ``_validate_writer_spatial_shape`` without ``dims`` is + # correct. + from ._validation import _validate_writer_spatial_shape + _validate_writer_spatial_shape( + getattr(data, 'shape', None), entry_point="write") + comp_tag = _compression_tag(compression) pred_int = normalize_predictor(predictor, data.dtype, comp_tag) @@ -1895,6 +1906,15 @@ def write_streaming(dask_data, path: str, *, "Streaming dask write to cloud storage is not yet supported. " "Use .compute() first or write to a .vrt file.") + # Issue #2075: reject empty spatial shapes before tile/strip count + # math (``math.ceil(width / tw)`` etc. below at the layout block) + # silently produces zero entries. ``to_geotiff`` already validates + # this upstream, but direct callers of ``write_streaming`` go + # through here too. + from ._validation import _validate_writer_spatial_shape + _validate_writer_spatial_shape( + getattr(dask_data, 'shape', None), entry_point="write_streaming") + height, width = dask_data.shape[:2] samples = dask_data.shape[2] if dask_data.ndim == 3 else 1 dtype = dask_data.dtype diff --git a/xrspatial/geotiff/_writers/eager.py b/xrspatial/geotiff/_writers/eager.py index c1934f641..d658be1ab 100644 --- a/xrspatial/geotiff/_writers/eager.py +++ b/xrspatial/geotiff/_writers/eager.py @@ -44,6 +44,7 @@ _validate_3d_writer_dims, _validate_nodata_arg, _validate_tile_size_arg, + _validate_writer_spatial_shape, validate_write_metadata, ) from .._writer import write @@ -271,6 +272,16 @@ def to_geotiff(data: xr.DataArray | np.ndarray, _validate_nodata_arg(nodata) + # Issue #2075: reject zero-height / zero-width inputs before any + # dispatch decision. Clip / window pipelines naturally produce empty + # rasters and the writers used to accept them, produce a TIFF whose + # IFD claimed shape ``(0, N)`` / ``(N, 0)``, and surface a generic + # "Invalid image dimensions" only at read time. Fail closed at the + # entry point with a message that names the offending dim. + _shape = getattr(data, 'shape', None) + _dims = getattr(data, 'dims', None) + _validate_writer_spatial_shape(_shape, _dims) + # Issue #1987 ambiguous-metadata checks. The hook is a no-op # when no check is registered, so this call is safe even if every # check is later unregistered for a specific entry point. diff --git a/xrspatial/geotiff/_writers/gpu.py b/xrspatial/geotiff/_writers/gpu.py index 4b60f1914..3ed7c038f 100644 --- a/xrspatial/geotiff/_writers/gpu.py +++ b/xrspatial/geotiff/_writers/gpu.py @@ -29,6 +29,7 @@ _validate_3d_writer_dims, _validate_nodata_arg, _validate_tile_size_arg, + _validate_writer_spatial_shape, validate_write_metadata, ) @@ -263,6 +264,16 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, _validate_tile_size_arg(tile_size) _validate_nodata_arg(nodata) + # Issue #2075: reject empty spatial shapes. ``write_geotiff_gpu`` is + # a public entry point and direct callers (with cupy.ndarray or raw + # numpy) do not flow through ``to_geotiff``'s guard, so check here + # before any GPU work starts. + _validate_writer_spatial_shape( + getattr(data, 'shape', None), + getattr(data, 'dims', None), + entry_point="write_geotiff_gpu", + ) + # Issue #1987 ambiguous-metadata checks; mirrors ``to_geotiff`` so the # GPU writer enforces the same crs/crs_wkt consistency rule. _attrs = getattr(data, 'attrs', None) or {} diff --git a/xrspatial/geotiff/tests/test_to_geotiff_empty_shape_2075.py b/xrspatial/geotiff/tests/test_to_geotiff_empty_shape_2075.py new file mode 100644 index 000000000..d53a3c66a --- /dev/null +++ b/xrspatial/geotiff/tests/test_to_geotiff_empty_shape_2075.py @@ -0,0 +1,102 @@ +"""Regression tests for issue #2075. + +``to_geotiff`` used to accept arrays with a zero-height or zero-width +spatial dim and write a TIFF whose IFD claimed shape ``(0, N)`` or +``(N, 0)``. The reader then rejected the file with the generic +"Invalid image dimensions" message that never named the writer as the +source. + +The fix raises ``ValueError`` at the write entry point. The failure +happens before any bytes hit disk, and the message names the offending +dimension so callers know which axis went empty (a clip / window +operation is the common cause). +""" +from __future__ import annotations + +import importlib.util + +import dask.array as dsk +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import to_geotiff + + +def _cupy_available() -> bool: + if importlib.util.find_spec("cupy") is None: + return False + try: + import cupy + + return bool(cupy.cuda.is_available()) + except Exception: + return False + + +_HAS_GPU = _cupy_available() + + +_EMPTY_SHAPES = [ + pytest.param((0, 5), id="zero-height"), + pytest.param((5, 0), id="zero-width"), + pytest.param((0, 0), id="both-zero"), +] + + +@pytest.mark.parametrize("shape", _EMPTY_SHAPES) +def test_to_geotiff_rejects_empty_numpy(tmp_path, shape): + h, w = shape + da = xr.DataArray( + np.zeros(shape, dtype=np.float32), + dims=("y", "x"), + ) + out = tmp_path / f"tmp_2075_empty_{h}x{w}.tif" + with pytest.raises(ValueError) as excinfo: + to_geotiff(da, str(out)) + msg = str(excinfo.value) + # The message must name the writer that the user called so the + # traceback names the right entry point. + assert "to_geotiff" in msg + assert "empty" in msg.lower() + if h == 0: + assert "height=0" in msg + if w == 0: + assert "width=0" in msg + # Nothing should have been written. + assert not out.exists() + + +@pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required") +def test_write_geotiff_gpu_rejects_empty(tmp_path): + """``write_geotiff_gpu`` is a public entry point and does not go + through ``to_geotiff``; make sure the empty-shape guard fires there + too (the suggestion from PR #2078 review).""" + import cupy as cp + + from xrspatial.geotiff._writers.gpu import write_geotiff_gpu + + arr = cp.zeros((0, 5), dtype=cp.float32) + out = tmp_path / "tmp_2075_empty_gpu_0x5.tif" + with pytest.raises(ValueError) as excinfo: + write_geotiff_gpu(arr, str(out)) + msg = str(excinfo.value) + assert "write_geotiff_gpu" in msg + assert "height=0" in msg + assert not out.exists() + + +def test_to_geotiff_rejects_empty_dask(tmp_path): + # One dask variant is enough to exercise the streaming entry point. + shape = (0, 5) + da = xr.DataArray( + dsk.zeros(shape, dtype=np.float32, chunks=shape if 0 not in shape + else (1, 1)), + dims=("y", "x"), + ) + out = tmp_path / "tmp_2075_empty_dask_0x5.tif" + with pytest.raises(ValueError) as excinfo: + to_geotiff(da, str(out)) + msg = str(excinfo.value).lower() + assert "height" in msg or "empty" in msg or "(0, 5)" in msg + assert not out.exists()