From 0b4487dc21872328948102dcde4412e74d5ee787 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Mon, 18 May 2026 14:08:05 -0700 Subject: [PATCH 1/2] geotiff: reject empty rasters in to_geotiff with a clear error (#2075) Writers accepted zero-height or zero-width arrays and produced TIFFs that failed to open. Validate positive spatial shape in both the eager and streaming entry points so the failure happens at write time with a specific message instead of a generic "Invalid image dimensions" at read time. --- xrspatial/geotiff/_validation.py | 49 ++++++++++++++ xrspatial/geotiff/_writer.py | 18 +++++ xrspatial/geotiff/_writers/eager.py | 11 +++ .../tests/test_to_geotiff_empty_shape_2075.py | 67 +++++++++++++++++++ 4 files changed, 145 insertions(+) create mode 100644 xrspatial/geotiff/tests/test_to_geotiff_empty_shape_2075.py diff --git a/xrspatial/geotiff/_validation.py b/xrspatial/geotiff/_validation.py index 24b184c20..a6d26f651 100644 --- a/xrspatial/geotiff/_validation.py +++ b/xrspatial/geotiff/_validation.py @@ -122,6 +122,55 @@ def _validate_3d_writer_dims(dims) -> None: ) +def _validate_writer_spatial_shape(shape, dims=None) -> None: + """Reject empty spatial shapes at the writer entry point (issue #2075). + + Clip and window pipelines can produce empty rasters. The eager and + streaming writers used to accept those inputs, write a TIFF whose + IFD claimed shape ``(0, N)`` / ``(N, 0)``, and then the reader + rejected the file with a generic "Invalid image dimensions" message + that never named the writer as the source. + + Validate up front. ``shape`` is the array shape (2D ``(h, w)``, + band-first 3D ``(bands, h, w)``, or band-last 3D ``(h, w, bands)``). + ``dims`` is the DataArray ``dims`` tuple when available; it lets the + helper pick the right spatial pair when a 3D band-first input + arrives. Without ``dims`` the helper assumes band-last for 3D + (consistent with the writer's pre-moveaxis layout invariant), so + pass ``dims`` for DataArray inputs to avoid mis-naming the axis. + """ + if shape is None: + return + ndim = len(shape) + if ndim == 2: + h, w = int(shape[0]), int(shape[1]) + spatial_shape = (h, w) + elif ndim == 3: + # Decide band-first vs band-last from ``dims`` when available. + # Both layouts are valid writer inputs; the spatial axes are + # whichever two are not the band axis. + band_first = False + if dims is not None and len(dims) == 3: + band_first = dims[0] in _BAND_DIM_NAMES + if band_first: + h, w = int(shape[1]), int(shape[2]) + else: + h, w = int(shape[0]), int(shape[1]) + spatial_shape = (h, w) + else: + # Other rank errors are handled by the existing ndim check; do + # not shadow that message. + return + if h <= 0 or w <= 0: + raise ValueError( + f"to_geotiff cannot write an empty raster: got shape " + f"{tuple(int(s) for s in shape)} with height={h}, width={w}. " + f"Both spatial dims must be positive. A common cause is a " + f"clip or window that produced an empty selection; check " + f"the upstream operation before writing." + ) + + def _validate_dtype_cast(source_dtype, target_dtype): """Validate that casting source_dtype to target_dtype is allowed. diff --git a/xrspatial/geotiff/_writer.py b/xrspatial/geotiff/_writer.py index a2468bd29..384dd79d4 100644 --- a/xrspatial/geotiff/_writer.py +++ b/xrspatial/geotiff/_writer.py @@ -1567,6 +1567,16 @@ def write(data: np.ndarray, path: str, *, Per-pixel error budget for LERC compression. ``0.0`` (default) is lossless. Only valid with ``compression='lerc'``. """ + # Issue #2075: reject empty spatial shapes before any IFD layout + # math runs. ``to_geotiff`` already guards this for DataArray inputs, + # but ``write`` is also called directly by tests and by the GPU + # path, so guard here too. ``write`` always receives band-last + # arrays (eager moveaxis ran upstream), so the ndim-based pair + # picked by ``_validate_writer_spatial_shape`` without ``dims`` is + # correct. + from ._validation import _validate_writer_spatial_shape + _validate_writer_spatial_shape(getattr(data, 'shape', None)) + comp_tag = _compression_tag(compression) pred_int = normalize_predictor(predictor, data.dtype, comp_tag) @@ -1895,6 +1905,14 @@ def write_streaming(dask_data, path: str, *, "Streaming dask write to cloud storage is not yet supported. " "Use .compute() first or write to a .vrt file.") + # Issue #2075: reject empty spatial shapes before tile/strip count + # math (``math.ceil(width / tw)`` etc. below at the layout block) + # silently produces zero entries. ``to_geotiff`` already validates + # this upstream, but direct callers of ``write_streaming`` go + # through here too. + from ._validation import _validate_writer_spatial_shape + _validate_writer_spatial_shape(getattr(dask_data, 'shape', None)) + height, width = dask_data.shape[:2] samples = dask_data.shape[2] if dask_data.ndim == 3 else 1 dtype = dask_data.dtype diff --git a/xrspatial/geotiff/_writers/eager.py b/xrspatial/geotiff/_writers/eager.py index c1934f641..d658be1ab 100644 --- a/xrspatial/geotiff/_writers/eager.py +++ b/xrspatial/geotiff/_writers/eager.py @@ -44,6 +44,7 @@ _validate_3d_writer_dims, _validate_nodata_arg, _validate_tile_size_arg, + _validate_writer_spatial_shape, validate_write_metadata, ) from .._writer import write @@ -271,6 +272,16 @@ def to_geotiff(data: xr.DataArray | np.ndarray, _validate_nodata_arg(nodata) + # Issue #2075: reject zero-height / zero-width inputs before any + # dispatch decision. Clip / window pipelines naturally produce empty + # rasters and the writers used to accept them, produce a TIFF whose + # IFD claimed shape ``(0, N)`` / ``(N, 0)``, and surface a generic + # "Invalid image dimensions" only at read time. Fail closed at the + # entry point with a message that names the offending dim. + _shape = getattr(data, 'shape', None) + _dims = getattr(data, 'dims', None) + _validate_writer_spatial_shape(_shape, _dims) + # Issue #1987 ambiguous-metadata checks. The hook is a no-op # when no check is registered, so this call is safe even if every # check is later unregistered for a specific entry point. diff --git a/xrspatial/geotiff/tests/test_to_geotiff_empty_shape_2075.py b/xrspatial/geotiff/tests/test_to_geotiff_empty_shape_2075.py new file mode 100644 index 000000000..2be3dfbda --- /dev/null +++ b/xrspatial/geotiff/tests/test_to_geotiff_empty_shape_2075.py @@ -0,0 +1,67 @@ +"""Regression tests for issue #2075. + +``to_geotiff`` used to accept arrays with a zero-height or zero-width +spatial dim and write a TIFF whose IFD claimed shape ``(0, N)`` or +``(N, 0)``. The reader then rejected the file with the generic +"Invalid image dimensions" message that never named the writer as the +source. + +The fix raises ``ValueError`` at the write entry point. The failure +happens before any bytes hit disk, and the message names the offending +dimension so callers know which axis went empty (a clip / window +operation is the common cause). +""" +from __future__ import annotations + +import dask.array as dsk +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import to_geotiff + + +_EMPTY_SHAPES = [ + pytest.param((0, 5), id="zero-height"), + pytest.param((5, 0), id="zero-width"), + pytest.param((0, 0), id="both-zero"), +] + + +@pytest.mark.parametrize("shape", _EMPTY_SHAPES) +def test_to_geotiff_rejects_empty_numpy(tmp_path, shape): + h, w = shape + da = xr.DataArray( + np.zeros(shape, dtype=np.float32), + dims=("y", "x"), + ) + out = tmp_path / f"tmp_2075_empty_{h}x{w}.tif" + with pytest.raises(ValueError) as excinfo: + to_geotiff(da, str(out)) + msg = str(excinfo.value) + # The message must name the writer (so callers see the source) and + # mention which axis is zero. Accept either height/width by name or + # the literal shape so we don't pin the exact wording. + assert "to_geotiff" in msg or "empty" in msg.lower() or "0" in msg + if h == 0: + assert "height" in msg.lower() or f"({h}, {w})" in msg + if w == 0: + assert "width" in msg.lower() or f"({h}, {w})" in msg + # Nothing should have been written. + assert not out.exists() + + +def test_to_geotiff_rejects_empty_dask(tmp_path): + # One dask variant is enough to exercise the streaming entry point. + shape = (0, 5) + da = xr.DataArray( + dsk.zeros(shape, dtype=np.float32, chunks=shape if 0 not in shape + else (1, 1)), + dims=("y", "x"), + ) + out = tmp_path / "tmp_2075_empty_dask_0x5.tif" + with pytest.raises(ValueError) as excinfo: + to_geotiff(da, str(out)) + msg = str(excinfo.value).lower() + assert "height" in msg or "empty" in msg or "(0, 5)" in msg + assert not out.exists() From cb256aad1da799ff95c0ba81dbf9fdfc7839ef77 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Mon, 18 May 2026 14:12:42 -0700 Subject: [PATCH 2/2] geotiff: extend empty-raster guard to write_geotiff_gpu (#2075) Follow-up to PR review: the GPU writer is a public entry point and direct callers (with cupy.ndarray or raw numpy) skip the to_geotiff guard. Add the same _validate_writer_spatial_shape call before any GPU work starts. Thread an entry_point kwarg through the helper so the error message names the function the caller actually invoked instead of always saying "to_geotiff". Drop an unused local in the helper. --- xrspatial/geotiff/_validation.py | 10 ++-- xrspatial/geotiff/_writer.py | 6 ++- xrspatial/geotiff/_writers/gpu.py | 11 +++++ .../tests/test_to_geotiff_empty_shape_2075.py | 47 ++++++++++++++++--- 4 files changed, 62 insertions(+), 12 deletions(-) diff --git a/xrspatial/geotiff/_validation.py b/xrspatial/geotiff/_validation.py index a6d26f651..956b20e6a 100644 --- a/xrspatial/geotiff/_validation.py +++ b/xrspatial/geotiff/_validation.py @@ -122,7 +122,8 @@ def _validate_3d_writer_dims(dims) -> None: ) -def _validate_writer_spatial_shape(shape, dims=None) -> None: +def _validate_writer_spatial_shape(shape, dims=None, + entry_point: str = "to_geotiff") -> None: """Reject empty spatial shapes at the writer entry point (issue #2075). Clip and window pipelines can produce empty rasters. The eager and @@ -138,13 +139,15 @@ def _validate_writer_spatial_shape(shape, dims=None) -> None: arrives. Without ``dims`` the helper assumes band-last for 3D (consistent with the writer's pre-moveaxis layout invariant), so pass ``dims`` for DataArray inputs to avoid mis-naming the axis. + ``entry_point`` is the function name used in the error message so + direct callers of ``write`` / ``write_streaming`` / ``write_geotiff_gpu`` + see the function they actually invoked. """ if shape is None: return ndim = len(shape) if ndim == 2: h, w = int(shape[0]), int(shape[1]) - spatial_shape = (h, w) elif ndim == 3: # Decide band-first vs band-last from ``dims`` when available. # Both layouts are valid writer inputs; the spatial axes are @@ -156,14 +159,13 @@ def _validate_writer_spatial_shape(shape, dims=None) -> None: h, w = int(shape[1]), int(shape[2]) else: h, w = int(shape[0]), int(shape[1]) - spatial_shape = (h, w) else: # Other rank errors are handled by the existing ndim check; do # not shadow that message. return if h <= 0 or w <= 0: raise ValueError( - f"to_geotiff cannot write an empty raster: got shape " + f"{entry_point} cannot write an empty raster: got shape " f"{tuple(int(s) for s in shape)} with height={h}, width={w}. " f"Both spatial dims must be positive. A common cause is a " f"clip or window that produced an empty selection; check " diff --git a/xrspatial/geotiff/_writer.py b/xrspatial/geotiff/_writer.py index 384dd79d4..e0b3517e4 100644 --- a/xrspatial/geotiff/_writer.py +++ b/xrspatial/geotiff/_writer.py @@ -1575,7 +1575,8 @@ def write(data: np.ndarray, path: str, *, # picked by ``_validate_writer_spatial_shape`` without ``dims`` is # correct. from ._validation import _validate_writer_spatial_shape - _validate_writer_spatial_shape(getattr(data, 'shape', None)) + _validate_writer_spatial_shape( + getattr(data, 'shape', None), entry_point="write") comp_tag = _compression_tag(compression) pred_int = normalize_predictor(predictor, data.dtype, comp_tag) @@ -1911,7 +1912,8 @@ def write_streaming(dask_data, path: str, *, # this upstream, but direct callers of ``write_streaming`` go # through here too. from ._validation import _validate_writer_spatial_shape - _validate_writer_spatial_shape(getattr(dask_data, 'shape', None)) + _validate_writer_spatial_shape( + getattr(dask_data, 'shape', None), entry_point="write_streaming") height, width = dask_data.shape[:2] samples = dask_data.shape[2] if dask_data.ndim == 3 else 1 diff --git a/xrspatial/geotiff/_writers/gpu.py b/xrspatial/geotiff/_writers/gpu.py index 4b60f1914..3ed7c038f 100644 --- a/xrspatial/geotiff/_writers/gpu.py +++ b/xrspatial/geotiff/_writers/gpu.py @@ -29,6 +29,7 @@ _validate_3d_writer_dims, _validate_nodata_arg, _validate_tile_size_arg, + _validate_writer_spatial_shape, validate_write_metadata, ) @@ -263,6 +264,16 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, _validate_tile_size_arg(tile_size) _validate_nodata_arg(nodata) + # Issue #2075: reject empty spatial shapes. ``write_geotiff_gpu`` is + # a public entry point and direct callers (with cupy.ndarray or raw + # numpy) do not flow through ``to_geotiff``'s guard, so check here + # before any GPU work starts. + _validate_writer_spatial_shape( + getattr(data, 'shape', None), + getattr(data, 'dims', None), + entry_point="write_geotiff_gpu", + ) + # Issue #1987 ambiguous-metadata checks; mirrors ``to_geotiff`` so the # GPU writer enforces the same crs/crs_wkt consistency rule. _attrs = getattr(data, 'attrs', None) or {} diff --git a/xrspatial/geotiff/tests/test_to_geotiff_empty_shape_2075.py b/xrspatial/geotiff/tests/test_to_geotiff_empty_shape_2075.py index 2be3dfbda..d53a3c66a 100644 --- a/xrspatial/geotiff/tests/test_to_geotiff_empty_shape_2075.py +++ b/xrspatial/geotiff/tests/test_to_geotiff_empty_shape_2075.py @@ -13,6 +13,8 @@ """ from __future__ import annotations +import importlib.util + import dask.array as dsk import numpy as np import pytest @@ -21,6 +23,20 @@ from xrspatial.geotiff import to_geotiff +def _cupy_available() -> bool: + if importlib.util.find_spec("cupy") is None: + return False + try: + import cupy + + return bool(cupy.cuda.is_available()) + except Exception: + return False + + +_HAS_GPU = _cupy_available() + + _EMPTY_SHAPES = [ pytest.param((0, 5), id="zero-height"), pytest.param((5, 0), id="zero-width"), @@ -39,18 +55,37 @@ def test_to_geotiff_rejects_empty_numpy(tmp_path, shape): with pytest.raises(ValueError) as excinfo: to_geotiff(da, str(out)) msg = str(excinfo.value) - # The message must name the writer (so callers see the source) and - # mention which axis is zero. Accept either height/width by name or - # the literal shape so we don't pin the exact wording. - assert "to_geotiff" in msg or "empty" in msg.lower() or "0" in msg + # The message must name the writer that the user called so the + # traceback names the right entry point. + assert "to_geotiff" in msg + assert "empty" in msg.lower() if h == 0: - assert "height" in msg.lower() or f"({h}, {w})" in msg + assert "height=0" in msg if w == 0: - assert "width" in msg.lower() or f"({h}, {w})" in msg + assert "width=0" in msg # Nothing should have been written. assert not out.exists() +@pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required") +def test_write_geotiff_gpu_rejects_empty(tmp_path): + """``write_geotiff_gpu`` is a public entry point and does not go + through ``to_geotiff``; make sure the empty-shape guard fires there + too (the suggestion from PR #2078 review).""" + import cupy as cp + + from xrspatial.geotiff._writers.gpu import write_geotiff_gpu + + arr = cp.zeros((0, 5), dtype=cp.float32) + out = tmp_path / "tmp_2075_empty_gpu_0x5.tif" + with pytest.raises(ValueError) as excinfo: + write_geotiff_gpu(arr, str(out)) + msg = str(excinfo.value) + assert "write_geotiff_gpu" in msg + assert "height=0" in msg + assert not out.exists() + + def test_to_geotiff_rejects_empty_dask(tmp_path): # One dask variant is enough to exercise the streaming entry point. shape = (0, 5)