Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions xrspatial/geotiff/_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,57 @@ def _validate_3d_writer_dims(dims) -> None:
)


def _validate_writer_spatial_shape(shape, dims=None,
entry_point: str = "to_geotiff") -> None:
"""Reject empty spatial shapes at the writer entry point (issue #2075).

Clip and window pipelines can produce empty rasters. The eager and
streaming writers used to accept those inputs, write a TIFF whose
IFD claimed shape ``(0, N)`` / ``(N, 0)``, and then the reader
rejected the file with a generic "Invalid image dimensions" message
that never named the writer as the source.

Validate up front. ``shape`` is the array shape (2D ``(h, w)``,
band-first 3D ``(bands, h, w)``, or band-last 3D ``(h, w, bands)``).
``dims`` is the DataArray ``dims`` tuple when available; it lets the
helper pick the right spatial pair when a 3D band-first input
arrives. Without ``dims`` the helper assumes band-last for 3D
(consistent with the writer's pre-moveaxis layout invariant), so
pass ``dims`` for DataArray inputs to avoid mis-naming the axis.
``entry_point`` is the function name used in the error message so
direct callers of ``write`` / ``write_streaming`` / ``write_geotiff_gpu``
see the function they actually invoked.
"""
if shape is None:
return
ndim = len(shape)
if ndim == 2:
h, w = int(shape[0]), int(shape[1])
elif ndim == 3:
# Decide band-first vs band-last from ``dims`` when available.
# Both layouts are valid writer inputs; the spatial axes are
# whichever two are not the band axis.
band_first = False
if dims is not None and len(dims) == 3:
band_first = dims[0] in _BAND_DIM_NAMES
if band_first:
h, w = int(shape[1]), int(shape[2])
else:
h, w = int(shape[0]), int(shape[1])
else:
# Other rank errors are handled by the existing ndim check; do
# not shadow that message.
return
if h <= 0 or w <= 0:
raise ValueError(
f"{entry_point} cannot write an empty raster: got shape "
f"{tuple(int(s) for s in shape)} with height={h}, width={w}. "
f"Both spatial dims must be positive. A common cause is a "
f"clip or window that produced an empty selection; check "
f"the upstream operation before writing."
)


def _validate_dtype_cast(source_dtype, target_dtype):
"""Validate that casting source_dtype to target_dtype is allowed.

Expand Down
20 changes: 20 additions & 0 deletions xrspatial/geotiff/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1567,6 +1567,17 @@ def write(data: np.ndarray, path: str, *,
Per-pixel error budget for LERC compression. ``0.0`` (default)
is lossless. Only valid with ``compression='lerc'``.
"""
# Issue #2075: reject empty spatial shapes before any IFD layout
# math runs. ``to_geotiff`` already guards this for DataArray inputs,
# but ``write`` is also called directly by tests and by the GPU
# path, so guard here too. ``write`` always receives band-last
# arrays (eager moveaxis ran upstream), so the ndim-based pair
# picked by ``_validate_writer_spatial_shape`` without ``dims`` is
# correct.
from ._validation import _validate_writer_spatial_shape
_validate_writer_spatial_shape(
getattr(data, 'shape', None), entry_point="write")

comp_tag = _compression_tag(compression)
pred_int = normalize_predictor(predictor, data.dtype, comp_tag)

Expand Down Expand Up @@ -1895,6 +1906,15 @@ def write_streaming(dask_data, path: str, *,
"Streaming dask write to cloud storage is not yet supported. "
"Use .compute() first or write to a .vrt file.")

# Issue #2075: reject empty spatial shapes before tile/strip count
# math (``math.ceil(width / tw)`` etc. below at the layout block)
# silently produces zero entries. ``to_geotiff`` already validates
# this upstream, but direct callers of ``write_streaming`` go
# through here too.
from ._validation import _validate_writer_spatial_shape
_validate_writer_spatial_shape(
getattr(dask_data, 'shape', None), entry_point="write_streaming")

height, width = dask_data.shape[:2]
samples = dask_data.shape[2] if dask_data.ndim == 3 else 1
dtype = dask_data.dtype
Expand Down
11 changes: 11 additions & 0 deletions xrspatial/geotiff/_writers/eager.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
_validate_3d_writer_dims,
_validate_nodata_arg,
_validate_tile_size_arg,
_validate_writer_spatial_shape,
validate_write_metadata,
)
from .._writer import write
Expand Down Expand Up @@ -271,6 +272,16 @@ def to_geotiff(data: xr.DataArray | np.ndarray,

_validate_nodata_arg(nodata)

# Issue #2075: reject zero-height / zero-width inputs before any
# dispatch decision. Clip / window pipelines naturally produce empty
# rasters and the writers used to accept them, produce a TIFF whose
# IFD claimed shape ``(0, N)`` / ``(N, 0)``, and surface a generic
# "Invalid image dimensions" only at read time. Fail closed at the
# entry point with a message that names the offending dim.
_shape = getattr(data, 'shape', None)
_dims = getattr(data, 'dims', None)
_validate_writer_spatial_shape(_shape, _dims)

# Issue #1987 ambiguous-metadata checks. The hook is a no-op
# when no check is registered, so this call is safe even if every
# check is later unregistered for a specific entry point.
Expand Down
11 changes: 11 additions & 0 deletions xrspatial/geotiff/_writers/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
_validate_3d_writer_dims,
_validate_nodata_arg,
_validate_tile_size_arg,
_validate_writer_spatial_shape,
validate_write_metadata,
)

Expand Down Expand Up @@ -263,6 +264,16 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray,
_validate_tile_size_arg(tile_size)
_validate_nodata_arg(nodata)

# Issue #2075: reject empty spatial shapes. ``write_geotiff_gpu`` is
# a public entry point and direct callers (with cupy.ndarray or raw
# numpy) do not flow through ``to_geotiff``'s guard, so check here
# before any GPU work starts.
_validate_writer_spatial_shape(
getattr(data, 'shape', None),
getattr(data, 'dims', None),
entry_point="write_geotiff_gpu",
)

# Issue #1987 ambiguous-metadata checks; mirrors ``to_geotiff`` so the
# GPU writer enforces the same crs/crs_wkt consistency rule.
_attrs = getattr(data, 'attrs', None) or {}
Expand Down
102 changes: 102 additions & 0 deletions xrspatial/geotiff/tests/test_to_geotiff_empty_shape_2075.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
"""Regression tests for issue #2075.

``to_geotiff`` used to accept arrays with a zero-height or zero-width
spatial dim and write a TIFF whose IFD claimed shape ``(0, N)`` or
``(N, 0)``. The reader then rejected the file with the generic
"Invalid image dimensions" message that never named the writer as the
source.

The fix raises ``ValueError`` at the write entry point. The failure
happens before any bytes hit disk, and the message names the offending
dimension so callers know which axis went empty (a clip / window
operation is the common cause).
"""
from __future__ import annotations

import importlib.util

import dask.array as dsk
import numpy as np
import pytest
import xarray as xr

from xrspatial.geotiff import to_geotiff


def _cupy_available() -> bool:
if importlib.util.find_spec("cupy") is None:
return False
try:
import cupy

return bool(cupy.cuda.is_available())
except Exception:
return False


_HAS_GPU = _cupy_available()


_EMPTY_SHAPES = [
pytest.param((0, 5), id="zero-height"),
pytest.param((5, 0), id="zero-width"),
pytest.param((0, 0), id="both-zero"),
]


@pytest.mark.parametrize("shape", _EMPTY_SHAPES)
def test_to_geotiff_rejects_empty_numpy(tmp_path, shape):
h, w = shape
da = xr.DataArray(
np.zeros(shape, dtype=np.float32),
dims=("y", "x"),
)
out = tmp_path / f"tmp_2075_empty_{h}x{w}.tif"
with pytest.raises(ValueError) as excinfo:
to_geotiff(da, str(out))
msg = str(excinfo.value)
# The message must name the writer that the user called so the
# traceback names the right entry point.
assert "to_geotiff" in msg
assert "empty" in msg.lower()
if h == 0:
assert "height=0" in msg
if w == 0:
assert "width=0" in msg
# Nothing should have been written.
assert not out.exists()


@pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required")
def test_write_geotiff_gpu_rejects_empty(tmp_path):
"""``write_geotiff_gpu`` is a public entry point and does not go
through ``to_geotiff``; make sure the empty-shape guard fires there
too (the suggestion from PR #2078 review)."""
import cupy as cp

from xrspatial.geotiff._writers.gpu import write_geotiff_gpu

arr = cp.zeros((0, 5), dtype=cp.float32)
out = tmp_path / "tmp_2075_empty_gpu_0x5.tif"
with pytest.raises(ValueError) as excinfo:
write_geotiff_gpu(arr, str(out))
msg = str(excinfo.value)
assert "write_geotiff_gpu" in msg
assert "height=0" in msg
assert not out.exists()


def test_to_geotiff_rejects_empty_dask(tmp_path):
# One dask variant is enough to exercise the streaming entry point.
shape = (0, 5)
da = xr.DataArray(
dsk.zeros(shape, dtype=np.float32, chunks=shape if 0 not in shape
else (1, 1)),
dims=("y", "x"),
)
out = tmp_path / "tmp_2075_empty_dask_0x5.tif"
with pytest.raises(ValueError) as excinfo:
to_geotiff(da, str(out))
msg = str(excinfo.value).lower()
assert "height" in msg or "empty" in msg or "(0, 5)" in msg
assert not out.exists()
Loading