Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .claude/sweep-api-consistency-state.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
module,last_inspected,issue,severity_max,categories_found,notes
geotiff,2026-05-13,1775,MEDIUM,3,"Sweep v4 (deep-sweep-api-consistency-geotiff-2026-05-13). 1 MEDIUM finding filed and fixed: #1775 reader trio dtype kwarg lacks str|np.dtype|None annotation (Cat 3). Annotation added on open_geotiff, read_geotiff_dask, read_geotiff_gpu, and read_vrt; pinning tests extended in test_signature_annotations_1654.py. Cross-sibling return-type drift surfaced for follow-up but not fixed (Cat 2): write_vrt returns str while to_geotiff and write_geotiff_gpu return None -- behaviour change deferred. Prior sweep findings (#1654 #1683 #1684 #1685 #1705 #1754) all confirmed fixed. cuda-validated."
geotiff,2026-05-13,1810,MEDIUM,5,"Sweep v5 (deep-sweep-api-consistency-geotiff-2026-05-13). 1 MEDIUM finding filed and fixed: #1810 open_geotiff dispatcher dropped missing_sources kwarg when routing to read_vrt (Cat 5, same class as #1561/#1605/#1685/#1795). Fix mirrors the on_gpu_failure pattern: sentinel default, forward to read_vrt for .vrt sources, reject for non-VRT sources. Regression test in test_open_geotiff_missing_sources_1810.py. Prior sweep findings (#1654 #1683 #1684 #1685 #1705 #1715 #1754 #1775) all confirmed fixed. Cross-sibling return-type drift (Cat 2): write_vrt returns str while to_geotiff and write_geotiff_gpu return None -- still deferred (LOW, callers do not substitute these writers). cuda-validated."
reproject,2026-05-10,1570,HIGH,2;5,"Filed cross-module attrs['vertical_crs'] type collision (string vs EPSG int) vs xrspatial.geotiff. Fixed in PR (TBD): reproject now writes EPSG int and preserves friendly token under vertical_datum. MEDIUM kwarg-order drift (transform_precision vs chunk_size) and missing type hints vs geotiff documented but not fixed (cosmetic, kwarg-only)."
38 changes: 36 additions & 2 deletions xrspatial/geotiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,12 @@
# None is itself a value a caller could supply alongside crs=. See
# issue #1715.
_CRS_WKT_DEPRECATED_SENTINEL = object()
# ``open_geotiff`` needs to tell "caller never set missing_sources" (default
# sentinel: skip forwarding so the read_vrt default applies, and reject the
# kwarg up front for non-VRT sources) from "caller set missing_sources=<value>"
# (forward verbatim to read_vrt). Mirrors the on_gpu_failure pattern. See
# issue #1810.
_MISSING_SOURCES_SENTINEL = object()

# Names of dims that ``to_geotiff`` / ``write_geotiff_gpu`` treat as the
# non-spatial band axis. Used both to remap ``(band, y, x)`` inputs to
Expand Down Expand Up @@ -692,6 +698,7 @@ def open_geotiff(source: str | BinaryIO, *,
gpu: bool = False,
max_pixels: int | None = None,
on_gpu_failure: str = _ON_GPU_FAILURE_SENTINEL,
missing_sources: str = _MISSING_SOURCES_SENTINEL,
) -> xr.DataArray:
"""Read a GeoTIFF, COG, or VRT file into an xarray.DataArray.

Expand Down Expand Up @@ -737,6 +744,14 @@ def open_geotiff(source: str | BinaryIO, *,
Passing this kwarg with ``gpu=False`` raises ``ValueError``
because the policy only applies to the GPU pipeline. See
``read_geotiff_gpu`` for the full description.
missing_sources : {'warn', 'raise'}, optional
Forwarded to ``read_vrt`` when the source is a ``.vrt`` file.
``'warn'`` preserves the historical behavior: emit
``GeoTIFFFallbackWarning``, record ``attrs['vrt_holes']``, and
return a partial mosaic. ``'raise'`` fails immediately. Passing
this kwarg with a non-VRT source raises ``ValueError`` because
the policy only applies to the VRT pipeline. See ``read_vrt``
for the full description.

Returns
-------
Expand Down Expand Up @@ -782,8 +797,24 @@ def open_geotiff(source: str | BinaryIO, *,
"Pass gpu=True to enable the GPU pipeline, or drop "
"on_gpu_failure to keep the default CPU path.")

# ``missing_sources`` is VRT-only. Reject it up front when the source
# is not a ``.vrt`` file so callers learn the policy is being ignored
# instead of getting a silent drop -- same pattern ``on_gpu_failure``
# uses above for the GPU-only kwarg, and the same class of dispatcher
# silently-drops-backend-kwarg bug #1561 / #1605 / #1685 / #1795 fixed
# for the other VRT/GPU kwargs. See issue #1810.
missing_sources_passed = (
missing_sources is not _MISSING_SOURCES_SENTINEL)
_is_vrt_source = (
isinstance(source, str) and source.lower().endswith('.vrt'))
if missing_sources_passed and not _is_vrt_source:
raise ValueError(
"missing_sources only applies to VRT sources. "
"Pass a .vrt path to enable the VRT pipeline, or drop "
"missing_sources to keep the default GeoTIFF path.")

# VRT files (string paths only -- VRT XML references other files on disk)
if isinstance(source, str) and source.lower().endswith('.vrt'):
if _is_vrt_source:
# ``read_vrt`` does not accept ``overview_level`` (the VRT XML
# references its own source files; overview selection would need
# to apply to each one). Silently dropping the kwarg was the same
Expand All @@ -810,9 +841,12 @@ def open_geotiff(source: str | BinaryIO, *,
"VRT reads do not go through the GPU decoder pipeline; "
"drop the kwarg or call read_geotiff_gpu directly on a "
".tif source.")
vrt_kwargs = {}
if missing_sources_passed:
vrt_kwargs['missing_sources'] = missing_sources
return read_vrt(source, dtype=dtype, window=window, band=band,
name=name, chunks=chunks, gpu=gpu,
max_pixels=max_pixels)
max_pixels=max_pixels, **vrt_kwargs)

# File-like buffers don't support the GPU or dask code paths because
# those re-open the source by path from worker tasks or device-side
Expand Down
19 changes: 14 additions & 5 deletions xrspatial/geotiff/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -1560,9 +1560,14 @@ def _read_tiles(data: bytes, ifd: IFD, header: TIFFHeader,
raise ValueError(
f"Invalid tile dimensions: TileWidth={tw}, TileLength={th}")

# Reject crafted tile dims that would force huge per-tile allocations.
# A single tile's decoded bytes must also fit under the pixel budget.
_check_dimensions(tw, th, samples, max_pixels)
# Reject crafted tile dims (e.g. TileWidth = 2**31). This guards the
# TIFF header against malformed values; it is not the caller's output
# budget. The output-window check below uses ``max_pixels`` and is
# what enforces the user's per-call memory cap. The source-read path
# under ``read_vrt`` (#1796) relies on that output check to honour a
# small caller ``max_pixels`` against a normal-tile source; see
# #1823.
_check_dimensions(tw, th, samples, MAX_PIXELS_DEFAULT)
Comment on lines +1569 to +1570

# Per-tile compressed-byte cap (issue #1664). Same env var as the
# HTTP path. mmap slicing is bounded by the file size, but the slice
Expand Down Expand Up @@ -2016,10 +2021,14 @@ def _fetch_decode_cog_http_tiles(
# A windowed HTTP read of a multi-billion-pixel COG only allocates
# the window, so capping the full image would reject legitimate
# tiled reads. The full-image cap still applies for whole-file
# reads (window is None). The single-tile budget always applies.
# reads (window is None). The per-tile dim check below guards the
# TIFF header against absurd ``TileWidth`` / ``TileLength`` values
# (e.g. 2**31) and uses ``MAX_PIXELS_DEFAULT`` so a caller's small
# ``max_pixels`` -- intended as an output-window budget -- does not
# reject normal 256x256 tiles. See #1823.
if window is None:
_check_dimensions(width, height, samples, max_pixels)
_check_dimensions(tw, th, samples, max_pixels)
_check_dimensions(tw, th, samples, MAX_PIXELS_DEFAULT)

# Reject malformed TIFFs whose declared tile grid exceeds the supplied
# TileOffsets length. See issue #1219.
Expand Down
146 changes: 146 additions & 0 deletions xrspatial/geotiff/tests/test_open_geotiff_missing_sources_1810.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
"""Regression test for #1810: ``open_geotiff`` did not accept
``missing_sources`` and did not forward it to ``read_vrt`` when the
source was a VRT.

The api-consistency sweep on 2026-05-13 flagged that ``read_vrt`` had
gained a ``missing_sources='warn'|'raise'`` policy kwarg (#1806) but
the documented dispatcher entry point ``open_geotiff`` did not expose
it. Callers wanting strict failure had to call ``read_vrt`` directly.
Same class of dispatcher-drops-backend-kwarg bug as #1561, #1605,
#1685, #1795.
"""
from __future__ import annotations

import inspect

import numpy as np
import pytest
import xarray as xr

from xrspatial.geotiff import (
GeoTIFFFallbackWarning,
open_geotiff,
to_geotiff,
write_vrt,
)


def _write_missing_source_vrt(path):
"""Write a VRT pointing at a non-existent source file.

Mirrors the helper in ``test_vrt_missing_sources_policy_1799.py`` so
the warn-vs-raise branches surface the same way through the
dispatcher as they do via the direct ``read_vrt`` call.
"""
path.write_text(
'<VRTDataset rasterXSize="2" rasterYSize="2">\n'
' <VRTRasterBand dataType="Byte" band="1">\n'
' <SimpleSource>\n'
' <SourceFilename relativeToVRT="1">missing.tif'
'</SourceFilename>\n'
' <SourceBand>1</SourceBand>\n'
' <SrcRect xOff="0" yOff="0" xSize="2" ySize="2"/>\n'
' <DstRect xOff="0" yOff="0" xSize="2" ySize="2"/>\n'
' </SimpleSource>\n'
' </VRTRasterBand>\n'
'</VRTDataset>\n'
)


def test_open_geotiff_accepts_missing_sources():
"""The signature exposes ``missing_sources`` so IDE autocomplete and
``inspect.signature`` see the kwarg without parsing the docstring.
"""
sig = inspect.signature(open_geotiff)
assert 'missing_sources' in sig.parameters


def test_open_geotiff_vrt_missing_sources_warn(tmp_path):
"""``missing_sources='warn'`` matches the historic behavior:
``GeoTIFFFallbackWarning`` is emitted and ``attrs['vrt_holes']`` is
populated. Going through the dispatcher must produce the same
result as calling ``read_vrt`` directly.
"""
vrt = tmp_path / "tmp_1810_missing_warn.vrt"
_write_missing_source_vrt(vrt)

with pytest.warns(GeoTIFFFallbackWarning, match="could not be read"):
da = open_geotiff(str(vrt), missing_sources='warn')

assert 'vrt_holes' in da.attrs
assert da.attrs['vrt_holes'][0]['source'].endswith('missing.tif')


def test_open_geotiff_vrt_missing_sources_raise(tmp_path):
"""``missing_sources='raise'`` propagates through the dispatcher and
fails fast instead of silently returning a partial mosaic.
"""
vrt = tmp_path / "tmp_1810_missing_raise.vrt"
_write_missing_source_vrt(vrt)

with pytest.raises((OSError, ValueError)):
open_geotiff(str(vrt), missing_sources='raise')


def test_open_geotiff_vrt_missing_sources_validates_policy(tmp_path):
"""The validator on ``read_vrt`` fires through the dispatcher path
so callers get the same parameter-named error from either entry
point.
"""
vrt = tmp_path / "tmp_1810_missing_bad_policy.vrt"
_write_missing_source_vrt(vrt)

with pytest.raises(ValueError, match="missing_sources"):
open_geotiff(str(vrt), missing_sources='ignore')


def test_open_geotiff_rejects_missing_sources_on_tif(tmp_path):
"""``missing_sources`` is VRT-only; passing it with a ``.tif`` source
raises rather than silently doing nothing. Mirrors the
``on_gpu_failure`` rejection pattern.
"""
arr = np.arange(16, dtype=np.uint16).reshape(4, 4)
da = xr.DataArray(
arr, dims=['y', 'x'],
coords={'y': np.arange(4, dtype=np.float64),
'x': np.arange(4, dtype=np.float64)},
attrs={'crs': 4326},
)
tif_path = tmp_path / "single_tile.tif"
to_geotiff(da, str(tif_path))

with pytest.raises(ValueError, match="missing_sources only applies"):
open_geotiff(str(tif_path), missing_sources='raise')


def test_open_geotiff_vrt_without_missing_sources_kwarg_still_works(tmp_path):
"""Omitting ``missing_sources`` is a no-op: behaviour matches the
pre-fix dispatcher (and the ``read_vrt`` default of ``'warn'``).
The existing two-tile VRT exercises the happy path so callers
upgrading from #1806 do not see a regression.
"""
arr_a = np.arange(16, dtype=np.uint16).reshape(4, 4)
da_a = xr.DataArray(
arr_a, dims=['y', 'x'],
coords={'y': np.array([0.5, 1.5, 2.5, 3.5]),
'x': np.array([0.5, 1.5, 2.5, 3.5])},
attrs={'crs': 4326},
)
tile_a = tmp_path / "tile_a.tif"
to_geotiff(da_a, str(tile_a))

arr_b = np.arange(16, 32, dtype=np.uint16).reshape(4, 4)
da_b = xr.DataArray(
arr_b, dims=['y', 'x'],
coords={'y': np.array([0.5, 1.5, 2.5, 3.5]),
'x': np.array([4.5, 5.5, 6.5, 7.5])},
attrs={'crs': 4326},
)
tile_b = tmp_path / "tile_b.tif"
to_geotiff(da_b, str(tile_b))

vrt_path = tmp_path / "mosaic_1810.vrt"
write_vrt(str(vrt_path), [str(tile_a), str(tile_b)])

da = open_geotiff(str(vrt_path))
assert da.shape == (4, 8)
113 changes: 113 additions & 0 deletions xrspatial/geotiff/tests/test_vrt_source_tile_check_1823.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
"""Regression tests for #1823.

PR #1803 forwarded the caller's ``max_pixels`` to ``read_to_array`` inside
the VRT source loop so that a tiny VRT output could not force a huge
source decode (#1796). The output-window check enforces that. A separate
Comment on lines +1 to +5
per-tile dimension check was incorrectly using the same ``max_pixels``
value, so a caller setting ``max_pixels`` as an output budget (e.g.
10,000) would also fail the per-tile sanity check on every normal source
whose default tile size is 256x256 (= 65,536 pixels).

The #1796 protection remains: the output-window check still catches a
tiny VRT output that asks for a large source window.
"""
from __future__ import annotations

import os
import tempfile

import numpy as np
import pytest

from xrspatial.geotiff import to_geotiff
from xrspatial.geotiff._reader import PixelSafetyLimitError
from xrspatial.geotiff._vrt import read_vrt


def _write_normal_tile_source(td: str) -> str:
"""10x10 uint8 source -- ``to_geotiff`` pads to a 256x256 tile."""
src = os.path.join(td, 'src.tif')
to_geotiff(np.zeros((10, 10), dtype=np.uint8), src, compression='none')
return src


def _write_vrt(td: str, *, dst_x_size: int, dst_y_size: int,
raster_x: int = 100, raster_y: int = 100,
src_x_size: int = 10, src_y_size: int = 10) -> str:
vrt = os.path.join(td, 'mosaic.vrt')
xml = (
f'<VRTDataset rasterXSize="{raster_x}" rasterYSize="{raster_y}">\n'
f' <VRTRasterBand dataType="Byte" band="1">\n'
f' <SimpleSource>\n'
f' <SourceFilename relativeToVRT="1">src.tif</SourceFilename>\n'
f' <SourceBand>1</SourceBand>\n'
f' <SrcRect xOff="0" yOff="0" '
f'xSize="{src_x_size}" ySize="{src_y_size}"/>\n'
f' <DstRect xOff="0" yOff="0" '
f'xSize="{dst_x_size}" ySize="{dst_y_size}"/>\n'
f' </SimpleSource>\n'
f' </VRTRasterBand>\n'
f'</VRTDataset>\n'
)
with open(vrt, 'w') as f:
f.write(xml)
return vrt


class TestPerTileCheckDoesNotUseCallerBudget:
"""Per-tile dim sanity must not reject normal 256x256 source tiles
when the caller's ``max_pixels`` is a small output-budget value."""

def test_normal_tile_source_with_small_max_pixels(self):
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td:
_write_normal_tile_source(td)
vrt = _write_vrt(td, dst_x_size=100, dst_y_size=100)
arr, _ = read_vrt(vrt, max_pixels=10_000)
assert arr.shape == (100, 100)

def test_normal_tile_source_with_tiny_max_pixels(self):
"""An output budget below a single tile must still succeed when
the requested output window itself fits."""
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td:
_write_normal_tile_source(td)
# Output 5x5 = 25 pixels; max_pixels = 100 fits 25 with room.
vrt = _write_vrt(td, dst_x_size=5, dst_y_size=5,
raster_x=5, raster_y=5)
arr, _ = read_vrt(vrt, max_pixels=100)
assert arr.shape == (5, 5)


class TestOutputWindowCheckStillEnforced:
"""The output-window check at the source read still rejects an
over-budget read; the #1796 protection is preserved."""

def test_output_window_exceeds_max_pixels_still_rejected(self):
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td:
src = os.path.join(td, 'src.tif')
to_geotiff(np.arange(16, dtype=np.uint8).reshape(4, 4),
src, compression='none')
vrt = _write_vrt(td, dst_x_size=1, dst_y_size=1,
raster_x=1, raster_y=1,
src_x_size=4, src_y_size=4)
# SrcRect 4x4 = 16 pixels > max_pixels=1 → output check fires.
with pytest.raises(ValueError, match="exceed"):
read_vrt(vrt, max_pixels=1)


class TestPerTileCheckStillRejectsCraftedHeader:
"""A pathological ``TileWidth``/``TileLength`` must still fail at
the per-tile sanity check, which uses ``MAX_PIXELS_DEFAULT``."""

def test_per_tile_check_caps_at_default(self, monkeypatch):
"""Lower ``MAX_PIXELS_DEFAULT`` to verify the per-tile call site
is wired to it (rather than to the caller's budget)."""
from xrspatial.geotiff import _reader as reader_mod

monkeypatch.setattr(reader_mod, "MAX_PIXELS_DEFAULT", 100)
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as td:
_write_normal_tile_source(td)
vrt = _write_vrt(td, dst_x_size=100, dst_y_size=100)
# 256x256 tile > patched MAX_PIXELS_DEFAULT=100 → per-tile
# check fires regardless of caller's max_pixels (1e9 here).
with pytest.raises(PixelSafetyLimitError, match="65,536"):
read_vrt(vrt, max_pixels=1_000_000_000)
Loading