Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions xrspatial/geotiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ def open_geotiff(source: str | BinaryIO, *,
missing_sources: str = _MISSING_SOURCES_SENTINEL,
allow_rotated: bool = False,
allow_unparseable_crs: bool = False,
band_nodata: str | None = None,
mask_nodata: bool = True,
) -> xr.DataArray:
"""Read a GeoTIFF, COG, or VRT file into an xarray.DataArray.
Expand Down Expand Up @@ -325,6 +326,17 @@ def open_geotiff(source: str | BinaryIO, *,
return a partial mosaic. Passing this kwarg with a non-VRT
source raises ``ValueError`` because the policy only applies to
the VRT pipeline. See ``read_vrt`` for the full description.
band_nodata : {'first', None}, optional
VRT-only. Opt-out for the fail-closed check that rejects VRT
sources whose bands declare disagreeing per-band nodata
sentinels (issue #1987 PR 5). When ``None`` (the default), a VRT
that mosaics bands with different sentinels raises
``MixedBandMetadataError``; flattening to one value would let
one band's valid pixels collide with another band's sentinel.
Pass ``band_nodata='first'`` to keep the legacy behaviour of
using band 0's sentinel for the whole mosaic. Passing this
kwarg with a non-VRT source raises ``ValueError`` because the
policy only applies to the VRT pipeline.
mask_nodata : bool, default True
If True (the default), replace the nodata sentinel with ``NaN``;
integer rasters get promoted to ``float64`` first so NaN can be
Expand Down Expand Up @@ -409,6 +421,19 @@ def open_geotiff(source: str | BinaryIO, *,
"Pass a .vrt path to enable the VRT pipeline, or drop "
"missing_sources to keep the default GeoTIFF path.")

# ``band_nodata`` is the #1987 PR 5 opt-out for the mixed-band
# metadata fail-closed check. It only has meaning on the VRT pipeline
# (a plain GeoTIFF has one nodata sentinel per file, not per band),
# so reject the kwarg up front on non-VRT sources rather than letting
# it leak into ``read_vrt`` and confuse the caller about what the
# opt-out actually controls.
if band_nodata is not None and not _is_vrt_source:
raise ValueError(
"band_nodata only applies to VRT sources. "
"Pass a .vrt path to enable the VRT pipeline, or drop "
"band_nodata to keep the default GeoTIFF path. "
"See issue #1987.")

# ``max_cloud_bytes`` is the eager fsspec-read budget. Only
# ``_read_to_array`` on the eager non-VRT, non-GPU, non-dask branch
# consumes it; the GPU (``read_geotiff_gpu``), dask
Expand Down Expand Up @@ -473,6 +498,7 @@ def open_geotiff(source: str | BinaryIO, *,
max_pixels=max_pixels,
allow_rotated=allow_rotated,
allow_unparseable_crs=allow_unparseable_crs,
band_nodata=band_nodata,
mask_nodata=mask_nodata,
**vrt_kwargs)

Expand Down
19 changes: 19 additions & 0 deletions xrspatial/geotiff/_backends/dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def read_geotiff_dask(source: str, *,
max_pixels: int | None = None,
allow_rotated: bool = False,
allow_unparseable_crs: bool = False,
band_nodata: str | None = None,
mask_nodata: bool = True) -> xr.DataArray:
"""Read a GeoTIFF as a dask-backed DataArray for out-of-core processing.

Expand Down Expand Up @@ -74,6 +75,11 @@ def read_geotiff_dask(source: str, *,
directly.
name : str or None
Name for the DataArray.
band_nodata : {'first', None}, optional
VRT-only opt-out for the fail-closed mixed-band-metadata check
(issue #1987 PR 5). Forwarded verbatim to ``read_vrt`` when the
source is a ``.vrt`` file. Passing it with a non-VRT GeoTIFF
source raises ``ValueError``.
mask_nodata : bool, default True
If True, replace the nodata sentinel with NaN per chunk (integer
rasters get promoted to ``float64``). If False, skip the
Expand Down Expand Up @@ -114,8 +120,21 @@ def read_geotiff_dask(source: str, *,
return read_vrt(
source, dtype=dtype, window=window, band=band, name=name,
chunks=chunks, max_pixels=max_pixels,
allow_rotated=allow_rotated,
allow_unparseable_crs=allow_unparseable_crs,
band_nodata=band_nodata,
mask_nodata=mask_nodata,
)
# ``band_nodata`` only has meaning for the VRT path (per-band sentinel
# ambiguity). Reject the kwarg up front on non-VRT GeoTIFF inputs so
# callers learn the opt-out is being dropped, matching the
# ``open_geotiff`` guard. See issue #1987 PR 5.
if band_nodata is not None:
raise ValueError(
"band_nodata only applies to VRT sources. "
"Pass a .vrt path to enable the VRT pipeline, or drop "
"band_nodata to keep the default GeoTIFF path. "
"See issue #1987.")

# P5: HTTP COG sources used to fire one IFD/header GET per chunk
# task. Parse metadata once here so every delayed task can reuse it.
Expand Down
27 changes: 27 additions & 0 deletions xrspatial/geotiff/_backends/vrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,16 @@ def read_vrt(source: str, *,
sentinel (or zero on integer bands without a sentinel).
``XRSPATIAL_GEOTIFF_STRICT=1`` forces a raise across the whole
module regardless of this kwarg.
band_nodata : {'first', None}, optional
Opt-out for the fail-closed mixed-band-metadata check (issue
#1987 PR 5). ``None`` (the default) rejects a VRT whose bands
declare disagreeing per-band ``<NoDataValue>`` sentinels with
``MixedBandMetadataError``; flattening to one value would
otherwise let one band's valid pixels collide with another
band's sentinel after the flatten. Pass ``band_nodata='first'``
to keep the legacy flatten-to-band-0 semantics explicitly. Any
other value raises ``ValueError`` at the boundary so typos
surface up front instead of degrading silently into strict mode.
mask_nodata : bool, default True
If True, run the integer-sentinel-to-NaN promotion on the
assembled mosaic. If False, skip it and keep the source dtype
Expand Down Expand Up @@ -154,6 +164,23 @@ def read_vrt(source: str, *,
f"missing_sources must be 'warn' or 'raise', got "
f"{missing_sources!r}")

# ``band_nodata`` accepts only ``None`` (strict, the default) and
# ``'first'`` (legacy flatten-to-band-0 opt-out for the fail-closed
# mixed-band-metadata check, issue #1987 PR 5). Any other value would
# silently degrade to strict mode because the registered check treats
# anything other than ``'first'`` as "no opt-out", which means a typo
# like ``band_nodata='firs'`` looks like opt-out at the call site but
# raises ``MixedBandMetadataError`` at run time. Mirror the
# ``missing_sources`` value-validation pattern above so the typo
# surfaces at the boundary instead.
if band_nodata not in (None, 'first'):
raise ValueError(
f"band_nodata must be None or 'first', got {band_nodata!r}. "
f"Pass ``band_nodata='first'`` to opt back into the legacy "
f"flatten-to-first-band semantics for VRT sources with "
f"disagreeing per-band nodata sentinels, or drop the kwarg "
f"to keep the fail-closed default. See issue #1987.")

# Lazy chunked path (issue #1814). The eager call below materialises
# the full mosaic on host RAM and then wraps the array via
# ``.chunk()``, so chunks= gave no memory protection and gpu=True +
Expand Down
18 changes: 10 additions & 8 deletions xrspatial/geotiff/_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -950,11 +950,13 @@ def _same_nodata(a: float, b: float) -> bool:
return a == b


# NOT registered by default. The mixed-band check has a much larger
# migration cost than its sibling read-side checks (around 35 existing
# test sites would need to opt in via ``band_nodata='first'`` to keep
# their legacy assertions), so it lands as a follow-up PR that bundles
# the check activation with the test migration. The check itself and
# the ``band_nodata=`` VRT kwarg ship now so the follow-up is a
# one-line registration call plus the test sweep.
# register_read_metadata_check(_check_read_mixed_band_metadata)
# Registered as of issue #1987 PR 5. The check was staged in the
# preceding bundle (#2031) along with the ``band_nodata=`` VRT kwarg
# but not registered, because activation needed a coordinated migration
# of the VRT test sites that read fixtures with disagreeing per-band
# sentinels. The migration sweep ships alongside this registration.
# Callers that still want the legacy flatten-to-first-band behaviour
# pass ``band_nodata='first'`` to ``read_vrt`` / ``open_geotiff`` /
# ``read_geotiff_dask``; the explicit opt-in surfaces the per-band
# ambiguity at the call site instead of papering over it silently.
register_read_metadata_check(_check_read_mixed_band_metadata)
Loading
Loading