Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion xrspatial/geotiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3919,14 +3919,33 @@ def _sentinel_for_dtype(nodata_val, dtype):
dtype, out-of-range, non-finite, or fractional). Mirrors the
gating PR #1583 added to other read paths via
``_int_nodata_in_range``.

A plain Python ``int`` ``nodata_val`` is handled without going
through ``float`` first, so 64-bit sentinels such as
``2**64 - 1`` (``UInt64`` max) and ``-2**63`` (``Int64`` min)
round-trip without the float64 rounding that pushes them just
past the dtype's representable range. ``_parse_band_nodata``
in ``_vrt.py`` parses integer-band ``<NoDataValue>`` directly
as ``int`` to feed this path. See issue #1783 follow-up.
"""
if nodata_val is None or dtype.kind not in ('u', 'i'):
return None
info = np.iinfo(dtype)
# Fast/exact path: ``nodata_val`` is already an integer. Avoids
# the float64 round-trip that loses precision near the int64 /
# uint64 extremes. ``bool`` is a subclass of ``int`` -- treat
# True/False as a 1/0 sentinel rather than rejecting outright,
# matching the existing ``int(float(...))`` behaviour.
if isinstance(nodata_val, (int, np.integer)) and not isinstance(
nodata_val, bool):
nodata_int = int(nodata_val)
if info.min <= nodata_int <= info.max:
return dtype.type(nodata_int)
return None
try:
nodata_f = float(nodata_val)
except (TypeError, ValueError):
return None
info = np.iinfo(dtype)
if not (np.isfinite(nodata_f) and nodata_f.is_integer()
and info.min <= nodata_f <= info.max):
return None
Expand Down
153 changes: 137 additions & 16 deletions xrspatial/geotiff/_vrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@
"""
from __future__ import annotations

import math
import os
import struct
import zlib
from dataclasses import dataclass, field
from typing import Union
from xml.sax.saxutils import escape as _xml_escape, quoteattr as _xml_quoteattr

import numpy as np
Expand Down Expand Up @@ -115,16 +117,26 @@ def _xml_attr(value) -> str:
return '""'
return _xml_quoteattr(str(value))

# Lazy imports to avoid circular dependency
# Mapping from GDAL VRT dataType names to NumPy dtypes.
#
# ``Int8`` is the GDAL 3.7+ signed-byte name. ``UInt64`` / ``Int64`` are
# the GDAL 3.5+ 64-bit integer names; both round-trip losslessly through
# numpy's native ``uint64`` / ``int64``. Complex types (``CInt16``,
# ``CInt32``, ``CFloat32``, ``CFloat64``) are deliberately absent: the
# reader has no complex-data code path, so an unknown ``dataType`` raises
# in :func:`parse_vrt` rather than silently dropping the imaginary
# component by falling back to ``Float32``. See issue #1783.
_DTYPE_MAP = {
'Byte': np.uint8,
'Int8': np.int8,
'UInt16': np.uint16,
'Int16': np.int16,
'UInt32': np.uint32,
'Int32': np.int32,
'UInt64': np.uint64,
'Int64': np.int64,
'Float32': np.float32,
'Float64': np.float64,
'Int8': np.int8,
}


Expand All @@ -144,7 +156,11 @@ class _Source:
band: int # 1-based
src_rect: _Rect
dst_rect: _Rect
nodata: float | None = None
# Parsed from ``<NODATA>`` on the source. ``int`` for integer-dtype
# bands so 64-bit sentinels survive without float64 rounding (see
# :func:`_parse_band_nodata`); ``float`` for floating-dtype bands
# and the legacy fallback. See issue #1783 follow-up.
nodata: Union[int, float, None] = None
# ComplexSource extras
scale: float | None = None
offset: float | None = None
Expand All @@ -165,7 +181,11 @@ class _VRTBand:
"""A single band in a VRT dataset."""
band_num: int # 1-based
dtype: np.dtype
nodata: float | None = None
# Parsed from ``<NoDataValue>``. ``int`` for integer-dtype bands so
# 64-bit sentinels (``2**64 - 1``, ``INT64_MIN``) survive without
# float64 rounding; ``float`` for floating-dtype bands. See
# :func:`_parse_band_nodata` and the issue #1783 follow-up.
nodata: Union[int, float, None] = None
sources: list[_Source] = field(default_factory=list)
color_interp: str | None = None

Expand Down Expand Up @@ -213,6 +233,67 @@ def _text(elem, tag, default=None):
return default


def _parse_band_nodata(text: str | None,
dtype: np.dtype) -> Union[int, float, None]:
"""Parse a VRT ``<NoDataValue>`` / ``<NODATA>`` string for a given band dtype.

Returns ``None`` when ``text`` is empty/None.

For floating dtypes the value is parsed as ``float`` (so ``nan`` /
``inf`` and scientific notation work as before).

For integer dtypes the value is parsed as ``int`` whenever possible so
that 64-bit sentinels (``2**64 - 1`` for ``UInt64``, ``INT64_MIN`` for
``Int64``) round-trip exactly rather than collapsing through a float64
that cannot represent them. GDAL is happy to write integer nodata in
scientific notation or with a trailing ``.0``; if a direct ``int()``
fails we fall back to ``int(float(text))`` but only accept that result
when the float is finite, integer-valued, and representable in the
target dtype. Out-of-range / non-integer values for an integer dtype
fall back to the parsed ``float`` (matching the eager-TIFF reader's
behaviour in :func:`_resolve_masked_fill`: the sentinel can never
match a real pixel, but ``attrs['nodata']`` still surfaces the raw
value for write round-trip). See issue #1783 follow-up.
"""
if text is None:
return None
text = text.strip()
if not text:
return None

if np.issubdtype(dtype, np.integer):
# Try the cheap path first: a plain integer literal (decimal,
# binary, hex with explicit base would be unusual but ``int()``
# without ``base`` rejects anything other than base-10 numeric
# strings, which is what GDAL writes).
try:
return int(text)
except ValueError:
pass
# Fall back: scientific notation or trailing ``.0``. Only accept
# when the float is exactly integer-valued and fits the dtype, so
# we never silently truncate a fractional value or wrap a
# sentinel that overflows the band's dtype.
try:
v = float(text)
except (TypeError, ValueError):
return None
if not math.isfinite(v) or not v.is_integer():
return v
info = np.iinfo(dtype)
nodata_int = int(v)
if info.min <= nodata_int <= info.max:
return nodata_int
return v

# Floating / other dtypes: parse as float (preserves NaN, Inf,
# scientific notation).
try:
return float(text)
except (TypeError, ValueError):
return None


def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset:
"""Parse a VRT XML string into a VRTDataset.

Expand Down Expand Up @@ -277,10 +358,35 @@ def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset:
bands = []
for band_elem in root.findall('VRTRasterBand'):
band_num = int(band_elem.get('band', 1))
dtype_name = band_elem.get('dataType', 'Float32')
dtype = np.dtype(_DTYPE_MAP.get(dtype_name, np.float32))
# Distinguish "attribute missing" (GDAL default: Float32) from
# "attribute present but unsupported". The previous
# ``_DTYPE_MAP.get(dtype_name, np.float32)`` collapsed both cases
# to ``Float32``, so a VRT declaring ``CFloat32`` silently lost
# its imaginary component and one declaring a typo (``Flaot32``)
# silently produced wrong values. See issue #1783.
dtype_name = band_elem.get('dataType')
if dtype_name is None:
dtype_name = 'Float32'
if dtype_name not in _DTYPE_MAP:
supported = ', '.join(sorted(_DTYPE_MAP))
raise ValueError(
f"VRTRasterBand band={band_num} declares "
f"dataType={dtype_name!r}, which is not supported by "
f"read_vrt. Supported dataType values: {supported}. "
f"Complex dataType values (CInt16, CInt32, CFloat32, "
f"CFloat64) are not supported because the reader has no "
f"complex-data code path; falling back to Float32 would "
f"silently drop the imaginary component."
)
dtype = np.dtype(_DTYPE_MAP[dtype_name])
# Parse <NoDataValue> with dtype-aware precision. For integer
# bands (including UInt64 / Int64) the sentinel is parsed as an
# ``int`` so values like ``2**64 - 1`` or ``INT64_MIN`` round-trip
# exactly through the masking pipeline; ``float(text)`` would
# collapse those to a representable float64 and break exact-
# equality masks downstream. See issue #1783 follow-up.
nodata_str = _text(band_elem, 'NoDataValue')
nodata = float(nodata_str) if nodata_str else None
nodata = _parse_band_nodata(nodata_str, dtype)
color_interp = _text(band_elem, 'ColorInterp')

sources = []
Expand Down Expand Up @@ -342,8 +448,12 @@ def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset:
src_rect = _parse_rect(src_rect_elem)
dst_rect = _parse_rect(dst_rect_elem)

# Per-source ``<NODATA>`` honours the band dtype too -- a
# uint64 source feeding a UInt64 band needs the full 64-bit
# sentinel preserved, not a float64-rounded approximation.
# See issue #1783 follow-up.
src_nodata_str = _text(src_elem, 'NODATA')
src_nodata = float(src_nodata_str) if src_nodata_str else None
src_nodata = _parse_band_nodata(src_nodata_str, dtype)

# ComplexSource extras
scale = None
Expand Down Expand Up @@ -820,15 +930,26 @@ def read_vrt(vrt_path: str, *, window=None,
# with GDAL_NODATA="-9999") stay no-op rather than
# tripping OverflowError on ``dtype.type(int(...))``.
# See issue #1616.
try:
nodata_f = float(src_nodata)
except (TypeError, ValueError):
nodata_f = None
if (nodata_f is not None
and np.isfinite(nodata_f)
and nodata_f.is_integer()):
# Accept an int sentinel directly so 64-bit values
# (``2**64 - 1`` for UInt64, ``INT64_MIN`` for Int64)
# don't get clobbered by ``float(...).is_integer()``
# losing precision near the edges. Fall back to the
# float parse for legacy float-typed nodata.
nodata_int: int | None = None
if (isinstance(src_nodata, (int, np.integer))
and not isinstance(src_nodata, bool)):
nodata_int = int(src_nodata)
else:
try:
nodata_f = float(src_nodata)
except (TypeError, ValueError):
nodata_f = None
if (nodata_f is not None
and np.isfinite(nodata_f)
and nodata_f.is_integer()):
nodata_int = int(nodata_f)
if nodata_int is not None:
info = np.iinfo(src_arr.dtype)
nodata_int = int(nodata_f)
if info.min <= nodata_int <= info.max:
sentinel = src_arr.dtype.type(nodata_int)
mask = src_arr == sentinel
Expand Down
Loading
Loading