xarray-contrib · brendancol · May 13, 2026 · May 13, 2026 · May 13, 2026
diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py
@@ -3919,14 +3919,33 @@ def _sentinel_for_dtype(nodata_val, dtype):
         dtype, out-of-range, non-finite, or fractional). Mirrors the
         gating PR #1583 added to other read paths via
         ``_int_nodata_in_range``.
+
+        A plain Python ``int`` ``nodata_val`` is handled without going
+        through ``float`` first, so 64-bit sentinels such as
+        ``2**64 - 1`` (``UInt64`` max) and ``-2**63`` (``Int64`` min)
+        round-trip without the float64 rounding that pushes them just
+        past the dtype's representable range.  ``_parse_band_nodata``
+        in ``_vrt.py`` parses integer-band ``<NoDataValue>`` directly
+        as ``int`` to feed this path.  See issue #1783 follow-up.
         """
         if nodata_val is None or dtype.kind not in ('u', 'i'):
             return None
+        info = np.iinfo(dtype)
+        # Fast/exact path: ``nodata_val`` is already an integer.  Avoids
+        # the float64 round-trip that loses precision near the int64 /
+        # uint64 extremes.  ``bool`` is a subclass of ``int`` -- treat
+        # True/False as a 1/0 sentinel rather than rejecting outright,
+        # matching the existing ``int(float(...))`` behaviour.
+        if isinstance(nodata_val, (int, np.integer)) and not isinstance(
+                nodata_val, bool):
+            nodata_int = int(nodata_val)
+            if info.min <= nodata_int <= info.max:
+                return dtype.type(nodata_int)
+            return None
         try:
             nodata_f = float(nodata_val)
         except (TypeError, ValueError):
             return None
-        info = np.iinfo(dtype)
         if not (np.isfinite(nodata_f) and nodata_f.is_integer()
                 and info.min <= nodata_f <= info.max):
             return None

diff --git a/xrspatial/geotiff/_vrt.py b/xrspatial/geotiff/_vrt.py
@@ -5,10 +5,12 @@
 """
 from __future__ import annotations
 
+import math
 import os
 import struct
 import zlib
 from dataclasses import dataclass, field
+from typing import Union
 from xml.sax.saxutils import escape as _xml_escape, quoteattr as _xml_quoteattr
 
 import numpy as np
@@ -115,16 +117,26 @@ def _xml_attr(value) -> str:
         return '""'
     return _xml_quoteattr(str(value))
 
-# Lazy imports to avoid circular dependency
+# Mapping from GDAL VRT dataType names to NumPy dtypes.
+#
+# ``Int8`` is the GDAL 3.7+ signed-byte name.  ``UInt64`` / ``Int64`` are
+# the GDAL 3.5+ 64-bit integer names; both round-trip losslessly through
+# numpy's native ``uint64`` / ``int64``.  Complex types (``CInt16``,
+# ``CInt32``, ``CFloat32``, ``CFloat64``) are deliberately absent: the
+# reader has no complex-data code path, so an unknown ``dataType`` raises
+# in :func:`parse_vrt` rather than silently dropping the imaginary
+# component by falling back to ``Float32``.  See issue #1783.
 _DTYPE_MAP = {
     'Byte': np.uint8,
+    'Int8': np.int8,
     'UInt16': np.uint16,
     'Int16': np.int16,
     'UInt32': np.uint32,
     'Int32': np.int32,
+    'UInt64': np.uint64,
+    'Int64': np.int64,
     'Float32': np.float32,
     'Float64': np.float64,
-    'Int8': np.int8,
 }
 
 
@@ -144,7 +156,11 @@ class _Source:
     band: int  # 1-based
     src_rect: _Rect
     dst_rect: _Rect
-    nodata: float | None = None
+    # Parsed from ``<NODATA>`` on the source.  ``int`` for integer-dtype
+    # bands so 64-bit sentinels survive without float64 rounding (see
+    # :func:`_parse_band_nodata`); ``float`` for floating-dtype bands
+    # and the legacy fallback.  See issue #1783 follow-up.
+    nodata: Union[int, float, None] = None
     # ComplexSource extras
     scale: float | None = None
     offset: float | None = None
@@ -165,7 +181,11 @@ class _VRTBand:
     """A single band in a VRT dataset."""
     band_num: int  # 1-based
     dtype: np.dtype
-    nodata: float | None = None
+    # Parsed from ``<NoDataValue>``.  ``int`` for integer-dtype bands so
+    # 64-bit sentinels (``2**64 - 1``, ``INT64_MIN``) survive without
+    # float64 rounding; ``float`` for floating-dtype bands.  See
+    # :func:`_parse_band_nodata` and the issue #1783 follow-up.
+    nodata: Union[int, float, None] = None
     sources: list[_Source] = field(default_factory=list)
     color_interp: str | None = None
 
@@ -213,6 +233,67 @@ def _text(elem, tag, default=None):
     return default
 
 
+def _parse_band_nodata(text: str | None,
+                       dtype: np.dtype) -> Union[int, float, None]:
+    """Parse a VRT ``<NoDataValue>`` / ``<NODATA>`` string for a given band dtype.
+
+    Returns ``None`` when ``text`` is empty/None.
+
+    For floating dtypes the value is parsed as ``float`` (so ``nan`` /
+    ``inf`` and scientific notation work as before).
+
+    For integer dtypes the value is parsed as ``int`` whenever possible so
+    that 64-bit sentinels (``2**64 - 1`` for ``UInt64``, ``INT64_MIN`` for
+    ``Int64``) round-trip exactly rather than collapsing through a float64
+    that cannot represent them.  GDAL is happy to write integer nodata in
+    scientific notation or with a trailing ``.0``; if a direct ``int()``
+    fails we fall back to ``int(float(text))`` but only accept that result
+    when the float is finite, integer-valued, and representable in the
+    target dtype.  Out-of-range / non-integer values for an integer dtype
+    fall back to the parsed ``float`` (matching the eager-TIFF reader's
+    behaviour in :func:`_resolve_masked_fill`: the sentinel can never
+    match a real pixel, but ``attrs['nodata']`` still surfaces the raw
+    value for write round-trip).  See issue #1783 follow-up.
+    """
+    if text is None:
+        return None
+    text = text.strip()
+    if not text:
+        return None
+
+    if np.issubdtype(dtype, np.integer):
+        # Try the cheap path first: a plain integer literal (decimal,
+        # binary, hex with explicit base would be unusual but ``int()``
+        # without ``base`` rejects anything other than base-10 numeric
+        # strings, which is what GDAL writes).
+        try:
+            return int(text)
+        except ValueError:
+            pass
+        # Fall back: scientific notation or trailing ``.0``.  Only accept
+        # when the float is exactly integer-valued and fits the dtype, so
+        # we never silently truncate a fractional value or wrap a
+        # sentinel that overflows the band's dtype.
+        try:
+            v = float(text)
+        except (TypeError, ValueError):
+            return None
+        if not math.isfinite(v) or not v.is_integer():
+            return v
+        info = np.iinfo(dtype)
+        nodata_int = int(v)
+        if info.min <= nodata_int <= info.max:
+            return nodata_int
+        return v
+
+    # Floating / other dtypes: parse as float (preserves NaN, Inf,
+    # scientific notation).
+    try:
+        return float(text)
+    except (TypeError, ValueError):
+        return None
+
+
 def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset:
     """Parse a VRT XML string into a VRTDataset.
 
@@ -277,10 +358,35 @@ def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset:
     bands = []
     for band_elem in root.findall('VRTRasterBand'):
         band_num = int(band_elem.get('band', 1))
-        dtype_name = band_elem.get('dataType', 'Float32')
-        dtype = np.dtype(_DTYPE_MAP.get(dtype_name, np.float32))
+        # Distinguish "attribute missing" (GDAL default: Float32) from
+        # "attribute present but unsupported".  The previous
+        # ``_DTYPE_MAP.get(dtype_name, np.float32)`` collapsed both cases
+        # to ``Float32``, so a VRT declaring ``CFloat32`` silently lost
+        # its imaginary component and one declaring a typo (``Flaot32``)
+        # silently produced wrong values.  See issue #1783.
+        dtype_name = band_elem.get('dataType')
+        if dtype_name is None:
+            dtype_name = 'Float32'
+        if dtype_name not in _DTYPE_MAP:
+            supported = ', '.join(sorted(_DTYPE_MAP))
+            raise ValueError(
+                f"VRTRasterBand band={band_num} declares "
+                f"dataType={dtype_name!r}, which is not supported by "
+                f"read_vrt. Supported dataType values: {supported}. "
+                f"Complex dataType values (CInt16, CInt32, CFloat32, "
+                f"CFloat64) are not supported because the reader has no "
+                f"complex-data code path; falling back to Float32 would "
+                f"silently drop the imaginary component."
+            )
+        dtype = np.dtype(_DTYPE_MAP[dtype_name])
+        # Parse <NoDataValue> with dtype-aware precision.  For integer
+        # bands (including UInt64 / Int64) the sentinel is parsed as an
+        # ``int`` so values like ``2**64 - 1`` or ``INT64_MIN`` round-trip
+        # exactly through the masking pipeline; ``float(text)`` would
+        # collapse those to a representable float64 and break exact-
+        # equality masks downstream.  See issue #1783 follow-up.
         nodata_str = _text(band_elem, 'NoDataValue')
-        nodata = float(nodata_str) if nodata_str else None
+        nodata = _parse_band_nodata(nodata_str, dtype)
         color_interp = _text(band_elem, 'ColorInterp')
 
         sources = []
@@ -342,8 +448,12 @@ def parse_vrt(xml_str: str, vrt_dir: str = '.') -> VRTDataset:
             src_rect = _parse_rect(src_rect_elem)
             dst_rect = _parse_rect(dst_rect_elem)
 
+            # Per-source ``<NODATA>`` honours the band dtype too -- a
+            # uint64 source feeding a UInt64 band needs the full 64-bit
+            # sentinel preserved, not a float64-rounded approximation.
+            # See issue #1783 follow-up.
             src_nodata_str = _text(src_elem, 'NODATA')
-            src_nodata = float(src_nodata_str) if src_nodata_str else None
+            src_nodata = _parse_band_nodata(src_nodata_str, dtype)
 
             # ComplexSource extras
             scale = None
@@ -820,15 +930,26 @@ def read_vrt(vrt_path: str, *, window=None,
                 # with GDAL_NODATA="-9999") stay no-op rather than
                 # tripping OverflowError on ``dtype.type(int(...))``.
                 # See issue #1616.
-                try:
-                    nodata_f = float(src_nodata)
-                except (TypeError, ValueError):
-                    nodata_f = None
-                if (nodata_f is not None
-                        and np.isfinite(nodata_f)
-                        and nodata_f.is_integer()):
+                # Accept an int sentinel directly so 64-bit values
+                # (``2**64 - 1`` for UInt64, ``INT64_MIN`` for Int64)
+                # don't get clobbered by ``float(...).is_integer()``
+                # losing precision near the edges.  Fall back to the
+                # float parse for legacy float-typed nodata.
+                nodata_int: int | None = None
+                if (isinstance(src_nodata, (int, np.integer))
+                        and not isinstance(src_nodata, bool)):
+                    nodata_int = int(src_nodata)
+                else:
+                    try:
+                        nodata_f = float(src_nodata)
+                    except (TypeError, ValueError):
+                        nodata_f = None
+                    if (nodata_f is not None
+                            and np.isfinite(nodata_f)
+                            and nodata_f.is_integer()):
+                        nodata_int = int(nodata_f)
+                if nodata_int is not None:
                     info = np.iinfo(src_arr.dtype)
-                    nodata_int = int(nodata_f)
                     if info.min <= nodata_int <= info.max:
                         sentinel = src_arr.dtype.type(nodata_int)
                         mask = src_arr == sentinel