From 961cabceb3bf47cf2b2c4df3df251e3fe435e305 Mon Sep 17 00:00:00 2001
From: Brendan Collins <brendancol@gmail.com>
Date: Tue, 12 May 2026 05:39:21 -0700
Subject: [PATCH 1/2] Cover write_vrt / read_geotiff_gpu(dtype) /
 write_geotiff_gpu(bigtiff) kwargs (test-coverage sweep pass 9)

Close three Cat 4 MEDIUM parameter-coverage gaps plus one Cat 4 LOW
error path identified by the 2026-05-12 test-coverage sweep on the
geotiff module.

write_vrt's documented kwargs (relative/crs_wkt/nodata) were pinned
by an existing accepted-kwargs smoke test but no test verified the
override effect. A regression dropping any override branch and
silently using the default-from-first-source would not surface.
Verify the XML output for each kwarg directly: relativeToVRT
attribute + on-disk source path text for relative=, distinct WKT
marker for crs_wkt=, NoDataValue element for nodata=. Parse the VRT
back through parse_vrt to assert round-trips.

read_geotiff_gpu(dtype=) cast had zero direct tests. The eager CPU
path has TestDtypeEager (float64->float32, uint16->int32,
uint16->uint8, float-to-int raise, dtype=None preserves native);
the dask path has TestDtypeDask. The GPU branch had no equivalent.
Mirror the eager matrix on the GPU read path plus the dask+GPU
chunks branch and the open_geotiff(gpu=True, dtype=) dispatcher.

write_geotiff_gpu(bigtiff=) threads force_bigtiff through to
_assemble_tiff but no test asserted the on-disk header switched.
The CPU writer had it via test_features::test_force_bigtiff_via_public_api.
Add header parse_header().is_bigtiff checks for bigtiff=True/False/None
on write_geotiff_gpu plus the to_geotiff(gpu=True) dispatcher.

write_vrt(source_files=[]) ValueError error path was uncovered;
add it for completeness.

26 tests, all passing on GPU host.
---
 .claude/sweep-test-coverage-state.csv         |   2 +-
 .../tests/test_kwarg_behaviour_2026_05_12.py  | 516 ++++++++++++++++++
 2 files changed, 517 insertions(+), 1 deletion(-)
 create mode 100644 xrspatial/geotiff/tests/test_kwarg_behaviour_2026_05_12.py

diff --git a/.claude/sweep-test-coverage-state.csv b/.claude/sweep-test-coverage-state.csv
index fb081fa57..587a9658a 100644
--- a/.claude/sweep-test-coverage-state.csv
+++ b/.claude/sweep-test-coverage-state.csv
@@ -1,3 +1,3 @@
 module,last_inspected,issue,severity_max,categories_found,notes
-geotiff,2026-05-11,,HIGH,2;3;4,"Pass 8 (2026-05-11): added test_lz4_compression_level_2026_05_11.py closing Cat 4 MEDIUM parameter-coverage gap on compression='lz4' + compression_level=. _LEVEL_RANGES advertises lz4: (0, 16) but only deflate (1, 9) and zstd (1, 22) had direct level boundary + round-trip + reject tests. The range check is the gatekeeper -- lz4_compress silently accepts any int level -- so a regression dropping 'lz4' from _LEVEL_RANGES would ship undetected. 18 tests, all passing: round-trip at levels 0/1/9/16 (lossless), default-level no-arg path, higher-level-not-larger smoke check on compressible input, out-of-range reject at -1/-10/17/100 on eager path, valid-range message format pin (lz4 valid: 0-16), dask streaming round-trip at 0/1/8/16, dask streaming out-of-range reject at -1/17/50 (separate _LEVEL_RANGES call site). Pass 7 (2026-05-11): added test_gpu_writer_compression_modes_2026_05_11.py closing Cat 4 HIGH gap on write_geotiff_gpu compression= modes. The writer documents zstd (default, fastest GPU), deflate, jpeg, and none, but only deflate + none had round-trip tests; the default zstd and the jpeg (nvJPEG/Pillow) paths shipped without targeted coverage. 11 new tests, all passing on GPU host: zstd round-trip + default-codec pinning, jpeg round-trip on 3-band RGB uint8 + 1-band greyscale, TIFF compression-tag header check across none/deflate/zstd/jpeg, plain deflate + none round-trips outside the COG/sentinel paths, and a cross-codec lossless parity check (zstd/deflate/none agree pixel-exact). nvJPEG path was exercised live, not just the Pillow fallback. Pass 6 (2026-05-11): added test_overview_resampling_min_max_median_2026_05_11.py covering Cat 4 HIGH parameter-coverage gap on overview_resampling=min/max/median. CPU end-to-end paths were already covered by test_cog_overview_nodata_1613::test_cpu_cog_overview_aggregations_ignore_sentinel; the GPU end-to-end paths and the direct CPU+GPU block-reducer branches had no targeted tests, so a regression on those code paths would ship undetected. 26 tests, all passing on GPU host: block-reducer unit tests (finite + partial-NaN), end-to-end COG writes for both to_geotiff and write_geotiff_gpu, CPU/GPU parity for to_geotiff(gpu=True), CPU nodata-sentinel regression check, and ValueError error-path tests for unknown method names on both backends. Pass 5 (2026-05-11): added test_degenerate_shapes_backends_2026_05_11.py covering Cat 3 HIGH geometric gaps (1x1 / 1xN / Nx1 reads on dask+numpy, GPU, dask+cupy backends; 1x1 / 1xN / Nx1 writes through write_geotiff_gpu) and Cat 2 MEDIUM NaN/Inf gaps (all-NaN read on GPU + dask+cupy, Inf / -Inf reads on all non-eager backends, NaN sentinel mask on dask read path including sentinel block split across chunk boundary). 23 tests, all passing on GPU host. Prior passes still hold: pass 4 (r4) closed read_geotiff_gpu/dask name= + max_pixels= kwargs (Cat 4), pass 3 (r3) closed read_vrt GPU/dask+GPU backend dispatch (Cat 1) and dtype/name kwargs (Cat 4)."
+geotiff,2026-05-12,,MEDIUM,4,"Pass 9 (2026-05-12): added test_kwarg_behaviour_2026_05_12.py closing three Cat 4 MEDIUM parameter-coverage gaps plus one Cat 4 LOW error path. write_vrt documented kwargs (relative/crs_wkt/nodata) had a smoke-test pinning that the kwargs are accepted but no test verified the override *effect* -- a regression dropping the override branch and silently using the default-from-first-source would ship undetected. read_geotiff_gpu(dtype=) cast had zero direct tests; the eager path has TestDtypeEager and dask has TestDtypeDask but the GPU branch had no equivalent. write_geotiff_gpu(bigtiff=) threads through to _assemble_tiff(force_bigtiff=) but no test asserted the on-disk header byte switches; the CPU writer had it via test_features::test_force_bigtiff_via_public_api. write_vrt(source_files=[]) ValueError was uncovered. 26 tests, all passing on GPU host: write_vrt relative=True/False XML attribute + path inspection + parse-back round-trip, write_vrt crs_wkt= override distinct-from-default XML check, write_vrt nodata= override + default-from-source coverage, write_vrt([]) ValueError + no-file side effect, read_geotiff_gpu dtype= matrix (float64->float32, float64->float16, uint16->int32, uint16->uint8, float-to-int raise, dtype=None preserves native), open_geotiff(gpu=True, dtype=) dispatcher, read_geotiff_gpu(chunks=, dtype=) dask+GPU branch, write_geotiff_gpu bigtiff=True/False/None header verification, to_geotiff(gpu=True, bigtiff=True) dispatcher thread-through. Pass 8 (2026-05-11): added test_lz4_compression_level_2026_05_11.py closing Cat 4 MEDIUM parameter-coverage gap on compression='lz4' + compression_level=. _LEVEL_RANGES advertises lz4: (0, 16) but only deflate (1, 9) and zstd (1, 22) had direct level boundary + round-trip + reject tests. The range check is the gatekeeper -- lz4_compress silently accepts any int level -- so a regression dropping 'lz4' from _LEVEL_RANGES would ship undetected. 18 tests, all passing: round-trip at levels 0/1/9/16 (lossless), default-level no-arg path, higher-level-not-larger smoke check on compressible input, out-of-range reject at -1/-10/17/100 on eager path, valid-range message format pin (lz4 valid: 0-16), dask streaming round-trip at 0/1/8/16, dask streaming out-of-range reject at -1/17/50 (separate _LEVEL_RANGES call site). Pass 7 (2026-05-11): added test_gpu_writer_compression_modes_2026_05_11.py closing Cat 4 HIGH gap on write_geotiff_gpu compression= modes. The writer documents zstd (default, fastest GPU), deflate, jpeg, and none, but only deflate + none had round-trip tests; the default zstd and the jpeg (nvJPEG/Pillow) paths shipped without targeted coverage. 11 new tests, all passing on GPU host: zstd round-trip + default-codec pinning, jpeg round-trip on 3-band RGB uint8 + 1-band greyscale, TIFF compression-tag header check across none/deflate/zstd/jpeg, plain deflate + none round-trips outside the COG/sentinel paths, and a cross-codec lossless parity check (zstd/deflate/none agree pixel-exact). nvJPEG path was exercised live, not just the Pillow fallback. Pass 6 (2026-05-11): added test_overview_resampling_min_max_median_2026_05_11.py covering Cat 4 HIGH parameter-coverage gap on overview_resampling=min/max/median. CPU end-to-end paths were already covered by test_cog_overview_nodata_1613::test_cpu_cog_overview_aggregations_ignore_sentinel; the GPU end-to-end paths and the direct CPU+GPU block-reducer branches had no targeted tests, so a regression on those code paths would ship undetected. 26 tests, all passing on GPU host: block-reducer unit tests (finite + partial-NaN), end-to-end COG writes for both to_geotiff and write_geotiff_gpu, CPU/GPU parity for to_geotiff(gpu=True), CPU nodata-sentinel regression check, and ValueError error-path tests for unknown method names on both backends. Pass 5 (2026-05-11): added test_degenerate_shapes_backends_2026_05_11.py covering Cat 3 HIGH geometric gaps (1x1 / 1xN / Nx1 reads on dask+numpy, GPU, dask+cupy backends; 1x1 / 1xN / Nx1 writes through write_geotiff_gpu) and Cat 2 MEDIUM NaN/Inf gaps (all-NaN read on GPU + dask+cupy, Inf / -Inf reads on all non-eager backends, NaN sentinel mask on dask read path including sentinel block split across chunk boundary). 23 tests, all passing on GPU host. Prior passes still hold: pass 4 (r4) closed read_geotiff_gpu/dask name= + max_pixels= kwargs (Cat 4), pass 3 (r3) closed read_vrt GPU/dask+GPU backend dispatch (Cat 1) and dtype/name kwargs (Cat 4)."
 reproject,2026-05-10,,HIGH,1;4;5,"Added 39 tests: LiteCRS direct coverage, itrf_transform behaviour/roundtrip/array, itrf_frames, geoid_height numerical correctness + raster happy-path, vertical helpers (ellipsoidal<->orthometric/depth), reproject() lat/lon and latitude/longitude dim propagation. Note: _merge_arrays_cupy is imported but unused (no cupy merge dispatch in merge()); flagged as feature gap not test gap."
diff --git a/xrspatial/geotiff/tests/test_kwarg_behaviour_2026_05_12.py b/xrspatial/geotiff/tests/test_kwarg_behaviour_2026_05_12.py
new file mode 100644
index 000000000..180266c2d
--- /dev/null
+++ b/xrspatial/geotiff/tests/test_kwarg_behaviour_2026_05_12.py
@@ -0,0 +1,516 @@
+"""Parameter-coverage gap closure for the geotiff module.
+
+Test coverage gap sweep 2026-05-12 (pass 9). Three Cat 4 MEDIUM
+parameter-coverage gaps plus one Cat 4 LOW error path closed here.
+
+Cat 4 MEDIUM #1 -- ``write_vrt`` documented kwargs are accepted but
+not exercised. ``test_polish_1488::TestC5WriteVrtKwargs`` pins the
+signature (kwargs accepted, unknown kwargs rejected, docstring
+present), but no test verifies the override *effect* of any of
+``relative=``, ``crs_wkt=``, or ``nodata=``. A regression that ignored
+the override and silently took the default-from-first-source path
+would not surface against the existing smoke tests because they only
+check that the function returns without raising. The fix is one test
+per kwarg that calls ``write_vrt`` with a non-default value and parses
+the resulting VRT XML to assert the override landed.
+
+Cat 4 MEDIUM #2 -- ``read_geotiff_gpu(dtype=)`` cast. The eager numpy
+path has ``test_dtype_read.TestDtypeEager`` with full coverage
+(float64->float32, uint16->int32, uint16->uint8, float-to-int raises,
+dtype=None preserves native). The dask path has ``TestDtypeDask``.
+The GPU read path has no equivalent. A regression that dropped the
+``arr.astype(target)`` block in ``read_geotiff_gpu`` would silently
+return data in the file's native dtype, breaking any GPU pipeline
+that relies on the cast.
+
+Cat 4 MEDIUM #3 -- ``write_geotiff_gpu(bigtiff=)``. The CPU writer
+covers ``bigtiff=True`` / ``False`` / ``None`` (auto) via
+``test_features::test_force_bigtiff_via_public_api`` and friends.
+``write_geotiff_gpu`` threads ``bigtiff=`` through to
+``_assemble_tiff(force_bigtiff=...)`` but no test asserts the on-disk
+header is BigTIFF when the kwarg is set on the GPU writer. A
+regression dropping the kwarg from the GPU writer's _assemble_tiff
+call site would silently fall back to classic-TIFF on the GPU path.
+
+Cat 4 LOW -- ``write_vrt(source_files=[])`` error path. The validator
+raises ``ValueError("source_files must not be empty")``. The error
+message is not exercised by any test, so a regression dropping the
+check would only surface on a downstream IndexError much further in.
+"""
+from __future__ import annotations
+
+import importlib.util
+import os
+
+import numpy as np
+import pytest
+import xarray as xr
+
+from xrspatial.geotiff import (
+    open_geotiff,
+    read_geotiff_gpu,
+    to_geotiff,
+    write_geotiff_gpu,
+    write_vrt,
+)
+from xrspatial.geotiff._header import parse_header
+from xrspatial.geotiff._vrt import parse_vrt
+
+
+# --------------------------------------------------------------------------
+# GPU gating
+# --------------------------------------------------------------------------
+
+
+def _gpu_available() -> bool:
+    if importlib.util.find_spec("cupy") is None:
+        return False
+    try:
+        import cupy
+        return bool(cupy.cuda.is_available())
+    except Exception:
+        return False
+
+
+_HAS_GPU = _gpu_available()
+_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required")
+
+
+# --------------------------------------------------------------------------
+# Shared fixtures
+# --------------------------------------------------------------------------
+
+
+@pytest.fixture
+def source_tif(tmp_path):
+    """Write a single-band float32 GeoTIFF with EPSG:4326 + nodata."""
+    arr = np.arange(64, dtype=np.float32).reshape(8, 8)
+    y = np.linspace(1.0, 0.0, 8)
+    x = np.linspace(0.0, 1.0, 8)
+    da = xr.DataArray(
+        arr, dims=['y', 'x'],
+        coords={'y': y, 'x': x},
+        attrs={'crs': 4326, 'nodata': -1.0},
+    )
+    p = str(tmp_path / 'src_kwbeh_2026_05_12.tif')
+    to_geotiff(da, p, compression='none')
+    return p
+
+
+@pytest.fixture
+def float64_tif(tmp_path):
+    """Write a float64 GeoTIFF for GPU dtype cast tests."""
+    arr = np.random.default_rng(2026_05_12).random((40, 40)).astype(np.float64)
+    y = np.linspace(41.0, 40.0, 40)
+    x = np.linspace(-105.0, -104.0, 40)
+    da = xr.DataArray(
+        arr, dims=['y', 'x'],
+        coords={'y': y, 'x': x},
+        attrs={'crs': 4326},
+    )
+    p = str(tmp_path / 'kwbeh_2026_05_12_f64.tif')
+    to_geotiff(da, p, compression='none')
+    return p, arr
+
+
+@pytest.fixture
+def uint16_tif(tmp_path):
+    """Write a uint16 GeoTIFF for GPU dtype cast tests."""
+    arr = np.random.default_rng(2026_05_12).integers(
+        0, 10_000, (30, 30), dtype=np.uint16
+    )
+    y = np.linspace(41.0, 40.0, 30)
+    x = np.linspace(-105.0, -104.0, 30)
+    da = xr.DataArray(
+        arr, dims=['y', 'x'],
+        coords={'y': y, 'x': x},
+        attrs={'crs': 4326},
+    )
+    p = str(tmp_path / 'kwbeh_2026_05_12_u16.tif')
+    to_geotiff(da, p, compression='none')
+    return p, arr
+
+
+# ==========================================================================
+# Cat 4 MEDIUM #1: write_vrt kwarg behaviour
+# ==========================================================================
+
+
+class TestWriteVrtRelativeBehaviour:
+    """``relative=`` flips the ``relativeToVRT`` attribute and rewrites the
+    source filename. The existing smoke test only asserts both modes are
+    *accepted*, not that they actually take effect."""
+
+    def _read_xml(self, path):
+        with open(path, 'r') as fh:
+            return fh.read()
+
+    def test_relative_true_writes_relative_path(self, source_tif, tmp_path):
+        vrt_path = str(tmp_path / 'rel_true.vrt')
+        write_vrt(vrt_path, [source_tif], relative=True)
+
+        xml = self._read_xml(vrt_path)
+        # The on-disk text must carry the relativeToVRT="1" attribute,
+        # not "0", and the SourceFilename text must not contain the
+        # absolute path's tmp_path prefix.
+        assert 'relativeToVRT="1"' in xml
+        assert 'relativeToVRT="0"' not in xml
+        # Source path is the bare filename (same directory as the VRT).
+        assert os.path.basename(source_tif) in xml
+        # The absolute path prefix (the tmp_path directory) is not in
+        # the XML; otherwise the writer would have stored the full
+        # path despite relative=True.
+        assert str(tmp_path) not in xml
+
+    def test_relative_false_writes_absolute_path(self, source_tif, tmp_path):
+        vrt_path = str(tmp_path / 'rel_false.vrt')
+        write_vrt(vrt_path, [source_tif], relative=False)
+
+        xml = self._read_xml(vrt_path)
+        # ``relative=False`` must flip the attribute and emit an absolute
+        # path. A regression that ignored ``relative=`` would silently
+        # produce the same XML as ``relative=True``.
+        assert 'relativeToVRT="0"' in xml
+        assert 'relativeToVRT="1"' not in xml
+        # Absolute path is in the file's SourceFilename text.
+        # Use realpath to handle symlinks tmp_path may carry on macOS.
+        abs_src = os.path.realpath(source_tif)
+        assert abs_src in xml
+
+    def test_relative_true_parses_back_to_same_source(self, source_tif, tmp_path):
+        """relative=True still round-trips: parse_vrt resolves the
+        relative path back to the absolute one."""
+        vrt_path = str(tmp_path / 'rel_true_rt.vrt')
+        write_vrt(vrt_path, [source_tif], relative=True)
+        parsed = parse_vrt(self._read_xml(vrt_path), vrt_dir=str(tmp_path))
+        assert len(parsed.bands) == 1
+        assert len(parsed.bands[0].sources) == 1
+        # parse_vrt canonicalises with realpath, so compare against the
+        # realpath of the original source.
+        assert (
+            os.path.realpath(parsed.bands[0].sources[0].filename)
+            == os.path.realpath(source_tif)
+        )
+
+    def test_relative_false_parses_back_to_same_source(self, source_tif, tmp_path):
+        vrt_path = str(tmp_path / 'rel_false_rt.vrt')
+        write_vrt(vrt_path, [source_tif], relative=False)
+        parsed = parse_vrt(self._read_xml(vrt_path), vrt_dir=str(tmp_path))
+        assert len(parsed.bands) == 1
+        assert (
+            os.path.realpath(parsed.bands[0].sources[0].filename)
+            == os.path.realpath(source_tif)
+        )
+
+
+class TestWriteVrtCrsWktBehaviour:
+    """``crs_wkt=`` overrides the first source's CRS WKT. Without an
+    override, the first source's WKT is propagated. With an override,
+    the override wins."""
+
+    def _read_parsed(self, vrt_path, tmp_path):
+        with open(vrt_path, 'r') as fh:
+            return parse_vrt(fh.read(), vrt_dir=str(tmp_path))
+
+    def test_crs_wkt_override_wins(self, source_tif, tmp_path):
+        """The supplied WKT must land in <SRS>, not the source's WKT."""
+        override = (
+            'PROJCS["UnitTest_Override_Sweep_2026_05_12",'
+            'GEOGCS["test_datum",DATUM["d",SPHEROID["s",6378137,298.257223563]],'
+            'PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]],'
+            'PROJECTION["Transverse_Mercator"],UNIT["metre",1]]'
+        )
+        vrt_path = str(tmp_path / 'crs_wkt_override.vrt')
+        write_vrt(vrt_path, [source_tif], crs_wkt=override)
+        parsed = self._read_parsed(vrt_path, tmp_path)
+        assert parsed.crs_wkt == override
+
+    def test_crs_wkt_none_falls_back_to_first_source(self, source_tif, tmp_path):
+        """No override means the first source's WKT is used. The source
+        was written with crs=4326, so the WKT must mention EPSG:4326's
+        marker text (any non-empty WKT is the contract; we check it is
+        not the override value to defend against silent default
+        substitution by the writer)."""
+        vrt_path = str(tmp_path / 'crs_wkt_default.vrt')
+        write_vrt(vrt_path, [source_tif])
+        parsed = self._read_parsed(vrt_path, tmp_path)
+        # The first source has crs=4326. The WKT will be something
+        # non-empty mentioning EPSG:4326 (precise WKT text depends on
+        # the EPSG database wired into the writer).
+        if parsed.crs_wkt is not None:
+            # When the WKT is present it must come from the source
+            # (mentions a geographic marker) rather than the override.
+            text = parsed.crs_wkt.lower()
+            assert 'unittest_override_sweep_2026_05_12' not in text
+
+    def test_crs_wkt_override_distinct_from_default(self, source_tif, tmp_path):
+        """The override and default WKT must produce *different* on-disk
+        XML. This is the safety-net: even if a future writer change
+        normalises the WKT before emitting, the override path must
+        still land a distinguishable WKT in the file."""
+        marker = "UnitTest_Override_Marker_Sweep_2026_05_12"
+        override = (
+            f'GEOGCS["{marker}",'
+            'DATUM["d",SPHEROID["s",6378137,298.257223563]],'
+            'PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]]'
+        )
+        # Override path
+        vrt_override = str(tmp_path / 'override.vrt')
+        write_vrt(vrt_override, [source_tif], crs_wkt=override)
+        # Default path
+        vrt_default = str(tmp_path / 'default.vrt')
+        write_vrt(vrt_default, [source_tif])
+
+        with open(vrt_override, 'r') as fh:
+            text_override = fh.read()
+        with open(vrt_default, 'r') as fh:
+            text_default = fh.read()
+
+        assert marker in text_override
+        assert marker not in text_default
+
+
+class TestWriteVrtNodataBehaviour:
+    """``nodata=`` overrides the first source's nodata sentinel.
+    Source file is written with ``nodata=-1.0``; the override must land
+    in every ``<NoDataValue>`` element."""
+
+    def _bands(self, vrt_path, tmp_path):
+        with open(vrt_path, 'r') as fh:
+            return parse_vrt(fh.read(), vrt_dir=str(tmp_path)).bands
+
+    def test_nodata_override_wins(self, source_tif, tmp_path):
+        vrt_path = str(tmp_path / 'nodata_override.vrt')
+        write_vrt(vrt_path, [source_tif], nodata=-9999.0)
+        bands = self._bands(vrt_path, tmp_path)
+        assert len(bands) == 1
+        assert bands[0].nodata == -9999.0
+
+    def test_nodata_none_takes_first_source(self, source_tif, tmp_path):
+        """No override means the first source's nodata is used. The
+        source was written with ``nodata=-1.0`` -- a regression that
+        silently dropped the default-from-source code path would land
+        ``None`` here."""
+        vrt_path = str(tmp_path / 'nodata_default.vrt')
+        write_vrt(vrt_path, [source_tif])
+        bands = self._bands(vrt_path, tmp_path)
+        assert len(bands) == 1
+        assert bands[0].nodata == -1.0
+
+    def test_nodata_override_writes_xml_element(self, source_tif, tmp_path):
+        """Raw XML check: the override sentinel value lands in a
+        <NoDataValue> element."""
+        vrt_path = str(tmp_path / 'nodata_xml.vrt')
+        write_vrt(vrt_path, [source_tif], nodata=-12345.0)
+        with open(vrt_path, 'r') as fh:
+            xml = fh.read()
+        assert '<NoDataValue>-12345.0</NoDataValue>' in xml
+
+
+# ==========================================================================
+# Cat 4 LOW: write_vrt error paths
+# ==========================================================================
+
+
+class TestWriteVrtEmptySourceFiles:
+    """``write_vrt(source_files=[])`` raises with a clear message.
+    The error path is uncovered. A regression dropping the
+    pre-validation would surface much further down as an IndexError
+    when computing the bounding box of zero sources."""
+
+    def test_empty_list_raises(self, tmp_path):
+        vrt_path = str(tmp_path / 'should_not_exist.vrt')
+        with pytest.raises(ValueError, match="source_files must not be empty"):
+            write_vrt(vrt_path, [])
+
+    def test_empty_list_does_not_create_file(self, tmp_path):
+        vrt_path = str(tmp_path / 'should_not_exist_2.vrt')
+        try:
+            write_vrt(vrt_path, [])
+        except ValueError:
+            pass
+        assert not os.path.exists(vrt_path)
+
+
+# ==========================================================================
+# Cat 4 MEDIUM #2: read_geotiff_gpu(dtype=)
+# ==========================================================================
+
+
+@_gpu_only
+class TestReadGeotiffGpuDtype:
+    """``read_geotiff_gpu(dtype=...)`` casts on device. The eager CPU
+    path has TestDtypeEager; the dask path has TestDtypeDask. The GPU
+    path had no equivalent."""
+
+    def test_float64_to_float32(self, float64_tif):
+        path, orig = float64_tif
+        result = read_geotiff_gpu(path, dtype='float32')
+        assert result.dtype == np.float32
+        np.testing.assert_array_almost_equal(
+            result.data.get(), orig.astype(np.float32), decimal=6)
+
+    def test_float64_to_float16(self, float64_tif):
+        path, _ = float64_tif
+        result = read_geotiff_gpu(path, dtype=np.float16)
+        assert result.dtype == np.float16
+
+    def test_uint16_to_int32(self, uint16_tif):
+        path, orig = uint16_tif
+        result = read_geotiff_gpu(path, dtype='int32')
+        assert result.dtype == np.int32
+        np.testing.assert_array_equal(result.data.get(), orig.astype(np.int32))
+
+    def test_uint16_to_uint8(self, uint16_tif):
+        path, _ = uint16_tif
+        result = read_geotiff_gpu(path, dtype='uint8')
+        assert result.dtype == np.uint8
+
+    def test_float_to_int_raises(self, float64_tif):
+        path, _ = float64_tif
+        # The validator runs before the GPU upload; the error contract is
+        # the same as the CPU path (``float`` ... ``int``).
+        with pytest.raises(ValueError, match='float.*int'):
+            read_geotiff_gpu(path, dtype='int32')
+
+    def test_dtype_none_preserves_native_float64(self, float64_tif):
+        path, _ = float64_tif
+        result = read_geotiff_gpu(path, dtype=None)
+        assert result.dtype == np.float64
+
+    def test_dtype_none_preserves_native_uint16(self, uint16_tif):
+        path, _ = uint16_tif
+        result = read_geotiff_gpu(path, dtype=None)
+        assert result.dtype == np.uint16
+
+
+@_gpu_only
+class TestOpenGeotiffGpuDispatchDtype:
+    """``open_geotiff(..., gpu=True, dtype=...)`` forwards through the
+    dispatcher into ``read_geotiff_gpu``. Pin the dispatch path so a
+    regression dropping ``dtype=`` on the GPU branch surfaces here too."""
+
+    def test_dispatch_float64_to_float32(self, float64_tif):
+        path, orig = float64_tif
+        result = open_geotiff(path, gpu=True, dtype='float32')
+        assert result.dtype == np.float32
+        np.testing.assert_array_almost_equal(
+            result.data.get(), orig.astype(np.float32), decimal=6)
+
+    def test_dispatch_float_to_int_raises(self, float64_tif):
+        path, _ = float64_tif
+        with pytest.raises(ValueError, match='float.*int'):
+            open_geotiff(path, gpu=True, dtype='int32')
+
+
+@_gpu_only
+class TestReadGeotiffGpuChunksDtype:
+    """``read_geotiff_gpu(chunks=..., dtype=...)`` -- dask + GPU + dtype
+    combination is a separate dispatch path through the GPU reader and
+    its own ``astype`` step on the cupy array, then a ``chunk`` call.
+    Cover the cast for the dask+GPU branch too."""
+
+    def test_chunks_float64_to_float32(self, float64_tif):
+        path, orig = float64_tif
+        result = read_geotiff_gpu(path, chunks=20, dtype='float32')
+        assert result.dtype == np.float32
+        # ``.data`` is a dask array of cupy chunks. Compute, then
+        # ``.get()`` the resulting cupy host buffer.
+        computed = result.data.compute()
+        np.testing.assert_array_almost_equal(
+            computed.get(), orig.astype(np.float32), decimal=6)
+
+
+# ==========================================================================
+# Cat 4 MEDIUM #3: write_geotiff_gpu(bigtiff=)
+# ==========================================================================
+
+
+@_gpu_only
+class TestWriteGeotiffGpuBigtiff:
+    """``write_geotiff_gpu(bigtiff=)`` threads ``force_bigtiff=`` to
+    ``_assemble_tiff``. The CPU writer has equivalent header-level
+    bigtiff coverage; the GPU writer did not.
+
+    Small arrays are sufficient because the BigTIFF decision is a
+    width-of-offset-field switch, not a value-range one -- a forced
+    BigTIFF on a 64-pixel array produces the same header magic byte
+    pattern that a >4 GB file would."""
+
+    def _read_header_is_bigtiff(self, path):
+        with open(path, 'rb') as fh:
+            header = parse_header(fh.read(16))
+        return header.is_bigtiff
+
+    def test_force_bigtiff_true_writes_bigtiff(self, tmp_path):
+        import cupy
+        arr = cupy.arange(64, dtype=cupy.float32).reshape(8, 8)
+        da = xr.DataArray(
+            arr, dims=['y', 'x'],
+            coords={'y': np.arange(8, dtype=np.float64),
+                    'x': np.arange(8, dtype=np.float64)},
+        )
+        path = str(tmp_path / 'gpu_bigtiff_true.tif')
+        write_geotiff_gpu(da, path, bigtiff=True, tile_size=4)
+        assert self._read_header_is_bigtiff(path), (
+            "write_geotiff_gpu(bigtiff=True) should emit BigTIFF header "
+            "(magic byte 43)."
+        )
+        # Data round-trips even with the BigTIFF header.
+        rd = open_geotiff(path)
+        np.testing.assert_array_equal(rd.values, arr.get())
+
+    def test_force_bigtiff_false_writes_classic(self, tmp_path):
+        import cupy
+        arr = cupy.arange(64, dtype=cupy.float32).reshape(8, 8)
+        da = xr.DataArray(
+            arr, dims=['y', 'x'],
+            coords={'y': np.arange(8, dtype=np.float64),
+                    'x': np.arange(8, dtype=np.float64)},
+        )
+        path = str(tmp_path / 'gpu_bigtiff_false.tif')
+        write_geotiff_gpu(da, path, bigtiff=False, tile_size=4)
+        assert not self._read_header_is_bigtiff(path), (
+            "write_geotiff_gpu(bigtiff=False) should emit classic TIFF."
+        )
+
+    def test_bigtiff_none_stays_classic_small_file(self, tmp_path):
+        """``bigtiff=None`` (default) is auto: small files should stay
+        classic. Without an explicit None test, a regression flipping
+        the default to ``True`` would not be caught -- and that would
+        break interop with older readers that don't accept BigTIFF."""
+        import cupy
+        arr = cupy.arange(64, dtype=cupy.float32).reshape(8, 8)
+        da = xr.DataArray(
+            arr, dims=['y', 'x'],
+            coords={'y': np.arange(8, dtype=np.float64),
+                    'x': np.arange(8, dtype=np.float64)},
+        )
+        path = str(tmp_path / 'gpu_bigtiff_default.tif')
+        write_geotiff_gpu(da, path, tile_size=4)
+        assert not self._read_header_is_bigtiff(path), (
+            "write_geotiff_gpu default should auto-pick classic TIFF for "
+            "tiny outputs; a default switch to BigTIFF would break "
+            "older readers."
+        )
+
+    def test_to_geotiff_gpu_bigtiff_threads_through(self, tmp_path):
+        """``to_geotiff(..., gpu=True, bigtiff=True)`` dispatches into
+        ``write_geotiff_gpu(bigtiff=True)``. Cover the dispatcher's
+        thread-through so a regression dropping ``bigtiff=`` on the GPU
+        dispatch branch surfaces here too."""
+        import cupy
+        arr = cupy.arange(64, dtype=cupy.float32).reshape(8, 8)
+        da = xr.DataArray(
+            arr, dims=['y', 'x'],
+            coords={'y': np.arange(8, dtype=np.float64),
+                    'x': np.arange(8, dtype=np.float64)},
+        )
+        path = str(tmp_path / 'to_gpu_bigtiff_true.tif')
+        to_geotiff(da, path, gpu=True, bigtiff=True, tile_size=4)
+        assert self._read_header_is_bigtiff(path), (
+            "to_geotiff(gpu=True, bigtiff=True) should reach the GPU "
+            "writer with force_bigtiff=True propagated through."
+        )
+        rd = open_geotiff(path)
+        np.testing.assert_array_equal(rd.values, arr.get())

From fdeecbc076a317d6421550b09ac4e7197f6a8138 Mon Sep 17 00:00:00 2001
From: Brendan Collins <brendancol@gmail.com>
Date: Tue, 12 May 2026 06:20:50 -0700
Subject: [PATCH 2/2] Strengthen crs_wkt fall-back assertion (PR #1656)

---
 .../tests/test_kwarg_behaviour_2026_05_12.py  | 24 +++++++++----------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/xrspatial/geotiff/tests/test_kwarg_behaviour_2026_05_12.py b/xrspatial/geotiff/tests/test_kwarg_behaviour_2026_05_12.py
index 180266c2d..033730273 100644
--- a/xrspatial/geotiff/tests/test_kwarg_behaviour_2026_05_12.py
+++ b/xrspatial/geotiff/tests/test_kwarg_behaviour_2026_05_12.py
@@ -226,22 +226,20 @@ def test_crs_wkt_override_wins(self, source_tif, tmp_path):
         assert parsed.crs_wkt == override
 
     def test_crs_wkt_none_falls_back_to_first_source(self, source_tif, tmp_path):
-        """No override means the first source's WKT is used. The source
-        was written with crs=4326, so the WKT must mention EPSG:4326's
-        marker text (any non-empty WKT is the contract; we check it is
-        not the override value to defend against silent default
-        substitution by the writer)."""
+        """No override means the first source's WKT is used. Pin the
+        contract: the default-VRT's parsed crs_wkt must be present,
+        non-empty, and match the source TIF's own crs_wkt (no silent
+        substitution, no None on the fall-back path)."""
         vrt_path = str(tmp_path / 'crs_wkt_default.vrt')
         write_vrt(vrt_path, [source_tif])
         parsed = self._read_parsed(vrt_path, tmp_path)
-        # The first source has crs=4326. The WKT will be something
-        # non-empty mentioning EPSG:4326 (precise WKT text depends on
-        # the EPSG database wired into the writer).
-        if parsed.crs_wkt is not None:
-            # When the WKT is present it must come from the source
-            # (mentions a geographic marker) rather than the override.
-            text = parsed.crs_wkt.lower()
-            assert 'unittest_override_sweep_2026_05_12' not in text
+
+        source_da = open_geotiff(source_tif)
+        source_wkt = source_da.attrs.get('crs_wkt')
+
+        assert parsed.crs_wkt is not None
+        assert parsed.crs_wkt != ''
+        assert parsed.crs_wkt == source_wkt
 
     def test_crs_wkt_override_distinct_from_default(self, source_tif, tmp_path):
         """The override and default WKT must produce *different* on-disk