From 961cabceb3bf47cf2b2c4df3df251e3fe435e305 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 12 May 2026 05:39:21 -0700 Subject: [PATCH 1/2] Cover write_vrt / read_geotiff_gpu(dtype) / write_geotiff_gpu(bigtiff) kwargs (test-coverage sweep pass 9) Close three Cat 4 MEDIUM parameter-coverage gaps plus one Cat 4 LOW error path identified by the 2026-05-12 test-coverage sweep on the geotiff module. write_vrt's documented kwargs (relative/crs_wkt/nodata) were pinned by an existing accepted-kwargs smoke test but no test verified the override effect. A regression dropping any override branch and silently using the default-from-first-source would not surface. Verify the XML output for each kwarg directly: relativeToVRT attribute + on-disk source path text for relative=, distinct WKT marker for crs_wkt=, NoDataValue element for nodata=. Parse the VRT back through parse_vrt to assert round-trips. read_geotiff_gpu(dtype=) cast had zero direct tests. The eager CPU path has TestDtypeEager (float64->float32, uint16->int32, uint16->uint8, float-to-int raise, dtype=None preserves native); the dask path has TestDtypeDask. The GPU branch had no equivalent. Mirror the eager matrix on the GPU read path plus the dask+GPU chunks branch and the open_geotiff(gpu=True, dtype=) dispatcher. write_geotiff_gpu(bigtiff=) threads force_bigtiff through to _assemble_tiff but no test asserted the on-disk header switched. The CPU writer had it via test_features::test_force_bigtiff_via_public_api. Add header parse_header().is_bigtiff checks for bigtiff=True/False/None on write_geotiff_gpu plus the to_geotiff(gpu=True) dispatcher. write_vrt(source_files=[]) ValueError error path was uncovered; add it for completeness. 26 tests, all passing on GPU host. --- .claude/sweep-test-coverage-state.csv | 2 +- .../tests/test_kwarg_behaviour_2026_05_12.py | 516 ++++++++++++++++++ 2 files changed, 517 insertions(+), 1 deletion(-) create mode 100644 xrspatial/geotiff/tests/test_kwarg_behaviour_2026_05_12.py diff --git a/.claude/sweep-test-coverage-state.csv b/.claude/sweep-test-coverage-state.csv index fb081fa57..587a9658a 100644 --- a/.claude/sweep-test-coverage-state.csv +++ b/.claude/sweep-test-coverage-state.csv @@ -1,3 +1,3 @@ module,last_inspected,issue,severity_max,categories_found,notes -geotiff,2026-05-11,,HIGH,2;3;4,"Pass 8 (2026-05-11): added test_lz4_compression_level_2026_05_11.py closing Cat 4 MEDIUM parameter-coverage gap on compression='lz4' + compression_level=. _LEVEL_RANGES advertises lz4: (0, 16) but only deflate (1, 9) and zstd (1, 22) had direct level boundary + round-trip + reject tests. The range check is the gatekeeper -- lz4_compress silently accepts any int level -- so a regression dropping 'lz4' from _LEVEL_RANGES would ship undetected. 18 tests, all passing: round-trip at levels 0/1/9/16 (lossless), default-level no-arg path, higher-level-not-larger smoke check on compressible input, out-of-range reject at -1/-10/17/100 on eager path, valid-range message format pin (lz4 valid: 0-16), dask streaming round-trip at 0/1/8/16, dask streaming out-of-range reject at -1/17/50 (separate _LEVEL_RANGES call site). Pass 7 (2026-05-11): added test_gpu_writer_compression_modes_2026_05_11.py closing Cat 4 HIGH gap on write_geotiff_gpu compression= modes. The writer documents zstd (default, fastest GPU), deflate, jpeg, and none, but only deflate + none had round-trip tests; the default zstd and the jpeg (nvJPEG/Pillow) paths shipped without targeted coverage. 11 new tests, all passing on GPU host: zstd round-trip + default-codec pinning, jpeg round-trip on 3-band RGB uint8 + 1-band greyscale, TIFF compression-tag header check across none/deflate/zstd/jpeg, plain deflate + none round-trips outside the COG/sentinel paths, and a cross-codec lossless parity check (zstd/deflate/none agree pixel-exact). nvJPEG path was exercised live, not just the Pillow fallback. Pass 6 (2026-05-11): added test_overview_resampling_min_max_median_2026_05_11.py covering Cat 4 HIGH parameter-coverage gap on overview_resampling=min/max/median. CPU end-to-end paths were already covered by test_cog_overview_nodata_1613::test_cpu_cog_overview_aggregations_ignore_sentinel; the GPU end-to-end paths and the direct CPU+GPU block-reducer branches had no targeted tests, so a regression on those code paths would ship undetected. 26 tests, all passing on GPU host: block-reducer unit tests (finite + partial-NaN), end-to-end COG writes for both to_geotiff and write_geotiff_gpu, CPU/GPU parity for to_geotiff(gpu=True), CPU nodata-sentinel regression check, and ValueError error-path tests for unknown method names on both backends. Pass 5 (2026-05-11): added test_degenerate_shapes_backends_2026_05_11.py covering Cat 3 HIGH geometric gaps (1x1 / 1xN / Nx1 reads on dask+numpy, GPU, dask+cupy backends; 1x1 / 1xN / Nx1 writes through write_geotiff_gpu) and Cat 2 MEDIUM NaN/Inf gaps (all-NaN read on GPU + dask+cupy, Inf / -Inf reads on all non-eager backends, NaN sentinel mask on dask read path including sentinel block split across chunk boundary). 23 tests, all passing on GPU host. Prior passes still hold: pass 4 (r4) closed read_geotiff_gpu/dask name= + max_pixels= kwargs (Cat 4), pass 3 (r3) closed read_vrt GPU/dask+GPU backend dispatch (Cat 1) and dtype/name kwargs (Cat 4)." +geotiff,2026-05-12,,MEDIUM,4,"Pass 9 (2026-05-12): added test_kwarg_behaviour_2026_05_12.py closing three Cat 4 MEDIUM parameter-coverage gaps plus one Cat 4 LOW error path. write_vrt documented kwargs (relative/crs_wkt/nodata) had a smoke-test pinning that the kwargs are accepted but no test verified the override *effect* -- a regression dropping the override branch and silently using the default-from-first-source would ship undetected. read_geotiff_gpu(dtype=) cast had zero direct tests; the eager path has TestDtypeEager and dask has TestDtypeDask but the GPU branch had no equivalent. write_geotiff_gpu(bigtiff=) threads through to _assemble_tiff(force_bigtiff=) but no test asserted the on-disk header byte switches; the CPU writer had it via test_features::test_force_bigtiff_via_public_api. write_vrt(source_files=[]) ValueError was uncovered. 26 tests, all passing on GPU host: write_vrt relative=True/False XML attribute + path inspection + parse-back round-trip, write_vrt crs_wkt= override distinct-from-default XML check, write_vrt nodata= override + default-from-source coverage, write_vrt([]) ValueError + no-file side effect, read_geotiff_gpu dtype= matrix (float64->float32, float64->float16, uint16->int32, uint16->uint8, float-to-int raise, dtype=None preserves native), open_geotiff(gpu=True, dtype=) dispatcher, read_geotiff_gpu(chunks=, dtype=) dask+GPU branch, write_geotiff_gpu bigtiff=True/False/None header verification, to_geotiff(gpu=True, bigtiff=True) dispatcher thread-through. Pass 8 (2026-05-11): added test_lz4_compression_level_2026_05_11.py closing Cat 4 MEDIUM parameter-coverage gap on compression='lz4' + compression_level=. _LEVEL_RANGES advertises lz4: (0, 16) but only deflate (1, 9) and zstd (1, 22) had direct level boundary + round-trip + reject tests. The range check is the gatekeeper -- lz4_compress silently accepts any int level -- so a regression dropping 'lz4' from _LEVEL_RANGES would ship undetected. 18 tests, all passing: round-trip at levels 0/1/9/16 (lossless), default-level no-arg path, higher-level-not-larger smoke check on compressible input, out-of-range reject at -1/-10/17/100 on eager path, valid-range message format pin (lz4 valid: 0-16), dask streaming round-trip at 0/1/8/16, dask streaming out-of-range reject at -1/17/50 (separate _LEVEL_RANGES call site). Pass 7 (2026-05-11): added test_gpu_writer_compression_modes_2026_05_11.py closing Cat 4 HIGH gap on write_geotiff_gpu compression= modes. The writer documents zstd (default, fastest GPU), deflate, jpeg, and none, but only deflate + none had round-trip tests; the default zstd and the jpeg (nvJPEG/Pillow) paths shipped without targeted coverage. 11 new tests, all passing on GPU host: zstd round-trip + default-codec pinning, jpeg round-trip on 3-band RGB uint8 + 1-band greyscale, TIFF compression-tag header check across none/deflate/zstd/jpeg, plain deflate + none round-trips outside the COG/sentinel paths, and a cross-codec lossless parity check (zstd/deflate/none agree pixel-exact). nvJPEG path was exercised live, not just the Pillow fallback. Pass 6 (2026-05-11): added test_overview_resampling_min_max_median_2026_05_11.py covering Cat 4 HIGH parameter-coverage gap on overview_resampling=min/max/median. CPU end-to-end paths were already covered by test_cog_overview_nodata_1613::test_cpu_cog_overview_aggregations_ignore_sentinel; the GPU end-to-end paths and the direct CPU+GPU block-reducer branches had no targeted tests, so a regression on those code paths would ship undetected. 26 tests, all passing on GPU host: block-reducer unit tests (finite + partial-NaN), end-to-end COG writes for both to_geotiff and write_geotiff_gpu, CPU/GPU parity for to_geotiff(gpu=True), CPU nodata-sentinel regression check, and ValueError error-path tests for unknown method names on both backends. Pass 5 (2026-05-11): added test_degenerate_shapes_backends_2026_05_11.py covering Cat 3 HIGH geometric gaps (1x1 / 1xN / Nx1 reads on dask+numpy, GPU, dask+cupy backends; 1x1 / 1xN / Nx1 writes through write_geotiff_gpu) and Cat 2 MEDIUM NaN/Inf gaps (all-NaN read on GPU + dask+cupy, Inf / -Inf reads on all non-eager backends, NaN sentinel mask on dask read path including sentinel block split across chunk boundary). 23 tests, all passing on GPU host. Prior passes still hold: pass 4 (r4) closed read_geotiff_gpu/dask name= + max_pixels= kwargs (Cat 4), pass 3 (r3) closed read_vrt GPU/dask+GPU backend dispatch (Cat 1) and dtype/name kwargs (Cat 4)." reproject,2026-05-10,,HIGH,1;4;5,"Added 39 tests: LiteCRS direct coverage, itrf_transform behaviour/roundtrip/array, itrf_frames, geoid_height numerical correctness + raster happy-path, vertical helpers (ellipsoidal<->orthometric/depth), reproject() lat/lon and latitude/longitude dim propagation. Note: _merge_arrays_cupy is imported but unused (no cupy merge dispatch in merge()); flagged as feature gap not test gap." diff --git a/xrspatial/geotiff/tests/test_kwarg_behaviour_2026_05_12.py b/xrspatial/geotiff/tests/test_kwarg_behaviour_2026_05_12.py new file mode 100644 index 000000000..180266c2d --- /dev/null +++ b/xrspatial/geotiff/tests/test_kwarg_behaviour_2026_05_12.py @@ -0,0 +1,516 @@ +"""Parameter-coverage gap closure for the geotiff module. + +Test coverage gap sweep 2026-05-12 (pass 9). Three Cat 4 MEDIUM +parameter-coverage gaps plus one Cat 4 LOW error path closed here. + +Cat 4 MEDIUM #1 -- ``write_vrt`` documented kwargs are accepted but +not exercised. ``test_polish_1488::TestC5WriteVrtKwargs`` pins the +signature (kwargs accepted, unknown kwargs rejected, docstring +present), but no test verifies the override *effect* of any of +``relative=``, ``crs_wkt=``, or ``nodata=``. A regression that ignored +the override and silently took the default-from-first-source path +would not surface against the existing smoke tests because they only +check that the function returns without raising. The fix is one test +per kwarg that calls ``write_vrt`` with a non-default value and parses +the resulting VRT XML to assert the override landed. + +Cat 4 MEDIUM #2 -- ``read_geotiff_gpu(dtype=)`` cast. The eager numpy +path has ``test_dtype_read.TestDtypeEager`` with full coverage +(float64->float32, uint16->int32, uint16->uint8, float-to-int raises, +dtype=None preserves native). The dask path has ``TestDtypeDask``. +The GPU read path has no equivalent. A regression that dropped the +``arr.astype(target)`` block in ``read_geotiff_gpu`` would silently +return data in the file's native dtype, breaking any GPU pipeline +that relies on the cast. + +Cat 4 MEDIUM #3 -- ``write_geotiff_gpu(bigtiff=)``. The CPU writer +covers ``bigtiff=True`` / ``False`` / ``None`` (auto) via +``test_features::test_force_bigtiff_via_public_api`` and friends. +``write_geotiff_gpu`` threads ``bigtiff=`` through to +``_assemble_tiff(force_bigtiff=...)`` but no test asserts the on-disk +header is BigTIFF when the kwarg is set on the GPU writer. A +regression dropping the kwarg from the GPU writer's _assemble_tiff +call site would silently fall back to classic-TIFF on the GPU path. + +Cat 4 LOW -- ``write_vrt(source_files=[])`` error path. The validator +raises ``ValueError("source_files must not be empty")``. The error +message is not exercised by any test, so a regression dropping the +check would only surface on a downstream IndexError much further in. +""" +from __future__ import annotations + +import importlib.util +import os + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import ( + open_geotiff, + read_geotiff_gpu, + to_geotiff, + write_geotiff_gpu, + write_vrt, +) +from xrspatial.geotiff._header import parse_header +from xrspatial.geotiff._vrt import parse_vrt + + +# -------------------------------------------------------------------------- +# GPU gating +# -------------------------------------------------------------------------- + + +def _gpu_available() -> bool: + if importlib.util.find_spec("cupy") is None: + return False + try: + import cupy + return bool(cupy.cuda.is_available()) + except Exception: + return False + + +_HAS_GPU = _gpu_available() +_gpu_only = pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required") + + +# -------------------------------------------------------------------------- +# Shared fixtures +# -------------------------------------------------------------------------- + + +@pytest.fixture +def source_tif(tmp_path): + """Write a single-band float32 GeoTIFF with EPSG:4326 + nodata.""" + arr = np.arange(64, dtype=np.float32).reshape(8, 8) + y = np.linspace(1.0, 0.0, 8) + x = np.linspace(0.0, 1.0, 8) + da = xr.DataArray( + arr, dims=['y', 'x'], + coords={'y': y, 'x': x}, + attrs={'crs': 4326, 'nodata': -1.0}, + ) + p = str(tmp_path / 'src_kwbeh_2026_05_12.tif') + to_geotiff(da, p, compression='none') + return p + + +@pytest.fixture +def float64_tif(tmp_path): + """Write a float64 GeoTIFF for GPU dtype cast tests.""" + arr = np.random.default_rng(2026_05_12).random((40, 40)).astype(np.float64) + y = np.linspace(41.0, 40.0, 40) + x = np.linspace(-105.0, -104.0, 40) + da = xr.DataArray( + arr, dims=['y', 'x'], + coords={'y': y, 'x': x}, + attrs={'crs': 4326}, + ) + p = str(tmp_path / 'kwbeh_2026_05_12_f64.tif') + to_geotiff(da, p, compression='none') + return p, arr + + +@pytest.fixture +def uint16_tif(tmp_path): + """Write a uint16 GeoTIFF for GPU dtype cast tests.""" + arr = np.random.default_rng(2026_05_12).integers( + 0, 10_000, (30, 30), dtype=np.uint16 + ) + y = np.linspace(41.0, 40.0, 30) + x = np.linspace(-105.0, -104.0, 30) + da = xr.DataArray( + arr, dims=['y', 'x'], + coords={'y': y, 'x': x}, + attrs={'crs': 4326}, + ) + p = str(tmp_path / 'kwbeh_2026_05_12_u16.tif') + to_geotiff(da, p, compression='none') + return p, arr + + +# ========================================================================== +# Cat 4 MEDIUM #1: write_vrt kwarg behaviour +# ========================================================================== + + +class TestWriteVrtRelativeBehaviour: + """``relative=`` flips the ``relativeToVRT`` attribute and rewrites the + source filename. The existing smoke test only asserts both modes are + *accepted*, not that they actually take effect.""" + + def _read_xml(self, path): + with open(path, 'r') as fh: + return fh.read() + + def test_relative_true_writes_relative_path(self, source_tif, tmp_path): + vrt_path = str(tmp_path / 'rel_true.vrt') + write_vrt(vrt_path, [source_tif], relative=True) + + xml = self._read_xml(vrt_path) + # The on-disk text must carry the relativeToVRT="1" attribute, + # not "0", and the SourceFilename text must not contain the + # absolute path's tmp_path prefix. + assert 'relativeToVRT="1"' in xml + assert 'relativeToVRT="0"' not in xml + # Source path is the bare filename (same directory as the VRT). + assert os.path.basename(source_tif) in xml + # The absolute path prefix (the tmp_path directory) is not in + # the XML; otherwise the writer would have stored the full + # path despite relative=True. + assert str(tmp_path) not in xml + + def test_relative_false_writes_absolute_path(self, source_tif, tmp_path): + vrt_path = str(tmp_path / 'rel_false.vrt') + write_vrt(vrt_path, [source_tif], relative=False) + + xml = self._read_xml(vrt_path) + # ``relative=False`` must flip the attribute and emit an absolute + # path. A regression that ignored ``relative=`` would silently + # produce the same XML as ``relative=True``. + assert 'relativeToVRT="0"' in xml + assert 'relativeToVRT="1"' not in xml + # Absolute path is in the file's SourceFilename text. + # Use realpath to handle symlinks tmp_path may carry on macOS. + abs_src = os.path.realpath(source_tif) + assert abs_src in xml + + def test_relative_true_parses_back_to_same_source(self, source_tif, tmp_path): + """relative=True still round-trips: parse_vrt resolves the + relative path back to the absolute one.""" + vrt_path = str(tmp_path / 'rel_true_rt.vrt') + write_vrt(vrt_path, [source_tif], relative=True) + parsed = parse_vrt(self._read_xml(vrt_path), vrt_dir=str(tmp_path)) + assert len(parsed.bands) == 1 + assert len(parsed.bands[0].sources) == 1 + # parse_vrt canonicalises with realpath, so compare against the + # realpath of the original source. + assert ( + os.path.realpath(parsed.bands[0].sources[0].filename) + == os.path.realpath(source_tif) + ) + + def test_relative_false_parses_back_to_same_source(self, source_tif, tmp_path): + vrt_path = str(tmp_path / 'rel_false_rt.vrt') + write_vrt(vrt_path, [source_tif], relative=False) + parsed = parse_vrt(self._read_xml(vrt_path), vrt_dir=str(tmp_path)) + assert len(parsed.bands) == 1 + assert ( + os.path.realpath(parsed.bands[0].sources[0].filename) + == os.path.realpath(source_tif) + ) + + +class TestWriteVrtCrsWktBehaviour: + """``crs_wkt=`` overrides the first source's CRS WKT. Without an + override, the first source's WKT is propagated. With an override, + the override wins.""" + + def _read_parsed(self, vrt_path, tmp_path): + with open(vrt_path, 'r') as fh: + return parse_vrt(fh.read(), vrt_dir=str(tmp_path)) + + def test_crs_wkt_override_wins(self, source_tif, tmp_path): + """The supplied WKT must land in , not the source's WKT.""" + override = ( + 'PROJCS["UnitTest_Override_Sweep_2026_05_12",' + 'GEOGCS["test_datum",DATUM["d",SPHEROID["s",6378137,298.257223563]],' + 'PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]],' + 'PROJECTION["Transverse_Mercator"],UNIT["metre",1]]' + ) + vrt_path = str(tmp_path / 'crs_wkt_override.vrt') + write_vrt(vrt_path, [source_tif], crs_wkt=override) + parsed = self._read_parsed(vrt_path, tmp_path) + assert parsed.crs_wkt == override + + def test_crs_wkt_none_falls_back_to_first_source(self, source_tif, tmp_path): + """No override means the first source's WKT is used. The source + was written with crs=4326, so the WKT must mention EPSG:4326's + marker text (any non-empty WKT is the contract; we check it is + not the override value to defend against silent default + substitution by the writer).""" + vrt_path = str(tmp_path / 'crs_wkt_default.vrt') + write_vrt(vrt_path, [source_tif]) + parsed = self._read_parsed(vrt_path, tmp_path) + # The first source has crs=4326. The WKT will be something + # non-empty mentioning EPSG:4326 (precise WKT text depends on + # the EPSG database wired into the writer). + if parsed.crs_wkt is not None: + # When the WKT is present it must come from the source + # (mentions a geographic marker) rather than the override. + text = parsed.crs_wkt.lower() + assert 'unittest_override_sweep_2026_05_12' not in text + + def test_crs_wkt_override_distinct_from_default(self, source_tif, tmp_path): + """The override and default WKT must produce *different* on-disk + XML. This is the safety-net: even if a future writer change + normalises the WKT before emitting, the override path must + still land a distinguishable WKT in the file.""" + marker = "UnitTest_Override_Marker_Sweep_2026_05_12" + override = ( + f'GEOGCS["{marker}",' + 'DATUM["d",SPHEROID["s",6378137,298.257223563]],' + 'PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]]' + ) + # Override path + vrt_override = str(tmp_path / 'override.vrt') + write_vrt(vrt_override, [source_tif], crs_wkt=override) + # Default path + vrt_default = str(tmp_path / 'default.vrt') + write_vrt(vrt_default, [source_tif]) + + with open(vrt_override, 'r') as fh: + text_override = fh.read() + with open(vrt_default, 'r') as fh: + text_default = fh.read() + + assert marker in text_override + assert marker not in text_default + + +class TestWriteVrtNodataBehaviour: + """``nodata=`` overrides the first source's nodata sentinel. + Source file is written with ``nodata=-1.0``; the override must land + in every ```` element.""" + + def _bands(self, vrt_path, tmp_path): + with open(vrt_path, 'r') as fh: + return parse_vrt(fh.read(), vrt_dir=str(tmp_path)).bands + + def test_nodata_override_wins(self, source_tif, tmp_path): + vrt_path = str(tmp_path / 'nodata_override.vrt') + write_vrt(vrt_path, [source_tif], nodata=-9999.0) + bands = self._bands(vrt_path, tmp_path) + assert len(bands) == 1 + assert bands[0].nodata == -9999.0 + + def test_nodata_none_takes_first_source(self, source_tif, tmp_path): + """No override means the first source's nodata is used. The + source was written with ``nodata=-1.0`` -- a regression that + silently dropped the default-from-source code path would land + ``None`` here.""" + vrt_path = str(tmp_path / 'nodata_default.vrt') + write_vrt(vrt_path, [source_tif]) + bands = self._bands(vrt_path, tmp_path) + assert len(bands) == 1 + assert bands[0].nodata == -1.0 + + def test_nodata_override_writes_xml_element(self, source_tif, tmp_path): + """Raw XML check: the override sentinel value lands in a + element.""" + vrt_path = str(tmp_path / 'nodata_xml.vrt') + write_vrt(vrt_path, [source_tif], nodata=-12345.0) + with open(vrt_path, 'r') as fh: + xml = fh.read() + assert '-12345.0' in xml + + +# ========================================================================== +# Cat 4 LOW: write_vrt error paths +# ========================================================================== + + +class TestWriteVrtEmptySourceFiles: + """``write_vrt(source_files=[])`` raises with a clear message. + The error path is uncovered. A regression dropping the + pre-validation would surface much further down as an IndexError + when computing the bounding box of zero sources.""" + + def test_empty_list_raises(self, tmp_path): + vrt_path = str(tmp_path / 'should_not_exist.vrt') + with pytest.raises(ValueError, match="source_files must not be empty"): + write_vrt(vrt_path, []) + + def test_empty_list_does_not_create_file(self, tmp_path): + vrt_path = str(tmp_path / 'should_not_exist_2.vrt') + try: + write_vrt(vrt_path, []) + except ValueError: + pass + assert not os.path.exists(vrt_path) + + +# ========================================================================== +# Cat 4 MEDIUM #2: read_geotiff_gpu(dtype=) +# ========================================================================== + + +@_gpu_only +class TestReadGeotiffGpuDtype: + """``read_geotiff_gpu(dtype=...)`` casts on device. The eager CPU + path has TestDtypeEager; the dask path has TestDtypeDask. The GPU + path had no equivalent.""" + + def test_float64_to_float32(self, float64_tif): + path, orig = float64_tif + result = read_geotiff_gpu(path, dtype='float32') + assert result.dtype == np.float32 + np.testing.assert_array_almost_equal( + result.data.get(), orig.astype(np.float32), decimal=6) + + def test_float64_to_float16(self, float64_tif): + path, _ = float64_tif + result = read_geotiff_gpu(path, dtype=np.float16) + assert result.dtype == np.float16 + + def test_uint16_to_int32(self, uint16_tif): + path, orig = uint16_tif + result = read_geotiff_gpu(path, dtype='int32') + assert result.dtype == np.int32 + np.testing.assert_array_equal(result.data.get(), orig.astype(np.int32)) + + def test_uint16_to_uint8(self, uint16_tif): + path, _ = uint16_tif + result = read_geotiff_gpu(path, dtype='uint8') + assert result.dtype == np.uint8 + + def test_float_to_int_raises(self, float64_tif): + path, _ = float64_tif + # The validator runs before the GPU upload; the error contract is + # the same as the CPU path (``float`` ... ``int``). + with pytest.raises(ValueError, match='float.*int'): + read_geotiff_gpu(path, dtype='int32') + + def test_dtype_none_preserves_native_float64(self, float64_tif): + path, _ = float64_tif + result = read_geotiff_gpu(path, dtype=None) + assert result.dtype == np.float64 + + def test_dtype_none_preserves_native_uint16(self, uint16_tif): + path, _ = uint16_tif + result = read_geotiff_gpu(path, dtype=None) + assert result.dtype == np.uint16 + + +@_gpu_only +class TestOpenGeotiffGpuDispatchDtype: + """``open_geotiff(..., gpu=True, dtype=...)`` forwards through the + dispatcher into ``read_geotiff_gpu``. Pin the dispatch path so a + regression dropping ``dtype=`` on the GPU branch surfaces here too.""" + + def test_dispatch_float64_to_float32(self, float64_tif): + path, orig = float64_tif + result = open_geotiff(path, gpu=True, dtype='float32') + assert result.dtype == np.float32 + np.testing.assert_array_almost_equal( + result.data.get(), orig.astype(np.float32), decimal=6) + + def test_dispatch_float_to_int_raises(self, float64_tif): + path, _ = float64_tif + with pytest.raises(ValueError, match='float.*int'): + open_geotiff(path, gpu=True, dtype='int32') + + +@_gpu_only +class TestReadGeotiffGpuChunksDtype: + """``read_geotiff_gpu(chunks=..., dtype=...)`` -- dask + GPU + dtype + combination is a separate dispatch path through the GPU reader and + its own ``astype`` step on the cupy array, then a ``chunk`` call. + Cover the cast for the dask+GPU branch too.""" + + def test_chunks_float64_to_float32(self, float64_tif): + path, orig = float64_tif + result = read_geotiff_gpu(path, chunks=20, dtype='float32') + assert result.dtype == np.float32 + # ``.data`` is a dask array of cupy chunks. Compute, then + # ``.get()`` the resulting cupy host buffer. + computed = result.data.compute() + np.testing.assert_array_almost_equal( + computed.get(), orig.astype(np.float32), decimal=6) + + +# ========================================================================== +# Cat 4 MEDIUM #3: write_geotiff_gpu(bigtiff=) +# ========================================================================== + + +@_gpu_only +class TestWriteGeotiffGpuBigtiff: + """``write_geotiff_gpu(bigtiff=)`` threads ``force_bigtiff=`` to + ``_assemble_tiff``. The CPU writer has equivalent header-level + bigtiff coverage; the GPU writer did not. + + Small arrays are sufficient because the BigTIFF decision is a + width-of-offset-field switch, not a value-range one -- a forced + BigTIFF on a 64-pixel array produces the same header magic byte + pattern that a >4 GB file would.""" + + def _read_header_is_bigtiff(self, path): + with open(path, 'rb') as fh: + header = parse_header(fh.read(16)) + return header.is_bigtiff + + def test_force_bigtiff_true_writes_bigtiff(self, tmp_path): + import cupy + arr = cupy.arange(64, dtype=cupy.float32).reshape(8, 8) + da = xr.DataArray( + arr, dims=['y', 'x'], + coords={'y': np.arange(8, dtype=np.float64), + 'x': np.arange(8, dtype=np.float64)}, + ) + path = str(tmp_path / 'gpu_bigtiff_true.tif') + write_geotiff_gpu(da, path, bigtiff=True, tile_size=4) + assert self._read_header_is_bigtiff(path), ( + "write_geotiff_gpu(bigtiff=True) should emit BigTIFF header " + "(magic byte 43)." + ) + # Data round-trips even with the BigTIFF header. + rd = open_geotiff(path) + np.testing.assert_array_equal(rd.values, arr.get()) + + def test_force_bigtiff_false_writes_classic(self, tmp_path): + import cupy + arr = cupy.arange(64, dtype=cupy.float32).reshape(8, 8) + da = xr.DataArray( + arr, dims=['y', 'x'], + coords={'y': np.arange(8, dtype=np.float64), + 'x': np.arange(8, dtype=np.float64)}, + ) + path = str(tmp_path / 'gpu_bigtiff_false.tif') + write_geotiff_gpu(da, path, bigtiff=False, tile_size=4) + assert not self._read_header_is_bigtiff(path), ( + "write_geotiff_gpu(bigtiff=False) should emit classic TIFF." + ) + + def test_bigtiff_none_stays_classic_small_file(self, tmp_path): + """``bigtiff=None`` (default) is auto: small files should stay + classic. Without an explicit None test, a regression flipping + the default to ``True`` would not be caught -- and that would + break interop with older readers that don't accept BigTIFF.""" + import cupy + arr = cupy.arange(64, dtype=cupy.float32).reshape(8, 8) + da = xr.DataArray( + arr, dims=['y', 'x'], + coords={'y': np.arange(8, dtype=np.float64), + 'x': np.arange(8, dtype=np.float64)}, + ) + path = str(tmp_path / 'gpu_bigtiff_default.tif') + write_geotiff_gpu(da, path, tile_size=4) + assert not self._read_header_is_bigtiff(path), ( + "write_geotiff_gpu default should auto-pick classic TIFF for " + "tiny outputs; a default switch to BigTIFF would break " + "older readers." + ) + + def test_to_geotiff_gpu_bigtiff_threads_through(self, tmp_path): + """``to_geotiff(..., gpu=True, bigtiff=True)`` dispatches into + ``write_geotiff_gpu(bigtiff=True)``. Cover the dispatcher's + thread-through so a regression dropping ``bigtiff=`` on the GPU + dispatch branch surfaces here too.""" + import cupy + arr = cupy.arange(64, dtype=cupy.float32).reshape(8, 8) + da = xr.DataArray( + arr, dims=['y', 'x'], + coords={'y': np.arange(8, dtype=np.float64), + 'x': np.arange(8, dtype=np.float64)}, + ) + path = str(tmp_path / 'to_gpu_bigtiff_true.tif') + to_geotiff(da, path, gpu=True, bigtiff=True, tile_size=4) + assert self._read_header_is_bigtiff(path), ( + "to_geotiff(gpu=True, bigtiff=True) should reach the GPU " + "writer with force_bigtiff=True propagated through." + ) + rd = open_geotiff(path) + np.testing.assert_array_equal(rd.values, arr.get()) From fdeecbc076a317d6421550b09ac4e7197f6a8138 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 12 May 2026 06:20:50 -0700 Subject: [PATCH 2/2] Strengthen crs_wkt fall-back assertion (PR #1656) --- .../tests/test_kwarg_behaviour_2026_05_12.py | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/xrspatial/geotiff/tests/test_kwarg_behaviour_2026_05_12.py b/xrspatial/geotiff/tests/test_kwarg_behaviour_2026_05_12.py index 180266c2d..033730273 100644 --- a/xrspatial/geotiff/tests/test_kwarg_behaviour_2026_05_12.py +++ b/xrspatial/geotiff/tests/test_kwarg_behaviour_2026_05_12.py @@ -226,22 +226,20 @@ def test_crs_wkt_override_wins(self, source_tif, tmp_path): assert parsed.crs_wkt == override def test_crs_wkt_none_falls_back_to_first_source(self, source_tif, tmp_path): - """No override means the first source's WKT is used. The source - was written with crs=4326, so the WKT must mention EPSG:4326's - marker text (any non-empty WKT is the contract; we check it is - not the override value to defend against silent default - substitution by the writer).""" + """No override means the first source's WKT is used. Pin the + contract: the default-VRT's parsed crs_wkt must be present, + non-empty, and match the source TIF's own crs_wkt (no silent + substitution, no None on the fall-back path).""" vrt_path = str(tmp_path / 'crs_wkt_default.vrt') write_vrt(vrt_path, [source_tif]) parsed = self._read_parsed(vrt_path, tmp_path) - # The first source has crs=4326. The WKT will be something - # non-empty mentioning EPSG:4326 (precise WKT text depends on - # the EPSG database wired into the writer). - if parsed.crs_wkt is not None: - # When the WKT is present it must come from the source - # (mentions a geographic marker) rather than the override. - text = parsed.crs_wkt.lower() - assert 'unittest_override_sweep_2026_05_12' not in text + + source_da = open_geotiff(source_tif) + source_wkt = source_da.attrs.get('crs_wkt') + + assert parsed.crs_wkt is not None + assert parsed.crs_wkt != '' + assert parsed.crs_wkt == source_wkt def test_crs_wkt_override_distinct_from_default(self, source_tif, tmp_path): """The override and default WKT must produce *different* on-disk