From e9a3ed36a1fcfbb064050266c7e949baf5105658 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Thu, 14 May 2026 09:47:44 -0700 Subject: [PATCH] geotiff: reject write_geotiff_gpu jpeg by default, gate behind opt-in flag (#1845) The CPU writer to_geotiff already raised ValueError for compression='jpeg' because the encoder skips the TIFF JPEGTables tag (347) and the resulting files are unreadable by libtiff, GDAL, and rasterio. write_geotiff_gpu sat in the same module and silently accepted the same kwarg, producing the same broken format. Two public entry points in the same module disagreed about the same codec. Reject compression='jpeg' on write_geotiff_gpu by default with the same wording to_geotiff uses. Add allow_internal_only_jpeg=False to keep the experimental internal-reader-only path reachable for callers who want it; when the flag is set the writer proceeds and emits a GeoTIFFFallbackWarning so the round-trip caveat is loud at call time. Updates the four existing GPU JPEG tests to pass the opt-in flag and adds a new test module covering the rejection (CPU-only, no CUDA needed) and the opt-in warning behaviour. --- xrspatial/geotiff/__init__.py | 64 ++++++-- ...st_gpu_jpeg_interop_reject_issue_D_1845.py | 151 ++++++++++++++++++ ...gpu_writer_compression_modes_2026_05_11.py | 29 +++- 3 files changed, 228 insertions(+), 16 deletions(-) create mode 100644 xrspatial/geotiff/tests/test_gpu_jpeg_interop_reject_issue_D_1845.py diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index c9a30c5f0..f350ddbb4 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -3326,7 +3326,8 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, bigtiff: bool | None = None, max_z_error: float = 0.0, streaming_buffer_bytes: int = 256 * 1024 * 1024, - photometric: str | int = 'auto') -> None: + photometric: str | int = 'auto', + allow_internal_only_jpeg: bool = False) -> None: """Write a CuPy-backed DataArray as a GeoTIFF with GPU compression. Tiles are extracted and compressed on the GPU via nvCOMP, then @@ -3371,17 +3372,16 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, - ``'zstd'`` (default) and ``'deflate'``: nvCOMP batch compression on the GPU -- the fastest paths and the reason to use this entry point. - - ``'jpeg'``: nvJPEG when libnvjpeg is loadable, Pillow - otherwise. Note that ``to_geotiff`` rejects - ``compression='jpeg'`` at runtime because its CPU encoder - omits the required TIFF JPEGTables tag (347); this GPU entry - point instead emits self-contained JFIF tiles. The two - writers therefore disagree about JPEG-in-TIFF interop. Files - produced here decode through this library's own reader but - may not round-trip through GDAL, rasterio, or libtiff - readers that require the JPEGTables tag. Treat the JPEG path - as experimental and internal-reader-only until the - JPEGTables fix lands. + - ``'jpeg'``: rejected by default for parity with + ``to_geotiff``. Both writers emit self-contained JFIF tiles + and skip the required TIFF JPEGTables tag (347), so the + resulting files are unreadable by libtiff, GDAL, and + rasterio. Pass ``allow_internal_only_jpeg=True`` to opt in + to the experimental encode path; the writer then routes to + nvJPEG when libnvjpeg is loadable and falls back to Pillow + otherwise, and emits a ``GeoTIFFFallbackWarning`` so the + caller knows the output will not round-trip through external + readers. See issue #1845. - ``'jpeg2000'`` and ``'j2k'``: nvJPEG2K GPU encode when available, glymur CPU encode otherwise. The two paths are not byte-for-byte identical (different libraries, different @@ -3444,6 +3444,15 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, GPU writer forwards this kwarg unchanged. Default ``'auto'`` writes MinIsBlack for any band count, so a 4-band raster is not silently tagged as RGB+alpha (issue #1769). + allow_internal_only_jpeg : bool + Opt in to the experimental ``compression='jpeg'`` encode path + (default ``False``). The encoder emits self-contained JFIF + tiles without the TIFF JPEGTables tag (347); the file decodes + through this library's reader but not through libtiff, GDAL, + or rasterio. With the flag set, the write proceeds and a + ``GeoTIFFFallbackWarning`` is emitted at call time. Without + the flag, ``compression='jpeg'`` raises ``ValueError`` for + parity with ``to_geotiff``. See issue #1845. Raises ------ @@ -3461,6 +3470,37 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, "compression is tile-based; the strip layout is not " "implemented on the GPU path. Use to_geotiff(..., gpu=False, " "tiled=False) for strip output on CPU.") + # JPEG-in-TIFF parity with to_geotiff (issue #1845). The GPU encode + # path writes self-contained JFIF tiles without the TIFF JPEGTables + # tag (347), matching the broken CPU encoder. ``to_geotiff`` refuses + # the codec outright; this writer offered no rejection at all, so + # callers could produce GeoTIFFs that decoded through xrspatial but + # broke in libtiff/GDAL/rasterio. Reject by default with the same + # wording so both entry points agree, and surface an opt-in flag for + # users who explicitly want the internal-only path. + if (isinstance(compression, str) + and compression.lower() == 'jpeg' + and not allow_internal_only_jpeg): + raise ValueError( + "compression='jpeg' is not supported: the encoder writes " + "self-contained JFIF streams without the required " + "JPEGTables tag (347), so other readers (libtiff, GDAL, " + "rasterio) reject the file. Use 'deflate', 'zstd', or " + "'lzw' instead. Pass allow_internal_only_jpeg=True to opt " + "in to the experimental internal-reader-only path (issue " + "#1845).") + if (isinstance(compression, str) + and compression.lower() == 'jpeg' + and allow_internal_only_jpeg): + warnings.warn( + "write_geotiff_gpu(compression='jpeg', " + "allow_internal_only_jpeg=True) writes JFIF tiles without " + "the TIFF JPEGTables tag (347); the file decodes through " + "xrspatial but may fail in libtiff, GDAL, or rasterio. " + "See issue #1845.", + GeoTIFFFallbackWarning, + stacklevel=2, + ) # MinIsWhite pre-inversion (issue #1836) runs in the eager CPU writer. # The GPU writer assembles tile bytes directly on device; threading # the pixel + nodata-sentinel transform through that pipeline is out diff --git a/xrspatial/geotiff/tests/test_gpu_jpeg_interop_reject_issue_D_1845.py b/xrspatial/geotiff/tests/test_gpu_jpeg_interop_reject_issue_D_1845.py new file mode 100644 index 000000000..2f88b05b8 --- /dev/null +++ b/xrspatial/geotiff/tests/test_gpu_jpeg_interop_reject_issue_D_1845.py @@ -0,0 +1,151 @@ +"""Issue #1845: ``write_geotiff_gpu`` must reject ``compression='jpeg'`` +by default. + +Background +---------- +``to_geotiff`` raises a ``ValueError`` for ``compression='jpeg'`` because +the encoder writes self-contained JFIF tiles without the TIFF JPEGTables +tag (347); the resulting files are unreadable by libtiff, GDAL, and +rasterio. The GPU writer sat in the same module and silently accepted +the same kwarg, producing the same broken format. The fix introduces an +``allow_internal_only_jpeg`` opt-in: callers who want the experimental +internal-reader-only path must ask for it explicitly, and they get a +``GeoTIFFFallbackWarning`` reminding them the file will not round-trip +through external readers. + +These tests pin the rejection (CPU-only, no CUDA required) and the +opt-in warning behaviour. The internal-only encode path itself is +covered by the updated tests in +``test_gpu_writer_compression_modes_2026_05_11.py`` which run only when +CUDA is present. +""" +from __future__ import annotations + +import importlib.util + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import GeoTIFFFallbackWarning, write_geotiff_gpu + + +def _gpu_available() -> bool: + if importlib.util.find_spec("cupy") is None: + return False + try: + import cupy + return bool(cupy.cuda.is_available()) + except Exception: + return False + + +_HAS_GPU = _gpu_available() + + +def _make_rgb_uint8_da() -> xr.DataArray: + """64x64x3 uint8 RGB raster suitable for the JPEG encode path.""" + rng = np.random.RandomState(0) + arr = rng.randint(0, 256, size=(64, 64, 3), dtype=np.uint8) + return xr.DataArray( + arr, + dims=("y", "x", "band"), + coords={ + "y": np.arange(64, dtype=np.float64), + "x": np.arange(64, dtype=np.float64), + "band": np.array([1, 2, 3], dtype=np.int32), + }, + ) + + +def test_write_geotiff_gpu_rejects_jpeg_without_opt_in(tmp_path): + """``compression='jpeg'`` without the opt-in raises ``ValueError``. + + Mirrors the ``to_geotiff`` rejection so both writers in the same + module agree about JPEG-in-TIFF interop. The check runs before any + GPU work, so the test does not need CUDA. + """ + da = _make_rgb_uint8_da() + path = str(tmp_path / "rejected_issue_D_1845.tif") + + with pytest.raises(ValueError, match="JPEGTables"): + write_geotiff_gpu(da, path, compression='jpeg') + + +def test_write_geotiff_gpu_rejects_jpeg_message_mentions_alternatives(tmp_path): + """The rejection error mentions the same alternative codecs that + ``to_geotiff`` recommends, so callers landing on either entry point + learn what to switch to.""" + da = _make_rgb_uint8_da() + path = str(tmp_path / "rejected_msg_issue_D_1845.tif") + + with pytest.raises(ValueError) as exc: + write_geotiff_gpu(da, path, compression='jpeg') + + # The shared message wording from to_geotiff. If the two writers + # drift apart, callers reading the error get inconsistent advice. + msg = str(exc.value) + assert "deflate" in msg + assert "zstd" in msg + + +def test_write_geotiff_gpu_rejects_jpeg_case_insensitive(tmp_path): + """Upper-case ``compression='JPEG'`` is rejected too. + + ``to_geotiff`` lower-cases the compression string before comparing, + so the GPU writer must follow the same rule -- otherwise a caller + who types ``'JPEG'`` slips past the gate.""" + da = _make_rgb_uint8_da() + path = str(tmp_path / "rejected_upper_issue_D_1845.tif") + + with pytest.raises(ValueError, match="JPEGTables"): + write_geotiff_gpu(da, path, compression='JPEG') + + +@pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required") +def test_write_geotiff_gpu_jpeg_opt_in_emits_warning(tmp_path): + """Setting ``allow_internal_only_jpeg=True`` proceeds with the JPEG + encode and emits ``GeoTIFFFallbackWarning``. + + The warning is the only signal the caller gets that their file may + not round-trip through GDAL or rasterio, so a missing warning here + would let the footgun back in. + """ + da = _make_rgb_uint8_da() + path = str(tmp_path / "opt_in_issue_D_1845.tif") + + with pytest.warns(GeoTIFFFallbackWarning, match="JPEGTables"): + write_geotiff_gpu( + da, path, + compression='jpeg', + allow_internal_only_jpeg=True, + ) + + # File was actually written, not just warned-about. + import os + assert os.path.exists(path) + assert os.path.getsize(path) > 0 + + +def test_write_geotiff_gpu_non_jpeg_unaffected_by_flag(tmp_path): + """Setting ``allow_internal_only_jpeg=True`` on a non-JPEG codec is a + no-op (no warning, no error). + + The flag is JPEG-specific; other codecs must not pay any cost for + it being present in the signature.""" + pytest.importorskip("cupy") + if not _HAS_GPU: + pytest.skip("cupy + CUDA required") + + da = _make_rgb_uint8_da() + path = str(tmp_path / "non_jpeg_flag_issue_D_1845.tif") + + import warnings as _warnings + with _warnings.catch_warnings(): + _warnings.simplefilter("error", GeoTIFFFallbackWarning) + # Should not raise (no JPEG-related warning fires). + write_geotiff_gpu( + da, path, + compression='zstd', + allow_internal_only_jpeg=True, + ) diff --git a/xrspatial/geotiff/tests/test_gpu_writer_compression_modes_2026_05_11.py b/xrspatial/geotiff/tests/test_gpu_writer_compression_modes_2026_05_11.py index ca6dd4cd6..081d39d9b 100644 --- a/xrspatial/geotiff/tests/test_gpu_writer_compression_modes_2026_05_11.py +++ b/xrspatial/geotiff/tests/test_gpu_writer_compression_modes_2026_05_11.py @@ -228,7 +228,13 @@ def test_write_geotiff_gpu_jpeg_rgb_roundtrip(tmp_path): da, arr = _make_rgb_uint8_da() path = str(tmp_path / "jpeg_rgb.tif") - write_geotiff_gpu(da, path, compression='jpeg') + # Issue #1845: the JPEG encode path is opt-in. The writer also + # emits a GeoTIFFFallbackWarning, which is the documented contract. + with pytest.warns(Warning): + write_geotiff_gpu( + da, path, compression='jpeg', + allow_internal_only_jpeg=True, + ) out = open_geotiff(path) assert out.shape == arr.shape @@ -251,7 +257,12 @@ def test_write_geotiff_gpu_jpeg_uint8_single_band_roundtrip(tmp_path): da, arr = _make_mono_uint8_da() path = str(tmp_path / "jpeg_mono.tif") - write_geotiff_gpu(da, path, compression='jpeg') + # Issue #1845: opt-in flag required; warning fires. + with pytest.warns(Warning): + write_geotiff_gpu( + da, path, compression='jpeg', + allow_internal_only_jpeg=True, + ) out = open_geotiff(path) assert out.shape == arr.shape @@ -278,7 +289,12 @@ def test_write_geotiff_gpu_jpeg_uses_nvjpeg_when_available(tmp_path, da, _ = _make_rgb_uint8_da() path = str(tmp_path / "jpeg_nvjpeg_spy.tif") - write_geotiff_gpu(da, path, compression='jpeg') + # Issue #1845: opt-in flag required; warning fires. + with pytest.warns(Warning): + write_geotiff_gpu( + da, path, compression='jpeg', + allow_internal_only_jpeg=True, + ) assert spy.calls >= 1, ( "libnvjpeg is loadable but _nvjpeg_batch_encode was never called; " @@ -317,7 +333,12 @@ def test_write_geotiff_gpu_jpeg_compression_tag(tmp_path): da, _ = _make_rgb_uint8_da() path = str(tmp_path / "jpeg_tag.tif") - write_geotiff_gpu(da, path, compression='jpeg') + # Issue #1845: opt-in flag required; warning fires. + with pytest.warns(Warning): + write_geotiff_gpu( + da, path, compression='jpeg', + allow_internal_only_jpeg=True, + ) assert _read_compression_tag(path) == _COMPRESSION_TAGS['jpeg']