diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index c9a30c5f0..f350ddbb4 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -3326,7 +3326,8 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, bigtiff: bool | None = None, max_z_error: float = 0.0, streaming_buffer_bytes: int = 256 * 1024 * 1024, - photometric: str | int = 'auto') -> None: + photometric: str | int = 'auto', + allow_internal_only_jpeg: bool = False) -> None: """Write a CuPy-backed DataArray as a GeoTIFF with GPU compression. Tiles are extracted and compressed on the GPU via nvCOMP, then @@ -3371,17 +3372,16 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, - ``'zstd'`` (default) and ``'deflate'``: nvCOMP batch compression on the GPU -- the fastest paths and the reason to use this entry point. - - ``'jpeg'``: nvJPEG when libnvjpeg is loadable, Pillow - otherwise. Note that ``to_geotiff`` rejects - ``compression='jpeg'`` at runtime because its CPU encoder - omits the required TIFF JPEGTables tag (347); this GPU entry - point instead emits self-contained JFIF tiles. The two - writers therefore disagree about JPEG-in-TIFF interop. Files - produced here decode through this library's own reader but - may not round-trip through GDAL, rasterio, or libtiff - readers that require the JPEGTables tag. Treat the JPEG path - as experimental and internal-reader-only until the - JPEGTables fix lands. + - ``'jpeg'``: rejected by default for parity with + ``to_geotiff``. Both writers emit self-contained JFIF tiles + and skip the required TIFF JPEGTables tag (347), so the + resulting files are unreadable by libtiff, GDAL, and + rasterio. Pass ``allow_internal_only_jpeg=True`` to opt in + to the experimental encode path; the writer then routes to + nvJPEG when libnvjpeg is loadable and falls back to Pillow + otherwise, and emits a ``GeoTIFFFallbackWarning`` so the + caller knows the output will not round-trip through external + readers. See issue #1845. - ``'jpeg2000'`` and ``'j2k'``: nvJPEG2K GPU encode when available, glymur CPU encode otherwise. The two paths are not byte-for-byte identical (different libraries, different @@ -3444,6 +3444,15 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, GPU writer forwards this kwarg unchanged. Default ``'auto'`` writes MinIsBlack for any band count, so a 4-band raster is not silently tagged as RGB+alpha (issue #1769). + allow_internal_only_jpeg : bool + Opt in to the experimental ``compression='jpeg'`` encode path + (default ``False``). The encoder emits self-contained JFIF + tiles without the TIFF JPEGTables tag (347); the file decodes + through this library's reader but not through libtiff, GDAL, + or rasterio. With the flag set, the write proceeds and a + ``GeoTIFFFallbackWarning`` is emitted at call time. Without + the flag, ``compression='jpeg'`` raises ``ValueError`` for + parity with ``to_geotiff``. See issue #1845. Raises ------ @@ -3461,6 +3470,37 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, "compression is tile-based; the strip layout is not " "implemented on the GPU path. Use to_geotiff(..., gpu=False, " "tiled=False) for strip output on CPU.") + # JPEG-in-TIFF parity with to_geotiff (issue #1845). The GPU encode + # path writes self-contained JFIF tiles without the TIFF JPEGTables + # tag (347), matching the broken CPU encoder. ``to_geotiff`` refuses + # the codec outright; this writer offered no rejection at all, so + # callers could produce GeoTIFFs that decoded through xrspatial but + # broke in libtiff/GDAL/rasterio. Reject by default with the same + # wording so both entry points agree, and surface an opt-in flag for + # users who explicitly want the internal-only path. + if (isinstance(compression, str) + and compression.lower() == 'jpeg' + and not allow_internal_only_jpeg): + raise ValueError( + "compression='jpeg' is not supported: the encoder writes " + "self-contained JFIF streams without the required " + "JPEGTables tag (347), so other readers (libtiff, GDAL, " + "rasterio) reject the file. Use 'deflate', 'zstd', or " + "'lzw' instead. Pass allow_internal_only_jpeg=True to opt " + "in to the experimental internal-reader-only path (issue " + "#1845).") + if (isinstance(compression, str) + and compression.lower() == 'jpeg' + and allow_internal_only_jpeg): + warnings.warn( + "write_geotiff_gpu(compression='jpeg', " + "allow_internal_only_jpeg=True) writes JFIF tiles without " + "the TIFF JPEGTables tag (347); the file decodes through " + "xrspatial but may fail in libtiff, GDAL, or rasterio. " + "See issue #1845.", + GeoTIFFFallbackWarning, + stacklevel=2, + ) # MinIsWhite pre-inversion (issue #1836) runs in the eager CPU writer. # The GPU writer assembles tile bytes directly on device; threading # the pixel + nodata-sentinel transform through that pipeline is out diff --git a/xrspatial/geotiff/tests/test_gpu_jpeg_interop_reject_issue_D_1845.py b/xrspatial/geotiff/tests/test_gpu_jpeg_interop_reject_issue_D_1845.py new file mode 100644 index 000000000..2f88b05b8 --- /dev/null +++ b/xrspatial/geotiff/tests/test_gpu_jpeg_interop_reject_issue_D_1845.py @@ -0,0 +1,151 @@ +"""Issue #1845: ``write_geotiff_gpu`` must reject ``compression='jpeg'`` +by default. + +Background +---------- +``to_geotiff`` raises a ``ValueError`` for ``compression='jpeg'`` because +the encoder writes self-contained JFIF tiles without the TIFF JPEGTables +tag (347); the resulting files are unreadable by libtiff, GDAL, and +rasterio. The GPU writer sat in the same module and silently accepted +the same kwarg, producing the same broken format. The fix introduces an +``allow_internal_only_jpeg`` opt-in: callers who want the experimental +internal-reader-only path must ask for it explicitly, and they get a +``GeoTIFFFallbackWarning`` reminding them the file will not round-trip +through external readers. + +These tests pin the rejection (CPU-only, no CUDA required) and the +opt-in warning behaviour. The internal-only encode path itself is +covered by the updated tests in +``test_gpu_writer_compression_modes_2026_05_11.py`` which run only when +CUDA is present. +""" +from __future__ import annotations + +import importlib.util + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import GeoTIFFFallbackWarning, write_geotiff_gpu + + +def _gpu_available() -> bool: + if importlib.util.find_spec("cupy") is None: + return False + try: + import cupy + return bool(cupy.cuda.is_available()) + except Exception: + return False + + +_HAS_GPU = _gpu_available() + + +def _make_rgb_uint8_da() -> xr.DataArray: + """64x64x3 uint8 RGB raster suitable for the JPEG encode path.""" + rng = np.random.RandomState(0) + arr = rng.randint(0, 256, size=(64, 64, 3), dtype=np.uint8) + return xr.DataArray( + arr, + dims=("y", "x", "band"), + coords={ + "y": np.arange(64, dtype=np.float64), + "x": np.arange(64, dtype=np.float64), + "band": np.array([1, 2, 3], dtype=np.int32), + }, + ) + + +def test_write_geotiff_gpu_rejects_jpeg_without_opt_in(tmp_path): + """``compression='jpeg'`` without the opt-in raises ``ValueError``. + + Mirrors the ``to_geotiff`` rejection so both writers in the same + module agree about JPEG-in-TIFF interop. The check runs before any + GPU work, so the test does not need CUDA. + """ + da = _make_rgb_uint8_da() + path = str(tmp_path / "rejected_issue_D_1845.tif") + + with pytest.raises(ValueError, match="JPEGTables"): + write_geotiff_gpu(da, path, compression='jpeg') + + +def test_write_geotiff_gpu_rejects_jpeg_message_mentions_alternatives(tmp_path): + """The rejection error mentions the same alternative codecs that + ``to_geotiff`` recommends, so callers landing on either entry point + learn what to switch to.""" + da = _make_rgb_uint8_da() + path = str(tmp_path / "rejected_msg_issue_D_1845.tif") + + with pytest.raises(ValueError) as exc: + write_geotiff_gpu(da, path, compression='jpeg') + + # The shared message wording from to_geotiff. If the two writers + # drift apart, callers reading the error get inconsistent advice. + msg = str(exc.value) + assert "deflate" in msg + assert "zstd" in msg + + +def test_write_geotiff_gpu_rejects_jpeg_case_insensitive(tmp_path): + """Upper-case ``compression='JPEG'`` is rejected too. + + ``to_geotiff`` lower-cases the compression string before comparing, + so the GPU writer must follow the same rule -- otherwise a caller + who types ``'JPEG'`` slips past the gate.""" + da = _make_rgb_uint8_da() + path = str(tmp_path / "rejected_upper_issue_D_1845.tif") + + with pytest.raises(ValueError, match="JPEGTables"): + write_geotiff_gpu(da, path, compression='JPEG') + + +@pytest.mark.skipif(not _HAS_GPU, reason="cupy + CUDA required") +def test_write_geotiff_gpu_jpeg_opt_in_emits_warning(tmp_path): + """Setting ``allow_internal_only_jpeg=True`` proceeds with the JPEG + encode and emits ``GeoTIFFFallbackWarning``. + + The warning is the only signal the caller gets that their file may + not round-trip through GDAL or rasterio, so a missing warning here + would let the footgun back in. + """ + da = _make_rgb_uint8_da() + path = str(tmp_path / "opt_in_issue_D_1845.tif") + + with pytest.warns(GeoTIFFFallbackWarning, match="JPEGTables"): + write_geotiff_gpu( + da, path, + compression='jpeg', + allow_internal_only_jpeg=True, + ) + + # File was actually written, not just warned-about. + import os + assert os.path.exists(path) + assert os.path.getsize(path) > 0 + + +def test_write_geotiff_gpu_non_jpeg_unaffected_by_flag(tmp_path): + """Setting ``allow_internal_only_jpeg=True`` on a non-JPEG codec is a + no-op (no warning, no error). + + The flag is JPEG-specific; other codecs must not pay any cost for + it being present in the signature.""" + pytest.importorskip("cupy") + if not _HAS_GPU: + pytest.skip("cupy + CUDA required") + + da = _make_rgb_uint8_da() + path = str(tmp_path / "non_jpeg_flag_issue_D_1845.tif") + + import warnings as _warnings + with _warnings.catch_warnings(): + _warnings.simplefilter("error", GeoTIFFFallbackWarning) + # Should not raise (no JPEG-related warning fires). + write_geotiff_gpu( + da, path, + compression='zstd', + allow_internal_only_jpeg=True, + ) diff --git a/xrspatial/geotiff/tests/test_gpu_writer_compression_modes_2026_05_11.py b/xrspatial/geotiff/tests/test_gpu_writer_compression_modes_2026_05_11.py index ca6dd4cd6..081d39d9b 100644 --- a/xrspatial/geotiff/tests/test_gpu_writer_compression_modes_2026_05_11.py +++ b/xrspatial/geotiff/tests/test_gpu_writer_compression_modes_2026_05_11.py @@ -228,7 +228,13 @@ def test_write_geotiff_gpu_jpeg_rgb_roundtrip(tmp_path): da, arr = _make_rgb_uint8_da() path = str(tmp_path / "jpeg_rgb.tif") - write_geotiff_gpu(da, path, compression='jpeg') + # Issue #1845: the JPEG encode path is opt-in. The writer also + # emits a GeoTIFFFallbackWarning, which is the documented contract. + with pytest.warns(Warning): + write_geotiff_gpu( + da, path, compression='jpeg', + allow_internal_only_jpeg=True, + ) out = open_geotiff(path) assert out.shape == arr.shape @@ -251,7 +257,12 @@ def test_write_geotiff_gpu_jpeg_uint8_single_band_roundtrip(tmp_path): da, arr = _make_mono_uint8_da() path = str(tmp_path / "jpeg_mono.tif") - write_geotiff_gpu(da, path, compression='jpeg') + # Issue #1845: opt-in flag required; warning fires. + with pytest.warns(Warning): + write_geotiff_gpu( + da, path, compression='jpeg', + allow_internal_only_jpeg=True, + ) out = open_geotiff(path) assert out.shape == arr.shape @@ -278,7 +289,12 @@ def test_write_geotiff_gpu_jpeg_uses_nvjpeg_when_available(tmp_path, da, _ = _make_rgb_uint8_da() path = str(tmp_path / "jpeg_nvjpeg_spy.tif") - write_geotiff_gpu(da, path, compression='jpeg') + # Issue #1845: opt-in flag required; warning fires. + with pytest.warns(Warning): + write_geotiff_gpu( + da, path, compression='jpeg', + allow_internal_only_jpeg=True, + ) assert spy.calls >= 1, ( "libnvjpeg is loadable but _nvjpeg_batch_encode was never called; " @@ -317,7 +333,12 @@ def test_write_geotiff_gpu_jpeg_compression_tag(tmp_path): da, _ = _make_rgb_uint8_da() path = str(tmp_path / "jpeg_tag.tif") - write_geotiff_gpu(da, path, compression='jpeg') + # Issue #1845: opt-in flag required; warning fires. + with pytest.warns(Warning): + write_geotiff_gpu( + da, path, compression='jpeg', + allow_internal_only_jpeg=True, + ) assert _read_compression_tag(path) == _COMPRESSION_TAGS['jpeg']