diff --git a/examples/user_guide/39_GeoTIFF_IO.ipynb b/examples/user_guide/39_GeoTIFF_IO.ipynb index 5038dbdbb..7e6746bc2 100644 --- a/examples/user_guide/39_GeoTIFF_IO.ipynb +++ b/examples/user_guide/39_GeoTIFF_IO.ipynb @@ -261,7 +261,7 @@ "}\n", "\n", ".xr-group-name::before {\n", - " content: \"📁\";\n", + " content: \"\ud83d\udcc1\";\n", " padding-right: 0.3em;\n", "}\n", "\n", @@ -324,7 +324,7 @@ "\n", ".xr-section-summary-in + label:before {\n", " display: inline-block;\n", - " content: \"►\";\n", + " content: \"\u25ba\";\n", " font-size: 11px;\n", " width: 15px;\n", " text-align: center;\n", @@ -335,7 +335,7 @@ "}\n", "\n", ".xr-section-summary-in:checked + label:before {\n", - " content: \"▼\";\n", + " content: \"\u25bc\";\n", "}\n", "\n", ".xr-section-summary-in:checked + label > span {\n", @@ -970,6 +970,49 @@ "The VRT is a few hundred bytes of XML. `open_geotiff` assembles the tiles when you read it." ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Supported features by tier\n", + "\n", + "`xrspatial.geotiff.SUPPORTED_FEATURES` is the source of truth for which features sit in the stable core, which are advanced but supported, which are experimental, and which require an opt-in flag because they do not round-trip through external readers. The table below is built from that constant so the documentation cannot drift from the code (issue #2137).\n", + "\n", + "Tiers:\n", + "\n", + "- **stable** -- the path a new user should be on. Local file in, local file out, lossless codec, axis-aligned grid.\n", + "- **advanced** -- works and is tested, but the caller should know the failure mode (cloud cost, partial VRT mosaics, rotated transforms drop on write, BigTIFF promotion, etc.).\n", + "- **experimental** -- no claim about cross-backend numerical parity or external interop. Tier 3 codecs (`lerc`, `jpeg2000` / `j2k`, `lz4`) require `allow_experimental_codecs=True` on `to_geotiff` and `write_geotiff_gpu`; the GPU read/write paths use `gpu=True` as their explicit opt-in.\n", + "- **internal_only** -- the strictest tier. `compression='jpeg'` writes self-contained JFIF tiles without the TIFF JPEGTables tag, so the output decodes through xrspatial but not libtiff / GDAL / rasterio. Requires the dedicated `allow_internal_only_jpeg=True` flag (issue #1845); `allow_experimental_codecs` does not cover it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from xrspatial.geotiff import SUPPORTED_FEATURES\n", + "\n", + "# Render SUPPORTED_FEATURES as a markdown table grouped by tier.\n", + "# This cell renders from the live constant so the table tracks any\n", + "# future additions to the feature inventory.\n", + "from collections import defaultdict\n", + "from IPython.display import Markdown\n", + "\n", + "_TIER_ORDER = ['stable', 'advanced', 'experimental', 'internal_only']\n", + "_by_tier = defaultdict(list)\n", + "for name, tier in SUPPORTED_FEATURES.items():\n", + " _by_tier[tier].append(name)\n", + "\n", + "lines = ['| Feature | Tier |', '| --- | --- |']\n", + "for tier in _TIER_ORDER:\n", + " for name in sorted(_by_tier[tier]):\n", + " lines.append(f'| `{name}` | {tier} |')\n", + "\n", + "Markdown('\\n'.join(lines))" + ] + }, { "cell_type": "code", "execution_count": 8, diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index 484064c4f..082f311aa 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -128,6 +128,7 @@ 'MixedBandMetadataError', 'NonUniformCoordsError', 'RotatedTransformError', + 'SUPPORTED_FEATURES', 'UnparseableCRSError', 'UnsafeURLError', 'open_geotiff', @@ -140,6 +141,44 @@ ] +# ``SUPPORTED_FEATURES`` and its derived ``_EXPERIMENTAL_CODECS`` set +# live in ``_attrs.py`` so the writers can import them at module scope +# without a circular dependency (this ``__init__`` already imports the +# writers, so the writers cannot import from ``..`` at module scope). +# The names are re-exported below to keep the public API at +# ``xrspatial.geotiff.SUPPORTED_FEATURES``. +# +# Tier semantics +# -------------- +# - ``"stable"`` -- the path a new user should be on. Local file in, +# local file out, lossless codec, axis-aligned grid. Covered by the +# cross-backend parity matrix. +# - ``"advanced"`` -- works and is tested, but the caller should know +# what they are signing up for (cloud cost, partial VRT mosaics, +# rotated transforms dropping on write, BigTIFF promotion, etc.). No +# kwarg gate; the docstring carries an ``Advanced:`` marker. +# - ``"experimental"`` -- works in our tests, no claim about external +# interop or numerical parity across backends. Tier 3 codecs +# (``lerc``, ``jpeg2000`` / ``j2k``, ``lz4``) require +# ``allow_experimental_codecs=True`` on the writers; the GPU paths +# use ``gpu=True`` as the explicit opt-in. +# - ``"internal_only"`` -- the strictest tier. Already gated behind +# its own dedicated flag because the output does not round-trip +# through libtiff / GDAL / rasterio. ``codec.jpeg`` requires +# ``allow_internal_only_jpeg=True`` (issue #1845); +# ``allow_experimental_codecs`` does NOT cover it. +# +# Tests in ``xrspatial/geotiff/tests/test_supported_features_tiers_2137.py`` +# walk the mapping and assert that every Tier 3 codec rejects without +# the opt-in flag and every Tier 4 codec rejects without its own +# dedicated flag. The user-guide notebook +# (``examples/user_guide/39_GeoTIFF_IO.ipynb``) renders the same +# mapping as a table so the documentation cannot drift from the code. +# +# See issue #2137. +from ._attrs import SUPPORTED_FEATURES # noqa: E402 + + def _read_geo_info(source, *, overview_level: int | None = None, allow_rotated: bool = False): """Read only the geographic metadata and image dimensions from a GeoTIFF. @@ -287,6 +326,15 @@ def open_geotiff(source: str | BinaryIO, *, ) -> xr.DataArray: """Read a GeoTIFF, COG, or VRT file into an xarray.DataArray. + Tier: Stable for local-file reads on axis-aligned grids with an + EPSG CRS in ``attrs['crs']``. Cloud / fsspec URIs, HTTP range + reads, ``.vrt`` mosaics, external ``.tif.ovr`` sidecars, + ``allow_rotated=True``, and ``allow_unparseable_crs=True`` are + Advanced (work, but each carries a specific failure mode named on + the parameter doc). ``gpu=True`` is Experimental. See + :data:`xrspatial.geotiff.SUPPORTED_FEATURES` for the full tier + map (issue #2137). + Automatically dispatches to the best backend: - ``gpu=True``: GPU-accelerated read via nvCOMP (returns CuPy) - ``chunks=N``: Dask lazy read via windowed chunks @@ -319,12 +367,18 @@ def open_geotiff(source: str | BinaryIO, *, chunks : int, tuple, or None Chunk size for Dask lazy reading. gpu : bool - Use GPU-accelerated decompression (requires cupy + nvCOMP). + Experimental: requires cupy + nvCOMP for the codec the file + carries; the reader falls back to CPU when the optional + libraries are unavailable unless ``on_gpu_failure='strict'`` is + also set. Use GPU-accelerated decompression. max_pixels : int or None Maximum allowed pixel count (width * height * samples). None uses the default (~1 billion). Raise to read legitimately large files. max_cloud_bytes : int or None, optional + Advanced: fsspec cloud reads can run up cost on large objects; + the budget defends against accidental large downloads but the + eager path still pulls the full object once the budget allows. Byte ceiling for eager reads from fsspec sources (``s3://``, ``gs://``, ``az://``, ``abfs://``, ``memory://``, ...). The compressed object size is checked against this budget before @@ -345,6 +399,10 @@ def open_geotiff(source: str | BinaryIO, *, because the policy only applies to the GPU pipeline. See ``read_geotiff_gpu`` for the full description. missing_sources : {'raise', 'warn'}, optional + Advanced: VRT mosaics can return partial output under + ``missing_sources='warn'`` when a backing source is unreadable; + the ``attrs['vrt_holes']`` entry records which sources were + skipped so downstream code can detect the partial mosaic. Forwarded to ``read_vrt`` when the source is a ``.vrt`` file. When the caller does not pass this kwarg, the public ``read_vrt`` default applies (``'raise'`` since #1860). @@ -377,6 +435,9 @@ def open_geotiff(source: str | BinaryIO, *, pixel, and ``dtype=`` then raises ``ValueError`` on the float-to-int cast. allow_rotated : bool, default False + Advanced: read-only opt-in; ``to_geotiff`` does not currently + emit ``rotated_affine`` so a read-then-write round-trip writes + an identity-affine output and silently drops the rotation. Read-side opt-in for rotated / sheared ``ModelTransformationTag`` files. By default the reader raises ``NotImplementedError`` because the rest of xrspatial assumes an axis-aligned grid. diff --git a/xrspatial/geotiff/_attrs.py b/xrspatial/geotiff/_attrs.py index f92422321..2b0fe4966 100644 --- a/xrspatial/geotiff/_attrs.py +++ b/xrspatial/geotiff/_attrs.py @@ -160,6 +160,61 @@ ) +# Tiered feature inventory for the public geotiff surface (issue #2137). +# Defined in ``_attrs.py`` (not the package ``__init__.py``) so the writers +# can import it at module scope without a circular dependency: the package +# ``__init__`` already imports the writers. The package re-exports +# ``SUPPORTED_FEATURES`` so the public API stays +# ``xrspatial.geotiff.SUPPORTED_FEATURES``. +# +# See ``xrspatial/geotiff/__init__.py`` for the per-tier semantics; the +# inline comments here track the codec/reader/writer split used by the +# user-guide notebook table. +SUPPORTED_FEATURES = { + # Codecs. Tier 1 lossless integer + float byte-for-byte round-trip. + 'codec.none': 'stable', + 'codec.deflate': 'stable', + 'codec.lzw': 'stable', + 'codec.packbits': 'stable', + 'codec.zstd': 'stable', + # Tier 3 codecs: require ``allow_experimental_codecs=True``. + 'codec.lerc': 'experimental', + 'codec.jpeg2000': 'experimental', + 'codec.j2k': 'experimental', + 'codec.lz4': 'experimental', + # Tier 4 codec: requires the dedicated ``allow_internal_only_jpeg`` + # opt-in (issue #1845). Not covered by ``allow_experimental_codecs``. + 'codec.jpeg': 'internal_only', + # Read paths. + 'reader.local_file': 'stable', + 'reader.fsspec': 'advanced', + 'reader.http': 'advanced', + 'reader.vrt': 'advanced', + 'reader.sidecar_ovr': 'advanced', + 'reader.allow_rotated': 'advanced', + 'reader.allow_unparseable_crs': 'advanced', + 'reader.gpu': 'experimental', + # Write paths. + 'writer.local_file': 'stable', + 'writer.cog': 'advanced', + 'writer.overviews': 'advanced', + 'writer.bigtiff': 'advanced', + 'writer.gpu': 'experimental', + 'writer.gdal_metadata_xml': 'experimental', + 'writer.extra_tags': 'experimental', +} + + +# Tier 3 codec names (lower-cased) gated behind +# ``allow_experimental_codecs`` on the writers. Derived from +# ``SUPPORTED_FEATURES`` so the gate cannot drift from the docs. +_EXPERIMENTAL_CODECS = frozenset( + name.split('.', 1)[1].lower() + for name, tier in SUPPORTED_FEATURES.items() + if name.startswith('codec.') and tier == 'experimental' +) + + # TIFF type ids needed when synthesizing extra_tags entries from attrs. _TIFF_BYTE = 1 _TIFF_ASCII = 2 diff --git a/xrspatial/geotiff/_backends/dask.py b/xrspatial/geotiff/_backends/dask.py index ccb52f96c..3f57061be 100644 --- a/xrspatial/geotiff/_backends/dask.py +++ b/xrspatial/geotiff/_backends/dask.py @@ -45,6 +45,13 @@ def read_geotiff_dask(source: str, *, mask_nodata: bool = True) -> xr.DataArray: """Read a GeoTIFF as a dask-backed DataArray for out-of-core processing. + Tier: Stable for local-file reads on axis-aligned grids with the + Tier 1 codec set. ``allow_rotated`` / ``allow_unparseable_crs`` + are Advanced (read-only opt-ins; round-trip semantics are listed + on the parameter docs). See + :data:`xrspatial.geotiff.SUPPORTED_FEATURES` for the full tier map + (issue #2137). + Each chunk is loaded lazily via windowed reads. Parameters diff --git a/xrspatial/geotiff/_backends/gpu.py b/xrspatial/geotiff/_backends/gpu.py index fb5201ac1..955af327d 100644 --- a/xrspatial/geotiff/_backends/gpu.py +++ b/xrspatial/geotiff/_backends/gpu.py @@ -91,6 +91,12 @@ def read_geotiff_gpu(source: str, *, ) -> xr.DataArray: """Read a GeoTIFF with GPU-accelerated decompression via Numba CUDA. + Tier: Experimental (issue #2137). Requires cupy + numba CUDA plus + optional nvCOMP / nvJPEG / nvJPEG2K libraries for codec-specific + acceleration; cross-backend numerical parity with the CPU reader + is tested for the Tier 1 codec set only. See + :data:`xrspatial.geotiff.SUPPORTED_FEATURES` for the full tier map. + Decompresses all tiles in parallel on the GPU and returns a CuPy-backed DataArray that stays on device memory. No CPU->GPU transfer needed for downstream xrspatial GPU operations. diff --git a/xrspatial/geotiff/_backends/vrt.py b/xrspatial/geotiff/_backends/vrt.py index 894b98643..a68e2a8de 100644 --- a/xrspatial/geotiff/_backends/vrt.py +++ b/xrspatial/geotiff/_backends/vrt.py @@ -45,6 +45,13 @@ def read_vrt(source: str, *, mask_nodata: bool = True) -> xr.DataArray: """Read a GDAL Virtual Raster Table (.vrt) into an xarray.DataArray. + Tier: Advanced (issue #2137). VRT mosaics work and are tested, but + the caller should know the failure modes: cross-source nodata can + disagree (gated by ``band_nodata``), backing files can be missing + (gated by ``missing_sources``), and per-band metadata mismatch + raises a typed error rather than silently flattening. See + :data:`xrspatial.geotiff.SUPPORTED_FEATURES` for the full tier map. + The VRT's source GeoTIFFs are read via windowed reads and assembled into a single array. diff --git a/xrspatial/geotiff/_writers/eager.py b/xrspatial/geotiff/_writers/eager.py index 012ef683a..69d153415 100644 --- a/xrspatial/geotiff/_writers/eager.py +++ b/xrspatial/geotiff/_writers/eager.py @@ -22,6 +22,7 @@ from typing import BinaryIO from .._attrs import ( + _EXPERIMENTAL_CODECS, _LEVEL_RANGES, _VALID_COMPRESSIONS, _extract_rich_tags, @@ -71,9 +72,21 @@ def to_geotiff(data: xr.DataArray | np.ndarray, max_z_error: float = 0.0, photometric: str | int = 'auto', allow_internal_only_jpeg: bool = False, + allow_experimental_codecs: bool = False, allow_unparseable_crs: bool = False) -> str | BinaryIO: """Write data as a GeoTIFF or Cloud Optimized GeoTIFF. + Tier: Stable for local-file output with ``compression`` in + ``{'none', 'deflate', 'lzw', 'packbits', 'zstd'}`` on an axis-aligned + grid. ``cog=True`` / overviews / BigTIFF are Advanced (work, but the + caller should know the failure modes). GPU output, GDAL XML metadata + pass-through, and ``extra_tags`` are Experimental. ``compression`` in + ``{'lerc', 'jpeg2000', 'j2k', 'lz4'}`` is Experimental and requires + ``allow_experimental_codecs=True``. ``compression='jpeg'`` is + Internal-only and requires the dedicated ``allow_internal_only_jpeg`` + flag. See :data:`xrspatial.geotiff.SUPPORTED_FEATURES` for the full + tier map (issue #2137). + Dask-backed DataArrays are written in streaming mode: one tile-row at a time, without materialising the full array into RAM. Peak memory is roughly ``tile_size * width * bytes_per_sample``. COG @@ -114,14 +127,28 @@ def to_geotiff(data: xr.DataArray | np.ndarray, Codec name. One of ``'none'``, ``'deflate'``, ``'lzw'``, ``'jpeg'``, ``'packbits'``, ``'zstd'``, ``'lz4'``, ``'jpeg2000'`` (alias ``'j2k'``), or ``'lerc'``. - ``'jpeg'`` is rejected on write by default because the encoder - omits the JPEGTables tag and produced files do not round-trip - through libtiff / GDAL / rasterio. Pass - ``allow_internal_only_jpeg=True`` to opt in to the experimental - internal-reader-only path (see that parameter for details), or - use ``'deflate'``, ``'zstd'``, or ``'lzw'`` instead. ``'lerc'`` + + Stable codecs (Tier 1, lossless, byte-for-byte round-trip): + ``'none'``, ``'deflate'``, ``'lzw'``, ``'packbits'``, + ``'zstd'``. + + Experimental codecs (Tier 3): ``'lerc'``, ``'jpeg2000'`` / + ``'j2k'``, ``'lz4'``. Rejected by default; pass + ``allow_experimental_codecs=True`` to opt in. The opt-in emits + ``GeoTIFFFallbackWarning`` once per call so the caller knows + the chosen codec carries no cross-backend numerical parity + claim and uneven reader support across GDAL versions. ``'lerc'`` accepts ``max_z_error`` for lossy compression with a bounded per-pixel error. + + Internal-only codec (Tier 4): ``'jpeg'``. Rejected on write by + default because the encoder omits the JPEGTables tag and the + produced files do not round-trip through libtiff / GDAL / + rasterio. Pass ``allow_internal_only_jpeg=True`` to opt in to + the internal-reader-only path (see that parameter for details). + ``allow_experimental_codecs=True`` does NOT cover ``'jpeg'``: + internal-only is a stricter tier than experimental, and the two + flags do not collapse into one switch. compression_level : int or None Compression effort level. None uses each codec's default (6 for deflate/zstd). Valid ranges: deflate 1-9, zstd 1-22, lz4 0-16. @@ -144,9 +171,16 @@ def to_geotiff(data: xr.DataArray | np.ndarray, * ``3`` -> floating-point predictor (float dtypes only; typically gives better deflate/zstd ratios on float data than predictor 2). cog : bool - Write as Cloud Optimized GeoTIFF. + Advanced: COG output materialises the full array because + overview pyramids need it, and the all-IFDs-at-file-start layout + only round-trips through readers that honour the COG layout + contract. Write as Cloud Optimized GeoTIFF. overview_levels : list[int] or None - Overview decimation factors relative to full resolution. + Advanced: overview pyramids are an optional COG feature; the + decimation factors and resampling choice affect downstream + analytics in ways that are not byte-for-byte reproducible + across backends. Overview decimation factors relative to full + resolution. Each entry must be a power-of-two integer >= 2, and the list must be strictly increasing (e.g. ``[2, 4, 8]`` writes overviews at 1/2, 1/4 and 1/8 of the full resolution). @@ -158,11 +192,17 @@ def to_geotiff(data: xr.DataArray | np.ndarray, Resampling method for overviews: 'mean' (default), 'nearest', 'min', 'max', 'median', 'mode', or 'cubic'. bigtiff : bool or None - Force BigTIFF (64-bit offsets). None (default) auto-promotes - when the estimated file size would exceed the classic-TIFF 4 GB - limit. Matches the same kwarg on ``write_geotiff_gpu``. + Advanced: BigTIFF uses 64-bit offsets; older readers that only + speak classic TIFF cannot open the output. Force BigTIFF + (64-bit offsets). None (default) auto-promotes when the + estimated file size would exceed the classic-TIFF 4 GB limit. + Matches the same kwarg on ``write_geotiff_gpu``. gpu : bool or None - Force GPU compression. None (default) auto-detects CuPy data. + Experimental: requires cupy + numba CUDA, plus the optional + nvCOMP / nvJPEG / nvJPEG2K libraries for codec-specific + acceleration; backend parity with the CPU writer is tested for + the Tier 1 codec set only. Force GPU compression. None + (default) auto-detects CuPy data. streaming_buffer_bytes : int Soft cap on bytes materialised per dask compute call when streaming a dask-backed DataArray. Defaults to 256 MB. Wide @@ -199,6 +239,21 @@ def to_geotiff(data: xr.DataArray | np.ndarray, chosen value; only these two tag ids are overridable so other auto-emitted tags such as ``ImageWidth`` or ``StripOffsets`` remain protected. + allow_experimental_codecs : bool + Opt in to the Tier 3 experimental codecs ``'lerc'``, + ``'jpeg2000'`` / ``'j2k'``, and ``'lz4'`` (default ``False``). + Setting ``compression=`` to one of those codecs without this + flag raises ``ValueError`` whose message names the flag. With + the flag set, the write proceeds and a + ``GeoTIFFFallbackWarning`` is emitted once per call so the + caller knows the chosen codec carries no cross-backend + numerical parity claim and uneven reader support across GDAL + versions. Does NOT cover ``compression='jpeg'``: the + internal-only JPEG path keeps its own dedicated + ``allow_internal_only_jpeg`` flag because internal-only is a + stricter tier than experimental. The kwarg is forwarded + unchanged to ``write_geotiff_gpu`` on the GPU dispatch path. + See issue #2137. allow_internal_only_jpeg : bool Opt in to the experimental ``compression='jpeg'`` encode path (default ``False``). The encoder writes self-contained JFIF @@ -342,6 +397,26 @@ def to_geotiff(data: xr.DataArray | np.ndarray, # of ``to_geotiff(gpu=True, compression='jpeg', # allow_internal_only_jpeg=True)``. + # Tier 3 experimental-codec gate (issue #2137). Lerc, jpeg2000 / + # j2k, and lz4 sit in ``_VALID_COMPRESSIONS`` for wire-format + # reasons but their cross-backend numerical parity, reader + # support across GDAL versions, and (for lerc) bounded lossy + # behaviour all carry caveats the default writer should not + # silently accept. Mirror the ``allow_internal_only_jpeg`` shape + # so callers learn the opt-in name from the rejection message + # and can fix the call site in one line. The opt-in warning is + # emitted below once the GPU dispatch decision is known so the + # GPU path does not double-warn (``write_geotiff_gpu`` emits its + # own warning on the GPU path). + if (compression.lower() in _EXPERIMENTAL_CODECS + and not allow_experimental_codecs): + raise ValueError( + f"compression={compression!r} is experimental: cross-backend " + "numerical parity is not claimed and reader support across " + "GDAL versions is uneven. Pass allow_experimental_codecs=True " + "to opt in, or use 'deflate', 'zstd', or 'lzw' for a " + "stable lossless codec (issue #2137).") + # max_z_error only applies to LERC; reject negative values and reject # non-zero values paired with any other codec so the caller learns the # parameter was ignored before bytes hit disk. @@ -394,6 +469,23 @@ def to_geotiff(data: xr.DataArray | np.ndarray, GeoTIFFFallbackWarning, stacklevel=2, ) + # Tier 3 experimental-codec opt-in warning (issue #2137). Mirrors + # the JPEG flag's "warn once, after dispatch is resolved" shape: + # ``write_geotiff_gpu`` emits its own warning on the GPU path with + # a backend-specific caveat, so the CPU dispatcher only warns when + # the write is staying on CPU. + if (isinstance(compression, str) + and compression.lower() in _EXPERIMENTAL_CODECS + and allow_experimental_codecs + and not use_gpu): + warnings.warn( + f"to_geotiff(compression={compression!r}, " + "allow_experimental_codecs=True): experimental codec, " + "no cross-backend parity claim and uneven reader support " + "across GDAL versions. See issue #2137.", + GeoTIFFFallbackWarning, + stacklevel=2, + ) # tile_size only applies to tiled output; warn if the caller passed a # non-default size alongside strip mode (it would otherwise be silently @@ -484,6 +576,7 @@ def to_geotiff(data: xr.DataArray | np.ndarray, streaming_buffer_bytes=streaming_buffer_bytes, photometric=photometric, allow_internal_only_jpeg=allow_internal_only_jpeg, + allow_experimental_codecs=allow_experimental_codecs, allow_unparseable_crs=allow_unparseable_crs, ) return path diff --git a/xrspatial/geotiff/_writers/gpu.py b/xrspatial/geotiff/_writers/gpu.py index 4581ff2ba..92ce94980 100644 --- a/xrspatial/geotiff/_writers/gpu.py +++ b/xrspatial/geotiff/_writers/gpu.py @@ -18,6 +18,7 @@ from typing import BinaryIO from .._attrs import ( + _EXPERIMENTAL_CODECS, _extract_rich_tags, _resolve_nodata_attr, _should_restore_nan_sentinel, @@ -82,10 +83,22 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, max_z_error: float = 0.0, photometric: str | int = 'auto', allow_internal_only_jpeg: bool = False, + allow_experimental_codecs: bool = False, allow_unparseable_crs: bool = False ) -> str | BinaryIO: """Write a CuPy-backed DataArray as a GeoTIFF with GPU compression. + Tier: Experimental (issue #2137). The GPU writer requires cupy + + numba CUDA plus optional nvCOMP / nvJPEG / nvJPEG2K libraries for + codec-specific acceleration; cross-backend numerical parity with + ``to_geotiff`` is tested for the Tier 1 codec set only. Tier 3 + codecs (``'lerc'``, ``'jpeg2000'`` / ``'j2k'``, ``'lz4'``) require + the explicit ``allow_experimental_codecs=True`` opt-in; the + internal-only ``'jpeg'`` codec keeps its own dedicated + ``allow_internal_only_jpeg`` flag. See + :data:`xrspatial.geotiff.SUPPORTED_FEATURES` for the full tier + map. + Tiles are extracted and compressed on the GPU via nvCOMP, then assembled into a TIFF file on CPU. The CuPy array stays on device throughout compression -- only the compressed bytes transfer to CPU @@ -201,6 +214,18 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, GPU writer forwards this kwarg unchanged. Default ``'auto'`` writes MinIsBlack for any band count, so a 4-band raster is not silently tagged as RGB+alpha (issue #1769). + allow_experimental_codecs : bool + Opt in to the Tier 3 experimental codecs ``'lerc'``, + ``'jpeg2000'`` / ``'j2k'``, and ``'lz4'`` (default ``False``). + Mirrors the same kwarg on ``to_geotiff`` so the two writers + expose a consistent surface; the GPU dispatch path through + ``to_geotiff`` forwards the kwarg unchanged. Setting + ``compression=`` to one of those codecs without this flag + raises ``ValueError`` whose message names the flag. With the + flag set, the write proceeds and a ``GeoTIFFFallbackWarning`` + is emitted once per call. Does NOT cover ``compression='jpeg'``: + the internal-only JPEG path keeps its own dedicated + ``allow_internal_only_jpeg`` flag. See issue #2137. allow_internal_only_jpeg : bool Opt in to the experimental ``compression='jpeg'`` encode path (default ``False``). The encoder emits self-contained JFIF @@ -271,6 +296,36 @@ def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, GeoTIFFFallbackWarning, stacklevel=2, ) + # Tier 3 experimental-codec gate (issue #2137). Lerc, jpeg2000 / + # j2k, and lz4 require ``allow_experimental_codecs=True``; the GPU + # writer mirrors the same gate ``to_geotiff`` enforces so the two + # entry points agree. The GPU dispatch path through + # ``to_geotiff(gpu=True, compression='lerc', ...)`` forwards the + # kwarg, so the warning is emitted once at the CPU dispatcher and a + # second time here when the GPU writer re-runs the same check; that + # mirrors ``allow_internal_only_jpeg`` (which already double-warns + # under that codepath) and keeps the explicit GPU entry point usable + # standalone. + if isinstance(compression, str): + _gpu_codec = compression.lower() + if (_gpu_codec in _EXPERIMENTAL_CODECS + and not allow_experimental_codecs): + raise ValueError( + f"compression={compression!r} is experimental: cross-backend " + "numerical parity is not claimed and reader support across " + "GDAL versions is uneven. Pass allow_experimental_codecs=True " + "to opt in, or use 'deflate', 'zstd', or 'lzw' for a " + "stable lossless codec (issue #2137).") + if (_gpu_codec in _EXPERIMENTAL_CODECS + and allow_experimental_codecs): + warnings.warn( + f"write_geotiff_gpu(compression={compression!r}, " + "allow_experimental_codecs=True): experimental codec, " + "GPU encode path is not byte-identical to the CPU writer " + "(different backend libraries). See issue #2137.", + GeoTIFFFallbackWarning, + stacklevel=2, + ) # MinIsWhite pre-inversion (issue #1836) runs in the eager CPU writer. # The GPU writer assembles tile bytes directly on device; threading # the pixel + nodata-sentinel transform through that pipeline is out diff --git a/xrspatial/geotiff/_writers/vrt.py b/xrspatial/geotiff/_writers/vrt.py index ec837392d..371bb3b48 100644 --- a/xrspatial/geotiff/_writers/vrt.py +++ b/xrspatial/geotiff/_writers/vrt.py @@ -27,6 +27,13 @@ def write_vrt(path: str = _VRT_PATH_MISSING_SENTINEL, nodata: float | int | None = None) -> str: """Generate a VRT file that mosaics multiple GeoTIFF tiles. + Tier: Advanced (issue #2137). VRT mosaic output is supported but + the caller should know the failure modes on the read side: a + consumer reading the resulting ``.vrt`` may hit cross-source + nodata mismatch, missing backing files, or per-band metadata + disagreement. See :data:`xrspatial.geotiff.SUPPORTED_FEATURES` for + the full tier map. + Parameters ---------- path : str diff --git a/xrspatial/geotiff/tests/test_compression_docstring_1644.py b/xrspatial/geotiff/tests/test_compression_docstring_1644.py index a8f049579..26314787a 100644 --- a/xrspatial/geotiff/tests/test_compression_docstring_1644.py +++ b/xrspatial/geotiff/tests/test_compression_docstring_1644.py @@ -125,7 +125,12 @@ def test_write_geotiff_gpu_accepts_cpu_fallback_codecs(tmp_path, codec): "transform": (1.0, 0.0, 0.0, 0.0, -1.0, 64.0)}, ) path = str(tmp_path / f"out_{codec}.tif") - write_geotiff_gpu(da, path, compression=codec) + # Tier 3 codecs (lerc / jpeg2000 / j2k / lz4) now require + # ``allow_experimental_codecs=True`` (issue #2137). Pass the opt-in + # so the test continues to exercise the actual encode path rather + # than the new rejection gate. + write_geotiff_gpu(da, path, compression=codec, + allow_experimental_codecs=True) assert os.path.exists(path), ( f"write_geotiff_gpu(compression={codec!r}) failed to write a file" ) diff --git a/xrspatial/geotiff/tests/test_features.py b/xrspatial/geotiff/tests/test_features.py index 420b44332..148ea5ba7 100644 --- a/xrspatial/geotiff/tests/test_features.py +++ b/xrspatial/geotiff/tests/test_features.py @@ -2783,6 +2783,9 @@ def test_all_lists_supported_functions(self): 'UnparseableCRSError', 'GeoTIFFFallbackWarning', 'UnsafeURLError', + # Issue #2137: tiered feature inventory exposed alongside + # the writer's ``allow_experimental_codecs`` opt-in. + 'SUPPORTED_FEATURES', 'open_geotiff', 'read_geotiff_gpu', 'read_geotiff_dask', diff --git a/xrspatial/geotiff/tests/test_fuzz_hypothesis_1661.py b/xrspatial/geotiff/tests/test_fuzz_hypothesis_1661.py index f31a40d7e..f53cff2f9 100644 --- a/xrspatial/geotiff/tests/test_fuzz_hypothesis_1661.py +++ b/xrspatial/geotiff/tests/test_fuzz_hypothesis_1661.py @@ -128,12 +128,18 @@ def test_round_trip_property(tmp_path_factory, inputs): tmp_dir = tmp_path_factory.mktemp("fuzz_1661_rt") path = str(tmp_dir / "rt.tif") + # ``lz4`` is in the fuzz codec list and is a Tier 3 experimental + # codec gated behind ``allow_experimental_codecs`` (issue #2137). + # The other codecs in ``LOSSLESS_CODECS`` are Tier 1 and accept the + # default ``False``, so passing ``True`` unconditionally lets the + # fuzz harness exercise every codec uniformly. to_geotiff( da, path, compression=compression, tiled=tiled, predictor=predictor, + allow_experimental_codecs=True, ) got = open_geotiff(path, dtype=str(da.dtype)) diff --git a/xrspatial/geotiff/tests/test_gpu_writer_cpu_fallback_codecs_2026_05_12.py b/xrspatial/geotiff/tests/test_gpu_writer_cpu_fallback_codecs_2026_05_12.py index ecef632c7..b271d890d 100644 --- a/xrspatial/geotiff/tests/test_gpu_writer_cpu_fallback_codecs_2026_05_12.py +++ b/xrspatial/geotiff/tests/test_gpu_writer_cpu_fallback_codecs_2026_05_12.py @@ -240,7 +240,7 @@ def test_write_geotiff_gpu_lz4_roundtrip(tmp_path): da, arr = _make_int_da() path = str(tmp_path / "lz4_roundtrip.tif") - write_geotiff_gpu(da, path, compression='lz4') + write_geotiff_gpu(da, path, compression='lz4', allow_experimental_codecs=True) out = open_geotiff(path) np.testing.assert_array_equal(out.values, arr) @@ -254,7 +254,7 @@ def test_write_geotiff_gpu_lz4_compression_tag(tmp_path): da, _ = _make_int_da() path = str(tmp_path / "lz4_tag.tif") - write_geotiff_gpu(da, path, compression='lz4') + write_geotiff_gpu(da, path, compression='lz4', allow_experimental_codecs=True) assert _read_compression_tag(path) == _COMPRESSION_TAGS['lz4'] @@ -279,7 +279,7 @@ def test_write_geotiff_gpu_lerc_float_lossless_roundtrip(tmp_path): da, arr = _make_float_da(dtype=np.float32) path = str(tmp_path / "lerc_float.tif") - write_geotiff_gpu(da, path, compression='lerc') + write_geotiff_gpu(da, path, compression='lerc', allow_experimental_codecs=True) out = open_geotiff(path) np.testing.assert_array_equal(out.values, arr) @@ -299,7 +299,7 @@ def test_write_geotiff_gpu_lerc_int_roundtrip(tmp_path): da, arr = _make_int_da(dtype=np.uint16) path = str(tmp_path / "lerc_int.tif") - write_geotiff_gpu(da, path, compression='lerc') + write_geotiff_gpu(da, path, compression='lerc', allow_experimental_codecs=True) out = open_geotiff(path) np.testing.assert_array_equal(out.values, arr) @@ -313,7 +313,7 @@ def test_write_geotiff_gpu_lerc_compression_tag(tmp_path): da, _ = _make_float_da() path = str(tmp_path / "lerc_tag.tif") - write_geotiff_gpu(da, path, compression='lerc') + write_geotiff_gpu(da, path, compression='lerc', allow_experimental_codecs=True) assert _read_compression_tag(path) == _COMPRESSION_TAGS['lerc'] @@ -341,7 +341,7 @@ def test_write_geotiff_gpu_jpeg2000_uint8_lossless_roundtrip(tmp_path): da, arr = _make_int_da(dtype=np.uint8) path = str(tmp_path / "j2k_uint8.tif") - write_geotiff_gpu(da, path, compression='jpeg2000') + write_geotiff_gpu(da, path, compression='jpeg2000', allow_experimental_codecs=True) out = open_geotiff(path) np.testing.assert_array_equal(out.values, arr) @@ -362,7 +362,7 @@ def test_write_geotiff_gpu_jpeg2000_rgb_roundtrip(tmp_path): da, arr = _make_uint8_rgb_da() path = str(tmp_path / "j2k_rgb.tif") - write_geotiff_gpu(da, path, compression='jpeg2000') + write_geotiff_gpu(da, path, compression='jpeg2000', allow_experimental_codecs=True) out = open_geotiff(path) np.testing.assert_array_equal(out.values, arr) @@ -383,8 +383,8 @@ def test_write_geotiff_gpu_j2k_alias_matches_jpeg2000(tmp_path): j2k_path = str(tmp_path / "alias_j2k.tif") jpeg2k_path = str(tmp_path / "alias_jpeg2000.tif") - write_geotiff_gpu(da, j2k_path, compression='j2k') - write_geotiff_gpu(da, jpeg2k_path, compression='jpeg2000') + write_geotiff_gpu(da, j2k_path, compression='j2k', allow_experimental_codecs=True) + write_geotiff_gpu(da, jpeg2k_path, compression='jpeg2000', allow_experimental_codecs=True) assert _read_compression_tag(j2k_path) == _COMPRESSION_TAGS['j2k'] assert _read_compression_tag(jpeg2k_path) == _COMPRESSION_TAGS['jpeg2000'] @@ -399,7 +399,7 @@ def test_write_geotiff_gpu_jpeg2000_compression_tag(tmp_path): da, _ = _make_int_da(dtype=np.uint8) path = str(tmp_path / "j2k_tag.tif") - write_geotiff_gpu(da, path, compression='jpeg2000') + write_geotiff_gpu(da, path, compression='jpeg2000', allow_experimental_codecs=True) assert _read_compression_tag(path) == _COMPRESSION_TAGS['jpeg2000'] diff --git a/xrspatial/geotiff/tests/test_jpeg2000.py b/xrspatial/geotiff/tests/test_jpeg2000.py index f38158129..fe16b60d3 100644 --- a/xrspatial/geotiff/tests/test_jpeg2000.py +++ b/xrspatial/geotiff/tests/test_jpeg2000.py @@ -146,7 +146,10 @@ def test_public_api_roundtrip(self, tmp_path): coords={'y': np.arange(8), 'x': np.arange(8)}, attrs={'crs': 4326}) path = str(tmp_path / 'j2k_1048_api.tif') - to_geotiff(da, path, compression='jpeg2000') + # Tier 3 codec (issue #2137); pass the opt-in so the round-trip + # test exercises the encode path rather than the rejection gate. + to_geotiff(da, path, compression='jpeg2000', + allow_experimental_codecs=True) result = open_geotiff(path) np.testing.assert_array_equal(result.values, data) diff --git a/xrspatial/geotiff/tests/test_lerc.py b/xrspatial/geotiff/tests/test_lerc.py index 927c8fc5a..c1f73c6bc 100644 --- a/xrspatial/geotiff/tests/test_lerc.py +++ b/xrspatial/geotiff/tests/test_lerc.py @@ -130,7 +130,10 @@ def test_public_api_roundtrip(self, tmp_path): coords={'y': np.arange(8), 'x': np.arange(8)}, attrs={'crs': 4326}) path = str(tmp_path / 'lerc_1052_api.tif') - to_geotiff(da, path, compression='lerc') + # Tier 3 codec (issue #2137); pass the opt-in so the round-trip + # test exercises the encode path rather than the rejection gate. + to_geotiff(da, path, compression='lerc', + allow_experimental_codecs=True) result = open_geotiff(path) np.testing.assert_array_equal(result.values, data) diff --git a/xrspatial/geotiff/tests/test_lerc_max_z_error.py b/xrspatial/geotiff/tests/test_lerc_max_z_error.py index e261013a2..25cc53245 100644 --- a/xrspatial/geotiff/tests/test_lerc_max_z_error.py +++ b/xrspatial/geotiff/tests/test_lerc_max_z_error.py @@ -49,7 +49,10 @@ def test_lossless_roundtrip_bit_exact(self, tmp_path): arr = _smooth_surface() da = _make_dataarray(arr) path = str(tmp_path / 'lerc_lossless.tif') - to_geotiff(da, path, compression='lerc', max_z_error=0.0) + # Tier 3 codec (issue #2137); opt in so the test exercises the + # encode path rather than the rejection gate. + to_geotiff(da, path, compression='lerc', max_z_error=0.0, + allow_experimental_codecs=True) result = open_geotiff(path) np.testing.assert_array_equal(result.values, arr) @@ -65,8 +68,10 @@ def test_lossy_smaller_and_bounded(self, tmp_path): lossless_path = str(tmp_path / 'lerc_lossless.tif') lossy_path = str(tmp_path / 'lerc_lossy.tif') - to_geotiff(da, lossless_path, compression='lerc', max_z_error=0.0) - to_geotiff(da, lossy_path, compression='lerc', max_z_error=0.05) + to_geotiff(da, lossless_path, compression='lerc', max_z_error=0.0, + allow_experimental_codecs=True) + to_geotiff(da, lossy_path, compression='lerc', max_z_error=0.05, + allow_experimental_codecs=True) lossless_size = os.path.getsize(lossless_path) lossy_size = os.path.getsize(lossy_path) @@ -93,7 +98,7 @@ def test_dask_lerc_with_max_z_error(self, tmp_path): ) path = str(tmp_path / 'lerc_dask.tif') to_geotiff(da, path, compression='lerc', max_z_error=0.05, - tile_size=32) + tile_size=32, allow_experimental_codecs=True) result = open_geotiff(path).values max_err = float(np.max(np.abs(result - arr))) @@ -115,7 +120,8 @@ def test_negative_max_z_error_raises(self, tmp_path): da = _make_dataarray(arr) path = str(tmp_path / 'should_not_exist.tif') with pytest.raises(ValueError, match="max_z_error"): - to_geotiff(da, path, compression='lerc', max_z_error=-0.01) + to_geotiff(da, path, compression='lerc', max_z_error=-0.01, + allow_experimental_codecs=True) def test_max_z_error_zero_with_other_codec_is_allowed(self, tmp_path): # The default value 0.0 must not error out for any other codec. diff --git a/xrspatial/geotiff/tests/test_lz4.py b/xrspatial/geotiff/tests/test_lz4.py index a845d868c..57dd77905 100644 --- a/xrspatial/geotiff/tests/test_lz4.py +++ b/xrspatial/geotiff/tests/test_lz4.py @@ -107,7 +107,10 @@ def test_public_api_roundtrip(self, tmp_path): coords={'y': np.arange(8), 'x': np.arange(8)}, attrs={'crs': 4326}) path = str(tmp_path / 'lz4_1051_api.tif') - to_geotiff(da, path, compression='lz4') + # Tier 3 codec (issue #2137); pass the opt-in so the round-trip + # test exercises the encode path rather than the rejection gate. + to_geotiff(da, path, compression='lz4', + allow_experimental_codecs=True) result = open_geotiff(path) np.testing.assert_array_equal(result.values, data) diff --git a/xrspatial/geotiff/tests/test_lz4_compression_level_2026_05_11.py b/xrspatial/geotiff/tests/test_lz4_compression_level_2026_05_11.py index 15ac8427d..d9fa2b9b9 100644 --- a/xrspatial/geotiff/tests/test_lz4_compression_level_2026_05_11.py +++ b/xrspatial/geotiff/tests/test_lz4_compression_level_2026_05_11.py @@ -75,7 +75,8 @@ def test_lz4_level_round_trip(self, level, tmp_path): da = _make_da(seed=level) path = str(tmp_path / f"lz4_level_{level}.tif") to_geotiff(da, path, compression="lz4", - compression_level=level) + compression_level=level, + allow_experimental_codecs=True) result = open_geotiff(path) # lz4 is lossless: assert_array_equal, not assert_allclose. np.testing.assert_array_equal(result.values, da.values) @@ -86,7 +87,8 @@ def test_lz4_default_level_round_trip(self, tmp_path): change is caught.""" da = _make_da(seed=99) path = str(tmp_path / "lz4_default.tif") - to_geotiff(da, path, compression="lz4") + to_geotiff(da, path, compression="lz4", + allow_experimental_codecs=True) result = open_geotiff(path) np.testing.assert_array_equal(result.values, da.values) @@ -105,8 +107,10 @@ def test_lz4_higher_level_not_larger(self, tmp_path): da = _make_compressible() path_lo = str(tmp_path / "lz4_lo.tif") path_hi = str(tmp_path / "lz4_hi.tif") - to_geotiff(da, path_lo, compression="lz4", compression_level=0) - to_geotiff(da, path_hi, compression="lz4", compression_level=16) + to_geotiff(da, path_lo, compression="lz4", compression_level=0, + allow_experimental_codecs=True) + to_geotiff(da, path_hi, compression="lz4", compression_level=16, + allow_experimental_codecs=True) size_lo = os.path.getsize(path_lo) size_hi = os.path.getsize(path_hi) # Allow equality: very small or already-compressed payloads can @@ -133,7 +137,8 @@ def test_lz4_out_of_range_level_raises_eager(self, level, tmp_path): path = str(tmp_path / "lz4_bad.tif") with pytest.raises(ValueError, match="compression_level"): to_geotiff(da, path, compression="lz4", - compression_level=level) + compression_level=level, + allow_experimental_codecs=True) def test_lz4_out_of_range_message_includes_range(self, tmp_path): """Error message advertises the valid (0, 16) range so callers @@ -142,7 +147,8 @@ def test_lz4_out_of_range_message_includes_range(self, tmp_path): path = str(tmp_path / "lz4_bad.tif") with pytest.raises(ValueError, match=r"lz4.*\(valid:\s*0-16\)"): to_geotiff(da, path, compression="lz4", - compression_level=999) + compression_level=999, + allow_experimental_codecs=True) # --------------------------------------------------------------------------- @@ -170,7 +176,8 @@ def test_lz4_dask_streaming_level_round_trip(self, level, tmp_path): dask_da, np_arr = self._make_dask_da() path = str(tmp_path / f"lz4_dask_level_{level}.tif") to_geotiff(dask_da, path, compression="lz4", - compression_level=level, tile_size=16) + compression_level=level, tile_size=16, + allow_experimental_codecs=True) result = open_geotiff(path) np.testing.assert_array_equal(result.values, np_arr) @@ -180,4 +187,5 @@ def test_lz4_dask_streaming_out_of_range_raises(self, level, tmp_path): path = str(tmp_path / "lz4_dask_bad.tif") with pytest.raises(ValueError, match="compression_level"): to_geotiff(dask_da, path, compression="lz4", - compression_level=level, tile_size=16) + compression_level=level, tile_size=16, + allow_experimental_codecs=True) diff --git a/xrspatial/geotiff/tests/test_streaming_codecs_2026_05_11.py b/xrspatial/geotiff/tests/test_streaming_codecs_2026_05_11.py index adbd3c22a..a73abbbb5 100644 --- a/xrspatial/geotiff/tests/test_streaming_codecs_2026_05_11.py +++ b/xrspatial/geotiff/tests/test_streaming_codecs_2026_05_11.py @@ -80,7 +80,9 @@ def test_lossless_round_trip(self, float_raster, dask_float_raster, tmp_path): """Dask + LERC (max_z_error=0) round-trips exactly.""" path = str(tmp_path / 'stream_lerc_lossless.tif') - to_geotiff(dask_float_raster, path, compression='lerc') + # Tier 3 codec (issue #2137); opt in to exercise the encode path. + to_geotiff(dask_float_raster, path, compression='lerc', + allow_experimental_codecs=True) result = open_geotiff(path) # LERC with max_z_error=0 is lossless for float32 sources. np.testing.assert_array_equal(result.values, float_raster.values) @@ -91,7 +93,8 @@ def test_lossy_respects_max_z_error(self, float_raster, dask_float_raster, max_z = 0.1 path = str(tmp_path / 'stream_lerc_lossy.tif') to_geotiff(dask_float_raster, path, - compression='lerc', max_z_error=max_z) + compression='lerc', max_z_error=max_z, + allow_experimental_codecs=True) result = open_geotiff(path) max_diff = float(np.abs(result.values - float_raster.values).max()) assert max_diff <= max_z + 1e-7, ( @@ -109,9 +112,11 @@ def test_streaming_matches_eager(self, float_raster, dask_float_raster, eager_path = str(tmp_path / 'eager_lerc.tif') stream_path = str(tmp_path / 'stream_lerc.tif') to_geotiff(float_raster, eager_path, - compression='lerc', max_z_error=0.05) + compression='lerc', max_z_error=0.05, + allow_experimental_codecs=True) to_geotiff(dask_float_raster, stream_path, - compression='lerc', max_z_error=0.05) + compression='lerc', max_z_error=0.05, + allow_experimental_codecs=True) eager = open_geotiff(eager_path).values stream = open_geotiff(stream_path).values np.testing.assert_array_equal(eager, stream) @@ -125,7 +130,9 @@ def test_streaming_matches_eager(self, float_raster, dask_float_raster, class TestStreamingLz4: def test_round_trip(self, float_raster, dask_float_raster, tmp_path): path = str(tmp_path / 'stream_lz4.tif') - to_geotiff(dask_float_raster, path, compression='lz4') + # Tier 3 codec (issue #2137); opt in to exercise the encode path. + to_geotiff(dask_float_raster, path, compression='lz4', + allow_experimental_codecs=True) result = open_geotiff(path) np.testing.assert_array_equal(result.values, float_raster.values) @@ -133,8 +140,10 @@ def test_streaming_matches_eager(self, float_raster, dask_float_raster, tmp_path): eager_path = str(tmp_path / 'eager_lz4.tif') stream_path = str(tmp_path / 'stream_lz4.tif') - to_geotiff(float_raster, eager_path, compression='lz4') - to_geotiff(dask_float_raster, stream_path, compression='lz4') + to_geotiff(float_raster, eager_path, compression='lz4', + allow_experimental_codecs=True) + to_geotiff(dask_float_raster, stream_path, compression='lz4', + allow_experimental_codecs=True) eager = open_geotiff(eager_path).values stream = open_geotiff(stream_path).values np.testing.assert_array_equal(eager, stream) diff --git a/xrspatial/geotiff/tests/test_supported_features_tiers_2137.py b/xrspatial/geotiff/tests/test_supported_features_tiers_2137.py new file mode 100644 index 000000000..09b020598 --- /dev/null +++ b/xrspatial/geotiff/tests/test_supported_features_tiers_2137.py @@ -0,0 +1,262 @@ +"""Tier inventory + opt-in gates for the geotiff public surface. + +Background +---------- +Issue #2137 tiers the geotiff public surface into Stable / Advanced / +Experimental / Internal-only. The :data:`SUPPORTED_FEATURES` constant +enumerates every feature with its tier and is the single source of +truth that the docstrings, writer gates, and user-guide notebook all +read from. The writer adds an ``allow_experimental_codecs=True`` +opt-in for Tier 3 codecs (``lerc``, ``jpeg2000`` / ``j2k``, ``lz4``) +modelled on the existing ``allow_internal_only_jpeg`` flag for the +Tier 4 ``jpeg`` codec (#1845). + +What this test pins +------------------- +* The mapping covers every codec name listed in ``_VALID_COMPRESSIONS`` + -- callers cannot ship a codec the tiering does not classify. +* Stable codecs accept a default ``to_geotiff`` call. +* Every Tier 3 codec is rejected by default and accepted when the + caller passes ``allow_experimental_codecs=True``; the rejection + message names the flag and the opt-in emits + ``GeoTIFFFallbackWarning`` once per call. +* The Tier 4 ``jpeg`` codec rejects without ``allow_internal_only_jpeg`` + and is NOT covered by ``allow_experimental_codecs`` -- the two flags + do not collapse into one switch. +* The signature of ``to_geotiff`` and ``write_geotiff_gpu`` carries + the new kwarg with a ``False`` default. +""" +from __future__ import annotations + +import inspect +import os +import warnings + +import numpy as np +import pytest +import xarray as xr + +from xrspatial.geotiff import ( + GeoTIFFFallbackWarning, + SUPPORTED_FEATURES, + to_geotiff, + write_geotiff_gpu, +) +from xrspatial.geotiff._attrs import _VALID_COMPRESSIONS + + +_TIER_VALUES = {'stable', 'advanced', 'experimental', 'internal_only'} + + +def _make_float32_da(h: int = 32, w: int = 32) -> xr.DataArray: + """Small float32 raster with axis-aligned coords; round-trips + through every Tier 1 codec and exercises the experimental codec + gate without exhausting CI time. + """ + rng = np.random.RandomState(0) + arr = rng.standard_normal((h, w)).astype(np.float32) + return xr.DataArray( + arr, + dims=("y", "x"), + coords={ + "y": np.arange(h, dtype=np.float64), + "x": np.arange(w, dtype=np.float64), + }, + attrs={'crs': 4326}, + ) + + +def _make_uint8_da(h: int = 32, w: int = 32) -> xr.DataArray: + """uint8 raster for codecs (jpeg2000 / j2k via glymur) that only + accept integer input. + """ + rng = np.random.RandomState(0) + arr = rng.randint(0, 256, size=(h, w), dtype=np.uint8) + return xr.DataArray( + arr, + dims=("y", "x"), + coords={ + "y": np.arange(h, dtype=np.float64), + "x": np.arange(w, dtype=np.float64), + }, + attrs={'crs': 4326}, + ) + + +# Some Tier 3 codecs constrain the supported input dtype (glymur's +# JPEG2000 encoder accepts only uint8/uint16). Pick the dtype that +# exercises the actual encode without re-litigating per-codec limits. +_EXPERIMENTAL_CODEC_INPUT = { + 'jpeg2000': _make_uint8_da, + 'j2k': _make_uint8_da, + 'lerc': _make_float32_da, + 'lz4': _make_float32_da, +} + + +def test_supported_features_is_a_mapping(): + """``SUPPORTED_FEATURES`` is a non-empty mapping from feature name + to tier label. The notebook and the test suite both iterate it, so + accidental removal would break the documentation generator and the + parity matrix's tier-aware selection. + """ + assert isinstance(SUPPORTED_FEATURES, dict) + assert len(SUPPORTED_FEATURES) > 0 + for name, tier in SUPPORTED_FEATURES.items(): + assert isinstance(name, str) and '.' in name, name + assert tier in _TIER_VALUES, (name, tier) + + +def test_supported_features_covers_every_valid_codec(): + """Every codec name in ``_VALID_COMPRESSIONS`` carries a tier in + ``SUPPORTED_FEATURES``. The gate cannot silently miss a codec. + """ + classified = { + name.split('.', 1)[1].lower() + for name in SUPPORTED_FEATURES + if name.startswith('codec.') + } + for codec in _VALID_COMPRESSIONS: + assert codec.lower() in classified, ( + f"codec {codec!r} is in _VALID_COMPRESSIONS but missing from " + "SUPPORTED_FEATURES; add a 'codec.' entry classified " + "into one of stable / experimental / internal_only.") + + +def test_to_geotiff_signature_has_allow_experimental_codecs(): + """``to_geotiff`` exposes ``allow_experimental_codecs=False``. + + Pinning the signature catches accidental removal during future + refactors: if the kwarg disappears, the writer silently drops back + to the unconditional acceptance of Tier 3 codecs and the issue + regresses. + """ + params = inspect.signature(to_geotiff).parameters + assert 'allow_experimental_codecs' in params + assert params['allow_experimental_codecs'].default is False + + +def test_write_geotiff_gpu_signature_has_allow_experimental_codecs(): + """``write_geotiff_gpu`` carries the same kwarg with the same + default, so the two writers expose a consistent surface and the + auto-dispatch path forwards a single value to either. + """ + params = inspect.signature(write_geotiff_gpu).parameters + assert 'allow_experimental_codecs' in params + assert params['allow_experimental_codecs'].default is False + + +@pytest.mark.parametrize( + "codec", + sorted( + name.split('.', 1)[1] + for name, tier in SUPPORTED_FEATURES.items() + if name.startswith('codec.') and tier == 'stable' + ), +) +def test_stable_codecs_accept_default_call(tmp_path, codec): + """Tier 1 codecs round-trip a small float32 raster with no flags. + A regression that accidentally gates a stable codec behind the new + flag would surface here. + """ + da = _make_float32_da() + path = os.path.join(str(tmp_path), f'stable_{codec}_2137.tif') + out = to_geotiff(da, path, compression=codec) + assert out == path + assert os.path.exists(path) + + +@pytest.mark.parametrize( + "codec", + sorted( + name.split('.', 1)[1] + for name, tier in SUPPORTED_FEATURES.items() + if name.startswith('codec.') and tier == 'experimental' + ), +) +def test_experimental_codec_rejected_by_default(tmp_path, codec): + """Tier 3 codecs raise ``ValueError`` whose message names the + ``allow_experimental_codecs`` flag so the caller learns the + opt-in name from the rejection itself. + """ + da = _make_float32_da() + path = os.path.join(str(tmp_path), f'reject_{codec}_2137.tif') + with pytest.raises(ValueError, match='allow_experimental_codecs'): + to_geotiff(da, path, compression=codec) + + +@pytest.mark.parametrize( + "codec", + sorted( + name.split('.', 1)[1] + for name, tier in SUPPORTED_FEATURES.items() + if name.startswith('codec.') and tier == 'experimental' + ), +) +def test_experimental_codec_opt_in_emits_warning(tmp_path, codec): + """``allow_experimental_codecs=True`` lets the codec through and + emits ``GeoTIFFFallbackWarning`` once per call. The warning shape + matches the existing ``allow_internal_only_jpeg`` opt-in so docs + and downstream warning filters can target a single class. + """ + da = _EXPERIMENTAL_CODEC_INPUT.get(codec, _make_float32_da)() + path = os.path.join(str(tmp_path), f'optin_{codec}_2137.tif') + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter('always') + try: + to_geotiff(da, path, compression=codec, + allow_experimental_codecs=True) + except (ImportError, ModuleNotFoundError) as e: + # ``jpeg2000`` / ``j2k`` need glymur; ``lerc`` needs a + # codec backend. The opt-in warning still fires before the + # encode runs, so the warning assertion below holds even + # when the optional dependency is missing on the runner. + pytest.skip(f"optional dependency missing for {codec}: {e}") + fallback = [w for w in caught + if issubclass(w.category, GeoTIFFFallbackWarning)] + assert fallback, ( + f"to_geotiff(compression={codec!r}, allow_experimental_codecs=" + "True) must emit GeoTIFFFallbackWarning so the caller knows " + "the codec carries no cross-backend parity claim.") + # Exactly one warning per call. Pinning the count catches the + # double-warn regression where the CPU dispatcher fires the + # warning and then ``write_geotiff_gpu`` fires it again on the GPU + # dispatch path; the CPU dispatcher gates its warning on + # ``not use_gpu`` to keep this invariant on the GPU path too. + assert len(fallback) == 1, ( + f"expected exactly one GeoTIFFFallbackWarning for " + f"to_geotiff(compression={codec!r}, allow_experimental_codecs=" + f"True); got {len(fallback)}: " + f"{[str(w.message) for w in fallback]}") + # Warning text names both the codec and the opt-in flag so logs + # are self-describing rather than pointing to a docs URL. + msg = str(fallback[0].message) + assert 'allow_experimental_codecs' in msg + assert codec in msg + + +def test_jpeg_internal_only_not_covered_by_experimental_flag(tmp_path): + """``allow_experimental_codecs=True`` does NOT unlock + ``compression='jpeg'`` -- internal-only is the strictest tier and + keeps its own dedicated flag (``allow_internal_only_jpeg``). The + two flags do not collapse into one switch. + """ + da = _make_float32_da().astype(np.uint8) + path = os.path.join(str(tmp_path), 'jpeg_only_experimental_2137.tif') + with pytest.raises(ValueError, match='allow_internal_only_jpeg'): + to_geotiff( + da, path, compression='jpeg', + allow_experimental_codecs=True, + ) + + +def test_jpeg_rejected_without_its_own_flag(tmp_path): + """``compression='jpeg'`` without ``allow_internal_only_jpeg=True`` + raises ``ValueError`` whose message names the dedicated flag. + Pinned here so the Tier 4 contract sits alongside the Tier 3 + contract in one file. + """ + da = _make_float32_da().astype(np.uint8) + path = os.path.join(str(tmp_path), 'jpeg_no_flag_2137.tif') + with pytest.raises(ValueError, match='allow_internal_only_jpeg'): + to_geotiff(da, path, compression='jpeg')