|
| 1 | +"""Smoke tests for the Phase 2 PR 5 compression fixtures (issue #1930). |
| 2 | +
|
| 3 | +Six fixtures land in this PR, one per major codec / predictor variant: |
| 4 | +
|
| 5 | +* ``compression_none_uint8`` -- baseline uncompressed uint8. |
| 6 | +* ``compression_deflate_predictor2_uint16`` -- deflate + predictor 2 (int). |
| 7 | +* ``compression_deflate_predictor3_float32`` -- deflate + predictor 3 (float). |
| 8 | +* ``compression_lzw_predictor2_int16`` -- LZW + predictor 2 on signed int. |
| 9 | +* ``compression_lerc_float32`` -- LERC on float32 with max_z_error 0 |
| 10 | + (lossless setting, so it is comparable to the other codecs). |
| 11 | +* ``compression_jpeg_uint8_ycbcr`` -- JPEG in YCbCr on uint8 tiled raster. |
| 12 | + Intrinsically lossy; the oracle is called with ``lossy=True``. |
| 13 | +
|
| 14 | +The test: |
| 15 | +
|
| 16 | +* loads each fixture with rasterio, |
| 17 | +* builds an xrspatial-shaped DataArray from the rasterio read, |
| 18 | +* calls ``compare_to_oracle`` and asserts it accepts the matching case |
| 19 | + for lossless codecs, with ``lossy=True`` for jpeg, |
| 20 | +* pins the JPEG cell's expected behaviour: strict mode must reject and |
| 21 | + lossy mode must accept. |
| 22 | +
|
| 23 | +LERC and JPEG codec availability depends on the GDAL build. When a |
| 24 | +codec is unavailable the corresponding fixture is missing from disk and |
| 25 | +the relevant test is skipped via ``pytest.skip``. The pre-PR generator |
| 26 | +run did write them; this guard is defensive for environments where GDAL |
| 27 | +was rebuilt without those drivers. |
| 28 | +""" |
| 29 | +from __future__ import annotations |
| 30 | + |
| 31 | +import pathlib |
| 32 | + |
| 33 | +import numpy as np |
| 34 | +import pytest |
| 35 | + |
| 36 | +# Both pyyaml and rasterio are runtime deps of this test. importorskip |
| 37 | +# keeps minimal environments green; once Phase 2 fully lands these are |
| 38 | +# planned to move into the test extras (see README). |
| 39 | +pytest.importorskip("yaml") |
| 40 | +rasterio = pytest.importorskip("rasterio") |
| 41 | + |
| 42 | +import xarray as xr # noqa: E402 |
| 43 | + |
| 44 | +from xrspatial.geotiff.tests.golden_corpus._oracle import ( # noqa: E402 |
| 45 | + compare_to_oracle, |
| 46 | +) |
| 47 | + |
| 48 | + |
| 49 | +FIXTURES_DIR = ( |
| 50 | + pathlib.Path(__file__).resolve().parent |
| 51 | + / "golden_corpus" |
| 52 | + / "fixtures" |
| 53 | +) |
| 54 | + |
| 55 | + |
| 56 | +# Fixture id, lossy flag for the oracle call. |
| 57 | +COMPRESSION_FIXTURES: tuple[tuple[str, bool], ...] = ( |
| 58 | + ("compression_none_uint8", False), |
| 59 | + ("compression_deflate_predictor2_uint16", False), |
| 60 | + ("compression_deflate_predictor3_float32", False), |
| 61 | + ("compression_lzw_predictor2_int16", False), |
| 62 | + ("compression_lerc_float32", False), |
| 63 | + ("compression_jpeg_uint8_ycbcr", True), |
| 64 | +) |
| 65 | + |
| 66 | + |
| 67 | +def _fixture_path(fid: str) -> pathlib.Path: |
| 68 | + """Return the on-disk path for a fixture id, skipping if absent. |
| 69 | +
|
| 70 | + A missing file usually means the maintainer who regenerated the |
| 71 | + corpus had a GDAL build without the relevant codec (LERC or JPEG |
| 72 | + are the common offenders). The committed fixtures in this PR were |
| 73 | + built with both codecs available; this guard exists so a contributor |
| 74 | + rebuilding locally without those drivers does not see a hard fail |
| 75 | + for a file they could not produce. |
| 76 | + """ |
| 77 | + p = FIXTURES_DIR / f"{fid}.tif" |
| 78 | + if not p.exists(): |
| 79 | + pytest.skip(f"fixture {fid} not present (codec unavailable?)") |
| 80 | + return p |
| 81 | + |
| 82 | + |
| 83 | +def _candidate_from_rasterio(path: pathlib.Path) -> xr.DataArray: |
| 84 | + """Read ``path`` with rasterio and return an xrspatial-shaped DataArray. |
| 85 | +
|
| 86 | + Mirrors the shape that ``xrspatial.geotiff.open_geotiff`` produces: |
| 87 | + a 2-D DataArray for single-band, 3-D ``(band, y, x)`` otherwise, |
| 88 | + pixel-centre coords, and the canonical ``transform`` 6-tuple in attrs. |
| 89 | + """ |
| 90 | + with rasterio.open(path) as src: |
| 91 | + data = src.read() # shape (bands, H, W) |
| 92 | + transform = src.transform |
| 93 | + crs = src.crs |
| 94 | + nodata = src.nodata |
| 95 | + dtype = src.dtypes[0] |
| 96 | + |
| 97 | + height, width = data.shape[-2], data.shape[-1] |
| 98 | + pw = float(transform.a) |
| 99 | + ph = float(transform.e) |
| 100 | + ox = float(transform.c) |
| 101 | + oy = float(transform.f) |
| 102 | + x = ox + (np.arange(width) + 0.5) * pw |
| 103 | + y = oy + (np.arange(height) + 0.5) * ph |
| 104 | + |
| 105 | + attrs: dict = { |
| 106 | + "transform": (pw, 0.0, ox, 0.0, ph, oy), |
| 107 | + } |
| 108 | + if crs is not None: |
| 109 | + epsg = crs.to_epsg() |
| 110 | + if epsg is not None: |
| 111 | + attrs["crs"] = epsg |
| 112 | + else: |
| 113 | + attrs["crs_wkt"] = crs.to_wkt() |
| 114 | + if nodata is not None: |
| 115 | + attrs["nodata"] = nodata |
| 116 | + |
| 117 | + if data.shape[0] == 1: |
| 118 | + arr = data[0].astype(dtype) |
| 119 | + return xr.DataArray( |
| 120 | + arr, |
| 121 | + dims=("y", "x"), |
| 122 | + coords={"y": y, "x": x}, |
| 123 | + attrs=attrs, |
| 124 | + ) |
| 125 | + return xr.DataArray( |
| 126 | + data.astype(dtype), |
| 127 | + dims=("band", "y", "x"), |
| 128 | + coords={ |
| 129 | + "band": np.arange(1, data.shape[0] + 1), |
| 130 | + "y": y, |
| 131 | + "x": x, |
| 132 | + }, |
| 133 | + attrs=attrs, |
| 134 | + ) |
| 135 | + |
| 136 | + |
| 137 | +# --------------------------------------------------------------------------- |
| 138 | +# Per-fixture parametrised smoke |
| 139 | +# --------------------------------------------------------------------------- |
| 140 | + |
| 141 | +@pytest.mark.parametrize("fid, lossy", COMPRESSION_FIXTURES) |
| 142 | +def test_compression_fixture_is_valid_tiff(fid: str, lossy: bool) -> None: |
| 143 | + """Each fixture opens with rasterio and reports the expected codec.""" |
| 144 | + path = _fixture_path(fid) |
| 145 | + with rasterio.open(path) as src: |
| 146 | + # Sanity: shape and dtype come out matching the manifest. |
| 147 | + assert src.width > 0 and src.height > 0 |
| 148 | + assert src.count >= 1 |
| 149 | + # CRS round-trips to EPSG:4326 (the default in the manifest). |
| 150 | + assert src.crs is not None |
| 151 | + assert src.crs.to_epsg() == 4326 |
| 152 | + |
| 153 | + |
| 154 | +@pytest.mark.parametrize("fid, lossy", COMPRESSION_FIXTURES) |
| 155 | +def test_compression_fixture_oracle_accepts(fid: str, lossy: bool) -> None: |
| 156 | + """rasterio-built candidate must satisfy the oracle. |
| 157 | +
|
| 158 | + For lossy fixtures (jpeg) the oracle is called with ``lossy=True``; |
| 159 | + for everything else it is called in strict bit-exact mode. |
| 160 | + """ |
| 161 | + path = _fixture_path(fid) |
| 162 | + cand = _candidate_from_rasterio(path) |
| 163 | + compare_to_oracle(path, cand, lossy=lossy) |
| 164 | + |
| 165 | + |
| 166 | +# --------------------------------------------------------------------------- |
| 167 | +# JPEG: pin lossy semantics explicitly |
| 168 | +# --------------------------------------------------------------------------- |
| 169 | + |
| 170 | +def test_jpeg_lossy_mode_required() -> None: |
| 171 | + """The jpeg fixture passes only when the oracle is told it is lossy. |
| 172 | +
|
| 173 | + Two halves: strict mode (default) must reject because JPEG quantises |
| 174 | + pixel values; lossy mode must accept. This pins the contract added |
| 175 | + in PR #1991 for Phase 2's jpeg cell. |
| 176 | + """ |
| 177 | + path = _fixture_path("compression_jpeg_uint8_ycbcr") |
| 178 | + cand = _candidate_from_rasterio(path) |
| 179 | + # Strict mode: the rasterio-decoded pixels match themselves trivially. |
| 180 | + # To prove the lossy contract we need to perturb the candidate so |
| 181 | + # strict comparison fails while shape/dtype/transform/CRS all match. |
| 182 | + # Bump every pixel by 1 (clipped to uint8 range). The decoded YCbCr |
| 183 | + # checker pattern lands well below 255 so clipping is a no-op in |
| 184 | + # practice; the assertion is that the perturbed array is no longer |
| 185 | + # bit-equal to the rasterio read. |
| 186 | + perturbed_data = ( |
| 187 | + cand.data.astype(np.int32) + 1 |
| 188 | + ).clip(0, 255).astype(np.uint8) |
| 189 | + perturbed = xr.DataArray( |
| 190 | + perturbed_data, |
| 191 | + dims=cand.dims, |
| 192 | + coords=cand.coords, |
| 193 | + attrs=dict(cand.attrs), |
| 194 | + ) |
| 195 | + with pytest.raises(AssertionError, match="pixel arrays differ"): |
| 196 | + compare_to_oracle(path, perturbed) |
| 197 | + # Lossy mode: same perturbed candidate is accepted. |
| 198 | + compare_to_oracle(path, perturbed, lossy=True) |
| 199 | + |
| 200 | + |
| 201 | +# --------------------------------------------------------------------------- |
| 202 | +# Manifest cross-check |
| 203 | +# --------------------------------------------------------------------------- |
| 204 | + |
| 205 | +def test_compression_fixtures_declared_in_manifest() -> None: |
| 206 | + """Every fixture id this test parametrises must exist in the manifest. |
| 207 | +
|
| 208 | + Catches drift between this file and ``manifest.yaml``. |
| 209 | + """ |
| 210 | + import importlib |
| 211 | + |
| 212 | + generate = importlib.import_module( |
| 213 | + "xrspatial.geotiff.tests.golden_corpus.generate" |
| 214 | + ) |
| 215 | + manifest = generate.load_manifest() |
| 216 | + declared = {f["id"] for f in manifest["fixtures"]} |
| 217 | + for fid, _ in COMPRESSION_FIXTURES: |
| 218 | + assert fid in declared, f"{fid} missing from manifest.yaml" |
| 219 | + |
| 220 | + |
| 221 | +def test_jpeg_fixture_marked_lossy_in_manifest() -> None: |
| 222 | + """The jpeg manifest entry must carry ``tolerance.lossy: true``. |
| 223 | +
|
| 224 | + The oracle's lossy contract is opt-in per-fixture and the manifest |
| 225 | + is the source of truth; mis-marking would silently downgrade strict |
| 226 | + comparison for everyone. |
| 227 | + """ |
| 228 | + import importlib |
| 229 | + |
| 230 | + generate = importlib.import_module( |
| 231 | + "xrspatial.geotiff.tests.golden_corpus.generate" |
| 232 | + ) |
| 233 | + manifest = generate.load_manifest() |
| 234 | + by_id = {f["id"]: f for f in manifest["fixtures"]} |
| 235 | + jpeg = by_id["compression_jpeg_uint8_ycbcr"] |
| 236 | + tol = jpeg.get("tolerance") or {} |
| 237 | + assert tol.get("lossy") is True, ( |
| 238 | + "compression_jpeg_uint8_ycbcr must declare tolerance.lossy: true" |
| 239 | + ) |
| 240 | + # And conversely the lossless cells must not. |
| 241 | + for fid, lossy in COMPRESSION_FIXTURES: |
| 242 | + if lossy: |
| 243 | + continue |
| 244 | + entry = by_id.get(fid) |
| 245 | + if entry is None: |
| 246 | + continue |
| 247 | + tol = entry.get("tolerance") or {} |
| 248 | + assert tol.get("lossy") in (False, None), ( |
| 249 | + f"{fid} unexpectedly marked lossy" |
| 250 | + ) |
0 commit comments