From e9b21ef542c67cd94d869ff57f825551196deeb6 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Sat, 16 May 2026 06:05:01 -0700 Subject: [PATCH 1/2] geotiff: golden corpus phase 2.8, CRS variants (#1930) Phase 2 PR 8 of #1930. Three fixtures, one per CRS encoding the manifest allows: * crs_epsg_3857: EPSG-coded Web Mercator. Straight EPSG path. * crs_wkt_utm10n: WKT for EPSG:32610 with AUTHORITY blocks stripped, so the bytes on disk are not byte-identical to from_epsg(32610).to_wkt(). PROJ still resolves it to EPSG:32610, which is what the oracle's EPSG-code fallback (PR #1991) was built for. * crs_citation_only: GeoKey citation, no AUTHORITY tag, no EPSG. libgeotiff mutates the WKT on round-trip (axis order, UNIT AUTHORITY), and neither side has an EPSG code, so structural CRS.__eq__ and the EPSG fallback both fail. _crs_equal gets one extra branch: when both to_epsg() return None and structural equality fails, compare crs.to_dict() (PROJ form). That dict is stable across the round-trip. Smoke tests pin each fixture, including a negative test that EPSG:4326 (same proj family, different ellipsoid) is still rejected by the PROJ-dict path. Fixtures are 8x8 uint8; all three .tif files are under 600 bytes. --- .../geotiff/tests/golden_corpus/_oracle.py | 15 ++- .../fixtures/crs_citation_only.tif | Bin 0 -> 567 bytes .../golden_corpus/fixtures/crs_epsg_3857.tif | Bin 0 -> 427 bytes .../golden_corpus/fixtures/crs_wkt_utm10n.tif | Bin 0 -> 424 bytes .../geotiff/tests/golden_corpus/generate.py | 8 +- .../geotiff/tests/golden_corpus/manifest.yaml | 41 ++++++++ .../tests/golden_corpus/test_oracle.py | 93 ++++++++++++++++++ 7 files changed, 153 insertions(+), 4 deletions(-) create mode 100644 xrspatial/geotiff/tests/golden_corpus/fixtures/crs_citation_only.tif create mode 100644 xrspatial/geotiff/tests/golden_corpus/fixtures/crs_epsg_3857.tif create mode 100644 xrspatial/geotiff/tests/golden_corpus/fixtures/crs_wkt_utm10n.tif diff --git a/xrspatial/geotiff/tests/golden_corpus/_oracle.py b/xrspatial/geotiff/tests/golden_corpus/_oracle.py index 63e8ba9bf..0b9f27d78 100644 --- a/xrspatial/geotiff/tests/golden_corpus/_oracle.py +++ b/xrspatial/geotiff/tests/golden_corpus/_oracle.py @@ -146,12 +146,20 @@ def _candidate_crs(candidate_da: xr.DataArray): def _crs_equal(ref, cand) -> bool: - """EPSG-aware CRS equality. + """EPSG-aware CRS equality with a PROJ-dict fallback. rasterio's ``CRS.__eq__`` compares WKT structurally, which makes EPSG-equivalent WKTs (one from PROJ, one from libgeotiff) compare unequal even when they describe the same coordinate system. Fall back to EPSG-code comparison when both sides resolve to an EPSG code. + + Citation-only CRSes (a user-supplied name with no AUTHORITY tag, e.g. + the Phase 2 PR 8 ``crs_citation_only`` fixture) cannot be compared by + EPSG code because neither side has one. PROJ's ``to_dict()`` projects + them onto a small set of canonical fields (proj kind, ellipsoid + radius, units), which is stable across the libgeotiff round-trip + that mutates WKT axis order and adds AUTHORITY["EPSG","9122"] to the + UNIT block. Use that as a last resort. """ if ref is None and cand is None: return True @@ -167,6 +175,11 @@ def _crs_equal(ref, cand) -> bool: cand_epsg = None if ref_epsg is not None and cand_epsg is not None: return ref_epsg == cand_epsg + if ref_epsg is None and cand_epsg is None: + try: + return ref.to_dict() == cand.to_dict() + except Exception: + return False return False diff --git a/xrspatial/geotiff/tests/golden_corpus/fixtures/crs_citation_only.tif b/xrspatial/geotiff/tests/golden_corpus/fixtures/crs_citation_only.tif new file mode 100644 index 0000000000000000000000000000000000000000..646cbf1c39f1b8a73f248a234b60ac5b3da9fa8e GIT binary patch literal 567 zcmebD)MDUZU|tfIlq0j81>>>Dly1{PpgFfcIyoerddmH-2Xk>P(m zBge+}0-$0hD4z{X_bGEg`J5aZ+Cfg@0@I8PykMFMWW2+&kO)U48YITx?i{S(mzbNX zV5<;XoLZz?P?TSmnUb2K0AysPCYNO9=jkZqtEX>ZXk=_+YG!U>X=QC=YiADt D$az+C literal 0 HcmV?d00001 diff --git a/xrspatial/geotiff/tests/golden_corpus/fixtures/crs_epsg_3857.tif b/xrspatial/geotiff/tests/golden_corpus/fixtures/crs_epsg_3857.tif new file mode 100644 index 0000000000000000000000000000000000000000..8c054748fd44ef5c246f6f0b13260917c44d5e4a GIT binary patch literal 427 zcmebD)MDUZU|`^9U|?isU<9&2JVqoo6B3&l$_A-pLt+auvViqw10_U}#Kn;GibK^n z0QJfssR>46^ELA@FazZe0dZ3c4+9&Jz6HeV+nE^@fHW7-{Eh8Q42nRu7m$qrDh?1f zj7G>{V>PtI9R!JA5DIpL$uoj|z|O$H0`xx!LTM(JjqQ>^MT{I9+u0c;8Q6dU#K_R6 z%)kSp1^HRPw5D>nd$59qiGsdDKyhkmO1`ddYEg1xNq$icM4|?$nu(c(m5rT)lZ%^& zmycgSP)JxrR7_k#Qc7AzR!&|)QAt@vRZU$(Q%hS%S5M!-(8$=t)Xdz%(#qP#*3KRP DV7eoo literal 0 HcmV?d00001 diff --git a/xrspatial/geotiff/tests/golden_corpus/fixtures/crs_wkt_utm10n.tif b/xrspatial/geotiff/tests/golden_corpus/fixtures/crs_wkt_utm10n.tif new file mode 100644 index 0000000000000000000000000000000000000000..37f7c9da8b63082bb24a74f7896bfa6474cd295d GIT binary patch literal 424 zcmebD)MDUZU|`^9U|?isU<9&2JVqoo6B3&l$_A-pLt+auvViqw03}3`#Kn;GibK^n z0QJfssR>46^ELA@FazZe0dZ3c4+9&Jz6HeV+nE^@fHW7-{Eh8Q401rW7swHCpyB{w z!f3bzRz^dMoFho0Ln{zWgV>B@XB znSlpHC)KlnX-(yD_h1DJ69s*R&=6mRs{FiE1w#YB8i;5OP$?5L3o9Et2PYRd4=*3T zfS{1Dh^Uyjgrt- + EPSG-coded CRS. Web Mercator (EPSG:3857). Exercises the + straight EPSG path in the oracle. + width: 8 + height: 8 + dtype: uint8 + crs: + epsg: 3857 + transform: [10.0, 0.0, -8000000.0, 0.0, -10.0, 5000000.0] + tags: [fast, crs, epsg] + - id: crs_wkt_utm10n + description: >- + WKT-encoded CRS for EPSG:32610 (UTM 10N), but with AUTHORITY + blocks stripped so the WKT is not byte-identical to what + rasterio emits for from_epsg(32610). Exercises the oracle's + EPSG-code-equality fallback added in PR #1991. + width: 8 + height: 8 + dtype: uint8 + crs: + wkt: 'PROJCS["WGS 84 / UTM zone 10N",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",0],PARAMETER["central_meridian",-123],PARAMETER["scale_factor",0.9996],PARAMETER["false_easting",500000],PARAMETER["false_northing",0],UNIT["metre",1],AXIS["Easting",EAST],AXIS["Northing",NORTH]]' + transform: [10.0, 0.0, 500000.0, 0.0, -10.0, 4500000.0] + tags: [fast, crs, wkt] + - id: crs_citation_only + description: >- + Citation-only CRS: a user-supplied name with no AUTHORITY tag + and no formally registered EPSG code. PROJ round-trips it as a + generic geographic CRS keyed only by name. Exercises the + oracle's non-EPSG WKT fallback. + width: 8 + height: 8 + dtype: uint8 + crs: + citation: User-provided projection, no formal EPSG/WKT + transform: [0.001, 0.0, -120.0, 0.0, -0.001, 45.0] + tags: [fast, crs, citation] diff --git a/xrspatial/geotiff/tests/golden_corpus/test_oracle.py b/xrspatial/geotiff/tests/golden_corpus/test_oracle.py index e73fd37b1..737d901de 100644 --- a/xrspatial/geotiff/tests/golden_corpus/test_oracle.py +++ b/xrspatial/geotiff/tests/golden_corpus/test_oracle.py @@ -364,3 +364,96 @@ def test_missing_fixture_raises_filenotfounderror(tmp_path: Path) -> None: ) with pytest.raises(FileNotFoundError): compare_to_oracle(tmp_path / 'does_not_exist.tif', cand) + + +# --------------------------------------------------------------------------- +# Phase 2 PR 8 CRS-variant fixtures +# +# Smoke tests for the three CRS-representation fixtures added in PR 8 of +# issue #1930. Each test reads the on-disk fixture with rasterio to pin +# the bytes-on-disk behaviour, then drives the oracle with a hand-built +# candidate to verify the comparison path the fixture is meant to +# exercise. Phase 3 will wire real backends to these same files. +# --------------------------------------------------------------------------- + +_CRS_FIXTURE_DIR = Path(__file__).resolve().parent / 'fixtures' + + +def _read_crs_fixture(name: str): + """Open a fixture and return its rasterio metadata plus pixel data.""" + path = _CRS_FIXTURE_DIR / f'{name}.tif' + with rasterio.open(path) as src: + return ( + path, + src.crs, + src.transform, + src.read(1), # single-band uint8 + ) + + +def test_crs_epsg_3857_fixture_reports_epsg() -> None: + """``crs_epsg_3857`` fixture: rasterio reports CRS.from_epsg(3857). + + The straight-EPSG path. Oracle accepts a candidate that carries the + EPSG int under ``attrs['crs']``. + """ + path, ref_crs, transform, data = _read_crs_fixture('crs_epsg_3857') + assert ref_crs == rasterio.crs.CRS.from_epsg(3857) + assert ref_crs.to_epsg() == 3857 + + cand = _build_candidate(data, transform=transform, crs=3857) + compare_to_oracle(path, cand) + + +def test_crs_wkt_utm10n_fixture_resolves_to_epsg_via_fallback() -> None: + """``crs_wkt_utm10n``: WKT-only on disk, but resolves to EPSG:32610. + + The fixture's WKT has no AUTHORITY tags, so it is not byte-identical + to what ``CRS.from_epsg(32610).to_wkt()`` emits. PROJ still recognises + it as UTM 10N and assigns it EPSG:32610 on read, which is the + fallback path ``_crs_equal`` was built for. A candidate that carries + the bare EPSG int must compare equal to the rasterio-read WKT CRS. + """ + path, ref_crs, transform, data = _read_crs_fixture('crs_wkt_utm10n') + assert ref_crs.to_epsg() == 32610 + + # Candidate carries only the EPSG int. The oracle reaches the + # EPSG-fallback branch of _crs_equal because ref's WKT and the + # canonical EPSG:32610 WKT are not structurally equal. + cand = _build_candidate(data, transform=transform, crs=32610) + compare_to_oracle(path, cand) + + +def test_crs_citation_only_fixture_oracle_accepts_via_proj_dict() -> None: + """``crs_citation_only``: GeoKey citation, no AUTHORITY. + + Neither side has an EPSG code, and libgeotiff mutates the WKT on + round-trip (axis order, UNIT AUTHORITY) so structural ``CRS.__eq__`` + fails. The oracle falls back to comparing ``to_dict()`` (PROJ form), + which is stable across that round-trip. Pinned here so any future + refactor of ``_crs_equal`` that drops the PROJ-dict branch trips a + test instead of silently regressing. + """ + path, ref_crs, transform, data = _read_crs_fixture('crs_citation_only') + assert ref_crs is not None + assert ref_crs.to_epsg() is None + + # Candidate carries the WKT under crs_wkt; oracle's _candidate_crs + # picks it up via from_user_input. + cand = _build_candidate( + data, transform=transform, crs=None, crs_wkt=ref_crs.to_wkt(), + ) + compare_to_oracle(path, cand) + + +def test_crs_citation_only_fixture_rejects_unrelated_crs() -> None: + """Negative pin: the PROJ-dict fallback must still reject mismatches. + + EPSG:4326 has the same coarse ``proj=longlat`` family as the + citation-only CRS but a different ellipsoid (WGS84 vs the fixture's + unknown sphere). ``to_dict()`` differs, so the oracle must raise. + """ + path, _ref_crs, transform, data = _read_crs_fixture('crs_citation_only') + cand = _build_candidate(data, transform=transform, crs=4326) + with pytest.raises(AssertionError, match='CRS mismatch'): + compare_to_oracle(path, cand) From f4be47b66c937a5e7066a00176f9a8f7da5bae14 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Sat, 16 May 2026 06:07:52 -0700 Subject: [PATCH 2/2] geotiff: address review on PR 2.8 CRS variants Self-review surfaced two issues with the PROJ-dict fallback added in the first commit: * ``CRS.to_dict()`` returns ``{}`` for LOCAL_CS-style WKTs that PROJ has no canonical form for. An unguarded ``ref.to_dict() == cand.to_dict()`` would treat any two such CRSes as equal, a silent false-positive in the oracle. Short-circuit on empty dicts. * ``CRS.to_dict()`` drops the GEOGCS / PROJCS name, so two citation-only CRSes with the same shape but different names would compare equal. Documented as a known limit; the current corpus only has one citation fixture so it is theoretical. If it ever bites, switch to a name-aware comparison via ``to_dict(projjson=True)`` with an axis-order normaliser. Also adds two tests: * ``test_crs_wkt_utm10n_fixture_accepts_wkt_attr``: complements the EPSG-int test by exercising the ``attrs['crs_wkt']`` branch of ``_candidate_crs``. Both paths must reach the same verdict. * ``test_crs_equal_rejects_empty_proj_dict``: regression pin for the empty-dict short-circuit. Uses two LOCAL_CS WKTs with different UNIT blocks so rasterio's own ``CRS.__eq__`` reports them as unequal and the test actually exercises the fallback rather than short-circuiting on structural equality. --- .../geotiff/tests/golden_corpus/_oracle.py | 22 ++++++++- .../tests/golden_corpus/test_oracle.py | 45 +++++++++++++++++++ 2 files changed, 65 insertions(+), 2 deletions(-) diff --git a/xrspatial/geotiff/tests/golden_corpus/_oracle.py b/xrspatial/geotiff/tests/golden_corpus/_oracle.py index 0b9f27d78..ccfe907a7 100644 --- a/xrspatial/geotiff/tests/golden_corpus/_oracle.py +++ b/xrspatial/geotiff/tests/golden_corpus/_oracle.py @@ -159,7 +159,18 @@ def _crs_equal(ref, cand) -> bool: them onto a small set of canonical fields (proj kind, ellipsoid radius, units), which is stable across the libgeotiff round-trip that mutates WKT axis order and adds AUTHORITY["EPSG","9122"] to the - UNIT block. Use that as a last resort. + UNIT block. Use that as a last resort, but only when both sides + produce a non-empty dict (``CRS.to_dict()`` returns ``{}`` for + LOCAL_CS-style WKTs, which would otherwise let any two unrecognised + CRSes compare equal). + + Known limit: ``CRS.to_dict()`` drops the GEOGCS / PROJCS name, so two + citation-only CRSes with the same shape but different names compare + equal here. The current corpus only has one citation fixture so this + is theoretical; if it becomes load-bearing, switch to a name-aware + comparison via ``to_dict(projjson=True)`` (which preserves the name + but mutates axis order on round-trip and would need its own + normaliser). """ if ref is None and cand is None: return True @@ -177,9 +188,16 @@ def _crs_equal(ref, cand) -> bool: return ref_epsg == cand_epsg if ref_epsg is None and cand_epsg is None: try: - return ref.to_dict() == cand.to_dict() + ref_dict = ref.to_dict() + cand_dict = cand.to_dict() except Exception: return False + # Empty dict means "PROJ has no canonical form for this CRS" + # (e.g. LOCAL_CS). Refuse to declare equality in that case + # rather than match any other empty-dict CRS. + if not ref_dict or not cand_dict: + return False + return ref_dict == cand_dict return False diff --git a/xrspatial/geotiff/tests/golden_corpus/test_oracle.py b/xrspatial/geotiff/tests/golden_corpus/test_oracle.py index 737d901de..634a463b8 100644 --- a/xrspatial/geotiff/tests/golden_corpus/test_oracle.py +++ b/xrspatial/geotiff/tests/golden_corpus/test_oracle.py @@ -457,3 +457,48 @@ def test_crs_citation_only_fixture_rejects_unrelated_crs() -> None: cand = _build_candidate(data, transform=transform, crs=4326) with pytest.raises(AssertionError, match='CRS mismatch'): compare_to_oracle(path, cand) + + +def test_crs_wkt_utm10n_fixture_accepts_wkt_attr() -> None: + """``crs_wkt_utm10n`` also accepts a candidate that carries crs_wkt. + + Complements the EPSG-int test by exercising the WKT branch of + ``_candidate_crs`` (``attrs['crs_wkt']`` -> ``from_user_input``). + Both paths must reach the same verdict. + """ + path, ref_crs, transform, data = _read_crs_fixture('crs_wkt_utm10n') + cand = _build_candidate( + data, transform=transform, crs=None, crs_wkt=ref_crs.to_wkt(), + ) + compare_to_oracle(path, cand) + + +def test_crs_equal_rejects_empty_proj_dict() -> None: + """``_crs_equal`` must refuse to declare two LOCAL_CS-style CRSes equal. + + Regression pin for the PROJ-dict fallback added in this PR. PROJ + returns ``{}`` from ``to_dict()`` for LOCAL_CS WKTs; an unguarded + fallback would treat any two such CRSes as equal, which is a + silent-false-positive in the oracle. The fallback must short-circuit + on empty dicts. + """ + from xrspatial.geotiff.tests.golden_corpus._oracle import _crs_equal + + # Two LOCAL_CS WKTs with different UNIT blocks so rasterio's own + # ``CRS.__eq__`` reports them as unequal (otherwise the early-return + # in _crs_equal would short-circuit before the fallback runs). + a = rasterio.crs.CRS.from_wkt( + 'LOCAL_CS["a",UNIT["metre",1,AUTHORITY["EPSG","9001"]],' + 'AXIS["Easting",EAST],AXIS["Northing",NORTH]]' + ) + b = rasterio.crs.CRS.from_wkt( + 'LOCAL_CS["b",UNIT["foot",0.3048],' + 'AXIS["Easting",EAST],AXIS["Northing",NORTH]]' + ) + # Sanity: structurally unequal, neither has EPSG, both have empty + # PROJ-dict. Without the guard, the fallback would return True. + assert a != b + assert a.to_epsg() is None + assert b.to_epsg() is None + assert a.to_dict() == {} == b.to_dict() + assert _crs_equal(a, b) is False