Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 32 additions & 1 deletion xrspatial/geotiff/tests/golden_corpus/_oracle.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,12 +146,31 @@ def _candidate_crs(candidate_da: xr.DataArray):


def _crs_equal(ref, cand) -> bool:
"""EPSG-aware CRS equality.
"""EPSG-aware CRS equality with a PROJ-dict fallback.

rasterio's ``CRS.__eq__`` compares WKT structurally, which makes
EPSG-equivalent WKTs (one from PROJ, one from libgeotiff) compare
unequal even when they describe the same coordinate system. Fall back
to EPSG-code comparison when both sides resolve to an EPSG code.

Citation-only CRSes (a user-supplied name with no AUTHORITY tag, e.g.
the Phase 2 PR 8 ``crs_citation_only`` fixture) cannot be compared by
EPSG code because neither side has one. PROJ's ``to_dict()`` projects
them onto a small set of canonical fields (proj kind, ellipsoid
radius, units), which is stable across the libgeotiff round-trip
that mutates WKT axis order and adds AUTHORITY["EPSG","9122"] to the
UNIT block. Use that as a last resort, but only when both sides
produce a non-empty dict (``CRS.to_dict()`` returns ``{}`` for
LOCAL_CS-style WKTs, which would otherwise let any two unrecognised
CRSes compare equal).

Known limit: ``CRS.to_dict()`` drops the GEOGCS / PROJCS name, so two
citation-only CRSes with the same shape but different names compare
equal here. The current corpus only has one citation fixture so this
is theoretical; if it becomes load-bearing, switch to a name-aware
comparison via ``to_dict(projjson=True)`` (which preserves the name
but mutates axis order on round-trip and would need its own
normaliser).
"""
if ref is None and cand is None:
return True
Expand All @@ -167,6 +186,18 @@ def _crs_equal(ref, cand) -> bool:
cand_epsg = None
if ref_epsg is not None and cand_epsg is not None:
return ref_epsg == cand_epsg
if ref_epsg is None and cand_epsg is None:
try:
ref_dict = ref.to_dict()
cand_dict = cand.to_dict()
except Exception:
return False
# Empty dict means "PROJ has no canonical form for this CRS"
# (e.g. LOCAL_CS). Refuse to declare equality in that case
# rather than match any other empty-dict CRS.
if not ref_dict or not cand_dict:
return False
return ref_dict == cand_dict
return False


Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
8 changes: 5 additions & 3 deletions xrspatial/geotiff/tests/golden_corpus/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,9 +397,11 @@ def _resolve_crs(crs_spec: dict[str, Any] | None):
if "wkt" in crs_spec:
return CRS.from_wkt(str(crs_spec["wkt"]))
if "citation" in crs_spec:
# Citation-only: a WKT with just a GEOGCS / PROJCS name and no
# numeric parameters. Useful for exercising the "we have a CRS
# name but no projection" code path. Phase 2 PR 8 fills this in.
# Citation-only: a WKT keyed only by name, no AUTHORITY tag and
# no numeric projection parameters. Exercises the oracle's
# non-EPSG WKT fallback (Phase 2 PR 8 of #1930). PROJ does not
# resolve this to an EPSG code; on round-trip libgeotiff mutates
# the WKT (axis order, UNIT AUTHORITY) but preserves to_dict().
return CRS.from_wkt(
f'GEOGCS["{crs_spec["citation"]}",DATUM["unknown",SPHEROID["unknown",6378137,0]],'
'PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]]'
Expand Down
41 changes: 41 additions & 0 deletions xrspatial/geotiff/tests/golden_corpus/manifest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -490,3 +490,44 @@ fixtures:
atol: 0.0
rtol: 0.0
lossy: false

# Phase 2 PR 8: CRS representation variants. Three fixtures, one per
# CRS encoding the manifest allows. Pixel content is irrelevant; the
# contract under test is what the GeoTIFF tags say about the CRS.
- id: crs_epsg_3857
description: >-
EPSG-coded CRS. Web Mercator (EPSG:3857). Exercises the
straight EPSG path in the oracle.
width: 8
height: 8
dtype: uint8
crs:
epsg: 3857
transform: [10.0, 0.0, -8000000.0, 0.0, -10.0, 5000000.0]
tags: [fast, crs, epsg]
- id: crs_wkt_utm10n
description: >-
WKT-encoded CRS for EPSG:32610 (UTM 10N), but with AUTHORITY
blocks stripped so the WKT is not byte-identical to what
rasterio emits for from_epsg(32610). Exercises the oracle's
EPSG-code-equality fallback added in PR #1991.
width: 8
height: 8
dtype: uint8
crs:
wkt: 'PROJCS["WGS 84 / UTM zone 10N",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",0],PARAMETER["central_meridian",-123],PARAMETER["scale_factor",0.9996],PARAMETER["false_easting",500000],PARAMETER["false_northing",0],UNIT["metre",1],AXIS["Easting",EAST],AXIS["Northing",NORTH]]'
transform: [10.0, 0.0, 500000.0, 0.0, -10.0, 4500000.0]
tags: [fast, crs, wkt]
- id: crs_citation_only
description: >-
Citation-only CRS: a user-supplied name with no AUTHORITY tag
and no formally registered EPSG code. PROJ round-trips it as a
generic geographic CRS keyed only by name. Exercises the
oracle's non-EPSG WKT fallback.
width: 8
height: 8
dtype: uint8
crs:
citation: User-provided projection, no formal EPSG/WKT
transform: [0.001, 0.0, -120.0, 0.0, -0.001, 45.0]
tags: [fast, crs, citation]
138 changes: 138 additions & 0 deletions xrspatial/geotiff/tests/golden_corpus/test_oracle.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,3 +364,141 @@ def test_missing_fixture_raises_filenotfounderror(tmp_path: Path) -> None:
)
with pytest.raises(FileNotFoundError):
compare_to_oracle(tmp_path / 'does_not_exist.tif', cand)


# ---------------------------------------------------------------------------
# Phase 2 PR 8 CRS-variant fixtures
#
# Smoke tests for the three CRS-representation fixtures added in PR 8 of
# issue #1930. Each test reads the on-disk fixture with rasterio to pin
# the bytes-on-disk behaviour, then drives the oracle with a hand-built
# candidate to verify the comparison path the fixture is meant to
# exercise. Phase 3 will wire real backends to these same files.
# ---------------------------------------------------------------------------

_CRS_FIXTURE_DIR = Path(__file__).resolve().parent / 'fixtures'


def _read_crs_fixture(name: str):
"""Open a fixture and return its rasterio metadata plus pixel data."""
path = _CRS_FIXTURE_DIR / f'{name}.tif'
with rasterio.open(path) as src:
return (
path,
src.crs,
src.transform,
src.read(1), # single-band uint8
)


def test_crs_epsg_3857_fixture_reports_epsg() -> None:
"""``crs_epsg_3857`` fixture: rasterio reports CRS.from_epsg(3857).

The straight-EPSG path. Oracle accepts a candidate that carries the
EPSG int under ``attrs['crs']``.
"""
path, ref_crs, transform, data = _read_crs_fixture('crs_epsg_3857')
assert ref_crs == rasterio.crs.CRS.from_epsg(3857)
assert ref_crs.to_epsg() == 3857

cand = _build_candidate(data, transform=transform, crs=3857)
compare_to_oracle(path, cand)


def test_crs_wkt_utm10n_fixture_resolves_to_epsg_via_fallback() -> None:
"""``crs_wkt_utm10n``: WKT-only on disk, but resolves to EPSG:32610.

The fixture's WKT has no AUTHORITY tags, so it is not byte-identical
to what ``CRS.from_epsg(32610).to_wkt()`` emits. PROJ still recognises
it as UTM 10N and assigns it EPSG:32610 on read, which is the
fallback path ``_crs_equal`` was built for. A candidate that carries
the bare EPSG int must compare equal to the rasterio-read WKT CRS.
"""
path, ref_crs, transform, data = _read_crs_fixture('crs_wkt_utm10n')
assert ref_crs.to_epsg() == 32610

# Candidate carries only the EPSG int. The oracle reaches the
# EPSG-fallback branch of _crs_equal because ref's WKT and the
# canonical EPSG:32610 WKT are not structurally equal.
cand = _build_candidate(data, transform=transform, crs=32610)
compare_to_oracle(path, cand)


def test_crs_citation_only_fixture_oracle_accepts_via_proj_dict() -> None:
"""``crs_citation_only``: GeoKey citation, no AUTHORITY.

Neither side has an EPSG code, and libgeotiff mutates the WKT on
round-trip (axis order, UNIT AUTHORITY) so structural ``CRS.__eq__``
fails. The oracle falls back to comparing ``to_dict()`` (PROJ form),
which is stable across that round-trip. Pinned here so any future
refactor of ``_crs_equal`` that drops the PROJ-dict branch trips a
test instead of silently regressing.
"""
path, ref_crs, transform, data = _read_crs_fixture('crs_citation_only')
assert ref_crs is not None
assert ref_crs.to_epsg() is None

# Candidate carries the WKT under crs_wkt; oracle's _candidate_crs
# picks it up via from_user_input.
cand = _build_candidate(
data, transform=transform, crs=None, crs_wkt=ref_crs.to_wkt(),
)
compare_to_oracle(path, cand)


def test_crs_citation_only_fixture_rejects_unrelated_crs() -> None:
"""Negative pin: the PROJ-dict fallback must still reject mismatches.

EPSG:4326 has the same coarse ``proj=longlat`` family as the
citation-only CRS but a different ellipsoid (WGS84 vs the fixture's
unknown sphere). ``to_dict()`` differs, so the oracle must raise.
"""
path, _ref_crs, transform, data = _read_crs_fixture('crs_citation_only')
cand = _build_candidate(data, transform=transform, crs=4326)
with pytest.raises(AssertionError, match='CRS mismatch'):
compare_to_oracle(path, cand)


def test_crs_wkt_utm10n_fixture_accepts_wkt_attr() -> None:
"""``crs_wkt_utm10n`` also accepts a candidate that carries crs_wkt.

Complements the EPSG-int test by exercising the WKT branch of
``_candidate_crs`` (``attrs['crs_wkt']`` -> ``from_user_input``).
Both paths must reach the same verdict.
"""
path, ref_crs, transform, data = _read_crs_fixture('crs_wkt_utm10n')
cand = _build_candidate(
data, transform=transform, crs=None, crs_wkt=ref_crs.to_wkt(),
)
compare_to_oracle(path, cand)


def test_crs_equal_rejects_empty_proj_dict() -> None:
"""``_crs_equal`` must refuse to declare two LOCAL_CS-style CRSes equal.

Regression pin for the PROJ-dict fallback added in this PR. PROJ
returns ``{}`` from ``to_dict()`` for LOCAL_CS WKTs; an unguarded
fallback would treat any two such CRSes as equal, which is a
silent-false-positive in the oracle. The fallback must short-circuit
on empty dicts.
"""
from xrspatial.geotiff.tests.golden_corpus._oracle import _crs_equal

# Two LOCAL_CS WKTs with different UNIT blocks so rasterio's own
# ``CRS.__eq__`` reports them as unequal (otherwise the early-return
# in _crs_equal would short-circuit before the fallback runs).
a = rasterio.crs.CRS.from_wkt(
'LOCAL_CS["a",UNIT["metre",1,AUTHORITY["EPSG","9001"]],'
'AXIS["Easting",EAST],AXIS["Northing",NORTH]]'
)
b = rasterio.crs.CRS.from_wkt(
'LOCAL_CS["b",UNIT["foot",0.3048],'
'AXIS["Easting",EAST],AXIS["Northing",NORTH]]'
)
# Sanity: structurally unequal, neither has EPSG, both have empty
# PROJ-dict. Without the guard, the fallback would return True.
assert a != b
assert a.to_epsg() is None
assert b.to_epsg() is None
assert a.to_dict() == {} == b.to_dict()
assert _crs_equal(a, b) is False
Loading