diff --git a/docs/api-hidden.rst b/docs/api-hidden.rst index d9a1771..032fa39 100644 --- a/docs/api-hidden.rst +++ b/docs/api-hidden.rst @@ -15,3 +15,5 @@ ProjIndexMixin._proj_get_crs ProjIndexMixin._proj_set_crs ProjIndexMixin._proj_to_crs + format_compact_cf + format_full_cf_gdal diff --git a/docs/api.rst b/docs/api.rst index 66ed8a0..f2fddd7 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -36,6 +36,8 @@ To enable it, be sure to import ``xproj`` after ``xarray``: Dataset.proj.assign_crs Dataset.proj.map_crs + Dataset.proj.write_crs_info + Dataset.proj.clear_crs_info DataArray ``proj`` extension @@ -67,6 +69,8 @@ To enable it, be sure to import ``xproj`` after ``xarray``: DataArray.proj.assign_crs DataArray.proj.map_crs + DataArray.proj.write_crs_info + DataArray.proj.clear_crs_info .. currentmodule:: xproj diff --git a/docs/integration.md b/docs/integration.md index 6c68d59..62af775 100644 --- a/docs/integration.md +++ b/docs/integration.md @@ -96,9 +96,16 @@ ds_wgs84.geo.crs ## CRS-aware Xarray index -Here below is a basic example of a {term}`CRS-aware index`, here a custom Xarray -index that adds some CRS-dependent functionality on top of Xarray's default -`PandasIndex`. +Here below is a basic example of a {term}`CRS-aware index`, i.e., a custom +Xarray index that adds some CRS-dependent functionality (via the {term}`proj +index interface`) on top of Xarray's default `PandasIndex`. + +:::{note} +The {class}`~xproj.ProjIndexMixin` class can be used to mark an Xarray index as +formally implementing the {term}`proj index interface`. However, XProj doesn't +require an Xarray index to explicitly inherit from this mixin class to be +recognized as CRS-aware. +::: ```{code-cell} ipython3 import warnings diff --git a/docs/terminology.md b/docs/terminology.md index d2f1f6a..c5e875a 100644 --- a/docs/terminology.md +++ b/docs/terminology.md @@ -13,11 +13,13 @@ Spatial reference coordinate An Xarray scalar {term}`coordinate` that usually declares a specific {term}`CRS` via its metadata. CF conventions use the term [grid mapping variable](https://cfconventions.org/Data/cf-conventions/cf-conventions-1.12/cf-conventions.html#grid-mappings-and-projections) - for the same concept. XProj associates a {class}`~xproj.CRSIndex` to such - coordinate. The name and the value of the coordinate is arbitrary, although - ``spatial_ref`` is a common name used by default in - [rioxarray](https://corteva.github.io/rioxarray) and - [odc-geo](https://odc-geo.readthedocs.io) (following GDAL's NetCDF driver). + for almost the same concept (the only difference is that a *grid mapping + variable* is a data variable, not a coordinate, although Xarray's builtin CF + decoders automatically promote it as a coordinate). XProj associates a + {class}`~xproj.CRSIndex` to such coordinate. The name and the value of the + coordinate is arbitrary, although ``spatial_ref`` is a common name used by + default in [rioxarray](https://corteva.github.io/rioxarray) and + [odc-geo](https://odc-geo.readthedocs.io) (inspired by GDAL). CRS-aware index Any custom {class}`xarray.Index` that implements data selection, alignment diff --git a/docs/usage.md b/docs/usage.md index 70e658b..1fa483c 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -81,6 +81,22 @@ reference coordinate` to the `.proj` accessor: ds_wgs84.proj("spatial_ref").crs ``` +## Writing CRS information + +Before saving the dataset to a given format, it may be useful to write the CRS +information as coordinate metadata (attributes) so it could be later loaded by +other tools like GDAL that understand this metadata. This can be done with +{meth}`xarray.Dataset.proj.write_crs_info`: + +```{code-cell} +ds_info = ds_wgs84.proj.write_crs_info() + +ds_info.spatial_ref +``` + +Conversely, the attributes of the spatial reference coordinates can be cleared +via {meth}`xarray.Dataset.proj.clear_crs_info`. + ## CRS-aware alignment One of the main motivations of associating a {class}`~xproj.CRSIndex` with a diff --git a/xproj/__init__.py b/xproj/__init__.py index 4452cfe..a60aa08 100644 --- a/xproj/__init__.py +++ b/xproj/__init__.py @@ -2,10 +2,19 @@ from .accessor import ProjAccessor as _ProjAccessor # noqa: F401 from .accessor import register_accessor +from .crs_utils import format_compact_cf, format_full_cf_gdal from .index import CRSIndex # noqa: F401 from .mixins import ProjAccessorMixin, ProjIndexMixin -__all__ = ["_ProjAccessor", "CRSIndex", "ProjAccessorMixin", "ProjIndexMixin", "register_accessor"] +__all__ = [ + "_ProjAccessor", + "CRSIndex", + "ProjAccessorMixin", + "ProjIndexMixin", + "format_compact_cf", + "format_full_cf_gdal", + "register_accessor", +] try: __version__ = version("xproj") diff --git a/xproj/accessor.py b/xproj/accessor.py index b4e5b31..498c5e5 100644 --- a/xproj/accessor.py +++ b/xproj/accessor.py @@ -1,12 +1,13 @@ from __future__ import annotations import warnings -from collections.abc import Hashable, Iterable, Mapping +from collections.abc import Callable, Hashable, Iterable, Mapping from typing import Any, Literal, TypeVar, cast import pyproj import xarray as xr +from xproj.crs_utils import format_compact_cf from xproj.index import CRSIndex from xproj.mixins import ProjIndexMixin from xproj.utils import Frozen, FrozenDict @@ -167,6 +168,27 @@ def crs_aware_indexes(self) -> Frozen[Hashable, xr.Index]: return FrozenDict(self._crs_aware_indexes) + def _get_crs_index(self, coord_name: Hashable) -> CRSIndex: + # Get a nice error message when trying to access a spatial reference + # coordinate with a CRSIndex using an arbitrary name. + + if coord_name not in self.crs_indexes: + if coord_name not in self._obj.coords: + raise KeyError(f"no coordinate {coord_name!r} found in Dataset or DataArray") + elif self._obj.coords[coord_name].ndim != 0: + raise ValueError(f"coordinate {coord_name!r} is not a scalar coordinate") + elif coord_name not in self._obj.xindexes: + raise ValueError( + f"coordinate {coord_name!r} has no index. It must have a CRSIndex associated " + f"(e.g., via Dataset.proj.assign_crs({coord_name}=...) or " + f"DataArray.proj.assign_crs({coord_name}=...)) to be used as " + "a spatial reference coordinate with xproj." + ) + else: + raise ValueError(f"coordinate {coord_name!r} index is not a CRSIndex") + + return self.crs_indexes[coord_name] + def __call__(self, coord_name: Hashable): """Select a given CRS by coordinate name. @@ -183,19 +205,14 @@ def __call__(self, coord_name: Hashable): A proxy accessor for a single CRS. """ - if coord_name in self.crs_aware_indexes: - index = self.crs_aware_indexes[coord_name] - return CRSProxy(self._obj, coord_name, index._proj_get_crs()) # type: ignore + crs: pyproj.CRS - if coord_name not in self.crs_indexes: - if coord_name not in self._obj.coords: - raise KeyError(f"no coordinate {coord_name!r} found in Dataset or DataArray") - elif coord_name not in self._obj.xindexes: - raise ValueError(f"coordinate {coord_name!r} has no index") - else: - raise ValueError(f"coordinate {coord_name!r} index is not a CRSIndex") + if coord_name in self.crs_aware_indexes: + crs = self.crs_aware_indexes[coord_name]._proj_get_crs() # type: ignore + else: + crs = self._get_crs_index(coord_name).crs - return CRSProxy(self._obj, coord_name, self.crs_indexes[coord_name].crs) + return CRSProxy(self._obj, coord_name, crs) def assert_one_crs_index(self): """Raise an `AssertionError` if no or multiple CRS-indexed coordinates @@ -333,7 +350,7 @@ def map_crs( indexes = _obj.xindexes for spatial_ref, coord_names in spatial_ref_coords.items(): - crs = self(spatial_ref).crs + crs = self._get_crs_index(spatial_ref).crs map_indexes = [] map_indexes_coords = set() @@ -393,3 +410,85 @@ def map_crs( _obj = _obj.assign_coords(xr.Coordinates(new_vars, {n: new_index for n in vars})) return _obj + + def _update_crs_info( + self, spatial_ref: Hashable | None, func: Callable[[xr.Variable, CRSIndex], None] + ) -> xr.DataArray | xr.Dataset: + if spatial_ref is None: + spatial_ref_coords = list(self.crs_indexes) + else: + spatial_ref_coords = [spatial_ref] + + _obj = self._obj.copy(deep=False) + + for coord_name in spatial_ref_coords: + index = self._get_crs_index(coord_name) + var = self._obj[coord_name].variable.copy(deep=False) + func(var, index) + _obj = _obj.assign_coords(xr.Coordinates({coord_name: var}, {coord_name: index})) + + return _obj + + def write_crs_info( + self, + spatial_ref: Hashable | None = None, + func: Callable[[pyproj.CRS], dict[str, Any]] = format_compact_cf, + ) -> xr.DataArray | xr.Dataset: + """Write CRS information as attributes to one or all spatial + reference coordinates. + + Parameters + ---------- + spatial_ref : Hashable, optional + The name of a :term:`spatial reference coordinate`. If not provided (default), + CRS information will be written to all spatial reference coordinates found in + the Dataset or DataArray. Each spatial reference coordinate must already have + a :py:class:`~xproj.CRSIndex` associated. + func : callable, optional + Any callable used to format CRS information as coordinate variable attributes. + The default function adds a ``crs_wkt`` attribute for compatibility with + CF conventions. + + Returns + ------- + Dataset or DataArray + A new Dataset or DatArray object with attributes updated for one or all + spatial reference coordinates. + + See Also + -------- + ~xproj.format_compact_cf + ~xproj.format_full_cf_gdal + Dataset.proj.clear_crs_info + DataArray.proj.clear_crs_info + + """ + return self._update_crs_info( + spatial_ref, lambda var, index: var.attrs.update(func(index.crs)) + ) + + def clear_crs_info(self, spatial_ref: Hashable | None = None) -> xr.DataArray | xr.Dataset: + """Convenient method to clear all attributes of one or all spatial + reference coordinates. + + Parameters + ---------- + spatial_ref : Hashable, optional + The name of a :term:`spatial reference coordinate`. If not provided (default), + CRS information will be cleared for all spatial reference coordinates found in + the Dataset or DataArray. Each spatial reference coordinate must already have + a :py:class:`~xproj.CRSIndex` associated. + + Returns + ------- + Dataset or DataArray + A new Dataset or DatArray object with attributes cleared for one or all + spatial reference coordinates. + + See Also + -------- + Dataset.proj.write_crs_info + DataArray.proj.write_crs_info + + """ + return self._update_crs_info(spatial_ref, lambda var, _: var.attrs.clear()) diff --git a/xproj/crs_utils.py b/xproj/crs_utils.py new file mode 100644 index 0000000..8d97b3c --- /dev/null +++ b/xproj/crs_utils.py @@ -0,0 +1,63 @@ +from typing import Any + +import pyproj + + +def format_compact_cf(crs: pyproj.CRS) -> dict[str, Any]: + """Format CRS as a dictionary for minimal compatibility with + CF conventions. + + More info: + https://cfconventions.org/cf-conventions/cf-conventions.html + + Parameters + ---------- + crs : pyproj.crs.CRS + The input CRS object to format. + + Returns + ------- + dict + A dictionary with one ``crs_wkt`` item that contains + the CRS information formatted as Well-Known Text (WKT). + + See Also + -------- + xarray.Dataset.proj.write_crs_info + format_full_cf_gdal + + """ + return {"crs_wkt": crs.to_wkt()} + + +def format_full_cf_gdal(crs: pyproj.CRS) -> dict[str, Any]: + """Format CRS as a dictionary for full compatibility with + CF conventions and GDAL. + + More info: + + - https://cfconventions.org/cf-conventions/cf-conventions.html + - https://gdal.org/en/stable/drivers/raster/netcdf.html + + Parameters + ---------- + crs : pyproj.crs.CRS + The input CRS object to format. + + Returns + ------- + dict + A dictionary with two ``crs_wkt`` and ``spatial_ref`` items + that contains the CRS information formatted as Well-Known Text (WKT), + as well as items representing all the CF grid mapping variable + attributes exported via :py:meth:`pyproj.crs.CRS.to_cf`. + + See Also + -------- + xarray.Dataset.proj.write_crs_info + format_compact_cf + + """ + output = crs.to_cf() + output["spatial_ref"] = crs.to_wkt() + return output diff --git a/xproj/tests/test_accessor.py b/xproj/tests/test_accessor.py index 304b8cf..d80803e 100644 --- a/xproj/tests/test_accessor.py +++ b/xproj/tests/test_accessor.py @@ -102,16 +102,25 @@ def test_accessor_callable_crs_aware_index() -> None: def test_accessor_callable_error(spatial_xr_obj) -> None: - obj = spatial_xr_obj.assign_coords(x=[1, 2], foo=("x", [3, 4])) + class DummyIndex(xr.Index): + @classmethod + def from_variables(cls, variables, *, options): + return cls() + + obj = spatial_xr_obj.assign_coords(x=[1, 2], foo=("x", [3, 4]), a=0, b=0) + obj = obj.set_xindex("b", DummyIndex) with pytest.raises(KeyError, match="no coordinate 'bar' found"): obj.proj("bar") - with pytest.raises(ValueError, match="coordinate 'foo' has no index"): + with pytest.raises(ValueError, match="coordinate 'foo' is not a scalar coordinate"): obj.proj("foo") - with pytest.raises(ValueError, match="coordinate 'x' index is not a CRSIndex"): - obj.proj("x") + with pytest.raises(ValueError, match="coordinate 'a' has no index"): + obj.proj("a") + + with pytest.raises(ValueError, match="coordinate 'b' index is not a CRSIndex"): + obj.proj("b") def test_accessor_assert_one_index() -> None: @@ -241,3 +250,41 @@ def _proj_set_crs(self, spatial_ref, crs): with pytest.raises(ValueError, match="missing indexed coordinate"): ds.proj.map_crs(spatial_ref=["x"]) + + +def test_accessor_write_crs_info(spatial_xr_obj) -> None: + obj_with_attrs = spatial_xr_obj.proj.write_crs_info() + assert "crs_wkt" in obj_with_attrs.spatial_ref.attrs + + # test CRSIndex is preserved + assert "spatial_ref" in obj_with_attrs.xindexes + + # test attrs unchanged in original object + assert "crs_wkt" not in spatial_xr_obj.spatial_ref.attrs + + # test spatial ref coordinate provided explicitly + obj_with_attrs2 = spatial_xr_obj.proj.write_crs_info("spatial_ref") + assert "crs_wkt" in obj_with_attrs2.spatial_ref.attrs + + # test alternative func + obj_with_attrs3 = spatial_xr_obj.proj.write_crs_info(func=xproj.format_full_cf_gdal) + assert "crs_wkt" in obj_with_attrs3.spatial_ref.attrs + assert "spatial_ref" in obj_with_attrs3.spatial_ref.attrs + assert "grid_mapping_name" in obj_with_attrs3.spatial_ref.attrs + + +def test_accessor_clear_crs_info(spatial_xr_obj) -> None: + obj_with_attrs = spatial_xr_obj.proj.write_crs_info() + + cleared = obj_with_attrs.proj.clear_crs_info() + assert not len(cleared.spatial_ref.attrs) + + # test CRSIndex is preserved + assert "spatial_ref" in cleared.xindexes + + # test attrs unchanged in original object + assert len(obj_with_attrs.spatial_ref.attrs) > 0 + + # test spatial ref coordinate provided explicitly + cleared2 = obj_with_attrs.proj.clear_crs_info("spatial_ref") + assert not len(cleared2.spatial_ref.attrs)