Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,8 @@
napoleon_type_aliases = {
# general terms
"sequence": ":term:`sequence`",
"iterable": ":term:`iterable`",
"Hashable": ":term:`sequence`",
"iterable": "~collections.abc.Hashable",
"callable": ":py:func:`callable`",
"dict_like": ":term:`dict-like <mapping>`",
"dict-like": ":term:`dict-like <mapping>`",
Expand Down
13 changes: 13 additions & 0 deletions docs/raster_index/design_choices.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,16 @@ The downside here is that CRS information is duplicated in two places explicitly
### Don't like it?

We chose this approach to enable experimentation. It is entirely possible to experiment with other approaches. Please reach out if you have opinions on this topic.

## Handling the `GeoTransform` attribute

GDAL _chooses_ to track the affine transform using a `GeoTransform` attribute on a `spatial_ref` variable. The `"spatial_ref"` is a
"grid mapping" variable (as termed by the CF-conventions). It also records CRS information. Currently, our design is that
{py:class}`xproj.CRSIndex` controls the CRS information and handles the creation of the `"spatial_ref"` variable, or more generally,
the grid mapping variable. Thus, {py:class}`RasterIndex` _cannot_ keep the `"GeoTransform"` attribute on `"spatial_ref"` up-to-date
because it does not control it.

Thus, {py:func}`assign_index` will delete the `"GeoTransform"` attribute on the grid mapping variable if it is detected, after using it
to construct the affine matrix.

If you wish to extract the GeoTransform attribute to write it to a location of your choosing use {py:meth}`RasterIndex.as_geotransform`.
3 changes: 2 additions & 1 deletion docs/rasterize/exactextract.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@
"outputs": [],
"source": [
"import xarray as xr\n",
"import xproj # noqa\n",
"\n",
"ds = xr.tutorial.open_dataset(\"eraint_uvz\")\n",
"ds = ds.rio.write_crs(\"epsg:4326\")\n",
"ds = ds.proj.assign_crs(spatial_ref=\"epsg:4326\")\n",
"ds"
]
},
Expand Down
3 changes: 2 additions & 1 deletion docs/rasterize/geometry_mask.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@
"outputs": [],
"source": [
"import xarray as xr\n",
"import xproj # noqa\n",
"\n",
"ds = xr.tutorial.open_dataset(\"eraint_uvz\")[[\"u\"]]\n",
"ds = ds.rio.write_crs(\"epsg:4326\")\n",
"ds = ds.proj.assign_crs(spatial_ref=\"epsg:4326\")\n",
"ds"
]
},
Expand Down
3 changes: 2 additions & 1 deletion docs/rasterize/rasterio.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@
"outputs": [],
"source": [
"import xarray as xr\n",
"import xproj # noqa\n",
"\n",
"ds = xr.tutorial.open_dataset(\"eraint_uvz\")\n",
"ds = ds.rio.write_crs(\"epsg:4326\")\n",
"ds = ds.proj.assign_crs(spatial_ref=\"epsg:4326\")\n",
"ds"
]
},
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ test = [
"pooch",
"dask-geopandas",
"rasterio",
"rioxarray",
"exactextract",
"sparse",
"netCDF4",
Expand Down
39 changes: 34 additions & 5 deletions src/rasterix/raster_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

from rasterix.odc_compat import BoundingBox, bbox_intersection, bbox_union, maybe_int, snap_grid
from rasterix.rioxarray_compat import guess_dims
from rasterix.utils import get_affine

T_Xarray = TypeVar("T_Xarray", "DataArray", "Dataset")

Expand All @@ -37,10 +38,14 @@ def assign_index(
) -> T_Xarray:
"""Assign a RasterIndex to an Xarray DataArray or Dataset.

By default, the affine transform is guessed by first looking for a ``GeoTransform`` attribute
on a CF "grid mapping" variable (commonly ``"spatial_ref"``). If not present, then the affine is determined from 1D coordinate
variables named ``x_dim`` and ``y_dim`` provided to this function.

Parameters
----------
obj : xarray.DataArray or xarray.Dataset
The object to assign the index to. Must have a rio accessor with a transform.
The object to assign the index to.
x_dim : str, optional
Name of the x dimension. If None, will be automatically detected.
y_dim : str, optional
Expand All @@ -53,22 +58,37 @@ def assign_index(
xarray.DataArray or xarray.Dataset
The input object with RasterIndex coordinates assigned.

Notes
-----
The "grid mapping" variable is determined following the CF conventions:

- If a DataArray is provided, we look for an attribute named ``"grid_mapping"``.
- For a Dataset, we pull the first detected ``"grid_mapping"`` attribute when iterating over data variables.

The value of this attribute is a variable name containing projection information (commonly ``"spatial_ref"``).
We then look for a ``"GeoTransform"`` attribute on this variable (following GDAL convention).

References
----------
- `CF conventions document <http://cfconventions.org/Data/cf-conventions/cf-conventions-1.11/cf-conventions.html#grid-mappings-and-projections>`_.
- `GDAL docs on GeoTransform <https://gdal.org/en/stable/tutorials/geotransforms_tut.html>`_.

Examples
--------
>>> import xarray as xr
>>> import rioxarray # Required for rio accessor
>>> import rioxarray # Required for reading TIFF
>>> da = xr.open_dataset("path/to/raster.tif", engine="rasterio")
>>> indexed_da = assign_index(da)
"""
import rioxarray # noqa

if x_dim is None or y_dim is None:
guessed_x, guessed_y = guess_dims(obj)
x_dim = x_dim or guessed_x
y_dim = y_dim or guessed_y

affine = get_affine(obj, x_dim=x_dim, y_dim=y_dim, clear_transform=True)

index = RasterIndex.from_transform(
obj.rio.transform(),
affine,
width=obj.sizes[x_dim],
height=obj.sizes[y_dim],
x_dim=x_dim,
Expand Down Expand Up @@ -626,6 +646,15 @@ def transform(self) -> Affine:
"""Affine transform for top-left corners."""
return self.center_transform() * Affine.translation(-0.5, -0.5)

def as_geotransform(self, *, decimals: int | None = None) -> str:
"""Convert the affine transform to a string suitable for saving as the GeoTransform attribute."""
gt = self.transform().to_gdal()
if decimals is not None:
fmt = f".{decimals}f"
return " ".join(f"{num:{fmt}}" for num in gt)
else:
return " ".join(map(str, gt))
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@scottyhq does this look useful enough to you. That way a user can write it to whichever attribute they want to

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, seems good!


def center_transform(self) -> Affine:
"""Affine transform for cell centers."""
if not self._axis_independent:
Expand Down
7 changes: 4 additions & 3 deletions src/rasterix/rasterize/rasterio.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
from rasterio.features import geometry_mask as geometry_mask_rio
from rasterio.features import rasterize as rasterize_rio

from .utils import XAXIS, YAXIS, clip_to_bbox, get_affine, is_in_memory, prepare_for_dask
from ..utils import get_affine
from .utils import XAXIS, YAXIS, clip_to_bbox, is_in_memory, prepare_for_dask

F = TypeVar("F", bound=Callable[..., Any])

Expand Down Expand Up @@ -161,7 +162,7 @@ def rasterize(
obj = clip_to_bbox(obj, geometries, xdim=xdim, ydim=ydim)

rasterize_kwargs = dict(
all_touched=all_touched, merge_alg=merge_alg, affine=get_affine(obj, xdim=xdim, ydim=ydim), env=env
all_touched=all_touched, merge_alg=merge_alg, affine=get_affine(obj, x_dim=xdim, y_dim=ydim), env=env
)
# FIXME: box.crs == geometries.crs

Expand Down Expand Up @@ -325,7 +326,7 @@ def geometry_mask(
obj = clip_to_bbox(obj, geometries, xdim=xdim, ydim=ydim)

geometry_mask_kwargs = dict(
all_touched=all_touched, affine=get_affine(obj, xdim=xdim, ydim=ydim), env=env
all_touched=all_touched, affine=get_affine(obj, x_dim=xdim, y_dim=ydim), env=env
)

if is_in_memory(obj=obj, geometries=geometries):
Expand Down
67 changes: 67 additions & 0 deletions src/rasterix/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import xarray as xr
from affine import Affine


def get_grid_mapping_var(obj: xr.Dataset | xr.DataArray) -> xr.DataArray | None:
grid_mapping_var = None
if isinstance(obj, xr.DataArray):
if maybe := obj.attrs.get("grid_mapping", None):
if maybe in obj.coords:
grid_mapping_var = maybe
else:
# for datasets, grab the first one for simplicity
for var in obj.data_vars.values():
if maybe := var.attrs.get("grid_mapping"):
if maybe in obj.coords:
# make sure it exists and is not an out-of-date attribute
grid_mapping_var = maybe
break
if grid_mapping_var is None and "spatial_ref" in obj.coords:
# hardcode this
grid_mapping_var = "spatial_ref"
if grid_mapping_var is not None:
return obj[grid_mapping_var]
return None


def get_affine(
obj: xr.Dataset | xr.DataArray, *, x_dim="x", y_dim="y", clear_transform: bool = False
) -> Affine:
"""
Grabs an affine transform from an Xarray object.

This method will first look for the ``"GeoTransform"`` attribute on a variable named
``"spatial_ref"``. If not, it will auto-guess the transform from the provided ``x_dim``,
and ``y_dim``.

Parameters
----------
obj: xr.DataArray or xr.Dataset
x_dim: str, optional
Name of the X dimension coordinate variable.
y_dim: str, optional
Name of the Y dimension coordinate variable.
clear_transform: bool
Whether to delete the ``GeoTransform`` attribute if detected.

Returns
-------
affine: Affine
"""
grid_mapping_var = get_grid_mapping_var(obj)
if grid_mapping_var is not None and (transform := grid_mapping_var.attrs.get("GeoTransform")):
if clear_transform:
del grid_mapping_var.attrs["GeoTransform"]
return Affine.from_gdal(*map(float, transform.split(" ")))
else:
x = obj.coords[x_dim]
y = obj.coords[y_dim]
if x.ndim != 1:
raise ValueError(f"Coordinate variable {x_dim=!r} must be 1D.")
if y.ndim != 1:
raise ValueError(f"Coordinate variable {y_dim=!r} must be 1D.")
dx = (x[1] - x[0]).item()
dy = (y[1] - y[0]).item()
Comment on lines +63 to +64
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we check the coordinate labels for uniform spacing?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure there's a good way to do it. Presumably that's why rioxarray also doesn't do it.

return Affine.translation(
x[0].item() - dx / 2, (y[0] if dy < 0 else y[-1]).item() - dy / 2
) * Affine.scale(dx, dy)
Binary file modified tests/geometry_mask_snapshot.nc
Binary file not shown.
Binary file modified tests/rasterize_snapshot.nc
Binary file not shown.
3 changes: 2 additions & 1 deletion tests/test_exact.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,15 @@
import sparse
import xarray as xr
import xarray.testing as xrt
import xproj # noqa
from exactextract import exact_extract
from hypothesis import example, given, settings
from xarray.tests import raise_if_dask_computes

from rasterix.rasterize.exact import CoverageWeights, coverage, xy_to_raster_source

dataset = xr.tutorial.open_dataset("eraint_uvz").rename({"latitude": "y", "longitude": "x"})
dataset = dataset.rio.write_crs("epsg:4326")
dataset = dataset.proj.assign_crs(spatial_ref="epsg:4326")
world = gpd.read_file(geodatasets.get_path("naturalearth land"))
XSIZE = dataset.x.size
YSIZE = dataset.y.size
Expand Down
51 changes: 38 additions & 13 deletions tests/test_raster_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
import numpy as np
import pyproj
import pytest
import rioxarray # noqa
import xarray as xr
from affine import Affine
from xarray.testing import assert_identical

from rasterix import RasterIndex, assign_index
from rasterix.utils import get_grid_mapping_var

CRS_ATTRS = pyproj.CRS.from_epsg(4326).to_cf()

Expand All @@ -20,6 +20,32 @@ def dataset_from_transform(transform: str) -> xr.Dataset:
).pipe(assign_index)


def test_grid_mapping_var():
obj = xr.DataArray()
assert get_grid_mapping_var(obj) is None

obj = xr.Dataset()
assert get_grid_mapping_var(obj) is None

obj = xr.DataArray(attrs={"grid_mapping": "spatial_ref"})
assert get_grid_mapping_var(obj) is None

obj = xr.DataArray(attrs={"grid_mapping": "spatial_ref"}, coords={"spatial_ref": 0})
assert_identical(get_grid_mapping_var(obj), obj["spatial_ref"])

obj = xr.Dataset({"foo": ((), 0, {"grid_mapping": "spatial_ref"})})
assert get_grid_mapping_var(obj) is None

obj = xr.Dataset(
{
"foo": ((), 0, {"grid_mapping": "spatial_ref_0"}),
"zoo": ((), 0, {"grid_mapping": "spatial_ref_1"}),
},
coords={"spatial_ref_1": 0},
)
assert_identical(get_grid_mapping_var(obj), obj["spatial_ref_1"])


def test_set_xindex() -> None:
coords = xr.Coordinates(coords={"x": np.arange(0.5, 12.5), "y": np.arange(0.5, 10.5)}, indexes={})
ds = xr.Dataset(coords=coords)
Expand All @@ -28,26 +54,26 @@ def test_set_xindex() -> None:
ds.set_xindex(["x", "y"], RasterIndex)


def test_rectilinear():
source = "/vsicurl/https://noaadata.apps.nsidc.org/NOAA/G02135/south/daily/geotiff/2024/01_Jan/S_20240101_concentration_v3.0.tif"
da_no_raster_index = xr.open_dataarray(source, engine="rasterio")
da_raster_index = assign_index(da_no_raster_index)
assert da_raster_index.equals(da_no_raster_index)


def test_raster_index_properties():
index1 = RasterIndex.from_transform(Affine.identity(), width=12, height=10)
assert index1.xy_shape == (12, 10)
assert index1.xy_dims == ("x", "y")
assert index1.xy_coord_names == ("x", "y")
assert index1.as_geotransform() == "0.0 1.0 0.0 0.0 0.0 1.0"

index2 = RasterIndex.from_transform(Affine.identity(), width=12, height=10, x_dim="x_", y_dim="y_")
assert index2.xy_dims == ("x_", "y_")
assert index2.as_geotransform() == "0.0 1.0 0.0 0.0 0.0 1.0"

index3 = RasterIndex.from_transform(Affine.rotation(45.0), width=12, height=10)
assert index3.xy_shape == (12, 10)
assert index3.xy_dims == ("x", "y")
assert index3.xy_coord_names == ("xc", "yc")
assert (
index3.as_geotransform()
== "0.0 0.7071067811865476 -0.7071067811865475 0.0 0.7071067811865475 0.7071067811865476"
)
assert index3.as_geotransform(decimals=6) == "0.000000 0.707107 -0.707107 0.000000 0.707107 0.707107"


# TODO: parameterize over
Expand All @@ -56,18 +82,17 @@ def test_raster_index_properties():
def test_sel_slice():
ds = xr.Dataset({"foo": (("y", "x"), np.ones((10, 12)))})
transform = Affine.identity()
ds = ds.rio.write_transform(transform)
ds.coords["spatial_ref"] = ((), 0, {"GeoTransform": " ".join(map(str, transform.to_gdal()))})
ds = assign_index(ds)

assert "GeoTransform" not in ds.spatial_ref.attrs
assert ds.xindexes["x"].transform() == transform

actual = ds.sel(x=slice(4), y=slice(3, 5))
assert isinstance(actual.xindexes["x"], RasterIndex)
assert isinstance(actual.xindexes["y"], RasterIndex)
actual_transform = actual.xindexes["x"].transform()

assert actual_transform == actual.rio.transform()
assert actual_transform == (transform * Affine.translation(0, 3))
actual_transform = actual.xindexes["x"].transform()
assert actual_transform == transform * Affine.translation(0, 3)

reverse = ds.isel(y=slice(None, None, -1))
assert_identical(reverse.y, ds.y[::-1])
Expand Down
Loading
Loading