From 586ff473fc7a625611592200b4386be31de24a58 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 30 Jun 2025 17:38:45 -0600 Subject: [PATCH 1/4] Add docs --- docs/_static/style.css | 4 + docs/conf.py | 3 +- docs/index.md | 3 + docs/raster_index/combining.md | 2 +- docs/raster_index/crs.ipynb | 199 ++++++++++++++ docs/raster_index/design_choices.md | 40 +++ docs/raster_index/indexing.ipynb | 301 ++++++++++++++++++++++ docs/raster_index/intro.ipynb | 385 ++++++++++++++++++++++++++++ src/rasterix/raster_index.py | 7 +- 9 files changed, 941 insertions(+), 3 deletions(-) create mode 100644 docs/raster_index/crs.ipynb create mode 100644 docs/raster_index/indexing.ipynb create mode 100644 docs/raster_index/intro.ipynb diff --git a/docs/_static/style.css b/docs/_static/style.css index d97a63f..d749760 100644 --- a/docs/_static/style.css +++ b/docs/_static/style.css @@ -1,3 +1,7 @@ +.sidebar-log { + max-width: 70%; +} + .xr-wrap { font-size: 0.85em; margin-left: 1.25em; diff --git a/docs/conf.py b/docs/conf.py index 366d125..df083cf 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -116,7 +116,7 @@ "font-size--small--2": "87.5%", } html_theme_options = dict( - sidebar_hide_name=True, + sidebar_hide_name=False, light_css_variables=css_vars, dark_css_variables=css_vars, ) @@ -141,6 +141,7 @@ "pyproj": ("https://pyproj4.github.io/pyproj/stable/", None), "exactextract": ("https://isciences.github.io/exactextract/", None), "rasterio": ("https://rasterio.readthedocs.io/en/stable/", None), + "xvec": ("https://xvec.readthedocs.io/en/stable/", None), } autosummary_generate = True diff --git a/docs/index.md b/docs/index.md index 238348c..96666d6 100644 --- a/docs/index.md +++ b/docs/index.md @@ -7,6 +7,9 @@ caption: Raster Index hidden: --- +raster_index/intro +raster_index/indexing +raster_index/crs raster_index/aligning raster_index/combining raster_index/design_choices diff --git a/docs/raster_index/combining.md b/docs/raster_index/combining.md index d64db49..45c1eed 100644 --- a/docs/raster_index/combining.md +++ b/docs/raster_index/combining.md @@ -21,7 +21,7 @@ xr.set_options(display_expand_indexes=True); # Combining -{py:class}`RasterIndex` supports concatenation along a single axis through either {py:func}`xarray.concat` or {py:func}`xarray.combine_nested`. +{py:class}`RasterIndex` supports concatenation along a single axis through either {py:func}`xarray.concat` or across multiple axes using {py:func}`xarray.combine_nested`. In all cases, a new {py:class}`RasterIndex` is created. Cases (a) and (b) in the following image are supported, case (c) is not. diff --git a/docs/raster_index/crs.ipynb b/docs/raster_index/crs.ipynb new file mode 100644 index 0000000..f9797d5 --- /dev/null +++ b/docs/raster_index/crs.ipynb @@ -0,0 +1,199 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0", + "metadata": {}, + "source": [ + "# CRS handling\n", + "\n", + "RasterIndex composes with [xproj](https://xproj.readthedocs.io) to provide CRS handling." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "hide-input", + "hide-output" + ] + }, + "outputs": [], + "source": [ + "%xmode minimal\n", + "\n", + "import xarray as xr\n", + "\n", + "import rasterix\n", + "\n", + "xr.set_options(display_expand_indexes=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "source = \"/vsicurl/https://noaadata.apps.nsidc.org/NOAA/G02135/south/daily/geotiff/2024/01_Jan/S_20240101_concentration_v3.0.tif\"" + ] + }, + { + "cell_type": "markdown", + "id": "3", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "To enable CRS-handling, start by assigning a CRS using the `.proj.assign_crs` function from xproj." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "da = xr.open_dataarray(source, engine=\"rasterio\")\n", + "da = da.proj.assign_crs(spatial_ref=da.spatial_ref.attrs[\"crs_wkt\"])\n", + "da = da.pipe(rasterix.assign_index)\n", + "da" + ] + }, + { + "cell_type": "markdown", + "id": "5", + "metadata": {}, + "source": [ + "Note how the `x` variable has appropriate attributes!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6", + "metadata": {}, + "outputs": [], + "source": [ + "da.x.attrs" + ] + }, + { + "cell_type": "markdown", + "id": "7", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Alignment\n", + "\n", + "For demo purposes we'll create a second copy with a different CRS." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "diffcrs = da.copy(deep=True).drop_vars(\"spatial_ref\").drop_indexes([\"x\", \"y\"])\n", + "diffcrs = diffcrs.proj.assign_crs(spatial_ref=\"epsg:4326\").pipe(rasterix.assign_index)\n", + "diffcrs" + ] + }, + { + "cell_type": "markdown", + "id": "9", + "metadata": {}, + "source": [ + "Again note that the attributes on `x` have changed appropriately. This is enabled by RasterIndex's integration with `xproj`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10", + "metadata": {}, + "outputs": [], + "source": [ + "diffcrs.x.attrs" + ] + }, + { + "cell_type": "markdown", + "id": "11", + "metadata": {}, + "source": [ + "Attempting to add the two raises an error (as it should)!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [], + "source": [ + "diffcrs + da" + ] + } + ], + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/raster_index/design_choices.md b/docs/raster_index/design_choices.md index 8dbb63a..7acfeb8 100644 --- a/docs/raster_index/design_choices.md +++ b/docs/raster_index/design_choices.md @@ -6,3 +6,43 @@ In designing {py:class}`RasterIndex`, we faced a few thorny questions. Below we discuss these considerations, and the approach we've taken. Ultimately, there are no easy answers and tradeoffs to be made. + +## CRS handling + +{py:class}`xproj.CRSIndex` is an attempt at providing a building block for CRS handling in the Xarray ecosystem. + +How might {py:class}`RasterIndex` fit with that vision? Our options are: + +1. fully encapsulate {py:class}`xproj.CRSIndex`, or +1. satisfy the ["CRS-aware" protocol](https://xproj.readthedocs.io/en/latest/integration.html) provided by `xproj`, or +1. simply handle the affine transform and ignore the CRS altogether. + +Why do we want CRS handling? We want Xarray to disallow alignment of two Xarray objects with different CRS e.g. `da1 + da2` should fail if `da1`, and `da2` have different CRS. This is enabled by assigning {py:class}`xproj.CRSIndex` to the `spatial_ref` variable. + +### Why should `RasterIndex` be aware of the CRS? + +RasterIndex handles indexing and the creation of coordinate variables. With CRS information handy, this would allow us to + +1. Support wraparound indexing along the `longitude` dimension ({issue}`26`) +1. Assign appropriate attributes to the created coordinate variables ({issue}`22`). (e.g. choose between `standard_name: latitude` and `standard_name: projection_y_coordinate`) +1. more? + +### Why not encapsulate CRSIndex? + +If RasterIndex must track CRS in some form, one way to do that would be to have RasterIndex internally build a `CRSIndex` for the `spatial_ref` variable. +Thus, `RasterIndex` would be associated with 3 variables instead of 2: `x`, `y`, and `spatial_ref`, for example. + +The downside of this approach is that it doesn't compose well with any other Index that would also like to handle the CRS (e.g. {py:class}`xvec.GeometryIndex`). +The reason is that the Xarray model enforces that one variable is only associated with one Index. +This restriction is needed because Indexes are responsible for creating associated variables, so we don't want two Indexes to have the ability to modify the same variable. +For example, `xr.merge([geometries, raster])` where `geometries` has `xvec.GeometryIndex[geometry, spatial_ref]` (square brackets list associated coordinate variable names) and `raster` has `RasterIndex[x, y, spatial_ref]`, would fail because the variable `spatial_ref` is associated with two Indexes of different types. + +### CRS-aware Index + +Therefore, we have chosen to experiment with the "CRS-aware" approach described in the [xproj docs](https://xproj.readthedocs.io/en/latest/integration.html). +Here `RasterIndex` tracks it's own _optional_ copy of a CRS object (not an Index) and defines the hooks needed for `CRSIndex` to communicate with `RasterIndex`. +The downside here is that CRS information is duplicated in two places explicitly, and requires explicit handling to ensure consistency + +### Don't like it? + +We chose this approach to enable experimentation. It is entirely possible to experiment with other approaches. Please reach out if you have opinions on this topic. diff --git a/docs/raster_index/indexing.ipynb b/docs/raster_index/indexing.ipynb new file mode 100644 index 0000000..7923df7 --- /dev/null +++ b/docs/raster_index/indexing.ipynb @@ -0,0 +1,301 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "# Indexing" + ] + }, + { + "cell_type": "markdown", + "id": "1", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "We will demonstrate the raster index with a simple dataset with rectilinear coordinates and no rotation or skew.\n", + "Both x and y coordinates are 1-dimensional." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception", + "hide-input", + "hide-output" + ] + }, + "outputs": [], + "source": [ + "%xmode minimal\n", + "\n", + "import numpy as np\n", + "import xarray as xr\n", + "\n", + "import rasterix\n", + "\n", + "np.set_printoptions(threshold=10, edgeitems=2)\n", + "xr.set_options(display_expand_indexes=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "source = \"/vsicurl/https://noaadata.apps.nsidc.org/NOAA/G02135/south/daily/geotiff/2024/01_Jan/S_20240101_concentration_v3.0.tif\"\n", + "\n", + "da = xr.open_dataarray(source, engine=\"rasterio\").pipe(rasterix.assign_index)\n", + "da" + ] + }, + { + "cell_type": "markdown", + "id": "4", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Positional Indexing (`isel`)\n", + "\n", + "```{seealso}\n", + "Now we will demonstrate indexing with a `RasterIndex`. See the Xarray [docs](https://docs.xarray.dev/en/stable/user-guide/indexing.html) and [tutorial](https://tutorial.xarray.dev/fundamentals/02.1_indexing_Basic.html) to understand the concepts underlying this section.\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "5", + "metadata": {}, + "source": [ + "### Slicing both x and y\n", + "\n", + "Slicing preserves the laziness of coordinates. The new dataset can be represented by a new affine transform." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6", + "metadata": {}, + "outputs": [], + "source": [ + "da_sliced = da.isel(x=slice(1, 4), y=slice(None, None, 2))\n", + "da_sliced" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", + "metadata": {}, + "outputs": [], + "source": [ + "print(da_sliced.xindexes[\"x\"])\n", + "print(da_sliced.xindexes[\"y\"])" + ] + }, + { + "cell_type": "markdown", + "id": "8", + "metadata": {}, + "source": [ + "### Outer indexing with array-like indexers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9", + "metadata": {}, + "outputs": [], + "source": [ + "da_outer = da.isel(x=[0, 2, 4], y=[0, 0, 1])\n", + "da_outer" + ] + }, + { + "cell_type": "markdown", + "id": "10", + "metadata": {}, + "source": [ + "We cannot compute a new affine transform given arbitrary array positions, so the resulting dataset has no indexes associated with `x` and `y`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11", + "metadata": {}, + "outputs": [], + "source": [ + "da_outer.xindexes" + ] + }, + { + "cell_type": "markdown", + "id": "12", + "metadata": {}, + "source": [ + "### Basic indexing with scalars" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13", + "metadata": {}, + "outputs": [], + "source": [ + "da_scalar = da.isel(x=0, y=1)\n", + "da_scalar" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14", + "metadata": {}, + "outputs": [], + "source": [ + "da_xscalar = da.isel(x=0)\n", + "da_xscalar" + ] + }, + { + "cell_type": "markdown", + "id": "15", + "metadata": {}, + "source": [ + "**FIXME** The RasterIndex should be preserved in case of partial dimension reduction." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16", + "metadata": {}, + "outputs": [], + "source": [ + "# da_xscalar.xindexes[\"y\"] # should return an index" + ] + }, + { + "cell_type": "markdown", + "id": "17", + "metadata": {}, + "source": [ + "### Vectorized (fancy) indexing\n", + "\n", + "```{seealso}\n", + "See the Xarray [tutorial](https://tutorial.xarray.dev/intermediate/indexing/advanced-indexing.html) for more on this topic.\n", + "```\n", + "\n", + "Indexing the spatial coordinates with Xarray `Variable` objects returns a `RasterIndex` (wrapping `PandasIndex`) for 1-dimensional variables and no index for scalar or n-dimensional variables." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18", + "metadata": {}, + "outputs": [], + "source": [ + "da_points = da.isel(x=xr.Variable(\"z\", [0, 1]), y=xr.Variable(\"z\", [1, 1]))\n", + "da_points" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19", + "metadata": {}, + "outputs": [], + "source": [ + "da_points2d = da.isel(\n", + " x=xr.Variable((\"u\", \"v\"), [[0, 1], [2, 3]]),\n", + " y=xr.Variable((\"u\", \"v\"), [[1, 1], [2, 2]]),\n", + ")\n", + "da_points2d" + ] + }, + { + "cell_type": "markdown", + "id": "20", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Label-based Indexing (`sel`)\n", + "\n", + "Label-based indexing also preserves the RasterIndex where possible, like positional indexing." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "da.sel(x=slice(-2e6, 2e6), y=slice(4e6, -2e6))" + ] + } + ], + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/raster_index/intro.ipynb b/docs/raster_index/intro.ipynb new file mode 100644 index 0000000..58d9492 --- /dev/null +++ b/docs/raster_index/intro.ipynb @@ -0,0 +1,385 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "# Why\n", + "\n", + "Rasterix provides a `RasterIndex` that uses an affine transform to enable indexing of the underlying data. \n", + "\n", + "Why should you use it?\n", + "1. It eliminates an entire class of bugs where Xarray allows you to add (for example) two datasets with different affine transforms (and/or projections) and return nonsensical outputs.\n", + "2. It enables indexing using the coordinate transformation, minimizing impacts of any floating-point mismatches.\n", + "3. To fit the Xarray data model, RasterIndex creates lazy coordinate variables and propagated them during indexing as much as possible. Thus very large coordinates can be represented with minimal memory cost.\n", + "\n", + "## Quick demo\n", + "\n", + "Below we quickly demonstrate some of these features. Note that this particular example notebook skips CRS handling." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception", + "hide-input", + "hide-output" + ] + }, + "outputs": [], + "source": [ + "%xmode minimal\n", + "\n", + "import numpy as np\n", + "import xarray as xr\n", + "\n", + "import rasterix\n", + "\n", + "np.set_printoptions(threshold=10, edgeitems=2)\n", + "xr.set_options(display_expand_indexes=True)" + ] + }, + { + "cell_type": "markdown", + "id": "2", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "We will demonstrate the raster index with a simple dataset with rectilinear coordinates and no rotation or skew.\n", + "Both x and y coordinates are 1-dimensional." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "source = \"/vsicurl/https://noaadata.apps.nsidc.org/NOAA/G02135/south/daily/geotiff/2024/01_Jan/S_20240101_concentration_v3.0.tif\"" + ] + }, + { + "cell_type": "markdown", + "id": "4", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## With and without `RasterIndex`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "da_no_raster_index = xr.open_dataarray(source, engine=\"rasterio\")\n", + "da_no_raster_index" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "da_raster_index = rasterix.assign_index(da_no_raster_index)\n", + "da_raster_index" + ] + }, + { + "cell_type": "markdown", + "id": "7", + "metadata": {}, + "source": [ + "Let's compare the coordinates for the DataArrays with and without RasterIndex.\n", + "\n", + "Without a raster index, `x` and `y` are in-memory data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8", + "metadata": {}, + "outputs": [], + "source": [ + "da_no_raster_index.x" + ] + }, + { + "cell_type": "markdown", + "id": "9", + "metadata": {}, + "source": [ + "By contrast, with a RasterIndex, coordinate values are lazy and generated on-demand!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "da_raster_index.x" + ] + }, + { + "cell_type": "markdown", + "id": "11", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "These differences are viewable in the repr. Note the `PandasIndex` type under \"Indexes\"." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12", + "metadata": {}, + "outputs": [], + "source": [ + "da_no_raster_index" + ] + }, + { + "cell_type": "markdown", + "id": "13", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + " The repr below shows a few values for each coordinate (those have been computed on-the-fly) but clicking on the database icon doesn't show any value in the spatial coordinate data reprs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "da_raster_index" + ] + }, + { + "cell_type": "markdown", + "id": "15", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "The generated coordinate values correspond to cell _centers_." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "da_raster_index.x.to_numpy()" + ] + }, + { + "cell_type": "markdown", + "id": "17", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "The affine transforms are accessible:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "da_raster_index.xindexes[\"x\"].transform() # top-left" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "da_raster_index.xindexes[\"x\"].center_transform() # pixel centrres" + ] + }, + { + "cell_type": "markdown", + "id": "20", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Equality\n", + "\n", + "`equals` compares variable values without relying on Xarray coordinate indexes. Both dataarrays should thus be equal." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21", + "metadata": {}, + "outputs": [], + "source": [ + "da_raster_index.equals(da_no_raster_index)" + ] + }, + { + "cell_type": "markdown", + "id": "22", + "metadata": {}, + "source": [ + "## Alignment" + ] + }, + { + "cell_type": "markdown", + "id": "23", + "metadata": {}, + "source": [ + "Xarray alignment relies on Xarray coordinate indexes. Trying to align both datasets fails here since they each have different index types." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [], + "source": [ + "da_raster_index + da_no_raster_index" + ] + } + ], + "metadata": { + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/rasterix/raster_index.py b/src/rasterix/raster_index.py index 6731567..598eca7 100644 --- a/src/rasterix/raster_index.py +++ b/src/rasterix/raster_index.py @@ -64,7 +64,12 @@ def assign_index(obj: T_Xarray, *, x_dim: str | None = None, y_dim: str | None = y_dim = y_dim or guessed_y index = RasterIndex.from_transform( - obj.rio.transform(), width=obj.sizes[x_dim], height=obj.sizes[y_dim], x_dim=x_dim, y_dim=y_dim + obj.rio.transform(), + width=obj.sizes[x_dim], + height=obj.sizes[y_dim], + x_dim=x_dim, + y_dim=y_dim, + crs=obj.proj.crs, ) coords = Coordinates.from_xindex(index) return obj.assign_coords(coords) From c43883e727550a81f80032986c9a36c7b87b48f1 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 30 Jun 2025 17:52:30 -0600 Subject: [PATCH 2/4] Control auto-detection of CRS --- src/rasterix/raster_index.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/rasterix/raster_index.py b/src/rasterix/raster_index.py index 598eca7..da92cff 100644 --- a/src/rasterix/raster_index.py +++ b/src/rasterix/raster_index.py @@ -32,7 +32,9 @@ YAXIS = 1 -def assign_index(obj: T_Xarray, *, x_dim: str | None = None, y_dim: str | None = None) -> T_Xarray: +def assign_index( + obj: T_Xarray, *, x_dim: str | None = None, y_dim: str | None = None, crs: bool = True +) -> T_Xarray: """Assign a RasterIndex to an Xarray DataArray or Dataset. Parameters @@ -43,6 +45,8 @@ def assign_index(obj: T_Xarray, *, x_dim: str | None = None, y_dim: str | None = Name of the x dimension. If None, will be automatically detected. y_dim : str, optional Name of the y dimension. If None, will be automatically detected. + crs: bool, optional + Auto-detect CRS using xproj? Returns ------- @@ -69,7 +73,7 @@ def assign_index(obj: T_Xarray, *, x_dim: str | None = None, y_dim: str | None = height=obj.sizes[y_dim], x_dim=x_dim, y_dim=y_dim, - crs=obj.proj.crs, + crs=obj.proj.crs if crs else None, ) coords = Coordinates.from_xindex(index) return obj.assign_coords(coords) From bb3324b1e22c1051b29ae546dc168a06da99c76b Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 1 Jul 2025 14:25:35 -0600 Subject: [PATCH 3/4] Apply suggestions from code review Co-authored-by: Scott Henderson <3924836+scottyhq@users.noreply.github.com> --- docs/raster_index/design_choices.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/raster_index/design_choices.md b/docs/raster_index/design_choices.md index 7acfeb8..5af312d 100644 --- a/docs/raster_index/design_choices.md +++ b/docs/raster_index/design_choices.md @@ -11,7 +11,7 @@ Ultimately, there are no easy answers and tradeoffs to be made. {py:class}`xproj.CRSIndex` is an attempt at providing a building block for CRS handling in the Xarray ecosystem. -How might {py:class}`RasterIndex` fit with that vision? Our options are: +How might `RasterIndex` integrate with `xproj.CRSIndex`? Our options are: 1. fully encapsulate {py:class}`xproj.CRSIndex`, or 1. satisfy the ["CRS-aware" protocol](https://xproj.readthedocs.io/en/latest/integration.html) provided by `xproj`, or @@ -33,9 +33,8 @@ If RasterIndex must track CRS in some form, one way to do that would be to have Thus, `RasterIndex` would be associated with 3 variables instead of 2: `x`, `y`, and `spatial_ref`, for example. The downside of this approach is that it doesn't compose well with any other Index that would also like to handle the CRS (e.g. {py:class}`xvec.GeometryIndex`). -The reason is that the Xarray model enforces that one variable is only associated with one Index. -This restriction is needed because Indexes are responsible for creating associated variables, so we don't want two Indexes to have the ability to modify the same variable. For example, `xr.merge([geometries, raster])` where `geometries` has `xvec.GeometryIndex[geometry, spatial_ref]` (square brackets list associated coordinate variable names) and `raster` has `RasterIndex[x, y, spatial_ref]`, would fail because the variable `spatial_ref` is associated with two Indexes of different types. +This fails because the Xarray model enforces that *one Variable is only associated with only one Index*, in order to prevent different Indexes modifying the same Variable. ### CRS-aware Index From 38dac9e5e563041082fe14edf8fd743627de9fdf Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 1 Jul 2025 14:32:18 -0600 Subject: [PATCH 4/4] addres more comments --- docs/raster_index/design_choices.md | 19 ++--- docs/raster_index/indexing.ipynb | 62 ++++++++++------- docs/raster_index/intro.ipynb | 104 ++++++++++++++-------------- 3 files changed, 99 insertions(+), 86 deletions(-) diff --git a/docs/raster_index/design_choices.md b/docs/raster_index/design_choices.md index 5af312d..36e6b38 100644 --- a/docs/raster_index/design_choices.md +++ b/docs/raster_index/design_choices.md @@ -9,23 +9,24 @@ Ultimately, there are no easy answers and tradeoffs to be made. ## CRS handling -{py:class}`xproj.CRSIndex` is an attempt at providing a building block for CRS handling in the Xarray ecosystem. +Why do we want CRS handling? +1. We want Xarray to disallow alignment of two Xarray objects with different CRS e.g. `da1 + da2` should fail if `da1`, and `da2` have different CRS. +1. Support wraparound indexing along the `longitude` dimension ({issue}`26`) +1. Assign appropriate attributes to the created coordinate variables ({issue}`22`). (e.g. choose between `standard_name: latitude` and `standard_name: projection_y_coordinate`) +1. more? + +{py:class}`xproj.CRSIndex` is an attempt at providing a building block for CRS handling in the Xarray ecosystem that solved problem (1). +Thus (1) can be handled by assigning {py:class}`xproj.CRSIndex` to the `spatial_ref` variable. How might `RasterIndex` integrate with `xproj.CRSIndex`? Our options are: 1. fully encapsulate {py:class}`xproj.CRSIndex`, or 1. satisfy the ["CRS-aware" protocol](https://xproj.readthedocs.io/en/latest/integration.html) provided by `xproj`, or 1. simply handle the affine transform and ignore the CRS altogether. -Why do we want CRS handling? We want Xarray to disallow alignment of two Xarray objects with different CRS e.g. `da1 + da2` should fail if `da1`, and `da2` have different CRS. This is enabled by assigning {py:class}`xproj.CRSIndex` to the `spatial_ref` variable. - ### Why should `RasterIndex` be aware of the CRS? -RasterIndex handles indexing and the creation of coordinate variables. With CRS information handy, this would allow us to - -1. Support wraparound indexing along the `longitude` dimension ({issue}`26`) -1. Assign appropriate attributes to the created coordinate variables ({issue}`22`). (e.g. choose between `standard_name: latitude` and `standard_name: projection_y_coordinate`) -1. more? +RasterIndex handles indexing and the creation of coordinate variables. Thus it is the natural place to support (2) and (3) in the list above. ### Why not encapsulate CRSIndex? @@ -34,7 +35,7 @@ Thus, `RasterIndex` would be associated with 3 variables instead of 2: `x`, `y`, The downside of this approach is that it doesn't compose well with any other Index that would also like to handle the CRS (e.g. {py:class}`xvec.GeometryIndex`). For example, `xr.merge([geometries, raster])` where `geometries` has `xvec.GeometryIndex[geometry, spatial_ref]` (square brackets list associated coordinate variable names) and `raster` has `RasterIndex[x, y, spatial_ref]`, would fail because the variable `spatial_ref` is associated with two Indexes of different types. -This fails because the Xarray model enforces that *one Variable is only associated with only one Index*, in order to prevent different Indexes modifying the same Variable. +This fails because the Xarray model enforces that _one Variable is only associated with only one Index_, in order to prevent different Indexes modifying the same Variable. ### CRS-aware Index diff --git a/docs/raster_index/indexing.ipynb b/docs/raster_index/indexing.ipynb index 7923df7..270c45f 100644 --- a/docs/raster_index/indexing.ipynb +++ b/docs/raster_index/indexing.ipynb @@ -70,7 +70,7 @@ }, "outputs": [], "source": [ - "source = \"/vsicurl/https://noaadata.apps.nsidc.org/NOAA/G02135/south/daily/geotiff/2024/01_Jan/S_20240101_concentration_v3.0.tif\"\n", + "source = \"https://noaadata.apps.nsidc.org/NOAA/G02135/south/daily/geotiff/2024/01_Jan/S_20240101_concentration_v3.0.tif\"\n", "\n", "da = xr.open_dataarray(source, engine=\"rasterio\").pipe(rasterix.assign_index)\n", "da" @@ -101,7 +101,7 @@ "source": [ "### Slicing both x and y\n", "\n", - "Slicing preserves the laziness of coordinates. The new dataset can be represented by a new affine transform." + "Slicing preserves the laziness of coordinates since the new dataset can be represented by a new affine transform." ] }, { @@ -115,10 +115,28 @@ "da_sliced" ] }, + { + "cell_type": "markdown", + "id": "7", + "metadata": {}, + "source": [ + "Compare the transforms before/after slicing." + ] + }, { "cell_type": "code", "execution_count": null, - "id": "7", + "id": "8", + "metadata": {}, + "outputs": [], + "source": [ + "(da.xindexes[\"x\"].transform(), da_sliced.xindexes[\"x\"].transform())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9", "metadata": {}, "outputs": [], "source": [ @@ -128,7 +146,7 @@ }, { "cell_type": "markdown", - "id": "8", + "id": "10", "metadata": {}, "source": [ "### Outer indexing with array-like indexers" @@ -137,7 +155,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9", + "id": "11", "metadata": {}, "outputs": [], "source": [ @@ -147,7 +165,7 @@ }, { "cell_type": "markdown", - "id": "10", + "id": "12", "metadata": {}, "source": [ "We cannot compute a new affine transform given arbitrary array positions, so the resulting dataset has no indexes associated with `x` and `y`." @@ -156,7 +174,7 @@ { "cell_type": "code", "execution_count": null, - "id": "11", + "id": "13", "metadata": {}, "outputs": [], "source": [ @@ -165,7 +183,7 @@ }, { "cell_type": "markdown", - "id": "12", + "id": "14", "metadata": {}, "source": [ "### Basic indexing with scalars" @@ -174,7 +192,7 @@ { "cell_type": "code", "execution_count": null, - "id": "13", + "id": "15", "metadata": {}, "outputs": [], "source": [ @@ -185,7 +203,7 @@ { "cell_type": "code", "execution_count": null, - "id": "14", + "id": "16", "metadata": {}, "outputs": [], "source": [ @@ -193,18 +211,10 @@ "da_xscalar" ] }, - { - "cell_type": "markdown", - "id": "15", - "metadata": {}, - "source": [ - "**FIXME** The RasterIndex should be preserved in case of partial dimension reduction." - ] - }, { "cell_type": "code", "execution_count": null, - "id": "16", + "id": "17", "metadata": {}, "outputs": [], "source": [ @@ -213,7 +223,7 @@ }, { "cell_type": "markdown", - "id": "17", + "id": "18", "metadata": {}, "source": [ "### Vectorized (fancy) indexing\n", @@ -222,24 +232,24 @@ "See the Xarray [tutorial](https://tutorial.xarray.dev/intermediate/indexing/advanced-indexing.html) for more on this topic.\n", "```\n", "\n", - "Indexing the spatial coordinates with Xarray `Variable` objects returns a `RasterIndex` (wrapping `PandasIndex`) for 1-dimensional variables and no index for scalar or n-dimensional variables." + "Indexing the spatial coordinates with Xarray `Variable` or `DataArray` objects returns an unindexed object. The result cannot be represented by a RasterIndex in general." ] }, { "cell_type": "code", "execution_count": null, - "id": "18", + "id": "19", "metadata": {}, "outputs": [], "source": [ - "da_points = da.isel(x=xr.Variable(\"z\", [0, 1]), y=xr.Variable(\"z\", [1, 1]))\n", + "da_points = da.isel(x=xr.DataArray([0, 1], dims=\"z\"), y=xr.DataArray([1, 1], dims=\"z\"))\n", "da_points" ] }, { "cell_type": "code", "execution_count": null, - "id": "19", + "id": "20", "metadata": {}, "outputs": [], "source": [ @@ -252,7 +262,7 @@ }, { "cell_type": "markdown", - "id": "20", + "id": "21", "metadata": { "editable": true, "slideshow": { @@ -269,7 +279,7 @@ { "cell_type": "code", "execution_count": null, - "id": "21", + "id": "22", "metadata": { "editable": true, "slideshow": { diff --git a/docs/raster_index/intro.ipynb b/docs/raster_index/intro.ipynb index 58d9492..2293e57 100644 --- a/docs/raster_index/intro.ipynb +++ b/docs/raster_index/intro.ipynb @@ -81,7 +81,7 @@ }, "outputs": [], "source": [ - "source = \"/vsicurl/https://noaadata.apps.nsidc.org/NOAA/G02135/south/daily/geotiff/2024/01_Jan/S_20240101_concentration_v3.0.tif\"" + "source = \"https://noaadata.apps.nsidc.org/NOAA/G02135/south/daily/geotiff/2024/01_Jan/S_20240101_concentration_v3.0.tif\"" ] }, { @@ -128,42 +128,46 @@ }, "outputs": [], "source": [ - "da_raster_index = rasterix.assign_index(da_no_raster_index)\n", + "da_raster_index = xr.open_dataarray(source, engine=\"rasterio\", parse_coordinates=False).pipe(\n", + " rasterix.assign_index\n", + ")\n", "da_raster_index" ] }, { "cell_type": "markdown", "id": "7", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ - "Let's compare the coordinates for the DataArrays with and without RasterIndex.\n", - "\n", - "Without a raster index, `x` and `y` are in-memory data." + "The affine transforms are accessible:" ] }, { "cell_type": "code", "execution_count": null, "id": "8", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [], "source": [ - "da_no_raster_index.x" - ] - }, - { - "cell_type": "markdown", - "id": "9", - "metadata": {}, - "source": [ - "By contrast, with a RasterIndex, coordinate values are lazy and generated on-demand!" + "da_raster_index.xindexes[\"x\"].transform() # top-left" ] }, { "cell_type": "code", "execution_count": null, - "id": "10", + "id": "9", "metadata": { "editable": true, "slideshow": { @@ -173,51 +177,41 @@ }, "outputs": [], "source": [ - "da_raster_index.x" + "da_raster_index.xindexes[\"x\"].center_transform() # pixel centrres" ] }, { "cell_type": "markdown", - "id": "11", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, + "id": "10", + "metadata": {}, "source": [ - "These differences are viewable in the repr. Note the `PandasIndex` type under \"Indexes\"." + "Now Let's compare the coordinates for the DataArrays with and without RasterIndex.\n", + "\n", + "Without a raster index, `x` and `y` are in-memory data." ] }, { "cell_type": "code", "execution_count": null, - "id": "12", + "id": "11", "metadata": {}, "outputs": [], "source": [ - "da_no_raster_index" + "da_no_raster_index.x" ] }, { "cell_type": "markdown", - "id": "13", - "metadata": { - "editable": true, - "slideshow": { - "slide_type": "" - }, - "tags": [] - }, + "id": "12", + "metadata": {}, "source": [ - " The repr below shows a few values for each coordinate (those have been computed on-the-fly) but clicking on the database icon doesn't show any value in the spatial coordinate data reprs." + "By contrast, with a RasterIndex, coordinate values are lazy and generated on-demand!" ] }, { "cell_type": "code", "execution_count": null, - "id": "14", + "id": "13", "metadata": { "editable": true, "slideshow": { @@ -227,12 +221,12 @@ }, "outputs": [], "source": [ - "da_raster_index" + "da_raster_index.x" ] }, { "cell_type": "markdown", - "id": "15", + "id": "14", "metadata": { "editable": true, "slideshow": { @@ -241,12 +235,21 @@ "tags": [] }, "source": [ - "The generated coordinate values correspond to cell _centers_." + "These differences are viewable in the repr. Note the `PandasIndex` type under \"Indexes\"." ] }, { "cell_type": "code", "execution_count": null, + "id": "15", + "metadata": {}, + "outputs": [], + "source": [ + "da_no_raster_index" + ] + }, + { + "cell_type": "markdown", "id": "16", "metadata": { "editable": true, @@ -255,13 +258,13 @@ }, "tags": [] }, - "outputs": [], "source": [ - "da_raster_index.x.to_numpy()" + " The repr below shows a few values for each coordinate (those have been computed on-the-fly) but clicking on the database icon doesn't show any value in the spatial coordinate data reprs." ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "id": "17", "metadata": { "editable": true, @@ -270,13 +273,13 @@ }, "tags": [] }, + "outputs": [], "source": [ - "The affine transforms are accessible:" + "da_raster_index" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "id": "18", "metadata": { "editable": true, @@ -285,9 +288,8 @@ }, "tags": [] }, - "outputs": [], "source": [ - "da_raster_index.xindexes[\"x\"].transform() # top-left" + "The generated coordinate values correspond to cell _centers_." ] }, { @@ -303,7 +305,7 @@ }, "outputs": [], "source": [ - "da_raster_index.xindexes[\"x\"].center_transform() # pixel centrres" + "da_raster_index.x.to_numpy()" ] }, {