Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:

- name: "Install dependencies"
run: |
conda install --yes numpy pytest pytest-mock iris xarray filelock requests
conda install --yes numpy pytest pytest-mock iris xarray filelock requests zarr aiohttp

- name: "Install *latest* Iris"
run: |
Expand Down
21 changes: 21 additions & 0 deletions lib/ncdata/xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
# Hopefully a minimal amount.
# The structure of an NcData object makes it fairly painless.
#
import warnings

from pathlib import Path
from typing import AnyStr, Union
Expand All @@ -21,6 +22,16 @@
from . import NcAttribute, NcData, NcDimension, NcVariable


def _raise_warning(var):
"""Raise a warnings.warning if variable data not lazy."""
warn_msg = (
f"Variable {var} has fully realized "
"data, if you need lazy data, then add "
"chunks={} as argument to Xarray open_dataset."
)
warnings.warn(warn_msg, UserWarning, stacklevel=2)


class _XarrayNcDataStore(NetCDF4DataStore):
"""
An adapter class presenting ncdata as an xarray datastore.
Expand Down Expand Up @@ -96,6 +107,16 @@ def store(

# Install variables, creating dimensions as we go.
for varname, var in new_variables.items():
if isinstance(var.data, np.ndarray):
# Zarr2 metadata
if "axis" not in var.attrs:
std_axes = ["latitude", "longitude", "time"]
if not list(set(var.attrs.values()) & set(std_axes)):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if not list(set(var.attrs.values()) & set(std_axes)):
if not [s for s in std_axes if s in str(var.attrs.values())]:

Copy link
Contributor

@valeriupredoi valeriupredoi Aug 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this will fix that failing test, and all the almost 1000 tests pass now 😃 The problem is that this method (and the previous approach) are not 100% exhaustive in figuring out which are coords that should have realized data, so a few of these warnings will be raised for those that the check misses them, but beats me how I can create a bulletproof check, given the variety of names and metadata, and differences between Zarr2 and Zarr3 (alas, most of them don't raise the warning)

_raise_warning(var)
# Zarr3 metadata
else:
if var.attrs["axis"] not in ["X", "Y", "Z", "T"]:
_raise_warning(var)
if varname in self.ncdata.variables:
raise ValueError(f'duplicate variable : "{varname}"')

Expand Down
118 changes: 118 additions & 0 deletions tests/integration/test_zarr_to_iris.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
"""Test conversion of remote and local Zarr store to iris Cube."""
from importlib.resources import files as importlib_files
from pathlib import Path

import iris
import pytest
import xarray as xr
import ncdata
import ncdata.iris_xarray
import zarr


def _return_kwargs():
time_coder = xr.coders.CFDatetimeCoder(use_cftime=True)
xr_kwargs = {
"consolidated": True,
"decode_times": time_coder,
"engine": "zarr",
"chunks": {},
"backend_kwargs": {},
}

return xr_kwargs


def _run_checks(cube):
"""Run some standard checks."""
assert cube.var_name == "q"
assert cube.standard_name == "specific_humidity"
assert cube.long_name is None
coords = cube.coords()
coord_names = [coord.standard_name for coord in coords]
assert "longitude" in coord_names
assert "latitude" in coord_names


def test_load_zarr2_local():
"""Test loading a Zarr2 store from local FS."""
zarr_path = (
Path(importlib_files("tests"))
/ "zarr-sample-data"
/ "example_field_0.zarr2"
)

xr_kwargs = _return_kwargs()
zarr_xr = xr.open_dataset(zarr_path, **xr_kwargs)
zarr_xr.unify_chunks()

conversion_func = ncdata.iris_xarray.cubes_from_xarray
cubes = conversion_func(zarr_xr)

assert len(cubes) == 1
cube = cubes[0]
_run_checks(cube)


def test_load_zarr3_local():
"""Test loading a Zarr3 store from local FS."""
zarr_path = (
Path(importlib_files("tests"))
/ "zarr-sample-data"
/ "example_field_0.zarr3"
)

xr_kwargs = _return_kwargs()
zarr_xr = xr.open_dataset(zarr_path, **xr_kwargs)
zarr_xr.unify_chunks()

conversion_func = ncdata.iris_xarray.cubes_from_xarray
cubes = conversion_func(zarr_xr)

assert len(cubes) == 1
cube = cubes[0]
_run_checks(cube)


def test_load_remote_zarr():
"""Test loading a remote Zarr store.

This is a ~250MB compressed Zarr in an S3 bucket.
Conversion is done fully lazily, by passing chunks={}
to Xarray loader. Test takes ~3-4s and needs ~400MB res mem.
"""
zarr_path = (
"https://uor-aces-o.s3-ext.jc.rl.ac.uk/"
"esmvaltool-zarr/pr_Amon_CNRM-ESM2-1_02Kpd-11_r1i1p2f2_gr_200601-220112.zarr3"
)

xr_kwargs = _return_kwargs()
zarr_xr = xr.open_dataset(zarr_path, **xr_kwargs)
zarr_xr.unify_chunks()

conversion_func = ncdata.iris_xarray.cubes_from_xarray
cubes = conversion_func(zarr_xr)

assert isinstance(cubes, iris.cube.CubeList)
assert len(cubes) == 1
assert cubes[0].has_lazy_data()


def test_load_remote_zarr_realized_data():
"""Test with the same remote Zarr store but chunks=None."""
zarr_path = (
"https://uor-aces-o.s3-ext.jc.rl.ac.uk/"
"esmvaltool-zarr/pr_Amon_CNRM-ESM2-1_02Kpd-11_r1i1p2f2_gr_200601-220112.zarr3"
)

xr_kwargs = _return_kwargs()
xr_kwargs["chunks"] = None
zarr_xr = xr.open_dataset(zarr_path, **xr_kwargs)

conversion_func = ncdata.iris_xarray.cubes_from_xarray
msg = (
"has fully realized data, if you need lazy data, "
"then add chunks={} as argument to Xarray open_dataset."
)
with pytest.warns(UserWarning, match=msg) as w:
cubes = conversion_func(zarr_xr)
3 changes: 3 additions & 0 deletions tests/zarr-sample-data/example_field_0.zarr2/.zattrs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"Conventions": "CF-1.12"
}
3 changes: 3 additions & 0 deletions tests/zarr-sample-data/example_field_0.zarr2/.zgroup
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"zarr_format": 2
}
171 changes: 171 additions & 0 deletions tests/zarr-sample-data/example_field_0.zarr2/.zmetadata
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
{
"metadata": {
".zattrs": {
"Conventions": "CF-1.12"
},
".zgroup": {
"zarr_format": 2
},
"lat/.zarray": {
"chunks": [
5
],
"compressor": {
"blocksize": 0,
"clevel": 5,
"cname": "lz4",
"id": "blosc",
"shuffle": 1
},
"dtype": "<f8",
"fill_value": "NaN",
"filters": null,
"order": "C",
"shape": [
5
],
"zarr_format": 2
},
"lat/.zattrs": {
"_ARRAY_DIMENSIONS": [
"lat"
],
"bounds": "lat_bnds",
"standard_name": "latitude",
"units": "degrees_north"
},
"lat_bnds/.zarray": {
"chunks": [
3,
2
],
"compressor": {
"blocksize": 0,
"clevel": 5,
"cname": "lz4",
"id": "blosc",
"shuffle": 1
},
"dtype": "<f8",
"fill_value": "NaN",
"filters": null,
"order": "C",
"shape": [
5,
2
],
"zarr_format": 2
},
"lat_bnds/.zattrs": {
"_ARRAY_DIMENSIONS": [
"lat",
"bounds2"
]
},
"lon/.zarray": {
"chunks": [
8
],
"compressor": {
"blocksize": 0,
"clevel": 5,
"cname": "lz4",
"id": "blosc",
"shuffle": 1
},
"dtype": "<f8",
"fill_value": "NaN",
"filters": null,
"order": "C",
"shape": [
8
],
"zarr_format": 2
},
"lon/.zattrs": {
"_ARRAY_DIMENSIONS": [
"lon"
],
"bounds": "lon_bnds",
"standard_name": "longitude",
"units": "degrees_east"
},
"lon_bnds/.zarray": {
"chunks": [
4,
2
],
"compressor": {
"blocksize": 0,
"clevel": 5,
"cname": "lz4",
"id": "blosc",
"shuffle": 1
},
"dtype": "<f8",
"fill_value": "NaN",
"filters": null,
"order": "C",
"shape": [
8,
2
],
"zarr_format": 2
},
"lon_bnds/.zattrs": {
"_ARRAY_DIMENSIONS": [
"lon",
"bounds2"
]
},
"q/.zarray": {
"chunks": [
3,
4
],
"compressor": {
"blocksize": 0,
"clevel": 5,
"cname": "lz4",
"id": "blosc",
"shuffle": 1
},
"dtype": "<f8",
"fill_value": "NaN",
"filters": null,
"order": "C",
"shape": [
5,
8
],
"zarr_format": 2
},
"q/.zattrs": {
"_ARRAY_DIMENSIONS": [
"lat",
"lon"
],
"cell_methods": "area: mean",
"coordinates": "time",
"project": "research",
"standard_name": "specific_humidity",
"units": "1"
},
"time/.zarray": {
"chunks": [],
"compressor": null,
"dtype": "<f8",
"fill_value": "NaN",
"filters": null,
"order": "C",
"shape": [],
"zarr_format": 2
},
"time/.zattrs": {
"_ARRAY_DIMENSIONS": [],
"standard_name": "time",
"units": "days since 2018-12-01"
}
},
"zarr_consolidated_format": 1
}
20 changes: 20 additions & 0 deletions tests/zarr-sample-data/example_field_0.zarr2/lat/.zarray
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"chunks": [
5
],
"compressor": {
"blocksize": 0,
"clevel": 5,
"cname": "lz4",
"id": "blosc",
"shuffle": 1
},
"dtype": "<f8",
"fill_value": "NaN",
"filters": null,
"order": "C",
"shape": [
5
],
"zarr_format": 2
}
8 changes: 8 additions & 0 deletions tests/zarr-sample-data/example_field_0.zarr2/lat/.zattrs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"_ARRAY_DIMENSIONS": [
"lat"
],
"bounds": "lat_bnds",
"standard_name": "latitude",
"units": "degrees_north"
}
Binary file added tests/zarr-sample-data/example_field_0.zarr2/lat/0
Binary file not shown.
Loading
Loading