From eed755f02ad4e75c1ecbfde19e3772d6c8706a98 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 8 Aug 2025 15:21:22 +0100 Subject: [PATCH 01/13] add a user warning when data is not lazy --- lib/ncdata/xarray.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/lib/ncdata/xarray.py b/lib/ncdata/xarray.py index cf92ce7..5178ff4 100644 --- a/lib/ncdata/xarray.py +++ b/lib/ncdata/xarray.py @@ -9,6 +9,7 @@ # Hopefully a minimal amount. # The structure of an NcData object makes it fairly painless. # +import warnings from pathlib import Path from typing import AnyStr, Union @@ -96,6 +97,14 @@ def store( # Install variables, creating dimensions as we go. for varname, var in new_variables.items(): + if isinstance(var.data, np.ndarray) and \ + var.attrs["axis"] not in ["X", "Y", "Z", "T"]: + warn_msg = ( + f"Variable {var} has fully realized " + "data, if you need lazy data, then add " + "chunks={} as argument to Xarray open_dataset." + ) + warnings.warn(warn_msg, UserWarning, stacklevel=2) if varname in self.ncdata.variables: raise ValueError(f'duplicate variable : "{varname}"') From f13b64214a97de9ea6dbaabb046bf3081145f565 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 8 Aug 2025 15:21:42 +0100 Subject: [PATCH 02/13] add a test module for zarrs --- tests/integration/test_zarr_to_iris.py | 37 ++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 tests/integration/test_zarr_to_iris.py diff --git a/tests/integration/test_zarr_to_iris.py b/tests/integration/test_zarr_to_iris.py new file mode 100644 index 0000000..0b18dca --- /dev/null +++ b/tests/integration/test_zarr_to_iris.py @@ -0,0 +1,37 @@ +"""Test conversion of remote and local Zarr store to iris Cube.""" +import iris +import xarray as xr +import ncdata +import ncdata.iris_xarray +import zarr + + +def test_load_remote_zarr(): + """Test loading a remote Zarr store. + + This is a ~250MB compressed Zarr in an S3 bucket. + Conversion is done fully lazily, by passing chunks={} + to Xarray loader. Test takes ~3-4s and needs ~400MB res mem. + """ + zarr_path = ( + "https://uor-aces-o.s3-ext.jc.rl.ac.uk/" + "esmvaltool-zarr/pr_Amon_CNRM-ESM2-1_02Kpd-11_r1i1p2f2_gr_200601-220112.zarr3" + ) + + time_coder = xr.coders.CFDatetimeCoder(use_cftime=True) + zarr_xr = xr.open_dataset( + zarr_path, + consolidated=True, + decode_times=time_coder, + engine="zarr", + chunks={}, + backend_kwargs={}, + ) + zarr_xr.unify_chunks() + + conversion_func = ncdata.iris_xarray.cubes_from_xarray + cubes = conversion_func(zarr_xr) + + assert isinstance(cubes, iris.cube.CubeList) + assert len(cubes) == 1 + assert cubes[0].has_lazy_data() From 3f7cf4a80d73496c4a6eac07bd50b30346528ae7 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 8 Aug 2025 15:22:59 +0100 Subject: [PATCH 03/13] add tiny Zarr sample data --- tests/zarr-sample-data/example_field_0.zarr17 | 1 + .../example_field_0.zarr2/.zattrs | 3 + .../example_field_0.zarr2/.zgroup | 3 + .../example_field_0.zarr2/.zmetadata | 171 ++++++++++++++++++ .../example_field_0.zarr2/lat/.zarray | 20 ++ .../example_field_0.zarr2/lat/.zattrs | 8 + .../example_field_0.zarr2/lat/0 | Bin 0 -> 56 bytes .../example_field_0.zarr2/lat_bnds/.zarray | 22 +++ .../example_field_0.zarr2/lat_bnds/.zattrs | 6 + .../example_field_0.zarr2/lat_bnds/0.0 | Bin 0 -> 64 bytes .../example_field_0.zarr2/lat_bnds/1.0 | Bin 0 -> 64 bytes .../example_field_0.zarr2/lon/.zarray | 20 ++ .../example_field_0.zarr2/lon/.zattrs | 8 + .../example_field_0.zarr2/lon/0 | Bin 0 -> 80 bytes .../example_field_0.zarr2/lon_bnds/.zarray | 22 +++ .../example_field_0.zarr2/lon_bnds/.zattrs | 6 + .../example_field_0.zarr2/lon_bnds/0.0 | Bin 0 -> 80 bytes .../example_field_0.zarr2/lon_bnds/1.0 | Bin 0 -> 80 bytes .../example_field_0.zarr2/q/.zarray | 22 +++ .../example_field_0.zarr2/q/.zattrs | 11 ++ .../example_field_0.zarr2/q/0.0 | Bin 0 -> 112 bytes .../example_field_0.zarr2/q/0.1 | Bin 0 -> 112 bytes .../example_field_0.zarr2/q/1.0 | Bin 0 -> 112 bytes .../example_field_0.zarr2/q/1.1 | Bin 0 -> 112 bytes .../example_field_0.zarr2/time/.zarray | 10 + .../example_field_0.zarr2/time/.zattrs | 5 + .../example_field_0.zarr2/time/0 | Bin 0 -> 8 bytes 27 files changed, 338 insertions(+) create mode 100644 tests/zarr-sample-data/example_field_0.zarr17 create mode 100644 tests/zarr-sample-data/example_field_0.zarr2/.zattrs create mode 100644 tests/zarr-sample-data/example_field_0.zarr2/.zgroup create mode 100644 tests/zarr-sample-data/example_field_0.zarr2/.zmetadata create mode 100644 tests/zarr-sample-data/example_field_0.zarr2/lat/.zarray create mode 100644 tests/zarr-sample-data/example_field_0.zarr2/lat/.zattrs create mode 100644 tests/zarr-sample-data/example_field_0.zarr2/lat/0 create mode 100644 tests/zarr-sample-data/example_field_0.zarr2/lat_bnds/.zarray create mode 100644 tests/zarr-sample-data/example_field_0.zarr2/lat_bnds/.zattrs create mode 100644 tests/zarr-sample-data/example_field_0.zarr2/lat_bnds/0.0 create mode 100644 tests/zarr-sample-data/example_field_0.zarr2/lat_bnds/1.0 create mode 100644 tests/zarr-sample-data/example_field_0.zarr2/lon/.zarray create mode 100644 tests/zarr-sample-data/example_field_0.zarr2/lon/.zattrs create mode 100644 tests/zarr-sample-data/example_field_0.zarr2/lon/0 create mode 100644 tests/zarr-sample-data/example_field_0.zarr2/lon_bnds/.zarray create mode 100644 tests/zarr-sample-data/example_field_0.zarr2/lon_bnds/.zattrs create mode 100644 tests/zarr-sample-data/example_field_0.zarr2/lon_bnds/0.0 create mode 100644 tests/zarr-sample-data/example_field_0.zarr2/lon_bnds/1.0 create mode 100644 tests/zarr-sample-data/example_field_0.zarr2/q/.zarray create mode 100644 tests/zarr-sample-data/example_field_0.zarr2/q/.zattrs create mode 100644 tests/zarr-sample-data/example_field_0.zarr2/q/0.0 create mode 100644 tests/zarr-sample-data/example_field_0.zarr2/q/0.1 create mode 100644 tests/zarr-sample-data/example_field_0.zarr2/q/1.0 create mode 100644 tests/zarr-sample-data/example_field_0.zarr2/q/1.1 create mode 100644 tests/zarr-sample-data/example_field_0.zarr2/time/.zarray create mode 100644 tests/zarr-sample-data/example_field_0.zarr2/time/.zattrs create mode 100644 tests/zarr-sample-data/example_field_0.zarr2/time/0 diff --git a/tests/zarr-sample-data/example_field_0.zarr17 b/tests/zarr-sample-data/example_field_0.zarr17 new file mode 100644 index 0000000..9abbe8a --- /dev/null +++ b/tests/zarr-sample-data/example_field_0.zarr17 @@ -0,0 +1 @@ +This is not a Zarr file. Go grab lunch! diff --git a/tests/zarr-sample-data/example_field_0.zarr2/.zattrs b/tests/zarr-sample-data/example_field_0.zarr2/.zattrs new file mode 100644 index 0000000..bb815de --- /dev/null +++ b/tests/zarr-sample-data/example_field_0.zarr2/.zattrs @@ -0,0 +1,3 @@ +{ + "Conventions": "CF-1.12" +} diff --git a/tests/zarr-sample-data/example_field_0.zarr2/.zgroup b/tests/zarr-sample-data/example_field_0.zarr2/.zgroup new file mode 100644 index 0000000..3f3fad2 --- /dev/null +++ b/tests/zarr-sample-data/example_field_0.zarr2/.zgroup @@ -0,0 +1,3 @@ +{ + "zarr_format": 2 +} diff --git a/tests/zarr-sample-data/example_field_0.zarr2/.zmetadata b/tests/zarr-sample-data/example_field_0.zarr2/.zmetadata new file mode 100644 index 0000000..ab417b3 --- /dev/null +++ b/tests/zarr-sample-data/example_field_0.zarr2/.zmetadata @@ -0,0 +1,171 @@ +{ + "metadata": { + ".zattrs": { + "Conventions": "CF-1.12" + }, + ".zgroup": { + "zarr_format": 2 + }, + "lat/.zarray": { + "chunks": [ + 5 + ], + "compressor": { + "blocksize": 0, + "clevel": 5, + "cname": "lz4", + "id": "blosc", + "shuffle": 1 + }, + "dtype": "u#KAaAN@}cyD1PC8SH>5cL02`bMZ2$lO literal 0 HcmV?d00001 diff --git a/tests/zarr-sample-data/example_field_0.zarr2/lon_bnds/1.0 b/tests/zarr-sample-data/example_field_0.zarr2/lon_bnds/1.0 new file mode 100644 index 0000000000000000000000000000000000000000..4461e510e92cd3bb68c020a0b129a8a1fd18cf71 GIT binary patch literal 80 rcmZQ#H0E$%U|;~@03Zf~hBOB-t&oFCKPZ6kVf2P#2p>i_lsNzZV$2HZ literal 0 HcmV?d00001 diff --git a/tests/zarr-sample-data/example_field_0.zarr2/q/.zarray b/tests/zarr-sample-data/example_field_0.zarr2/q/.zarray new file mode 100644 index 0000000..d03af81 --- /dev/null +++ b/tests/zarr-sample-data/example_field_0.zarr2/q/.zarray @@ -0,0 +1,22 @@ +{ + "chunks": [ + 3, + 4 + ], + "compressor": { + "blocksize": 0, + "clevel": 5, + "cname": "lz4", + "id": "blosc", + "shuffle": 1 + }, + "dtype": "+iS-3ANs1X$Nop~(PRE3&8|;Oe51d-)y3Ky#+Zub*xLx)q%l6He(qC)8%aeym IyS>dG0J;D#egFUf literal 0 HcmV?d00001 diff --git a/tests/zarr-sample-data/example_field_0.zarr2/q/1.0 b/tests/zarr-sample-data/example_field_0.zarr2/q/1.0 new file mode 100644 index 0000000000000000000000000000000000000000..b71f5dc59971cee259c58961f1f3f0e0ad83f4f1 GIT binary patch literal 112 zcmZQ#H0DTPU|;~@0w9)T3FFh+6m7zv{B~ hluWL$x4!S+J3Via{q3J%0jT^!d!P|O@S`3V4FI+jCtd&m literal 0 HcmV?d00001 diff --git a/tests/zarr-sample-data/example_field_0.zarr2/q/1.1 b/tests/zarr-sample-data/example_field_0.zarr2/q/1.1 new file mode 100644 index 0000000000000000000000000000000000000000..377eb07cb782bc18c866ac4ad785aaa5ad77e488 GIT binary patch literal 112 zcmZQ#H0DTPU|;~@0w9)T3FFh Date: Fri, 8 Aug 2025 15:43:12 +0100 Subject: [PATCH 04/13] add test --- tests/integration/test_zarr_to_iris.py | 36 ++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/integration/test_zarr_to_iris.py b/tests/integration/test_zarr_to_iris.py index 0b18dca..d4b11f8 100644 --- a/tests/integration/test_zarr_to_iris.py +++ b/tests/integration/test_zarr_to_iris.py @@ -1,4 +1,7 @@ """Test conversion of remote and local Zarr store to iris Cube.""" +from importlib.resources import files as importlib_files +from pathlib import Path + import iris import xarray as xr import ncdata @@ -6,6 +9,39 @@ import zarr +def test_load_zarr2_local(): + """Test loading a Zarr2 store from local FS.""" + zarr_path = ( + Path(importlib_files("tests")) + / "zarr-sample-data" + / "example_field_0.zarr2" + ) + + time_coder = xr.coders.CFDatetimeCoder(use_cftime=True) + zarr_xr = xr.open_dataset( + zarr_path, + consolidated=True, + decode_times=time_coder, + engine="zarr", + chunks={}, + backend_kwargs={}, + ) + zarr_xr.unify_chunks() + + conversion_func = ncdata.iris_xarray.cubes_from_xarray + cubes = conversion_func(zarr_xr) + + assert len(cubes) == 1 + cube = cubes[0] + assert cube.var_name == "q" + assert cube.standard_name == "specific_humidity" + assert cube.long_name is None + coords = cube.coords() + coord_names = [coord.standard_name for coord in coords] + assert "longitude" in coord_names + assert "latitude" in coord_names + + def test_load_remote_zarr(): """Test loading a remote Zarr store. From edd6c365cecdee3429127877467e3b7a175663c4 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 8 Aug 2025 15:43:26 +0100 Subject: [PATCH 05/13] make warning more robust --- lib/ncdata/xarray.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/lib/ncdata/xarray.py b/lib/ncdata/xarray.py index 5178ff4..9f05c23 100644 --- a/lib/ncdata/xarray.py +++ b/lib/ncdata/xarray.py @@ -22,6 +22,16 @@ from . import NcAttribute, NcData, NcDimension, NcVariable +def _raise_warning(var): + """Raise a warnings.warning if variable data not lazy.""" + warn_msg = ( + f"Variable {var} has fully realized " + "data, if you need lazy data, then add " + "chunks={} as argument to Xarray open_dataset." + ) + warnings.warn(warn_msg, UserWarning, stacklevel=2) + + class _XarrayNcDataStore(NetCDF4DataStore): """ An adapter class presenting ncdata as an xarray datastore. @@ -97,14 +107,15 @@ def store( # Install variables, creating dimensions as we go. for varname, var in new_variables.items(): - if isinstance(var.data, np.ndarray) and \ - var.attrs["axis"] not in ["X", "Y", "Z", "T"]: - warn_msg = ( - f"Variable {var} has fully realized " - "data, if you need lazy data, then add " - "chunks={} as argument to Xarray open_dataset." - ) - warnings.warn(warn_msg, UserWarning, stacklevel=2) + if "axis" not in var.attrs: + std_axes = ["latitude", "longitude", "time"] + if isinstance(var.data, np.ndarray) and \ + var.attrs["standard_name"] not in std_axes: + _raise_warning(var) + else: + if isinstance(var.data, np.ndarray) and \ + var.attrs["axis"] not in ["X", "Y", "Z", "T"]: + _raise_warning(var) if varname in self.ncdata.variables: raise ValueError(f'duplicate variable : "{varname}"') From 105031f4e2870a1866511a2f5a3a2e7607f7258f Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 8 Aug 2025 16:16:38 +0100 Subject: [PATCH 06/13] add Zarr3 test data --- .../example_field_0.zarr3/lat/c/0 | Bin 0 -> 41 bytes .../example_field_0.zarr3/lat/zarr.json | 47 +++ .../example_field_0.zarr3/lat_bnds/c/0/0 | Bin 0 -> 39 bytes .../example_field_0.zarr3/lat_bnds/zarr.json | 47 +++ .../example_field_0.zarr3/lon/c/0 | Bin 0 -> 50 bytes .../example_field_0.zarr3/lon/zarr.json | 47 +++ .../example_field_0.zarr3/lon_bnds/c/0/0 | Bin 0 -> 50 bytes .../example_field_0.zarr3/lon_bnds/zarr.json | 47 +++ .../example_field_0.zarr3/q/c/0/0 | Bin 0 -> 235 bytes .../example_field_0.zarr3/q/zarr.json | 52 ++++ .../example_field_0.zarr3/time/c | Bin 0 -> 17 bytes .../example_field_0.zarr3/time/zarr.json | 39 +++ .../example_field_0.zarr3/zarr.json | 292 ++++++++++++++++++ 13 files changed, 571 insertions(+) create mode 100644 tests/zarr-sample-data/example_field_0.zarr3/lat/c/0 create mode 100644 tests/zarr-sample-data/example_field_0.zarr3/lat/zarr.json create mode 100644 tests/zarr-sample-data/example_field_0.zarr3/lat_bnds/c/0/0 create mode 100644 tests/zarr-sample-data/example_field_0.zarr3/lat_bnds/zarr.json create mode 100644 tests/zarr-sample-data/example_field_0.zarr3/lon/c/0 create mode 100644 tests/zarr-sample-data/example_field_0.zarr3/lon/zarr.json create mode 100644 tests/zarr-sample-data/example_field_0.zarr3/lon_bnds/c/0/0 create mode 100644 tests/zarr-sample-data/example_field_0.zarr3/lon_bnds/zarr.json create mode 100644 tests/zarr-sample-data/example_field_0.zarr3/q/c/0/0 create mode 100644 tests/zarr-sample-data/example_field_0.zarr3/q/zarr.json create mode 100644 tests/zarr-sample-data/example_field_0.zarr3/time/c create mode 100644 tests/zarr-sample-data/example_field_0.zarr3/time/zarr.json create mode 100644 tests/zarr-sample-data/example_field_0.zarr3/zarr.json diff --git a/tests/zarr-sample-data/example_field_0.zarr3/lat/c/0 b/tests/zarr-sample-data/example_field_0.zarr3/lat/c/0 new file mode 100644 index 0000000000000000000000000000000000000000..7ef1fc24eaf0f39d9d6c29cc892774561768449f GIT binary patch literal 41 pcmdPcs{dC(gO!nC0|N*g2s*&f;MU;g0OdI_Gc+_zJ;-Rm1OVQa3XA{% literal 0 HcmV?d00001 diff --git a/tests/zarr-sample-data/example_field_0.zarr3/lat/zarr.json b/tests/zarr-sample-data/example_field_0.zarr3/lat/zarr.json new file mode 100644 index 0000000..42da08d --- /dev/null +++ b/tests/zarr-sample-data/example_field_0.zarr3/lat/zarr.json @@ -0,0 +1,47 @@ +{ + "shape": [ + 5 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 5 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "units": "degrees_north", + "standard_name": "latitude", + "bounds": "lat_bnds", + }, + "dimension_names": [ + "lat" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} diff --git a/tests/zarr-sample-data/example_field_0.zarr3/lat_bnds/c/0/0 b/tests/zarr-sample-data/example_field_0.zarr3/lat_bnds/c/0/0 new file mode 100644 index 0000000000000000000000000000000000000000..a2317cbf52c38ba0b3a4d13bd977420f72f866d2 GIT binary patch literal 39 scmdPcs{dCZ;41?|0|N*&gdJe;vvcrk2yN0ra>71 DamfvK literal 0 HcmV?d00001 diff --git a/tests/zarr-sample-data/example_field_0.zarr3/lon/zarr.json b/tests/zarr-sample-data/example_field_0.zarr3/lon/zarr.json new file mode 100644 index 0000000..42f9657 --- /dev/null +++ b/tests/zarr-sample-data/example_field_0.zarr3/lon/zarr.json @@ -0,0 +1,47 @@ +{ + "shape": [ + 8 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 8 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "units": "degrees_east", + "standard_name": "longitude", + "bounds": "lon_bnds", + }, + "dimension_names": [ + "lon" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} diff --git a/tests/zarr-sample-data/example_field_0.zarr3/lon_bnds/c/0/0 b/tests/zarr-sample-data/example_field_0.zarr3/lon_bnds/c/0/0 new file mode 100644 index 0000000000000000000000000000000000000000..3a05139fabd77d1a74d4826bad22504dba20c39a GIT binary patch literal 50 zcmdPcs{dD^!IzO?0RuyWn*&4GgM@}Og`5Wk8;Tps95@*qjzOWDLZz$jh9e9S0MM2a A%>V!Z literal 0 HcmV?d00001 diff --git a/tests/zarr-sample-data/example_field_0.zarr3/lon_bnds/zarr.json b/tests/zarr-sample-data/example_field_0.zarr3/lon_bnds/zarr.json new file mode 100644 index 0000000..ed74f33 --- /dev/null +++ b/tests/zarr-sample-data/example_field_0.zarr3/lon_bnds/zarr.json @@ -0,0 +1,47 @@ +{ + "shape": [ + 8, + 2 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 8, + 2 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + }, + "dimension_names": [ + "lon", + "bounds2" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} diff --git a/tests/zarr-sample-data/example_field_0.zarr3/q/c/0/0 b/tests/zarr-sample-data/example_field_0.zarr3/q/c/0/0 new file mode 100644 index 0000000000000000000000000000000000000000..924dc5043c1f5bd21df06523b786c973e1bb19a3 GIT binary patch literal 235 zcmVFAwm_Hc=Rt_z)oj+P+qCX1Pqi{V6r$1OZw8(b8uctrbrJKYcu(EhROgD)^ zVCAqsDO``j^(eeQ_*1~Y!aov&z{;UTwm*^Wd_Of_x`}NLh3m1cvVxI#+xek6w8(b8pNc;f04qztz|I@O1(FFIn!+H<+~iwj lHZoZpDwk-xSs49O<=ZNmqV^6U%R^;YDs53M9m*{hn`0yfZNLBk literal 0 HcmV?d00001 diff --git a/tests/zarr-sample-data/example_field_0.zarr3/q/zarr.json b/tests/zarr-sample-data/example_field_0.zarr3/q/zarr.json new file mode 100644 index 0000000..7b895a0 --- /dev/null +++ b/tests/zarr-sample-data/example_field_0.zarr3/q/zarr.json @@ -0,0 +1,52 @@ +{ + "shape": [ + 5, + 8 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 5, + 8 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "project": "research", + "standard_name": "specific_humidity", + "units": "1", + "cell_methods": "area: mean", + "coordinates": "time", + }, + "dimension_names": [ + "lat", + "lon" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} diff --git a/tests/zarr-sample-data/example_field_0.zarr3/time/c b/tests/zarr-sample-data/example_field_0.zarr3/time/c new file mode 100644 index 0000000000000000000000000000000000000000..16e658b14d90c05cf1c523593ca4c6f8efd0b32d GIT binary patch literal 17 ScmdPcs{dDk!;t|B>>U6kDFX5U literal 0 HcmV?d00001 diff --git a/tests/zarr-sample-data/example_field_0.zarr3/time/zarr.json b/tests/zarr-sample-data/example_field_0.zarr3/time/zarr.json new file mode 100644 index 0000000..32ebdf5 --- /dev/null +++ b/tests/zarr-sample-data/example_field_0.zarr3/time/zarr.json @@ -0,0 +1,39 @@ +{ + "shape": [], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "standard_name": "time", + "units": "days since 2018-12-01" + }, + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] +} diff --git a/tests/zarr-sample-data/example_field_0.zarr3/zarr.json b/tests/zarr-sample-data/example_field_0.zarr3/zarr.json new file mode 100644 index 0000000..54ac972 --- /dev/null +++ b/tests/zarr-sample-data/example_field_0.zarr3/zarr.json @@ -0,0 +1,292 @@ +{ + "attributes": { + "Conventions": "CF-1.12" + }, + "zarr_format": 3, + "consolidated_metadata": { + "kind": "inline", + "must_understand": false, + "metadata": { + "lon": { + "shape": [ + 8 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 8 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "units": "degrees_east", + "standard_name": "longitude", + "bounds": "lon_bnds" + }, + "dimension_names": [ + "lon" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "lat_bnds": { + "shape": [ + 5, + 2 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 5, + 2 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + }, + "dimension_names": [ + "lat", + "bounds2" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "lat": { + "shape": [ + 5 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 5 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "units": "degrees_north", + "standard_name": "latitude", + "bounds": "lat_bnds" + }, + "dimension_names": [ + "lat" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "time": { + "shape": [], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "standard_name": "time", + "units": "days since 2018-12-01" + }, + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "lon_bnds": { + "shape": [ + 8, + 2 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 8, + 2 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + }, + "dimension_names": [ + "lon", + "bounds2" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + }, + "q": { + "shape": [ + 5, + 8 + ], + "data_type": "float64", + "chunk_grid": { + "name": "regular", + "configuration": { + "chunk_shape": [ + 5, + 8 + ] + } + }, + "chunk_key_encoding": { + "name": "default", + "configuration": { + "separator": "/" + } + }, + "fill_value": 0.0, + "codecs": [ + { + "name": "bytes", + "configuration": { + "endian": "little" + } + }, + { + "name": "zstd", + "configuration": { + "level": 0, + "checksum": false + } + } + ], + "attributes": { + "project": "research", + "standard_name": "specific_humidity", + "units": "1", + "cell_methods": "area: mean", + "coordinates": "time" + }, + "dimension_names": [ + "lat", + "lon" + ], + "zarr_format": 3, + "node_type": "array", + "storage_transformers": [] + } + } + }, + "node_type": "group" +} From 16fb58811ea3aa5d931af9d2ea4b3d0edc462497 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 8 Aug 2025 16:17:40 +0100 Subject: [PATCH 07/13] rm erroneous file --- tests/zarr-sample-data/example_field_0.zarr17 | 1 - 1 file changed, 1 deletion(-) delete mode 100644 tests/zarr-sample-data/example_field_0.zarr17 diff --git a/tests/zarr-sample-data/example_field_0.zarr17 b/tests/zarr-sample-data/example_field_0.zarr17 deleted file mode 100644 index 9abbe8a..0000000 --- a/tests/zarr-sample-data/example_field_0.zarr17 +++ /dev/null @@ -1 +0,0 @@ -This is not a Zarr file. Go grab lunch! From f6c2766cd9aa9933fec47119951f56a57a861228 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 8 Aug 2025 16:17:54 +0100 Subject: [PATCH 08/13] add zarr3 test --- tests/integration/test_zarr_to_iris.py | 54 +++++++++++++++++--------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/tests/integration/test_zarr_to_iris.py b/tests/integration/test_zarr_to_iris.py index d4b11f8..e356965 100644 --- a/tests/integration/test_zarr_to_iris.py +++ b/tests/integration/test_zarr_to_iris.py @@ -9,6 +9,16 @@ import zarr +time_coder = xr.coders.CFDatetimeCoder(use_cftime=True) +xr_kwargs = { + "consolidated": True, + "decode_times": time_coder, + "engine": "zarr", + "chunks": {}, + "backend_kwargs": {}, +} + + def test_load_zarr2_local(): """Test loading a Zarr2 store from local FS.""" zarr_path = ( @@ -17,15 +27,7 @@ def test_load_zarr2_local(): / "example_field_0.zarr2" ) - time_coder = xr.coders.CFDatetimeCoder(use_cftime=True) - zarr_xr = xr.open_dataset( - zarr_path, - consolidated=True, - decode_times=time_coder, - engine="zarr", - chunks={}, - backend_kwargs={}, - ) + zarr_xr = xr.open_dataset(zarr_path, **xr_kwargs) zarr_xr.unify_chunks() conversion_func = ncdata.iris_xarray.cubes_from_xarray @@ -42,6 +44,30 @@ def test_load_zarr2_local(): assert "latitude" in coord_names +def test_load_zarr3_local(): + """Test loading a Zarr3 store from local FS.""" + zarr_path = ( + Path(importlib_files("tests")) + / "zarr-sample-data" + / "example_field_0.zarr3" + ) + + zarr_xr = xr.open_dataset(zarr_path, **xr_kwargs) + zarr_xr.unify_chunks() + + conversion_func = ncdata.iris_xarray.cubes_from_xarray + cubes = conversion_func(zarr_xr) + + assert len(cubes) == 1 + cube = cubes[0] + assert cube.var_name == "q" + assert cube.standard_name == "specific_humidity" + assert cube.long_name is None + coords = cube.coords() + coord_names = [coord.standard_name for coord in coords] + assert "longitude" in coord_names + assert "latitude" in coord_names + def test_load_remote_zarr(): """Test loading a remote Zarr store. @@ -54,15 +80,7 @@ def test_load_remote_zarr(): "esmvaltool-zarr/pr_Amon_CNRM-ESM2-1_02Kpd-11_r1i1p2f2_gr_200601-220112.zarr3" ) - time_coder = xr.coders.CFDatetimeCoder(use_cftime=True) - zarr_xr = xr.open_dataset( - zarr_path, - consolidated=True, - decode_times=time_coder, - engine="zarr", - chunks={}, - backend_kwargs={}, - ) + zarr_xr = xr.open_dataset(zarr_path, **xr_kwargs) zarr_xr.unify_chunks() conversion_func = ncdata.iris_xarray.cubes_from_xarray From 7846f776ff17c07d75f062c1e69259511233be4a Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 8 Aug 2025 16:18:06 +0100 Subject: [PATCH 09/13] make the warning better --- lib/ncdata/xarray.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/lib/ncdata/xarray.py b/lib/ncdata/xarray.py index 9f05c23..b5d3352 100644 --- a/lib/ncdata/xarray.py +++ b/lib/ncdata/xarray.py @@ -107,15 +107,16 @@ def store( # Install variables, creating dimensions as we go. for varname, var in new_variables.items(): - if "axis" not in var.attrs: - std_axes = ["latitude", "longitude", "time"] - if isinstance(var.data, np.ndarray) and \ - var.attrs["standard_name"] not in std_axes: - _raise_warning(var) - else: - if isinstance(var.data, np.ndarray) and \ - var.attrs["axis"] not in ["X", "Y", "Z", "T"]: - _raise_warning(var) + if isinstance(var.data, np.ndarray): + # Zarr2 metadata + if "axis" not in var.attrs: + std_axes = ["latitude", "longitude", "time"] + if var.attrs["standard_name"] not in std_axes: + _raise_warning(var) + # Zarr3 metadata + else: + if var.attrs["axis"] not in ["X", "Y", "Z", "T"]: + _raise_warning(var) if varname in self.ncdata.variables: raise ValueError(f'duplicate variable : "{varname}"') From 2d2c661dc0fe385b974d910ca4af24fd7e51c021 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 8 Aug 2025 16:55:51 +0100 Subject: [PATCH 10/13] full test suite --- tests/integration/test_zarr_to_iris.py | 71 ++++++++++++++++++-------- 1 file changed, 49 insertions(+), 22 deletions(-) diff --git a/tests/integration/test_zarr_to_iris.py b/tests/integration/test_zarr_to_iris.py index e356965..3580ce6 100644 --- a/tests/integration/test_zarr_to_iris.py +++ b/tests/integration/test_zarr_to_iris.py @@ -3,20 +3,35 @@ from pathlib import Path import iris +import pytest import xarray as xr import ncdata import ncdata.iris_xarray import zarr -time_coder = xr.coders.CFDatetimeCoder(use_cftime=True) -xr_kwargs = { - "consolidated": True, - "decode_times": time_coder, - "engine": "zarr", - "chunks": {}, - "backend_kwargs": {}, -} +def _return_kwargs(): + time_coder = xr.coders.CFDatetimeCoder(use_cftime=True) + xr_kwargs = { + "consolidated": True, + "decode_times": time_coder, + "engine": "zarr", + "chunks": {}, + "backend_kwargs": {}, + } + + return xr_kwargs + + +def _run_checks(cube): + """Run some standard checks.""" + assert cube.var_name == "q" + assert cube.standard_name == "specific_humidity" + assert cube.long_name is None + coords = cube.coords() + coord_names = [coord.standard_name for coord in coords] + assert "longitude" in coord_names + assert "latitude" in coord_names def test_load_zarr2_local(): @@ -27,6 +42,7 @@ def test_load_zarr2_local(): / "example_field_0.zarr2" ) + xr_kwargs = _return_kwargs() zarr_xr = xr.open_dataset(zarr_path, **xr_kwargs) zarr_xr.unify_chunks() @@ -35,13 +51,7 @@ def test_load_zarr2_local(): assert len(cubes) == 1 cube = cubes[0] - assert cube.var_name == "q" - assert cube.standard_name == "specific_humidity" - assert cube.long_name is None - coords = cube.coords() - coord_names = [coord.standard_name for coord in coords] - assert "longitude" in coord_names - assert "latitude" in coord_names + _run_checks(cube) def test_load_zarr3_local(): @@ -52,6 +62,7 @@ def test_load_zarr3_local(): / "example_field_0.zarr3" ) + xr_kwargs = _return_kwargs() zarr_xr = xr.open_dataset(zarr_path, **xr_kwargs) zarr_xr.unify_chunks() @@ -60,13 +71,8 @@ def test_load_zarr3_local(): assert len(cubes) == 1 cube = cubes[0] - assert cube.var_name == "q" - assert cube.standard_name == "specific_humidity" - assert cube.long_name is None - coords = cube.coords() - coord_names = [coord.standard_name for coord in coords] - assert "longitude" in coord_names - assert "latitude" in coord_names + _run_checks(cube) + def test_load_remote_zarr(): """Test loading a remote Zarr store. @@ -80,6 +86,7 @@ def test_load_remote_zarr(): "esmvaltool-zarr/pr_Amon_CNRM-ESM2-1_02Kpd-11_r1i1p2f2_gr_200601-220112.zarr3" ) + xr_kwargs = _return_kwargs() zarr_xr = xr.open_dataset(zarr_path, **xr_kwargs) zarr_xr.unify_chunks() @@ -89,3 +96,23 @@ def test_load_remote_zarr(): assert isinstance(cubes, iris.cube.CubeList) assert len(cubes) == 1 assert cubes[0].has_lazy_data() + + +def test_load_remote_zarr_realized_data(): + """Test with the same remote Zarr store but chunks=None.""" + zarr_path = ( + "https://uor-aces-o.s3-ext.jc.rl.ac.uk/" + "esmvaltool-zarr/pr_Amon_CNRM-ESM2-1_02Kpd-11_r1i1p2f2_gr_200601-220112.zarr3" + ) + + xr_kwargs = _return_kwargs() + xr_kwargs["chunks"] = None + zarr_xr = xr.open_dataset(zarr_path, **xr_kwargs) + + conversion_func = ncdata.iris_xarray.cubes_from_xarray + msg = ( + "has fully realized data, if you need lazy data, " + "then add chunks={} as argument to Xarray open_dataset." + ) + with pytest.warns(UserWarning, match=msg) as w: + cubes = conversion_func(zarr_xr) From 179610005e8f8e0239b7dad493f70b4da316acb0 Mon Sep 17 00:00:00 2001 From: Valeriu Predoi Date: Fri, 8 Aug 2025 16:57:11 +0100 Subject: [PATCH 11/13] more general search meth --- lib/ncdata/xarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/ncdata/xarray.py b/lib/ncdata/xarray.py index b5d3352..e746a44 100644 --- a/lib/ncdata/xarray.py +++ b/lib/ncdata/xarray.py @@ -111,7 +111,7 @@ def store( # Zarr2 metadata if "axis" not in var.attrs: std_axes = ["latitude", "longitude", "time"] - if var.attrs["standard_name"] not in std_axes: + if not list(set(var.attrs.values()) & set(std_axes)): _raise_warning(var) # Zarr3 metadata else: From 3bcf54f7922b62645e67ca83b59c2dafb7c2ced8 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Mon, 11 Aug 2025 08:32:29 +0100 Subject: [PATCH 12/13] Add zarr to test dependencies. --- .github/workflows/ci-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml index b0eef1a..7609e93 100644 --- a/.github/workflows/ci-tests.yml +++ b/.github/workflows/ci-tests.yml @@ -49,7 +49,7 @@ jobs: - name: "Install dependencies" run: | - conda install --yes numpy pytest pytest-mock iris xarray filelock requests + conda install --yes numpy pytest pytest-mock iris xarray filelock requests zarr - name: "Install *latest* Iris" run: | From 2ea08de2100a7c81efbe94d0324cd03ed2b0af80 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Mon, 11 Aug 2025 11:44:55 +0100 Subject: [PATCH 13/13] Add aiohttp to test deps. --- .github/workflows/ci-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml index 7609e93..8f54e30 100644 --- a/.github/workflows/ci-tests.yml +++ b/.github/workflows/ci-tests.yml @@ -49,7 +49,7 @@ jobs: - name: "Install dependencies" run: | - conda install --yes numpy pytest pytest-mock iris xarray filelock requests zarr + conda install --yes numpy pytest pytest-mock iris xarray filelock requests zarr aiohttp - name: "Install *latest* Iris" run: |