From 0ac8ed2def6f62f9bb01e2fd75d97c79287e2f4e Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 28 May 2025 13:49:16 +0100 Subject: [PATCH 1/6] Fixes to use main iris loadchain. --- lib/ncdata/dataset_like.py | 4 +++ lib/ncdata/iris.py | 16 ++++++----- tests/unit/iris/test_to_iris.py | 48 ++++++++++++++++++++++++++++++++- 3 files changed, 60 insertions(+), 8 deletions(-) diff --git a/lib/ncdata/dataset_like.py b/lib/ncdata/dataset_like.py index 55af241..df034c7 100644 --- a/lib/ncdata/dataset_like.py +++ b/lib/ncdata/dataset_like.py @@ -193,6 +193,10 @@ def filepath() -> str: # noqa: D102 # return self.ncdata.encoding.get("source", "") return "" + def __lt__(self, other): + # Support "trivial" dataset comparison, so Iris can load from a list of them + return False + class Nc4VariableLike(_Nc4DatalikeWithNcattrs): """ diff --git a/lib/ncdata/iris.py b/lib/ncdata/iris.py index cb94a67..0215855 100644 --- a/lib/ncdata/iris.py +++ b/lib/ncdata/iris.py @@ -13,10 +13,9 @@ # though we do specifically target the netcdf format interface. # -from typing import Any, AnyStr, Dict, Iterable, Union +from typing import Any, AnyStr, Dict, Iterable, Union, List import iris -import iris.fileformats.netcdf as ifn from iris.cube import Cube, CubeList from . import NcData @@ -25,7 +24,7 @@ __all__ = ["from_iris", "to_iris"] -def to_iris(ncdata: NcData, **iris_load_kwargs: Dict[AnyStr, Any]) -> CubeList: +def to_iris(ncdata: NcData | List[NcData], **iris_load_kwargs: Dict[AnyStr, Any]) -> CubeList: """ Read Iris cubes from an :class:`~ncdata.NcData`. @@ -33,8 +32,8 @@ def to_iris(ncdata: NcData, **iris_load_kwargs: Dict[AnyStr, Any]) -> CubeList: Parameters ---------- - ncdata : NcData - object to be loaded, treated as equivalent to a netCDF4 dataset. + ncdata : NcData of list(NcData) + object(s) to be loaded, treated as equivalent netCDF4 datasets. iris_load_kwargs : dict extra keywords, passed to :func:`iris.fileformats.netcdf.load_cubes` @@ -44,8 +43,11 @@ def to_iris(ncdata: NcData, **iris_load_kwargs: Dict[AnyStr, Any]) -> CubeList: cubes : iris.cube.CubeList loaded results """ - dslike = Nc4DatasetLike(ncdata) - cubes = CubeList(ifn.load_cubes(dslike, **iris_load_kwargs)) + if isinstance(ncdata, Iterable): + dslikes = [Nc4DatasetLike(data) for data in ncdata] + else: + dslikes = Nc4DatasetLike(ncdata) + cubes = CubeList(iris.load(dslikes, **iris_load_kwargs)) return cubes diff --git a/tests/unit/iris/test_to_iris.py b/tests/unit/iris/test_to_iris.py index bf90999..ec466dd 100644 --- a/tests/unit/iris/test_to_iris.py +++ b/tests/unit/iris/test_to_iris.py @@ -10,8 +10,10 @@ """ import dask.array as da import numpy as np -from iris import NameConstraint +import iris +from iris._constraints import NameConstraint from iris.cube import CubeList +import pytest from ncdata import NcData, NcDimension, NcVariable from ncdata.iris import to_iris @@ -116,3 +118,47 @@ def test_kwargs__load_by_name(): assert isinstance(cubes, CubeList) assert len(cubes) == 1 assert cubes[0].name() == "var2" + + +def test_iris_loadchain(): + """Check that standard iris load-chain processing is applied, including a merge.""" + + # Create a pair of datasets with mergeable variables. + # Start by making one, with a scalar 'z' coord. + ncdata = NcData( + dimensions=[NcDimension("x", 3)], + variables=[ + NcVariable( + name="v_data", + dimensions=["x"], + data=[1, 2, 3], + attributes={"long_name": "data", "coordinates": "v_z"} + ), + NcVariable( + name="v_z", + dimensions=[], + data=[1], + attributes={"long_name": "z"} + ), + ] + ) + + # Duplicate to get a second dataset, then change the z value. + # N.B. we need 2 datasets, as Iris won't match coords with different var-names. + ncdata2 = ncdata.copy() + + # N.B. must **copy data array** before modifying, as var.copy() doesn't do so. + data = ncdata2.variables["v_z"].data + data = data.copy() + data[0] = 2 + ncdata2.variables["v_z"].data = data + + # Loading should now merge these 2 into one cube. + cubes = to_iris([ncdata, ncdata2]) + + assert isinstance(cubes, CubeList) + assert len(cubes) == 1 + (cube,) = cubes + assert cube.long_name == "data" + assert cube.shape == (2, 3) + assert cube.coords("z", dim_coords=True) From 23d8342b4f4c84b0d243f3e48169f2a24da75aa8 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 28 May 2025 14:00:15 +0100 Subject: [PATCH 2/6] Small adjustments. --- lib/ncdata/dataset_like.py | 2 +- lib/ncdata/iris.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/ncdata/dataset_like.py b/lib/ncdata/dataset_like.py index df034c7..acc1396 100644 --- a/lib/ncdata/dataset_like.py +++ b/lib/ncdata/dataset_like.py @@ -194,7 +194,7 @@ def filepath() -> str: # noqa: D102 return "" def __lt__(self, other): - # Support "trivial" dataset comparison, so Iris can load from a list of them + # Support a trivial "comparison", just so that Iris can load a list of them. return False diff --git a/lib/ncdata/iris.py b/lib/ncdata/iris.py index 0215855..1227180 100644 --- a/lib/ncdata/iris.py +++ b/lib/ncdata/iris.py @@ -32,8 +32,8 @@ def to_iris(ncdata: NcData | List[NcData], **iris_load_kwargs: Dict[AnyStr, Any] Parameters ---------- - ncdata : NcData of list(NcData) - object(s) to be loaded, treated as equivalent netCDF4 datasets. + ncdata : NcData or list(NcData) + object(s) to be loaded into Iris, treated as equivalent to netCDF4 datasets. iris_load_kwargs : dict extra keywords, passed to :func:`iris.fileformats.netcdf.load_cubes` From 11653fcd0e738f86ed8a2e9340e429b8cf588f6e Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 28 May 2025 14:19:18 +0100 Subject: [PATCH 3/6] Fix usage of iris netcdf save. --- lib/ncdata/iris.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/ncdata/iris.py b/lib/ncdata/iris.py index 1227180..8f107e5 100644 --- a/lib/ncdata/iris.py +++ b/lib/ncdata/iris.py @@ -12,11 +12,12 @@ # This means that, hopefully, all we need to know of Iris itself is the load and save, # though we do specifically target the netcdf format interface. # - +import dask from typing import Any, AnyStr, Dict, Iterable, Union, List import iris from iris.cube import Cube, CubeList +import iris.fileformats.netcdf as ifn from . import NcData from .dataset_like import Nc4DatasetLike From a7e7221686c992bc32e3d3490cdcaedd7f0a9186 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Sun, 27 Jul 2025 18:17:49 +0100 Subject: [PATCH 4/6] Fix for new Iris MeshCoord behaviour. --- tests/integration/equivalence_testing_utils.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/integration/equivalence_testing_utils.py b/tests/integration/equivalence_testing_utils.py index 4ac1ed5..8693bc6 100644 --- a/tests/integration/equivalence_testing_utils.py +++ b/tests/integration/equivalence_testing_utils.py @@ -8,6 +8,8 @@ import numpy as np import pytest +import iris.mesh + def cubes_equal__corrected(c1, c2): """ @@ -84,10 +86,12 @@ def fix_arrays(a1, a2): ) # Fix matching of all coords points + bounds for co1, co2 in zip(c1.coords(), c2.coords()): + if isinstance(co1, iris.mesh.MeshCoord): + # Can't write MeshCoords + continue co1.points, co2.points = fix_arrays( *(co.core_points() for co in (co1, co2)) ) - for co1, co2 in zip(c1.coords(), c2.coords()): co1.bounds, co2.bounds = fix_arrays( *(co.core_bounds() for co in (co1, co2)) ) @@ -162,6 +166,9 @@ def nanmask_cube(cube): """Replace all NaNs with masked points, in cube data and coords.""" cube.data = nanmask_array(cube.core_data()) for coord in cube.coords(): + if isinstance(coord, iris.mesh.MeshCoord): + # Can't write MeshCoords + continue coord.points = nanmask_array(coord.core_points()) coord.bounds = nanmask_array(coord.core_bounds()) return cube From 8b51595a8a21edb45f4fe71580f59c6e769a7dae Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Sun, 27 Jul 2025 19:18:12 +0100 Subject: [PATCH 5/6] Fix xr variable data test for latest xarray. --- lib/ncdata/iris.py | 11 ++++++----- tests/unit/iris/test_to_iris.py | 8 +++----- tests/unit/xarray/test_to_xarray.py | 11 ----------- 3 files changed, 9 insertions(+), 21 deletions(-) diff --git a/lib/ncdata/iris.py b/lib/ncdata/iris.py index 8f107e5..5651a4b 100644 --- a/lib/ncdata/iris.py +++ b/lib/ncdata/iris.py @@ -4,6 +4,8 @@ Convert :class:`~ncdata.NcData`\s to and from Iris :class:`~iris.cube.Cube`\s. """ +from typing import Any, AnyStr, Dict, Iterable, List, Union + # # NOTE: This uses the :mod:`ncdata.dataset_like` interface ability to mimic a # :class:`netCDF4.Dataset` object, which can then be loaded like a file into Iris. @@ -12,12 +14,9 @@ # This means that, hopefully, all we need to know of Iris itself is the load and save, # though we do specifically target the netcdf format interface. # -import dask -from typing import Any, AnyStr, Dict, Iterable, Union, List - import iris -from iris.cube import Cube, CubeList import iris.fileformats.netcdf as ifn +from iris.cube import Cube, CubeList from . import NcData from .dataset_like import Nc4DatasetLike @@ -25,7 +24,9 @@ __all__ = ["from_iris", "to_iris"] -def to_iris(ncdata: NcData | List[NcData], **iris_load_kwargs: Dict[AnyStr, Any]) -> CubeList: +def to_iris( + ncdata: NcData | List[NcData], **iris_load_kwargs: Dict[AnyStr, Any] +) -> CubeList: """ Read Iris cubes from an :class:`~ncdata.NcData`. diff --git a/tests/unit/iris/test_to_iris.py b/tests/unit/iris/test_to_iris.py index ec466dd..8170e80 100644 --- a/tests/unit/iris/test_to_iris.py +++ b/tests/unit/iris/test_to_iris.py @@ -10,10 +10,8 @@ """ import dask.array as da import numpy as np -import iris from iris._constraints import NameConstraint from iris.cube import CubeList -import pytest from ncdata import NcData, NcDimension, NcVariable from ncdata.iris import to_iris @@ -132,15 +130,15 @@ def test_iris_loadchain(): name="v_data", dimensions=["x"], data=[1, 2, 3], - attributes={"long_name": "data", "coordinates": "v_z"} + attributes={"long_name": "data", "coordinates": "v_z"}, ), NcVariable( name="v_z", dimensions=[], data=[1], - attributes={"long_name": "z"} + attributes={"long_name": "z"}, ), - ] + ], ) # Duplicate to get a second dataset, then change the z value. diff --git a/tests/unit/xarray/test_to_xarray.py b/tests/unit/xarray/test_to_xarray.py index b1ea919..f2e777f 100644 --- a/tests/unit/xarray/test_to_xarray.py +++ b/tests/unit/xarray/test_to_xarray.py @@ -59,18 +59,7 @@ def test_real_nocopy(): xrds = to_xarray(ncdata) # Check that the data content is the *SAME ARRAY* - - # This exists within the Xarray variable, but wrapped in a slightly cunning way... - # We will embed some rather specific Xarray knowledge here, and hope that it does - # not break too often. xr_data = xrds.variables["var_x"]._data - from xarray.core.indexing import LazilyIndexedArray, NumpyIndexingAdapter - - assert isinstance(xr_data, LazilyIndexedArray) - xr_data = xr_data.array - assert isinstance(xr_data, NumpyIndexingAdapter) - xr_data = xr_data.array - assert xr_data is real_numpy_data From 1e9870fa86ba5eeb409d5ab9b7795009bb4371a8 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Mon, 28 Jul 2025 16:01:17 +0100 Subject: [PATCH 6/6] Workaround for datetimes problem visible in later xarray versions. --- lib/ncdata/xarray.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/lib/ncdata/xarray.py b/lib/ncdata/xarray.py index ecdd9d5..cf92ce7 100644 --- a/lib/ncdata/xarray.py +++ b/lib/ncdata/xarray.py @@ -13,6 +13,8 @@ from pathlib import Path from typing import AnyStr, Union +import dask.array as da +import numpy as np import xarray as xr from xarray.backends import NetCDF4DataStore @@ -86,14 +88,14 @@ def store( unlimited_dims = unlimited_dims or [] # Encode the xarray data as-if-for netcdf4 output, so we convert internal forms # (such as strings and timedates) to file-relevant forms. - variables, attributes = self.encode(variables, attributes) + new_variables, attributes = self.encode(variables, attributes) # Install (global) attributes into self. for attrname, v in attributes.items(): self.ncdata.attributes[attrname] = NcAttribute(attrname, v) # Install variables, creating dimensions as we go. - for varname, var in variables.items(): + for varname, var in new_variables.items(): if varname in self.ncdata.variables: raise ValueError(f'duplicate variable : "{varname}"') @@ -117,6 +119,20 @@ def store( } data = var.data + + if hasattr(var.dtype, "kind") and var.dtype.kind == "f": + # Time variables may in the original be datetime objects or numpy + # datetimes, which in decoding get converted to floats. When computed, + # however, in both cases the wrapped function may in fact return ints. + # This is, effectively, an xarray bug, but a very subtle one since it + # doesn't affect what get written to an actual file. + # Get the original, unencoded version of the variable. + oldvar = variables[varname] + if oldvar.data.dtype != var.dtype: + # If the result type is float, but changes in decoding, then cast + # result to the 'expected' float type, to avoid problems. + data = da.map_blocks(np.astype, data, var.dtype) + nc_var = NcVariable( name=varname, dimensions=var.dims,