diff --git a/lib/ncdata/dataset_like.py b/lib/ncdata/dataset_like.py index 55af241..acc1396 100644 --- a/lib/ncdata/dataset_like.py +++ b/lib/ncdata/dataset_like.py @@ -193,6 +193,10 @@ def filepath() -> str: # noqa: D102 # return self.ncdata.encoding.get("source", "") return "" + def __lt__(self, other): + # Support a trivial "comparison", just so that Iris can load a list of them. + return False + class Nc4VariableLike(_Nc4DatalikeWithNcattrs): """ diff --git a/lib/ncdata/iris.py b/lib/ncdata/iris.py index cb94a67..5651a4b 100644 --- a/lib/ncdata/iris.py +++ b/lib/ncdata/iris.py @@ -4,6 +4,8 @@ Convert :class:`~ncdata.NcData`\s to and from Iris :class:`~iris.cube.Cube`\s. """ +from typing import Any, AnyStr, Dict, Iterable, List, Union + # # NOTE: This uses the :mod:`ncdata.dataset_like` interface ability to mimic a # :class:`netCDF4.Dataset` object, which can then be loaded like a file into Iris. @@ -12,9 +14,6 @@ # This means that, hopefully, all we need to know of Iris itself is the load and save, # though we do specifically target the netcdf format interface. # - -from typing import Any, AnyStr, Dict, Iterable, Union - import iris import iris.fileformats.netcdf as ifn from iris.cube import Cube, CubeList @@ -25,7 +24,9 @@ __all__ = ["from_iris", "to_iris"] -def to_iris(ncdata: NcData, **iris_load_kwargs: Dict[AnyStr, Any]) -> CubeList: +def to_iris( + ncdata: NcData | List[NcData], **iris_load_kwargs: Dict[AnyStr, Any] +) -> CubeList: """ Read Iris cubes from an :class:`~ncdata.NcData`. @@ -33,8 +34,8 @@ def to_iris(ncdata: NcData, **iris_load_kwargs: Dict[AnyStr, Any]) -> CubeList: Parameters ---------- - ncdata : NcData - object to be loaded, treated as equivalent to a netCDF4 dataset. + ncdata : NcData or list(NcData) + object(s) to be loaded into Iris, treated as equivalent to netCDF4 datasets. iris_load_kwargs : dict extra keywords, passed to :func:`iris.fileformats.netcdf.load_cubes` @@ -44,8 +45,11 @@ def to_iris(ncdata: NcData, **iris_load_kwargs: Dict[AnyStr, Any]) -> CubeList: cubes : iris.cube.CubeList loaded results """ - dslike = Nc4DatasetLike(ncdata) - cubes = CubeList(ifn.load_cubes(dslike, **iris_load_kwargs)) + if isinstance(ncdata, Iterable): + dslikes = [Nc4DatasetLike(data) for data in ncdata] + else: + dslikes = Nc4DatasetLike(ncdata) + cubes = CubeList(iris.load(dslikes, **iris_load_kwargs)) return cubes diff --git a/lib/ncdata/xarray.py b/lib/ncdata/xarray.py index ecdd9d5..cf92ce7 100644 --- a/lib/ncdata/xarray.py +++ b/lib/ncdata/xarray.py @@ -13,6 +13,8 @@ from pathlib import Path from typing import AnyStr, Union +import dask.array as da +import numpy as np import xarray as xr from xarray.backends import NetCDF4DataStore @@ -86,14 +88,14 @@ def store( unlimited_dims = unlimited_dims or [] # Encode the xarray data as-if-for netcdf4 output, so we convert internal forms # (such as strings and timedates) to file-relevant forms. - variables, attributes = self.encode(variables, attributes) + new_variables, attributes = self.encode(variables, attributes) # Install (global) attributes into self. for attrname, v in attributes.items(): self.ncdata.attributes[attrname] = NcAttribute(attrname, v) # Install variables, creating dimensions as we go. - for varname, var in variables.items(): + for varname, var in new_variables.items(): if varname in self.ncdata.variables: raise ValueError(f'duplicate variable : "{varname}"') @@ -117,6 +119,20 @@ def store( } data = var.data + + if hasattr(var.dtype, "kind") and var.dtype.kind == "f": + # Time variables may in the original be datetime objects or numpy + # datetimes, which in decoding get converted to floats. When computed, + # however, in both cases the wrapped function may in fact return ints. + # This is, effectively, an xarray bug, but a very subtle one since it + # doesn't affect what get written to an actual file. + # Get the original, unencoded version of the variable. + oldvar = variables[varname] + if oldvar.data.dtype != var.dtype: + # If the result type is float, but changes in decoding, then cast + # result to the 'expected' float type, to avoid problems. + data = da.map_blocks(np.astype, data, var.dtype) + nc_var = NcVariable( name=varname, dimensions=var.dims, diff --git a/tests/integration/equivalence_testing_utils.py b/tests/integration/equivalence_testing_utils.py index 4ac1ed5..8693bc6 100644 --- a/tests/integration/equivalence_testing_utils.py +++ b/tests/integration/equivalence_testing_utils.py @@ -8,6 +8,8 @@ import numpy as np import pytest +import iris.mesh + def cubes_equal__corrected(c1, c2): """ @@ -84,10 +86,12 @@ def fix_arrays(a1, a2): ) # Fix matching of all coords points + bounds for co1, co2 in zip(c1.coords(), c2.coords()): + if isinstance(co1, iris.mesh.MeshCoord): + # Can't write MeshCoords + continue co1.points, co2.points = fix_arrays( *(co.core_points() for co in (co1, co2)) ) - for co1, co2 in zip(c1.coords(), c2.coords()): co1.bounds, co2.bounds = fix_arrays( *(co.core_bounds() for co in (co1, co2)) ) @@ -162,6 +166,9 @@ def nanmask_cube(cube): """Replace all NaNs with masked points, in cube data and coords.""" cube.data = nanmask_array(cube.core_data()) for coord in cube.coords(): + if isinstance(coord, iris.mesh.MeshCoord): + # Can't write MeshCoords + continue coord.points = nanmask_array(coord.core_points()) coord.bounds = nanmask_array(coord.core_bounds()) return cube diff --git a/tests/unit/iris/test_to_iris.py b/tests/unit/iris/test_to_iris.py index bf90999..8170e80 100644 --- a/tests/unit/iris/test_to_iris.py +++ b/tests/unit/iris/test_to_iris.py @@ -10,7 +10,7 @@ """ import dask.array as da import numpy as np -from iris import NameConstraint +from iris._constraints import NameConstraint from iris.cube import CubeList from ncdata import NcData, NcDimension, NcVariable @@ -116,3 +116,47 @@ def test_kwargs__load_by_name(): assert isinstance(cubes, CubeList) assert len(cubes) == 1 assert cubes[0].name() == "var2" + + +def test_iris_loadchain(): + """Check that standard iris load-chain processing is applied, including a merge.""" + + # Create a pair of datasets with mergeable variables. + # Start by making one, with a scalar 'z' coord. + ncdata = NcData( + dimensions=[NcDimension("x", 3)], + variables=[ + NcVariable( + name="v_data", + dimensions=["x"], + data=[1, 2, 3], + attributes={"long_name": "data", "coordinates": "v_z"}, + ), + NcVariable( + name="v_z", + dimensions=[], + data=[1], + attributes={"long_name": "z"}, + ), + ], + ) + + # Duplicate to get a second dataset, then change the z value. + # N.B. we need 2 datasets, as Iris won't match coords with different var-names. + ncdata2 = ncdata.copy() + + # N.B. must **copy data array** before modifying, as var.copy() doesn't do so. + data = ncdata2.variables["v_z"].data + data = data.copy() + data[0] = 2 + ncdata2.variables["v_z"].data = data + + # Loading should now merge these 2 into one cube. + cubes = to_iris([ncdata, ncdata2]) + + assert isinstance(cubes, CubeList) + assert len(cubes) == 1 + (cube,) = cubes + assert cube.long_name == "data" + assert cube.shape == (2, 3) + assert cube.coords("z", dim_coords=True) diff --git a/tests/unit/xarray/test_to_xarray.py b/tests/unit/xarray/test_to_xarray.py index b1ea919..f2e777f 100644 --- a/tests/unit/xarray/test_to_xarray.py +++ b/tests/unit/xarray/test_to_xarray.py @@ -59,18 +59,7 @@ def test_real_nocopy(): xrds = to_xarray(ncdata) # Check that the data content is the *SAME ARRAY* - - # This exists within the Xarray variable, but wrapped in a slightly cunning way... - # We will embed some rather specific Xarray knowledge here, and hope that it does - # not break too often. xr_data = xrds.variables["var_x"]._data - from xarray.core.indexing import LazilyIndexedArray, NumpyIndexingAdapter - - assert isinstance(xr_data, LazilyIndexedArray) - xr_data = xr_data.array - assert isinstance(xr_data, NumpyIndexingAdapter) - xr_data = xr_data.array - assert xr_data is real_numpy_data