Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions lib/ncdata/dataset_like.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,10 @@ def filepath() -> str: # noqa: D102
# return self.ncdata.encoding.get("source", "")
return "<Nc4DatasetLike>"

def __lt__(self, other):
# Support a trivial "comparison", just so that Iris can load a list of them.
return False


class Nc4VariableLike(_Nc4DatalikeWithNcattrs):
"""
Expand Down
20 changes: 12 additions & 8 deletions lib/ncdata/iris.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
Convert :class:`~ncdata.NcData`\s to and from Iris :class:`~iris.cube.Cube`\s.

"""
from typing import Any, AnyStr, Dict, Iterable, List, Union

#
# NOTE: This uses the :mod:`ncdata.dataset_like` interface ability to mimic a
# :class:`netCDF4.Dataset` object, which can then be loaded like a file into Iris.
Expand All @@ -12,9 +14,6 @@
# This means that, hopefully, all we need to know of Iris itself is the load and save,
# though we do specifically target the netcdf format interface.
#

from typing import Any, AnyStr, Dict, Iterable, Union

import iris
import iris.fileformats.netcdf as ifn
from iris.cube import Cube, CubeList
Expand All @@ -25,16 +24,18 @@
__all__ = ["from_iris", "to_iris"]


def to_iris(ncdata: NcData, **iris_load_kwargs: Dict[AnyStr, Any]) -> CubeList:
def to_iris(
ncdata: NcData | List[NcData], **iris_load_kwargs: Dict[AnyStr, Any]
) -> CubeList:
"""
Read Iris cubes from an :class:`~ncdata.NcData`.

Behaves like an Iris 'load' operation.

Parameters
----------
ncdata : NcData
object to be loaded, treated as equivalent to a netCDF4 dataset.
ncdata : NcData or list(NcData)
object(s) to be loaded into Iris, treated as equivalent to netCDF4 datasets.

iris_load_kwargs : dict
extra keywords, passed to :func:`iris.fileformats.netcdf.load_cubes`
Expand All @@ -44,8 +45,11 @@ def to_iris(ncdata: NcData, **iris_load_kwargs: Dict[AnyStr, Any]) -> CubeList:
cubes : iris.cube.CubeList
loaded results
"""
dslike = Nc4DatasetLike(ncdata)
cubes = CubeList(ifn.load_cubes(dslike, **iris_load_kwargs))
if isinstance(ncdata, Iterable):
dslikes = [Nc4DatasetLike(data) for data in ncdata]
else:
dslikes = Nc4DatasetLike(ncdata)
cubes = CubeList(iris.load(dslikes, **iris_load_kwargs))
return cubes


Expand Down
20 changes: 18 additions & 2 deletions lib/ncdata/xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
from pathlib import Path
from typing import AnyStr, Union

import dask.array as da
import numpy as np
import xarray as xr
from xarray.backends import NetCDF4DataStore

Expand Down Expand Up @@ -86,14 +88,14 @@ def store(
unlimited_dims = unlimited_dims or []
# Encode the xarray data as-if-for netcdf4 output, so we convert internal forms
# (such as strings and timedates) to file-relevant forms.
variables, attributes = self.encode(variables, attributes)
new_variables, attributes = self.encode(variables, attributes)

# Install (global) attributes into self.
for attrname, v in attributes.items():
self.ncdata.attributes[attrname] = NcAttribute(attrname, v)

# Install variables, creating dimensions as we go.
for varname, var in variables.items():
for varname, var in new_variables.items():
if varname in self.ncdata.variables:
raise ValueError(f'duplicate variable : "{varname}"')

Expand All @@ -117,6 +119,20 @@ def store(
}

data = var.data

if hasattr(var.dtype, "kind") and var.dtype.kind == "f":
# Time variables may in the original be datetime objects or numpy
# datetimes, which in decoding get converted to floats. When computed,
# however, in both cases the wrapped function may in fact return ints.
# This is, effectively, an xarray bug, but a very subtle one since it
# doesn't affect what get written to an actual file.
# Get the original, unencoded version of the variable.
oldvar = variables[varname]
if oldvar.data.dtype != var.dtype:
# If the result type is float, but changes in decoding, then cast
# result to the 'expected' float type, to avoid problems.
data = da.map_blocks(np.astype, data, var.dtype)

nc_var = NcVariable(
name=varname,
dimensions=var.dims,
Expand Down
9 changes: 8 additions & 1 deletion tests/integration/equivalence_testing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import numpy as np
import pytest

import iris.mesh


def cubes_equal__corrected(c1, c2):
"""
Expand Down Expand Up @@ -84,10 +86,12 @@ def fix_arrays(a1, a2):
)
# Fix matching of all coords points + bounds
for co1, co2 in zip(c1.coords(), c2.coords()):
if isinstance(co1, iris.mesh.MeshCoord):
# Can't write MeshCoords
continue
co1.points, co2.points = fix_arrays(
*(co.core_points() for co in (co1, co2))
)
for co1, co2 in zip(c1.coords(), c2.coords()):
co1.bounds, co2.bounds = fix_arrays(
*(co.core_bounds() for co in (co1, co2))
)
Expand Down Expand Up @@ -162,6 +166,9 @@ def nanmask_cube(cube):
"""Replace all NaNs with masked points, in cube data and coords."""
cube.data = nanmask_array(cube.core_data())
for coord in cube.coords():
if isinstance(coord, iris.mesh.MeshCoord):
# Can't write MeshCoords
continue
coord.points = nanmask_array(coord.core_points())
coord.bounds = nanmask_array(coord.core_bounds())
return cube
Expand Down
46 changes: 45 additions & 1 deletion tests/unit/iris/test_to_iris.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
"""
import dask.array as da
import numpy as np
from iris import NameConstraint
from iris._constraints import NameConstraint
from iris.cube import CubeList

from ncdata import NcData, NcDimension, NcVariable
Expand Down Expand Up @@ -116,3 +116,47 @@ def test_kwargs__load_by_name():
assert isinstance(cubes, CubeList)
assert len(cubes) == 1
assert cubes[0].name() == "var2"


def test_iris_loadchain():
"""Check that standard iris load-chain processing is applied, including a merge."""

# Create a pair of datasets with mergeable variables.
# Start by making one, with a scalar 'z' coord.
ncdata = NcData(
dimensions=[NcDimension("x", 3)],
variables=[
NcVariable(
name="v_data",
dimensions=["x"],
data=[1, 2, 3],
attributes={"long_name": "data", "coordinates": "v_z"},
),
NcVariable(
name="v_z",
dimensions=[],
data=[1],
attributes={"long_name": "z"},
),
],
)

# Duplicate to get a second dataset, then change the z value.
# N.B. we need 2 datasets, as Iris won't match coords with different var-names.
ncdata2 = ncdata.copy()

# N.B. must **copy data array** before modifying, as var.copy() doesn't do so.
data = ncdata2.variables["v_z"].data
data = data.copy()
data[0] = 2
ncdata2.variables["v_z"].data = data

# Loading should now merge these 2 into one cube.
cubes = to_iris([ncdata, ncdata2])

assert isinstance(cubes, CubeList)
assert len(cubes) == 1
(cube,) = cubes
assert cube.long_name == "data"
assert cube.shape == (2, 3)
assert cube.coords("z", dim_coords=True)
11 changes: 0 additions & 11 deletions tests/unit/xarray/test_to_xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,18 +59,7 @@ def test_real_nocopy():
xrds = to_xarray(ncdata)

# Check that the data content is the *SAME ARRAY*

# This exists within the Xarray variable, but wrapped in a slightly cunning way...
# We will embed some rather specific Xarray knowledge here, and hope that it does
# not break too often.
xr_data = xrds.variables["var_x"]._data
from xarray.core.indexing import LazilyIndexedArray, NumpyIndexingAdapter

assert isinstance(xr_data, LazilyIndexedArray)
xr_data = xr_data.array
assert isinstance(xr_data, NumpyIndexingAdapter)
xr_data = xr_data.array

assert xr_data is real_numpy_data


Expand Down
Loading