pp-mo · pp-mo · Dec 5, 2022 · Dec 5, 2022 · Dec 7, 2022 · Dec 7, 2022
diff --git a/lib/iris/experimental/ncdata/__init__.py b/lib/iris/experimental/ncdata/__init__.py
@@ -0,0 +1,17 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the LGPL license.
+# See COPYING and COPYING.LESSER in the root of the repository for full
+# licensing details.
+"""
+An abstract representation of Netcdf structured data, according to the
+"Common Data Model" : https://docs.unidata.ucar.edu/netcdf-java/5.3/userguide/common_data_model_overview.html
+
+TODO:
+  * add consistency checking
+  * add "direct" netcdf interfacing, i.e. to_nc4/from_nc4
+
+"""
+from ._core import NcAttribute, NcData, NcDimension, NcVariable
+
+__all__ = ["NcAttribute", "NcData", "NcDimension", "NcVariable"]
diff --git a/lib/iris/experimental/ncdata/_core.py b/lib/iris/experimental/ncdata/_core.py
@@ -0,0 +1,95 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the LGPL license.
+# See COPYING and COPYING.LESSER in the root of the repository for full
+# licensing details.
+"""
+An abstract representation of Netcdf data with groups, variables + attributes
+
+This is also provided with a read/write conversion interface to Xarray.
+
+TODO: add direct netcdf file interface (easy, but not yet).
+
+"""
+from typing import Dict, Optional, Tuple
+
+import numpy as np
+
+#
+# A totally basic and naive representation of netCDF data.
+# The structure supports groups, variables, attributes.
+# The sole limitation here is that data and attributes appear as numpy-compatible
+# array-like values (though this may include dask.array.Array), and hence their types
+# are modelled as np.dtype's.
+#
+
+
+class NcData:
+    def __init__(
+        self,
+        name: Optional[str] = None,
+        dimensions: Dict[str, "NcDimension"] = None,
+        variables: Dict[str, "NcVariable"] = None,
+        attributes: Dict[str, "NcAttribute"] = None,
+        groups: Dict[str, "NcData"] = None,
+    ):
+        self.name: str = name
+        self.dimensions: Dict[str, "NcDimension"] = dimensions or {}
+        self.variables: Dict[str, "NcVariable"] = variables or {}
+        self.attributes: Dict[str, "NcAttribute"] = attributes or {}
+        self.groups: Dict[str, "NcData"] = groups or {}
+
+
+class NcDimension:
+    def __init__(self, name: str, size: int = 0):
+        self.name: str = name
+        self.size: int = size  # N.B. we retain the 'zero size means unlimited'
+
+
+class NcVariable:
+    def __init__(
+        self,
+        name: str,
+        dimensions: Tuple[str] = None,
+        data: np.ndarray = None,
+        dtype: np.dtype = None,
+        attributes: Dict[str, "NcAttribute"] = None,
+        group: "NcData" = None,
+    ):
+        self.name = name
+        self.dimensions = tuple(dimensions or ())
+        if data is not None:
+            if not hasattr(data, "dtype"):
+                data = np.asanyarray(data)
+            dtype = data.dtype
+        self.dtype = dtype
+        self.data = data  # Supports lazy, and normally provides a dtype
+        self.attributes = attributes or {}
+        self.group = group
+
+    # # Provide some array-like readonly properties reflected from the data.
+    # @property
+    # def dtype(self):
+    #     return self.data.dtype
+    #
+    # @property
+    # def shape(self):
+    #     return self.data.shape
+
+
+class NcAttribute:
+    def __init__(self, name: str, value):
+        self.name: str = name
+        # Attribute values are arraylike, have dtype
+        # TODO: may need to regularise string representations?
+        if not hasattr(value, "dtype"):
+            value = np.asanyarray(value)
+        self.value: np.ndarray = value
+
+    def _as_python_value(self):
+        result = self.value
+        if result.dtype.kind in ("U", "S"):
+            result = str(result)
+            if isinstance(result, bytes):
+                result = result.decode()
+        return result
diff --git a/lib/iris/experimental/ncdata/_nc4_interface_exercise.py b/lib/iris/experimental/ncdata/_nc4_interface_exercise.py
@@ -0,0 +1,24 @@
+from iris.experimental.ncdata.netcdf4 import from_nc4, to_nc4
+import iris.tests as itsts
+
+
+def example_nc4_roundtrip():
+    filepath = itsts.get_data_path(
+        ["NetCDF", "stereographic", "toa_brightness_temperature.nc"]
+    )
+    ncdata = from_nc4(filepath)
+    filepath2 = "./temp_nc_output.nc"
+    to_nc4(ncdata, filepath2)
+
+    # Convert to Iris + compare (a bit of a cheat, bit OK for now?)
+    import iris
+
+    cube1 = iris.load_cube(filepath)
+    cube2 = iris.load_cube(filepath2)
+    print("Round-tripped result, as iris cube:")
+    print(cube2)
+    print("\nold-file-cube == new-file-cube ? ", cube1 == cube2)
+
+
+if __name__ == "__main__":
+    example_nc4_roundtrip()
diff --git a/lib/iris/experimental/ncdata/dataset_like.py b/lib/iris/experimental/ncdata/dataset_like.py
@@ -0,0 +1,237 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the LGPL license.
+# See COPYING and COPYING.LESSER in the root of the repository for full
+# licensing details.
+"""
+An adaptor layer allowing an NcData to masquerade as a netCDF4.Dataset object.
+
+This is provided primarily to support a re-use of the iris.fileformats.netcdf file
+format load + save, to convert cubes to+from ncdata objects, and hence convert Iris
+ cubes to+from an xarray.Dataset.
+
+These classes contain NcData and NcVariables, but emulating the access APIs of a
+netCDF4.Dataset.
+
+Note: currently only supports what is required for Iris load/save capability.
+It could conceivably be used for data exchange by *other* code that reads or writes
+netcdf files, but that may require API support to be extended, depending on what
+additional methods might be used.
+
+"""
+import numpy as np
+
+from ._core import NcAttribute, NcData, NcDimension, NcVariable
+
+
+class _Nc4DatalikeWithNcattrs:
+    # A mixin, shared by Nc4DatasetLike and Nc4VariableLike, which adds netcdf-like
+    #  attribute operations 'ncattrs / setncattr / getncattr', *AND* extends the local
+    #  objects attribute to those things also
+    # N.B. "self._ncdata" is the underlying NcData object : either an NcData or
+    #  NcVariable object.
+    def ncattrs(self):
+        return list(self._ncdata.attributes.keys())
+
+    def getncattr(self, attr):
+        attrs = self._ncdata.attributes
+        if attr in attrs:
+            result = attrs[attr]._as_python_value()
+        else:
+            # Don't allow it to issue a KeyError, as this upsets 'getattr' usage.
+            # Raise an AttributeError instead.
+            raise AttributeError(attr)
+        return result
+
+    def setncattr(self, attr, value):
+        # TODO: are we sure we need this translation ??
+        if isinstance(value, bytes):
+            value = value.decode("utf-8")
+        # N.B. using the NcAttribute class for storage also ensures/requires that all
+        #  attributes are cast as numpy arrays (so have shape, dtype etc).
+        self._ncdata.attributes[attr] = NcAttribute(attr, value)
+
+    def __getattr__(self, attr):
+        # Extend local object attribute access to the ncattrs of the stored data item
+        #  (Yuck, but I think the Iris load code requires it).
+        return self.getncattr(attr)
+
+    def __setattr__(self, attr, value):
+        if attr in self._local_instance_props:
+            # N.B. use _local_instance_props to define standard instance attributes, to avoid a
+            #  possible endless loop here.
+            super().__setattr__(attr, value)
+        else:
+            # # if not hasattr(self, '_allsetattrs'):
+            # #     self._allsetattrs = set()
+            # self._allsetattrs.add(attr)
+            self.setncattr(attr, value)
+
+
+class Nc4DatasetLike(_Nc4DatalikeWithNcattrs):
+    _local_instance_props = ("_ncdata", "variables")
+
+    def __init__(self, ncdata: NcData = None):
+        if ncdata is None:
+            ncdata = NcData()  # an empty dataset
+        self._ncdata = ncdata
+        # N.B. we need to create + store our OWN variables, as they are wrappers for
+        #  the underlying NcVariable objects, with different properties.
+        self.variables = {
+            name: Nc4VariableLike._from_ncvariable(ncvar)
+            for name, ncvar in self._ncdata.variables.items()
+        }
+
+    @property
+    def dimensions(self):
+        return {
+            name: dim.size for name, dim in self._ncdata.dimensions.items()
+        }
+
+    @property
+    def groups(self):
+        return None  # not supported
+
+    def createDimension(self, dimname, size):
+        if dimname in self.dimensions:
+            msg = f'creating duplicate dimension "{dimname}".'
+            raise ValueError(msg)
+            # if self.dimensions[name] != size:
+            #     raise ValueError(f"size mismatch for dimension {name!r}: "
+            #                      f"{self.dimensions[name]} != {size}")
+        else:
+            self._ncdata.dimensions[dimname] = NcDimension(dimname, size)
+        return size
+
+    def createVariable(self, varname, datatype, dimensions=(), **encoding):
+        if varname in self.variables:
+            msg = f'creating duplicate variable "{varname}".'
+            raise ValueError(msg)
+        # Add a variable into the underlying NcData object.
+        ncvar = NcVariable(
+            name=varname,
+            dimensions=dimensions,
+            group=self._ncdata,
+        )
+        # Note: initially has no data (or attributes), since this is how netCDF4 expects
+        #  to do it.
+        self._ncdata.variables[varname] = ncvar
+        # Create a netCDF4-like "wrapper" variable + install that here.
+        nc4var = Nc4VariableLike._from_ncvariable(ncvar, dtype=datatype)
+        self.variables[varname] = nc4var
+        return nc4var
+
+    def sync(self):
+        pass
+
+    def close(self):
+        self.sync()
+
+    @staticmethod
+    def filepath():
+        #
+        # Note: for now, let's just not care about this.
+        # we *might* need this to be an optional defined item on an NcData ??
+        # .. or, we ight need to store an xarray "encoding" somewhere ?
+        # TODO: more thought here ?
+        # return self.ncdata.encoding.get("source", "")
+        return "<Nc4DatasetLike>"
+
+
+class Nc4VariableLike(_Nc4DatalikeWithNcattrs):
+    _local_instance_props = ("_ncdata", "name", "datatype", "_raw_array")
+
+    def __init__(self, ncvar: NcVariable, datatype: np.dtype):
+        self._ncdata = ncvar
+        self.name = ncvar.name
+        # Note: datatype must be known at creation, which may be before an actual data
+        #  array is assigned on the ncvar.
+        self.datatype = np.dtype(datatype)
+        if ncvar.data is None:
+            # temporary empty data (to support never-written scalar values)
+            # NOTE: significantly, does *not* allocate an actual full array in memory
+            array = np.zeros(self.shape, self.datatype)
+            ncvar.data = array
+        self._raw_array = ncvar.data
+
+    @classmethod
+    def _from_ncvariable(cls, ncvar: NcVariable, dtype: np.dtype = None):
+        if dtype is None:
+            dtype = ncvar.dtype
+        self = cls(
+            ncvar=ncvar,
+            datatype=dtype,
+        )
+        return self
+
+    # Label this as an 'emulated' netCDF4.Variable, containing an actual (possibly
+    #  lazy) array, which can be directly read/written.
+    @property
+    def _raw_array(self):
+        return self._ncdata.data
+
+    @_raw_array.setter
+    def _raw_array(self, data):
+        self._ncdata.data = data
+        self.datatype = data.dtype
+
+    @property
+    def group(self):
+        return self._ncdata.group
+
+    @property
+    def dimensions(self):
+        return self._ncdata.dimensions
+
+    #
+    # "Normal" data access is via indexing.
+    # N.B. we do still need to support this, e.g. for DimCoords ?
+    #
+    def __getitem__(self, keys):
+        if keys != slice(None):
+            raise IndexError(keys)
+        if self.ndim == 0:
+            return self._ncdata.data
+        return self._ncdata.data[keys]
+
+    # The __setitem__ is not required for normal saving.
+    # The saver will assign ._raw_array instead
+    # TODO: might need to support this for future non-Iris usage ?
+    #
+    # def __setitem__(self, keys, data):
+    #     if keys != slice(None):
+    #         raise IndexError(keys)
+    #     if not hasattr(data, "dtype"):
+    #         raise ValueError(f"nonarray assigned as data : {data}")
+    #     if not data.shape == self.shape:
+    #         msg = (
+    #             f"assigned data has wrong shape : "
+    #             f"{data.shape} instead of {self.shape}"
+    #         )
+    #         raise ValueError(msg)
+    #     self._ncdata.data = data
+    #     self.datatype = data.dtype
+
+    @property
+    def dtype(self):
+        return self.datatype
+
+    @property
+    def dims(self):
+        return self.dimensions
+
+    @property
+    def ndim(self):
+        return len(self.dimensions)
+
+    @property
+    def shape(self):
+        dims = self.group.dimensions
+        return tuple(dims[n].size for n in self.dimensions)
+
+    @property
+    def size(self):
+        return np.prod(self.shape)
+
+    def chunking(self):
+        return None