From b4838bbe7b8648eb0fa4c4c6b40264fde79ccb07 Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Mon, 5 Dec 2022 16:21:45 +0000
Subject: [PATCH 1/6] First working copy-free xr-iris-bridge.

---
 lib/iris/experimental/ncxr.py  | 576 +++++++++++++++++++++++++++++++++
 lib/iris/fileformats/netcdf.py |  63 ++--
 2 files changed, 620 insertions(+), 19 deletions(-)
 create mode 100644 lib/iris/experimental/ncxr.py

diff --git a/lib/iris/experimental/ncxr.py b/lib/iris/experimental/ncxr.py
new file mode 100644
index 0000000000..7b6c2cdaa8
--- /dev/null
+++ b/lib/iris/experimental/ncxr.py
@@ -0,0 +1,576 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the LGPL license.
+# See COPYING and COPYING.LESSER in the root of the repository for full
+# licensing details.
+"""
+Temporary code layer supporting interoperation between Iris and Xarray.
+
+TODO: replace this with various changes:
+  * move Iris-agnostic code outside Iris
+      - into its own repo (where it can be better tested)
+      - leaving **only** the 'to_xarray' and 'from_xarray' functions.
+  * add consistency checking
+  * add "direct" netcdf interfacing, i.e. NcDataset.to_nc/from_nc
+
+"""
+from functools import wraps
+from pathlib import Path  # noqa
+from typing import AnyStr, Dict, Optional, Tuple, Union
+
+import dask.array as da
+import netCDF4
+import numpy as np
+import xarray as xr
+
+import iris
+from iris.cube import CubeList
+import iris.fileformats.netcdf
+import iris.tests as itsts
+
+#
+# A totally basic and naive representation of netCDF data.
+# The structure supports groups, variables, attributes.
+# The sole limitation here is that data and attributes appear as numpy-compatible
+# array-like values (though this may include dask.array.Array), and hence their types
+# are modelled as np.dtype's.
+#
+
+
+class NcGroup:
+    def __init__(
+        self,
+        name: Optional[str] = None,
+        dimensions: Dict[str, "NcDimension"] = None,
+        variables: Dict[str, "NcVariable"] = None,
+        attributes: Dict[str, "NcAttribute"] = None,
+        groups: Dict[str, "NcGroup"] = None,
+    ):
+        self.name: str = name
+        self.dimensions: Dict[str, "NcDimension"] = dimensions or {}
+        self.variables: Dict[str, "NcVariable"] = variables or {}
+        self.attributes: Dict[str, "NcAttribute"] = attributes or {}
+        self.groups: Dict[str, "NcGroup"] = groups or {}
+
+
+class NcDimension:
+    def __init__(self, name: str, size: int = 0):
+        self.name: str = name
+        self.size: int = size  # N.B. we retain the 'zero size means unlimited'
+
+
+class NcVariable:
+    def __init__(
+        self,
+        name: str,
+        dimensions: Tuple[str] = None,
+        data: np.ndarray = None,
+        dtype: np.dtype = None,
+        attributes: Dict[str, "NcAttribute"] = None,
+        group: "NcGroup" = None,
+    ):
+        self.name = name
+        self.dimensions = tuple(dimensions or ())
+        if data is not None:
+            if not hasattr(data, "dtype"):
+                data = np.asanyarray(data)
+            dtype = data.dtype
+        self.dtype = dtype
+        self.data = data  # Supports lazy, and normally provides a dtype
+        self.attributes = attributes or {}
+        self.group = group
+
+    # # Provide some array-like readonly properties reflected from the data.
+    # @property
+    # def dtype(self):
+    #     return self.data.dtype
+    #
+    # @property
+    # def shape(self):
+    #     return self.data.shape
+
+
+class NcAttribute:
+    def __init__(self, name: str, value):
+        self.name: str = name
+        # Attribute values are arraylike, have dtype
+        # TODO: may need to regularise string representations?
+        if not hasattr(value, "dtype"):
+            value = np.asanyarray(value)
+        self.value: np.ndarray = value
+
+    def _as_python_value(self):
+        result = self.value
+        if result.dtype.kind in ("U", "S"):
+            result = str(result)
+            if isinstance(result, bytes):
+                result = result.decode()
+        return result
+
+
+class NcDataset(NcGroup):
+    # An interface class providing an NcGroup which can be converted to/from an
+    # xr.Dataset.  This is basically done by adding a small API enabling it to function
+    # as an Xarray "AbstractDataStore".
+    # This implies some embedded knowledge of Xarray, but it is very small.
+    #
+    # This code pinched from @TomekTrzeciak
+    # see https://gist.github.com/TomekTrzeciak/b00ff6c9dc301ed6f684990e400d1435
+
+    def load(self):
+        variables = {}
+        for k, v in self.variables.items():
+            attrs = {
+                name: attr._as_python_value()
+                for name, attr in v.attributes.items()
+            }
+            xr_var = xr.Variable(
+                v.dimensions, v.data, attrs, getattr(v, "encoding", {})
+            )
+            # TODO: ?possibly? need to apply usual Xarray "encodings" to convert raw
+            #  cf-encoded data into 'normal', interpreted xr.Variables.
+            if k == "time":
+                t_bdg = 0
+            xr_var = xr.conventions.decode_cf_variable(k, xr_var)
+            variables[k] = xr_var
+        attributes = {
+            name: attr._as_python_value()
+            for name, attr in self.attributes.items()
+        }
+        return variables, attributes
+
+    def store(
+        self,
+        variables,
+        attributes,
+        check_encoding_set=frozenset(),
+        writer=None,
+        unlimited_dims=None,
+    ):
+        for k, v in attributes.items():
+            if k in self.attributes:  # and self.attributes[k] != v:
+                msg = (
+                    f're-setting of attribute "{k}" : '
+                    f"was={self.attributes[k]}, now={v}"
+                )
+                raise ValueError(msg)
+            else:
+                self.attributes[k] = NcAttribute(k, v)
+        for k, v in variables.items():
+            if hasattr(v, "ncattrs"):
+                # An actual netCDF.Variable (?PP, not sure?)
+                data, dtype, dims, attrs, enc = (
+                    v[:],
+                    v.datatype,
+                    v.dimensions,
+                    v.ncattrs(),
+                    getattr(v, "encoding", {}),
+                )
+            else:
+                # An xr.Variable (?PP, not sure?)
+                # remove all the possible Xarray encodings
+                # These are all the ones potentially used by
+                # :func:`xr.conventions.decode_cf_variable`, in the order in which they
+                # would be applied.
+                v = xr.conventions.encode_cf_variable(
+                    v, name=k, needs_copy=False
+                )
+                data, dtype, dims, attrs, enc = (
+                    v.data,
+                    v.dtype,
+                    v.dims,
+                    v.attrs,
+                    v.encoding,
+                )
+
+            for dim_name, size in zip(dims, v.shape):
+                if dim_name in self.dimensions:
+                    if self.dimensions[dim_name].size != size:
+                        raise ValueError(
+                            f"size mismatch for dimension {dim_name!r}: "
+                            f"{self.dimensions[dim_name]} != {size}"
+                        )
+                else:
+                    self.dimensions[dim_name] = NcDimension(
+                        dim_name, size=size
+                    )
+
+            if k in self.variables:
+                raise ValueError(f'duplicate variable : "{k}"')
+            attrs = {
+                name: NcAttribute(name, value) for name, value in attrs.items()
+            }
+            nc_var = NcVariable(
+                name=k,
+                dimensions=dims,
+                attributes=attrs,
+                data=v.data,
+                group=self,
+            )
+            self.variables[k] = nc_var
+
+    def close(self):
+        pass
+
+    #
+    # This interface supports conversion to+from an xarray "Dataset".
+    # N.B. using the "AbstractDataStore" interface preserves variable contents, being
+    # either real or lazy arrays.
+    #
+    @classmethod
+    def from_xarray(
+        cls, dataset_or_file: Union[xr.Dataset, AnyStr, Path], **xr_load_kwargs
+    ):
+        if not isinstance(dataset_or_file, xr.Dataset):
+            # It's a "file" (or pathstring, or Path ?).
+            dataset_or_file = xr.load_dataset(
+                dataset_or_file, **xr_load_kwargs
+            )
+        nc_data = cls()
+        dataset_or_file.dump_to_store(nc_data, **xr_load_kwargs)
+        return nc_data
+
+    def to_xarray(self, **xr_save_kwargs) -> xr.Dataset:
+        ds = xr.Dataset.load_store(self, **xr_save_kwargs)
+        return ds
+
+
+#
+# Classes containing NcDataset and NcVariables, but emulating the access APIs of a
+# netCDF4.Dataset.
+# Notes:
+#   (1) only supports what is required for Iris load/save capability
+#   (2) we are proposing that this remains private, for now? -- due to (1)
+#
+class _Nc4DatalikeWithNcattrs:
+    # A mixin, shared by _Nc4DatasetLike and _Nc4VariableLike, which adds netcdf-like
+    #  attribute operations'ncattrs / setncattr / getncattr', *AND* extends the local
+    #  objects attribute to those things also
+    # N.B. "self._ncdata" is the underlying NcData object : either an NcDataset or
+    #  NcVariable object.
+    def ncattrs(self):
+        return list(self._ncdata.attributes.keys())
+
+    def getncattr(self, attr):
+        attrs = self._ncdata.attributes
+        if attr in attrs:
+            result = attrs[attr]._as_python_value()
+        else:
+            # Don't allow it to issue a KeyError, as this upsets 'getattr' usage.
+            # Raise an AttributeError instead.
+            raise AttributeError(attr)
+        return result
+
+    def setncattr(self, attr, value):
+        # TODO: are we sure we need this translation ??
+        if isinstance(value, bytes):
+            value = value.decode("utf-8")
+        # N.B. using the NcAttribute class for storage also ensures/requires that all
+        #  attributes are cast as numpy arrays (so have shape, dtype etc).
+        self._ncdata.attributes[attr] = NcAttribute(attr, value)
+
+    def __getattr__(self, attr):
+        # Extend local object attribute access to the ncattrs of the stored data item
+        #  (Yuck, but I think the Iris load code requires it).
+        return self.getncattr(attr)
+
+    def __setattr__(self, attr, value):
+        if attr in self._local_instance_props:
+            # N.B. use _local_instance_props to define standard instance attributes, to avoid a
+            #  possible endless loop here.
+            super().__setattr__(attr, value)
+        else:
+            # # if not hasattr(self, '_allsetattrs'):
+            # #     self._allsetattrs = set()
+            # self._allsetattrs.add(attr)
+            self.setncattr(attr, value)
+
+
+class _Nc4DatasetLike(_Nc4DatalikeWithNcattrs):
+    _local_instance_props = ("_ncdata", "variables")
+
+    def __init__(self, ncdata: NcDataset = None):
+        if ncdata is None:
+            ncdata = NcDataset()  # an empty dataset
+        self._ncdata = ncdata
+        # N.B. we need to create + store our OWN variables, as they are wrappers for
+        #  the underlying NcVariable objects, with different properties.
+        self.variables = {
+            name: _Nc4VariableLike._from_ncvariable(ncvar, group=self)
+            for name, ncvar in self._ncdata.variables.items()
+        }
+
+    @property
+    def dimensions(self):
+        return {
+            name: dim.size for name, dim in self._ncdata.dimensions.items()
+        }
+
+    # @property
+    # def attributes(self):
+    #     return {
+    #         name: attr.value
+    #         for name, attr in self.ncdata.attributes.items()
+    #     }
+
+    @property
+    def groups(self):
+        return None  # not supported
+
+    # def ncattrs(self):
+    #     return self.attributes
+    #
+    # def getncattr(self, attr_name):
+    #     if attr_name in self.attributes:
+    #         return self.attributes[attr_name]
+    #     raise AttributeError(attr_name)
+    #
+    # def setncattr(self, attr_name, value):
+    #     if isinstance(value, bytes):
+    #         value = value.decode("utf-8")
+    #     self.ncdata.attributes[attr_name] = NcAttribute(attr_name, value)
+    #
+    # Attributes other than the instance-defining "slots" translate to netcdf
+    #  attributes of the underlying ncdata varable
+    #
+    def createDimension(self, dimname, size):
+        if dimname in self.dimensions:
+            msg = f'creating duplicate dimension "{dimname}".'
+            raise ValueError(msg)
+            # if self.dimensions[name] != size:
+            #     raise ValueError(f"size mismatch for dimension {name!r}: "
+            #                      f"{self.dimensions[name]} != {size}")
+        else:
+            self._ncdata.dimensions[dimname] = NcDimension(dimname, size)
+        return size
+
+    def createVariable(self, varname, datatype, dimensions=(), **encoding):
+        if varname in self.variables:
+            msg = f'creating duplicate variable "{varname}".'
+            raise ValueError(msg)
+        # Add a variable into the underlying NcDataset object.
+        ncvar = NcVariable(
+            name=varname,
+            dimensions=dimensions,
+            group=self._ncdata,
+        )
+        # Note: initially has no data (or attributes), since this is how netCDF4 expects
+        #  to do it.
+        self._ncdata.variables[varname] = ncvar
+        # Create a netCDF4-like "wrapper" variable + install that here.
+        nc4var = _Nc4VariableLike._from_ncvariable(
+            ncvar, group=self, dtype=datatype
+        )
+        self.variables[varname] = nc4var
+        return nc4var
+
+    def sync(self):
+        pass
+        # for k, v in self.variables.items():
+        #     if not hasattr(v, 'data'):
+        #         # coordinate system variables are created but not initialized with data by Iris!
+        #         v.data = np.empty(v.shape, dtype=v.datatype)
+        #         v.data[...] = netCDF4.default_fillvals.get(np.dtype(v.datatype).str[1:])
+
+    def close(self):
+        self.sync()
+
+    def filepath(self):
+        #
+        # Note: for now, let's just not care about this.
+        # we *might* need this to be an optinoal defined item on an NcDataset ??
+        # .. or, we ight need to store an xarray "encoding" somewhere ?
+        #
+        # return self.ncdata.encoding.get("source", "")
+        return "<Nc4DatasetLike>"
+
+
+class _Nc4VariableLike(_Nc4DatalikeWithNcattrs):
+    _local_instance_props = ("_ncdata", "name", "datatype", "_raw_array")
+
+    def __init__(self, ncvar: NcVariable, datatype: np.dtype):
+        self._ncdata = ncvar
+        self.name = ncvar.name
+        # Note: datatype must be known at creation, which may be before an actual data
+        #  array is assigned on the ncvar.
+        self.datatype = np.dtype(datatype)
+        if ncvar.data is None:
+            # temporary empty data (to support never-written scalar values)
+            ncvar.data = np.zeros(self.shape, self.datatype)
+        self[:] = ncvar.data
+
+    @classmethod
+    def _from_ncvariable(
+        cls, ncvar: NcVariable, group: NcGroup, dtype: np.dtype = None
+    ):
+        if dtype is None:
+            dtype = ncvar.dtype
+        self = cls(
+            ncvar=ncvar,
+            datatype=dtype,
+        )
+        return self
+
+    # Label this as an 'emulated' netCDF4.Variable, containing an actual (possibly
+    #  lazy) array, which can be directly read/written.
+    @property
+    def _raw_array(self):
+        return self._ncdata.data
+
+    @_raw_array.setter
+    def _raw_array(self, data):
+        self._ncdata.data = data
+        self.datatype = data.dtype
+
+    @property
+    def group(self):
+        return self._ncdata.group
+
+    @property
+    def dimensions(self):
+        return self._ncdata.dimensions
+
+    #
+    # "Normal" data access is via indexing.
+    #
+    def __getitem__(self, keys):
+        if keys != slice(None):
+            raise IndexError(keys)
+        if self.ndim == 0:
+            return self._ncdata.data
+        return self._ncdata.data[keys]
+
+    def __setitem__(self, keys, data):
+        if keys != slice(None):
+            raise IndexError(keys)
+        if not hasattr(data, "dtype"):
+            raise ValueError(f"nonarray assigned as data : {data}")
+        if not data.shape == self.shape:
+            msg = (
+                f"assigned data has wrong shape : "
+                f"{data.shape} instead of {self.shape}"
+            )
+            raise ValueError(msg)
+        self._ncdata.data = data
+        self.datatype = data.dtype
+        # if not self.dimensions and data.ndim != 0:
+        #     # Iris assigns 1-D single element array to 0-D var!
+        #     self.data = np.asarray(data.item())
+        # else:
+        #     shape = tuple(self.group.dimensions[d] for d in self.dimensions)
+        #     if data.shape != shape:
+        #         # Iris passes bounds arrays of wrong shape!
+        #         self.data = data.reshape(shape)
+        #     else:
+        #         self.data = data
+
+    @property
+    def dtype(self):
+        return self.datatype
+
+    @property
+    def dims(self):
+        return self.dimensions
+
+    @property
+    def ndim(self):
+        return len(self.dimensions)
+
+    @property
+    def shape(self):
+        dims = self.group.dimensions
+        return tuple(dims[n].size for n in self.dimensions)
+
+    @property
+    def size(self):
+        return np.prod(self.shape)
+
+    def chunking(self):
+        return None
+
+
+def cubes_from_xrds(xrds: xr.Dataset, **xr_load_kwargs):
+    ncdata = NcDataset.from_xarray(xrds, **xr_load_kwargs)
+    dslike = _Nc4DatasetLike(ncdata)
+    cubes = CubeList(iris.fileformats.netcdf.load_cubes(dslike))
+    return cubes
+
+
+def cubes_to_xrds(cubes, iris_save_kwargs=None, xr_save_kwargs=None):
+    iris_save_kwargs = iris_save_kwargs or {}
+    xr_save_kwargs = xr_save_kwargs or {}
+    nc4like = _Nc4DatasetLike()
+    iris.save(
+        cubes, nc4like, saver=iris.fileformats.netcdf.save, **iris_save_kwargs
+    )
+    xrds = nc4like._ncdata.to_xarray(**xr_save_kwargs)
+    return xrds
+
+
+def example_from_xr():
+    iris.FUTURE.datum_support = True
+    filepath = itsts.get_data_path(
+        ["NetCDF", "stereographic", "toa_brightness_temperature.nc"]
+    )
+    xrds = xr.open_dataset(filepath, chunks="auto")
+    print("\nOriginal Xarray dataset:\n", xrds)
+    cubes = cubes_from_xrds(xrds)
+    print("\nxrds['time']:\n", xrds["time"])
+    print("\n\n")
+    print("============ CONVERT xr.Dataset TO cubes ... =========\n")
+    print("Cubes:")
+    print(cubes)
+    cube = cubes[0]
+    print("\nCube:")
+    print(cube)
+    data = cube.core_data()
+    print("\ncube.core_data():")
+    print(data)
+    # match = data is xrds['data'].data
+    # print('\ncube.core_data() is xrds["data"].data:')
+    # print(match)
+    co_auxlons = cube.coord("longitude")
+    print('\ncube.coord("longitude"):')
+    print(co_auxlons)
+    points = co_auxlons.core_points()
+    print('\ncube.coord("longitude").core_points():')
+    print(points)
+    print('\ncube.coord("longitude").points:')
+    print(points.compute())
+
+    print("\n")
+    print("============ CONVERT cubes TO xr.Dataset ... =========")
+    print("")
+    xrds2 = cubes_to_xrds(cubes)
+    print("\nxrds2:\n", xrds2)
+    print("\ntime:\n", xrds2["time"])
+
+    print("\n")
+    print("============ Array identity checks ... =========")
+    print(
+        "xrds2['data'].data   is   cube.core_data() : ",
+        bool(xrds2["data"].data is cube.core_data()),
+    )
+    print(
+        "xrds2['lon'].data   is   cube.coord('longitude').core_points() : ",
+        bool(xrds2["lon"].data is cube.coord("longitude").core_points()),
+    )
+    print(
+        "xrds2['x'].data   is   cube.coord('projection_x_coordinate').core_points() : ",
+        bool(
+            xrds2["x"].data
+            is cube.coord("projection_x_coordinate").core_points()
+        ),
+    )
+    print(
+        "np.all(xrds2['x'].data == cube.coord('projection_x_coordinate').points) : ",
+        bool(
+            np.all(
+                xrds2["x"].data == cube.coord("projection_x_coordinate").points
+            )
+        ),
+    )
+
+
+if __name__ == "__main__":
+    example_from_xr()
diff --git a/lib/iris/fileformats/netcdf.py b/lib/iris/fileformats/netcdf.py
index 4efed43db9..490816ce68 100644
--- a/lib/iris/fileformats/netcdf.py
+++ b/lib/iris/fileformats/netcdf.py
@@ -618,22 +618,32 @@ def _get_cf_var_data(cf_var, filename):
     # Get lazy chunked data out of a cf variable.
     dtype = _get_actual_dtype(cf_var)
 
-    # Create cube with deferred data, but no metadata
-    fill_value = getattr(
-        cf_var.cf_data,
-        "_FillValue",
-        netCDF4.default_fillvals[cf_var.dtype.str[1:]],
-    )
-    proxy = NetCDFDataProxy(
-        cf_var.shape, dtype, filename, cf_var.cf_name, fill_value
-    )
-    # Get the chunking specified for the variable : this is either a shape, or
-    # maybe the string "contiguous".
-    chunks = cf_var.cf_data.chunking()
-    # In the "contiguous" case, pass chunks=None to 'as_lazy_data'.
-    if chunks == "contiguous":
-        chunks = None
-    return as_lazy_data(proxy, chunks=chunks)
+    # Shortcut for 'emulated' netcdf data loading
+    if hasattr(cf_var.cf_data, "_raw_array"):
+        # This is a emulated variable, which simply stores an array (possibly lazy)
+        result = cf_var.cf_data._raw_array
+    else:
+        # A 'real' netCDF4.Variable : create a lazy proxy
+        # Create cube with deferred data, but no metadata
+        fill_value = getattr(
+            cf_var.cf_data,
+            "_FillValue",
+            netCDF4.default_fillvals[cf_var.dtype.str[1:]],
+        )
+        proxy = NetCDFDataProxy(
+            cf_var.shape, dtype, filename, cf_var.cf_name, fill_value
+        )
+
+        # Get the chunking specified for the variable : this is either a shape, or
+        # maybe the string "contiguous".
+        chunks = cf_var.cf_data.chunking()
+        # In the "contiguous" case, pass chunks=None to 'as_lazy_data'.
+        if chunks == "contiguous":
+            chunks = None
+
+        result = as_lazy_data(proxy, chunks=chunks)
+
+    return result
 
 
 class _OrderedAddableList(list):
@@ -3015,8 +3025,22 @@ def _lazy_stream_data(data, fill_value, fill_warn, cf_var):
             #  contains just 1 row, so the cf_var is 1D.
             data = data.squeeze(axis=0)
 
-        if is_lazy_data(data):
-
+        if hasattr(cf_var, "_raw_array"):
+            # The target is not an actual netCDF4.Variable in a file, but an emulation
+            #  object which can store an arraylike (including lazy) directly.
+            # -  transfer the array without any copying (or realisation).
+            def store(data, cf_var, fill_value):
+                # Store the data directly on the Variable-like object.
+                cf_var._raw_array = data
+                # TODO: for now, just ignore any possible masking issues here, because
+                #  it is tricky, at least for lazy data.  In future, we should deal
+                #  with this properly.
+                is_masked, contains_fill_value = False, False
+                return is_masked, contains_fill_value
+
+        elif is_lazy_data(data):
+            # Storing lazy data to an actual netCDF4.Variable in a file.
+            #   - use streaming.
             def store(data, cf_var, fill_value):
                 # Store lazy data and check whether it is masked and contains
                 # the fill value
@@ -3025,7 +3049,8 @@ def store(data, cf_var, fill_value):
                 return target.is_masked, target.contains_value
 
         else:
-
+            # Storing real data to an actual netCDF4.Variable in a file.
+            #   - just write the data.
             def store(data, cf_var, fill_value):
                 cf_var[:] = data
                 is_masked = np.ma.is_masked(data)

From de6924be161af4a48d75e516269fe83d1e7d50bb Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Mon, 5 Dec 2022 17:46:35 +0000
Subject: [PATCH 2/6] Reorganised into separate sourcefiles.

---
 lib/iris/experimental/ncxr.py                 | 576 ------------------
 .../experimental/xarray_bridge/__init__.py    |  45 ++
 .../xarray_bridge/_ncdata_exercise.py         |  84 +++
 lib/iris/experimental/xarray_bridge/ncdata.py | 207 +++++++
 .../xarray_bridge/ncdata_netcdf4_adaptor.py   | 236 +++++++
 .../experimental/xarray_dataset_wrapper.py    | 415 -------------
 6 files changed, 572 insertions(+), 991 deletions(-)
 delete mode 100644 lib/iris/experimental/ncxr.py
 create mode 100644 lib/iris/experimental/xarray_bridge/__init__.py
 create mode 100644 lib/iris/experimental/xarray_bridge/_ncdata_exercise.py
 create mode 100644 lib/iris/experimental/xarray_bridge/ncdata.py
 create mode 100644 lib/iris/experimental/xarray_bridge/ncdata_netcdf4_adaptor.py
 delete mode 100644 lib/iris/experimental/xarray_dataset_wrapper.py

diff --git a/lib/iris/experimental/ncxr.py b/lib/iris/experimental/ncxr.py
deleted file mode 100644
index 7b6c2cdaa8..0000000000
--- a/lib/iris/experimental/ncxr.py
+++ /dev/null
@@ -1,576 +0,0 @@
-# Copyright Iris contributors
-#
-# This file is part of Iris and is released under the LGPL license.
-# See COPYING and COPYING.LESSER in the root of the repository for full
-# licensing details.
-"""
-Temporary code layer supporting interoperation between Iris and Xarray.
-
-TODO: replace this with various changes:
-  * move Iris-agnostic code outside Iris
-      - into its own repo (where it can be better tested)
-      - leaving **only** the 'to_xarray' and 'from_xarray' functions.
-  * add consistency checking
-  * add "direct" netcdf interfacing, i.e. NcDataset.to_nc/from_nc
-
-"""
-from functools import wraps
-from pathlib import Path  # noqa
-from typing import AnyStr, Dict, Optional, Tuple, Union
-
-import dask.array as da
-import netCDF4
-import numpy as np
-import xarray as xr
-
-import iris
-from iris.cube import CubeList
-import iris.fileformats.netcdf
-import iris.tests as itsts
-
-#
-# A totally basic and naive representation of netCDF data.
-# The structure supports groups, variables, attributes.
-# The sole limitation here is that data and attributes appear as numpy-compatible
-# array-like values (though this may include dask.array.Array), and hence their types
-# are modelled as np.dtype's.
-#
-
-
-class NcGroup:
-    def __init__(
-        self,
-        name: Optional[str] = None,
-        dimensions: Dict[str, "NcDimension"] = None,
-        variables: Dict[str, "NcVariable"] = None,
-        attributes: Dict[str, "NcAttribute"] = None,
-        groups: Dict[str, "NcGroup"] = None,
-    ):
-        self.name: str = name
-        self.dimensions: Dict[str, "NcDimension"] = dimensions or {}
-        self.variables: Dict[str, "NcVariable"] = variables or {}
-        self.attributes: Dict[str, "NcAttribute"] = attributes or {}
-        self.groups: Dict[str, "NcGroup"] = groups or {}
-
-
-class NcDimension:
-    def __init__(self, name: str, size: int = 0):
-        self.name: str = name
-        self.size: int = size  # N.B. we retain the 'zero size means unlimited'
-
-
-class NcVariable:
-    def __init__(
-        self,
-        name: str,
-        dimensions: Tuple[str] = None,
-        data: np.ndarray = None,
-        dtype: np.dtype = None,
-        attributes: Dict[str, "NcAttribute"] = None,
-        group: "NcGroup" = None,
-    ):
-        self.name = name
-        self.dimensions = tuple(dimensions or ())
-        if data is not None:
-            if not hasattr(data, "dtype"):
-                data = np.asanyarray(data)
-            dtype = data.dtype
-        self.dtype = dtype
-        self.data = data  # Supports lazy, and normally provides a dtype
-        self.attributes = attributes or {}
-        self.group = group
-
-    # # Provide some array-like readonly properties reflected from the data.
-    # @property
-    # def dtype(self):
-    #     return self.data.dtype
-    #
-    # @property
-    # def shape(self):
-    #     return self.data.shape
-
-
-class NcAttribute:
-    def __init__(self, name: str, value):
-        self.name: str = name
-        # Attribute values are arraylike, have dtype
-        # TODO: may need to regularise string representations?
-        if not hasattr(value, "dtype"):
-            value = np.asanyarray(value)
-        self.value: np.ndarray = value
-
-    def _as_python_value(self):
-        result = self.value
-        if result.dtype.kind in ("U", "S"):
-            result = str(result)
-            if isinstance(result, bytes):
-                result = result.decode()
-        return result
-
-
-class NcDataset(NcGroup):
-    # An interface class providing an NcGroup which can be converted to/from an
-    # xr.Dataset.  This is basically done by adding a small API enabling it to function
-    # as an Xarray "AbstractDataStore".
-    # This implies some embedded knowledge of Xarray, but it is very small.
-    #
-    # This code pinched from @TomekTrzeciak
-    # see https://gist.github.com/TomekTrzeciak/b00ff6c9dc301ed6f684990e400d1435
-
-    def load(self):
-        variables = {}
-        for k, v in self.variables.items():
-            attrs = {
-                name: attr._as_python_value()
-                for name, attr in v.attributes.items()
-            }
-            xr_var = xr.Variable(
-                v.dimensions, v.data, attrs, getattr(v, "encoding", {})
-            )
-            # TODO: ?possibly? need to apply usual Xarray "encodings" to convert raw
-            #  cf-encoded data into 'normal', interpreted xr.Variables.
-            if k == "time":
-                t_bdg = 0
-            xr_var = xr.conventions.decode_cf_variable(k, xr_var)
-            variables[k] = xr_var
-        attributes = {
-            name: attr._as_python_value()
-            for name, attr in self.attributes.items()
-        }
-        return variables, attributes
-
-    def store(
-        self,
-        variables,
-        attributes,
-        check_encoding_set=frozenset(),
-        writer=None,
-        unlimited_dims=None,
-    ):
-        for k, v in attributes.items():
-            if k in self.attributes:  # and self.attributes[k] != v:
-                msg = (
-                    f're-setting of attribute "{k}" : '
-                    f"was={self.attributes[k]}, now={v}"
-                )
-                raise ValueError(msg)
-            else:
-                self.attributes[k] = NcAttribute(k, v)
-        for k, v in variables.items():
-            if hasattr(v, "ncattrs"):
-                # An actual netCDF.Variable (?PP, not sure?)
-                data, dtype, dims, attrs, enc = (
-                    v[:],
-                    v.datatype,
-                    v.dimensions,
-                    v.ncattrs(),
-                    getattr(v, "encoding", {}),
-                )
-            else:
-                # An xr.Variable (?PP, not sure?)
-                # remove all the possible Xarray encodings
-                # These are all the ones potentially used by
-                # :func:`xr.conventions.decode_cf_variable`, in the order in which they
-                # would be applied.
-                v = xr.conventions.encode_cf_variable(
-                    v, name=k, needs_copy=False
-                )
-                data, dtype, dims, attrs, enc = (
-                    v.data,
-                    v.dtype,
-                    v.dims,
-                    v.attrs,
-                    v.encoding,
-                )
-
-            for dim_name, size in zip(dims, v.shape):
-                if dim_name in self.dimensions:
-                    if self.dimensions[dim_name].size != size:
-                        raise ValueError(
-                            f"size mismatch for dimension {dim_name!r}: "
-                            f"{self.dimensions[dim_name]} != {size}"
-                        )
-                else:
-                    self.dimensions[dim_name] = NcDimension(
-                        dim_name, size=size
-                    )
-
-            if k in self.variables:
-                raise ValueError(f'duplicate variable : "{k}"')
-            attrs = {
-                name: NcAttribute(name, value) for name, value in attrs.items()
-            }
-            nc_var = NcVariable(
-                name=k,
-                dimensions=dims,
-                attributes=attrs,
-                data=v.data,
-                group=self,
-            )
-            self.variables[k] = nc_var
-
-    def close(self):
-        pass
-
-    #
-    # This interface supports conversion to+from an xarray "Dataset".
-    # N.B. using the "AbstractDataStore" interface preserves variable contents, being
-    # either real or lazy arrays.
-    #
-    @classmethod
-    def from_xarray(
-        cls, dataset_or_file: Union[xr.Dataset, AnyStr, Path], **xr_load_kwargs
-    ):
-        if not isinstance(dataset_or_file, xr.Dataset):
-            # It's a "file" (or pathstring, or Path ?).
-            dataset_or_file = xr.load_dataset(
-                dataset_or_file, **xr_load_kwargs
-            )
-        nc_data = cls()
-        dataset_or_file.dump_to_store(nc_data, **xr_load_kwargs)
-        return nc_data
-
-    def to_xarray(self, **xr_save_kwargs) -> xr.Dataset:
-        ds = xr.Dataset.load_store(self, **xr_save_kwargs)
-        return ds
-
-
-#
-# Classes containing NcDataset and NcVariables, but emulating the access APIs of a
-# netCDF4.Dataset.
-# Notes:
-#   (1) only supports what is required for Iris load/save capability
-#   (2) we are proposing that this remains private, for now? -- due to (1)
-#
-class _Nc4DatalikeWithNcattrs:
-    # A mixin, shared by _Nc4DatasetLike and _Nc4VariableLike, which adds netcdf-like
-    #  attribute operations'ncattrs / setncattr / getncattr', *AND* extends the local
-    #  objects attribute to those things also
-    # N.B. "self._ncdata" is the underlying NcData object : either an NcDataset or
-    #  NcVariable object.
-    def ncattrs(self):
-        return list(self._ncdata.attributes.keys())
-
-    def getncattr(self, attr):
-        attrs = self._ncdata.attributes
-        if attr in attrs:
-            result = attrs[attr]._as_python_value()
-        else:
-            # Don't allow it to issue a KeyError, as this upsets 'getattr' usage.
-            # Raise an AttributeError instead.
-            raise AttributeError(attr)
-        return result
-
-    def setncattr(self, attr, value):
-        # TODO: are we sure we need this translation ??
-        if isinstance(value, bytes):
-            value = value.decode("utf-8")
-        # N.B. using the NcAttribute class for storage also ensures/requires that all
-        #  attributes are cast as numpy arrays (so have shape, dtype etc).
-        self._ncdata.attributes[attr] = NcAttribute(attr, value)
-
-    def __getattr__(self, attr):
-        # Extend local object attribute access to the ncattrs of the stored data item
-        #  (Yuck, but I think the Iris load code requires it).
-        return self.getncattr(attr)
-
-    def __setattr__(self, attr, value):
-        if attr in self._local_instance_props:
-            # N.B. use _local_instance_props to define standard instance attributes, to avoid a
-            #  possible endless loop here.
-            super().__setattr__(attr, value)
-        else:
-            # # if not hasattr(self, '_allsetattrs'):
-            # #     self._allsetattrs = set()
-            # self._allsetattrs.add(attr)
-            self.setncattr(attr, value)
-
-
-class _Nc4DatasetLike(_Nc4DatalikeWithNcattrs):
-    _local_instance_props = ("_ncdata", "variables")
-
-    def __init__(self, ncdata: NcDataset = None):
-        if ncdata is None:
-            ncdata = NcDataset()  # an empty dataset
-        self._ncdata = ncdata
-        # N.B. we need to create + store our OWN variables, as they are wrappers for
-        #  the underlying NcVariable objects, with different properties.
-        self.variables = {
-            name: _Nc4VariableLike._from_ncvariable(ncvar, group=self)
-            for name, ncvar in self._ncdata.variables.items()
-        }
-
-    @property
-    def dimensions(self):
-        return {
-            name: dim.size for name, dim in self._ncdata.dimensions.items()
-        }
-
-    # @property
-    # def attributes(self):
-    #     return {
-    #         name: attr.value
-    #         for name, attr in self.ncdata.attributes.items()
-    #     }
-
-    @property
-    def groups(self):
-        return None  # not supported
-
-    # def ncattrs(self):
-    #     return self.attributes
-    #
-    # def getncattr(self, attr_name):
-    #     if attr_name in self.attributes:
-    #         return self.attributes[attr_name]
-    #     raise AttributeError(attr_name)
-    #
-    # def setncattr(self, attr_name, value):
-    #     if isinstance(value, bytes):
-    #         value = value.decode("utf-8")
-    #     self.ncdata.attributes[attr_name] = NcAttribute(attr_name, value)
-    #
-    # Attributes other than the instance-defining "slots" translate to netcdf
-    #  attributes of the underlying ncdata varable
-    #
-    def createDimension(self, dimname, size):
-        if dimname in self.dimensions:
-            msg = f'creating duplicate dimension "{dimname}".'
-            raise ValueError(msg)
-            # if self.dimensions[name] != size:
-            #     raise ValueError(f"size mismatch for dimension {name!r}: "
-            #                      f"{self.dimensions[name]} != {size}")
-        else:
-            self._ncdata.dimensions[dimname] = NcDimension(dimname, size)
-        return size
-
-    def createVariable(self, varname, datatype, dimensions=(), **encoding):
-        if varname in self.variables:
-            msg = f'creating duplicate variable "{varname}".'
-            raise ValueError(msg)
-        # Add a variable into the underlying NcDataset object.
-        ncvar = NcVariable(
-            name=varname,
-            dimensions=dimensions,
-            group=self._ncdata,
-        )
-        # Note: initially has no data (or attributes), since this is how netCDF4 expects
-        #  to do it.
-        self._ncdata.variables[varname] = ncvar
-        # Create a netCDF4-like "wrapper" variable + install that here.
-        nc4var = _Nc4VariableLike._from_ncvariable(
-            ncvar, group=self, dtype=datatype
-        )
-        self.variables[varname] = nc4var
-        return nc4var
-
-    def sync(self):
-        pass
-        # for k, v in self.variables.items():
-        #     if not hasattr(v, 'data'):
-        #         # coordinate system variables are created but not initialized with data by Iris!
-        #         v.data = np.empty(v.shape, dtype=v.datatype)
-        #         v.data[...] = netCDF4.default_fillvals.get(np.dtype(v.datatype).str[1:])
-
-    def close(self):
-        self.sync()
-
-    def filepath(self):
-        #
-        # Note: for now, let's just not care about this.
-        # we *might* need this to be an optinoal defined item on an NcDataset ??
-        # .. or, we ight need to store an xarray "encoding" somewhere ?
-        #
-        # return self.ncdata.encoding.get("source", "")
-        return "<Nc4DatasetLike>"
-
-
-class _Nc4VariableLike(_Nc4DatalikeWithNcattrs):
-    _local_instance_props = ("_ncdata", "name", "datatype", "_raw_array")
-
-    def __init__(self, ncvar: NcVariable, datatype: np.dtype):
-        self._ncdata = ncvar
-        self.name = ncvar.name
-        # Note: datatype must be known at creation, which may be before an actual data
-        #  array is assigned on the ncvar.
-        self.datatype = np.dtype(datatype)
-        if ncvar.data is None:
-            # temporary empty data (to support never-written scalar values)
-            ncvar.data = np.zeros(self.shape, self.datatype)
-        self[:] = ncvar.data
-
-    @classmethod
-    def _from_ncvariable(
-        cls, ncvar: NcVariable, group: NcGroup, dtype: np.dtype = None
-    ):
-        if dtype is None:
-            dtype = ncvar.dtype
-        self = cls(
-            ncvar=ncvar,
-            datatype=dtype,
-        )
-        return self
-
-    # Label this as an 'emulated' netCDF4.Variable, containing an actual (possibly
-    #  lazy) array, which can be directly read/written.
-    @property
-    def _raw_array(self):
-        return self._ncdata.data
-
-    @_raw_array.setter
-    def _raw_array(self, data):
-        self._ncdata.data = data
-        self.datatype = data.dtype
-
-    @property
-    def group(self):
-        return self._ncdata.group
-
-    @property
-    def dimensions(self):
-        return self._ncdata.dimensions
-
-    #
-    # "Normal" data access is via indexing.
-    #
-    def __getitem__(self, keys):
-        if keys != slice(None):
-            raise IndexError(keys)
-        if self.ndim == 0:
-            return self._ncdata.data
-        return self._ncdata.data[keys]
-
-    def __setitem__(self, keys, data):
-        if keys != slice(None):
-            raise IndexError(keys)
-        if not hasattr(data, "dtype"):
-            raise ValueError(f"nonarray assigned as data : {data}")
-        if not data.shape == self.shape:
-            msg = (
-                f"assigned data has wrong shape : "
-                f"{data.shape} instead of {self.shape}"
-            )
-            raise ValueError(msg)
-        self._ncdata.data = data
-        self.datatype = data.dtype
-        # if not self.dimensions and data.ndim != 0:
-        #     # Iris assigns 1-D single element array to 0-D var!
-        #     self.data = np.asarray(data.item())
-        # else:
-        #     shape = tuple(self.group.dimensions[d] for d in self.dimensions)
-        #     if data.shape != shape:
-        #         # Iris passes bounds arrays of wrong shape!
-        #         self.data = data.reshape(shape)
-        #     else:
-        #         self.data = data
-
-    @property
-    def dtype(self):
-        return self.datatype
-
-    @property
-    def dims(self):
-        return self.dimensions
-
-    @property
-    def ndim(self):
-        return len(self.dimensions)
-
-    @property
-    def shape(self):
-        dims = self.group.dimensions
-        return tuple(dims[n].size for n in self.dimensions)
-
-    @property
-    def size(self):
-        return np.prod(self.shape)
-
-    def chunking(self):
-        return None
-
-
-def cubes_from_xrds(xrds: xr.Dataset, **xr_load_kwargs):
-    ncdata = NcDataset.from_xarray(xrds, **xr_load_kwargs)
-    dslike = _Nc4DatasetLike(ncdata)
-    cubes = CubeList(iris.fileformats.netcdf.load_cubes(dslike))
-    return cubes
-
-
-def cubes_to_xrds(cubes, iris_save_kwargs=None, xr_save_kwargs=None):
-    iris_save_kwargs = iris_save_kwargs or {}
-    xr_save_kwargs = xr_save_kwargs or {}
-    nc4like = _Nc4DatasetLike()
-    iris.save(
-        cubes, nc4like, saver=iris.fileformats.netcdf.save, **iris_save_kwargs
-    )
-    xrds = nc4like._ncdata.to_xarray(**xr_save_kwargs)
-    return xrds
-
-
-def example_from_xr():
-    iris.FUTURE.datum_support = True
-    filepath = itsts.get_data_path(
-        ["NetCDF", "stereographic", "toa_brightness_temperature.nc"]
-    )
-    xrds = xr.open_dataset(filepath, chunks="auto")
-    print("\nOriginal Xarray dataset:\n", xrds)
-    cubes = cubes_from_xrds(xrds)
-    print("\nxrds['time']:\n", xrds["time"])
-    print("\n\n")
-    print("============ CONVERT xr.Dataset TO cubes ... =========\n")
-    print("Cubes:")
-    print(cubes)
-    cube = cubes[0]
-    print("\nCube:")
-    print(cube)
-    data = cube.core_data()
-    print("\ncube.core_data():")
-    print(data)
-    # match = data is xrds['data'].data
-    # print('\ncube.core_data() is xrds["data"].data:')
-    # print(match)
-    co_auxlons = cube.coord("longitude")
-    print('\ncube.coord("longitude"):')
-    print(co_auxlons)
-    points = co_auxlons.core_points()
-    print('\ncube.coord("longitude").core_points():')
-    print(points)
-    print('\ncube.coord("longitude").points:')
-    print(points.compute())
-
-    print("\n")
-    print("============ CONVERT cubes TO xr.Dataset ... =========")
-    print("")
-    xrds2 = cubes_to_xrds(cubes)
-    print("\nxrds2:\n", xrds2)
-    print("\ntime:\n", xrds2["time"])
-
-    print("\n")
-    print("============ Array identity checks ... =========")
-    print(
-        "xrds2['data'].data   is   cube.core_data() : ",
-        bool(xrds2["data"].data is cube.core_data()),
-    )
-    print(
-        "xrds2['lon'].data   is   cube.coord('longitude').core_points() : ",
-        bool(xrds2["lon"].data is cube.coord("longitude").core_points()),
-    )
-    print(
-        "xrds2['x'].data   is   cube.coord('projection_x_coordinate').core_points() : ",
-        bool(
-            xrds2["x"].data
-            is cube.coord("projection_x_coordinate").core_points()
-        ),
-    )
-    print(
-        "np.all(xrds2['x'].data == cube.coord('projection_x_coordinate').points) : ",
-        bool(
-            np.all(
-                xrds2["x"].data == cube.coord("projection_x_coordinate").points
-            )
-        ),
-    )
-
-
-if __name__ == "__main__":
-    example_from_xr()
diff --git a/lib/iris/experimental/xarray_bridge/__init__.py b/lib/iris/experimental/xarray_bridge/__init__.py
new file mode 100644
index 0000000000..1400cf0e17
--- /dev/null
+++ b/lib/iris/experimental/xarray_bridge/__init__.py
@@ -0,0 +1,45 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the LGPL license.
+# See COPYING and COPYING.LESSER in the root of the repository for full
+# licensing details.
+"""
+Experimental code fror interchanging data with Xarray .
+
+
+TODO: replace this with various changes:
+  * move Iris-agnostic code outside Iris
+      - into its own repo (where it can be better tested)
+      - leaving **only** the 'to_xarray' and 'from_xarray' functions.
+  * add consistency checking
+  * add "direct" netcdf interfacing, i.e. NcDataset.to_nc/from_nc
+
+"""
+import iris
+from iris.cube import CubeList
+import iris.fileformats.netcdf as ifn
+
+from .ncdata import NcDataset
+from .ncdata_netcdf4_adaptor import _Nc4DatasetLike
+
+#
+# The primary conversion interfaces
+#
+
+
+def cubes_from_xarray(xrds: "xarray.Dataset", **xr_load_kwargs):  # noqa
+    ncdata = NcDataset.from_xarray(xrds, **xr_load_kwargs)
+    dslike = _Nc4DatasetLike(ncdata)
+    cubes = CubeList(ifn.load_cubes(dslike))
+    return cubes
+
+
+def cubes_to_xarray(cubes, iris_save_kwargs=None, xr_save_kwargs=None):
+    iris_save_kwargs = iris_save_kwargs or {}
+    xr_save_kwargs = xr_save_kwargs or {}
+    nc4like = _Nc4DatasetLike()
+    iris.save(
+        cubes, nc4like, saver=iris.fileformats.netcdf.save, **iris_save_kwargs
+    )
+    xrds = nc4like._ncdata.to_xarray(**xr_save_kwargs)
+    return xrds
diff --git a/lib/iris/experimental/xarray_bridge/_ncdata_exercise.py b/lib/iris/experimental/xarray_bridge/_ncdata_exercise.py
new file mode 100644
index 0000000000..c301f154d6
--- /dev/null
+++ b/lib/iris/experimental/xarray_bridge/_ncdata_exercise.py
@@ -0,0 +1,84 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the LGPL license.
+# See COPYING and COPYING.LESSER in the root of the repository for full
+# licensing details.
+"""
+A temporary proof-of-concept test workflow
+
+"""
+import numpy as np
+import xarray as xr
+
+import iris
+from iris.experimental.xarray_bridge import cubes_from_xarray, cubes_to_xarray
+import iris.tests as itsts
+
+
+def example_from_xr():
+    iris.FUTURE.datum_support = True
+    filepath = itsts.get_data_path(
+        ["NetCDF", "stereographic", "toa_brightness_temperature.nc"]
+    )
+    xrds = xr.open_dataset(filepath, chunks="auto")
+    print("\nOriginal Xarray dataset:\n", xrds)
+    cubes = cubes_from_xarray(xrds)
+    print("\nxrds['time']:\n", xrds["time"])
+    print("\n\n")
+    print("============ CONVERT xr.Dataset TO cubes ... =========\n")
+    print("Cubes:")
+    print(cubes)
+    cube = cubes[0]
+    print("\nCube:")
+    print(cube)
+    data = cube.core_data()
+    print("\ncube.core_data():")
+    print(data)
+    # match = data is xrds['data'].data
+    # print('\ncube.core_data() is xrds["data"].data:')
+    # print(match)
+    co_auxlons = cube.coord("longitude")
+    print('\ncube.coord("longitude"):')
+    print(co_auxlons)
+    points = co_auxlons.core_points()
+    print('\ncube.coord("longitude").core_points():')
+    print(points)
+    print('\ncube.coord("longitude").points:')
+    print(points.compute())
+
+    print("\n")
+    print("============ CONVERT cubes TO xr.Dataset ... =========")
+    print("")
+    xrds2 = cubes_to_xarray(cubes)
+    print("\nxrds2:\n", xrds2)
+    print("\ntime:\n", xrds2["time"])
+
+    print("\n")
+    print("============ Array identity checks ... =========")
+    print(
+        "xrds2['data'].data   is   cube.core_data() : ",
+        bool(xrds2["data"].data is cube.core_data()),
+    )
+    print(
+        "xrds2['lon'].data   is   cube.coord('longitude').core_points() : ",
+        bool(xrds2["lon"].data is cube.coord("longitude").core_points()),
+    )
+    print(
+        "xrds2['x'].data   is   cube.coord('projection_x_coordinate').core_points() : ",
+        bool(
+            xrds2["x"].data
+            is cube.coord("projection_x_coordinate").core_points()
+        ),
+    )
+    print(
+        "np.all(xrds2['x'].data == cube.coord('projection_x_coordinate').points) : ",
+        bool(
+            np.all(
+                xrds2["x"].data == cube.coord("projection_x_coordinate").points
+            )
+        ),
+    )
+
+
+if __name__ == "__main__":
+    example_from_xr()
diff --git a/lib/iris/experimental/xarray_bridge/ncdata.py b/lib/iris/experimental/xarray_bridge/ncdata.py
new file mode 100644
index 0000000000..2e2b2edaeb
--- /dev/null
+++ b/lib/iris/experimental/xarray_bridge/ncdata.py
@@ -0,0 +1,207 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the LGPL license.
+# See COPYING and COPYING.LESSER in the root of the repository for full
+# licensing details.
+"""
+An abstract representation of Netcdf data with groups, variables + attributes
+
+This is also provided with a read/write conversion interface to Xarray.
+
+TODO: add direct netcdf file interface (easy, but not yet).
+
+"""
+from pathlib import Path
+from typing import AnyStr, Dict, Optional, Tuple, Union
+
+import numpy as np
+import xarray as xr
+
+#
+# A totally basic and naive representation of netCDF data.
+# The structure supports groups, variables, attributes.
+# The sole limitation here is that data and attributes appear as numpy-compatible
+# array-like values (though this may include dask.array.Array), and hence their types
+# are modelled as np.dtype's.
+#
+
+
+class NcGroup:
+    def __init__(
+        self,
+        name: Optional[str] = None,
+        dimensions: Dict[str, "NcDimension"] = None,
+        variables: Dict[str, "NcVariable"] = None,
+        attributes: Dict[str, "NcAttribute"] = None,
+        groups: Dict[str, "NcGroup"] = None,
+    ):
+        self.name: str = name
+        self.dimensions: Dict[str, "NcDimension"] = dimensions or {}
+        self.variables: Dict[str, "NcVariable"] = variables or {}
+        self.attributes: Dict[str, "NcAttribute"] = attributes or {}
+        self.groups: Dict[str, "NcGroup"] = groups or {}
+
+
+class NcDimension:
+    def __init__(self, name: str, size: int = 0):
+        self.name: str = name
+        self.size: int = size  # N.B. we retain the 'zero size means unlimited'
+
+
+class NcVariable:
+    def __init__(
+        self,
+        name: str,
+        dimensions: Tuple[str] = None,
+        data: np.ndarray = None,
+        dtype: np.dtype = None,
+        attributes: Dict[str, "NcAttribute"] = None,
+        group: "NcGroup" = None,
+    ):
+        self.name = name
+        self.dimensions = tuple(dimensions or ())
+        if data is not None:
+            if not hasattr(data, "dtype"):
+                data = np.asanyarray(data)
+            dtype = data.dtype
+        self.dtype = dtype
+        self.data = data  # Supports lazy, and normally provides a dtype
+        self.attributes = attributes or {}
+        self.group = group
+
+    # # Provide some array-like readonly properties reflected from the data.
+    # @property
+    # def dtype(self):
+    #     return self.data.dtype
+    #
+    # @property
+    # def shape(self):
+    #     return self.data.shape
+
+
+class NcAttribute:
+    def __init__(self, name: str, value):
+        self.name: str = name
+        # Attribute values are arraylike, have dtype
+        # TODO: may need to regularise string representations?
+        if not hasattr(value, "dtype"):
+            value = np.asanyarray(value)
+        self.value: np.ndarray = value
+
+    def _as_python_value(self):
+        result = self.value
+        if result.dtype.kind in ("U", "S"):
+            result = str(result)
+            if isinstance(result, bytes):
+                result = result.decode()
+        return result
+
+
+class NcDataset(NcGroup):
+    # An interface class providing an NcGroup which can be converted to/from an
+    # xr.Dataset.  This is basically done by adding a small API enabling it to function
+    # as an Xarray "AbstractDataStore".
+    # This implies some embedded knowledge of Xarray, but it is very small.
+    #
+    # This code pinched from @TomekTrzeciak
+    # see https://gist.github.com/TomekTrzeciak/b00ff6c9dc301ed6f684990e400d1435
+
+    def load(self):
+        variables = {}
+        for k, v in self.variables.items():
+            attrs = {
+                name: attr._as_python_value()
+                for name, attr in v.attributes.items()
+            }
+            xr_var = xr.Variable(
+                v.dimensions, v.data, attrs, getattr(v, "encoding", {})
+            )
+            # TODO: ?possibly? need to apply usual Xarray "encodings" to convert raw
+            #  cf-encoded data into 'normal', interpreted xr.Variables.
+            xr_var = xr.conventions.decode_cf_variable(k, xr_var)
+            variables[k] = xr_var
+        attributes = {
+            name: attr._as_python_value()
+            for name, attr in self.attributes.items()
+        }
+        return variables, attributes
+
+    def store(
+        self,
+        variables,
+        attributes,
+        check_encoding_set=frozenset(),
+        writer=None,
+        unlimited_dims=None,
+    ):
+        for attrname, v in attributes.items():
+            if attrname in self.attributes:  # and self.attributes[k] != v:
+                msg = (
+                    f're-setting of attribute "{attrname}" : '
+                    f"was={self.attributes[attrname]}, now={v}"
+                )
+                raise ValueError(msg)
+            else:
+                self.attributes[attrname] = NcAttribute(attrname, v)
+
+        for varname, var in variables.items():
+            if varname in self.variables:
+                raise ValueError(f'duplicate variable : "{varname}"')
+
+            # An xr.Variable : remove all the possible Xarray encodings
+            # These are all the ones potentially used by
+            # :func:`xr.conventions.decode_cf_variable`, in the order in which they
+            # would be applied.
+            var = xr.conventions.encode_cf_variable(
+                var, name=varname, needs_copy=False
+            )
+
+            for dim_name, size in zip(var.dims, var.shape):
+                if dim_name in self.dimensions:
+                    if self.dimensions[dim_name].size != size:
+                        raise ValueError(
+                            f"size mismatch for dimension {dim_name!r}: "
+                            f"{self.dimensions[dim_name]} != {size}"
+                        )
+                else:
+                    self.dimensions[dim_name] = NcDimension(
+                        dim_name, size=size
+                    )
+
+            attrs = {
+                name: NcAttribute(name, value)
+                for name, value in var.attrs.items()
+            }
+            nc_var = NcVariable(
+                name=varname,
+                dimensions=var.dims,
+                attributes=attrs,
+                data=var.data,
+                group=self,
+            )
+            self.variables[varname] = nc_var
+
+    def close(self):
+        pass
+
+    #
+    # This interface supports conversion to+from an xarray "Dataset".
+    # N.B. using the "AbstractDataStore" interface preserves variable contents, being
+    # either real or lazy arrays.
+    #
+    @classmethod
+    def from_xarray(
+        cls, dataset_or_file: Union[xr.Dataset, AnyStr, Path], **xr_load_kwargs
+    ):
+        if not isinstance(dataset_or_file, xr.Dataset):
+            # It's a "file" (or pathstring, or Path ?).
+            dataset_or_file = xr.load_dataset(
+                dataset_or_file, **xr_load_kwargs
+            )
+        nc_data = cls()
+        dataset_or_file.dump_to_store(nc_data, **xr_load_kwargs)
+        return nc_data
+
+    def to_xarray(self, **xr_save_kwargs) -> xr.Dataset:
+        ds = xr.Dataset.load_store(self, **xr_save_kwargs)
+        return ds
diff --git a/lib/iris/experimental/xarray_bridge/ncdata_netcdf4_adaptor.py b/lib/iris/experimental/xarray_bridge/ncdata_netcdf4_adaptor.py
new file mode 100644
index 0000000000..ac1d878126
--- /dev/null
+++ b/lib/iris/experimental/xarray_bridge/ncdata_netcdf4_adaptor.py
@@ -0,0 +1,236 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the LGPL license.
+# See COPYING and COPYING.LESSER in the root of the repository for full
+# licensing details.
+"""
+An adaptor layer allowing an NcDataset to masquerade as a netCDF4.Dataset
+
+This is provided primarily to support a re-use of the iris.fileformats.netcdf file
+format load + save, to convert cubes to+from iris.experimental.xarray_bridde.ncdata
+objects, and hence bridge to xarray.Dataset.
+
+These classes contain NcDataset and NcVariables, but emulating the access APIs of a
+netCDF4.Dataset.
+
+Notes:
+  (1) currently only supports what is required for Iris load/save capability
+  (2) we are proposing that this remains private, for now? -- due to (1)
+
+"""
+import numpy as np
+
+from .ncdata import NcAttribute, NcDataset, NcDimension, NcVariable
+
+
+class _Nc4DatalikeWithNcattrs:
+    # A mixin, shared by _Nc4DatasetLike and _Nc4VariableLike, which adds netcdf-like
+    #  attribute operations'ncattrs / setncattr / getncattr', *AND* extends the local
+    #  objects attribute to those things also
+    # N.B. "self._ncdata" is the underlying NcData object : either an NcDataset or
+    #  NcVariable object.
+    def ncattrs(self):
+        return list(self._ncdata.attributes.keys())
+
+    def getncattr(self, attr):
+        attrs = self._ncdata.attributes
+        if attr in attrs:
+            result = attrs[attr]._as_python_value()
+        else:
+            # Don't allow it to issue a KeyError, as this upsets 'getattr' usage.
+            # Raise an AttributeError instead.
+            raise AttributeError(attr)
+        return result
+
+    def setncattr(self, attr, value):
+        # TODO: are we sure we need this translation ??
+        if isinstance(value, bytes):
+            value = value.decode("utf-8")
+        # N.B. using the NcAttribute class for storage also ensures/requires that all
+        #  attributes are cast as numpy arrays (so have shape, dtype etc).
+        self._ncdata.attributes[attr] = NcAttribute(attr, value)
+
+    def __getattr__(self, attr):
+        # Extend local object attribute access to the ncattrs of the stored data item
+        #  (Yuck, but I think the Iris load code requires it).
+        return self.getncattr(attr)
+
+    def __setattr__(self, attr, value):
+        if attr in self._local_instance_props:
+            # N.B. use _local_instance_props to define standard instance attributes, to avoid a
+            #  possible endless loop here.
+            super().__setattr__(attr, value)
+        else:
+            # # if not hasattr(self, '_allsetattrs'):
+            # #     self._allsetattrs = set()
+            # self._allsetattrs.add(attr)
+            self.setncattr(attr, value)
+
+
+class _Nc4DatasetLike(_Nc4DatalikeWithNcattrs):
+    _local_instance_props = ("_ncdata", "variables")
+
+    def __init__(self, ncdata: NcDataset = None):
+        if ncdata is None:
+            ncdata = NcDataset()  # an empty dataset
+        self._ncdata = ncdata
+        # N.B. we need to create + store our OWN variables, as they are wrappers for
+        #  the underlying NcVariable objects, with different properties.
+        self.variables = {
+            name: _Nc4VariableLike._from_ncvariable(ncvar)
+            for name, ncvar in self._ncdata.variables.items()
+        }
+
+    @property
+    def dimensions(self):
+        return {
+            name: dim.size for name, dim in self._ncdata.dimensions.items()
+        }
+
+    @property
+    def groups(self):
+        return None  # not supported
+
+    def createDimension(self, dimname, size):
+        if dimname in self.dimensions:
+            msg = f'creating duplicate dimension "{dimname}".'
+            raise ValueError(msg)
+            # if self.dimensions[name] != size:
+            #     raise ValueError(f"size mismatch for dimension {name!r}: "
+            #                      f"{self.dimensions[name]} != {size}")
+        else:
+            self._ncdata.dimensions[dimname] = NcDimension(dimname, size)
+        return size
+
+    def createVariable(self, varname, datatype, dimensions=(), **encoding):
+        if varname in self.variables:
+            msg = f'creating duplicate variable "{varname}".'
+            raise ValueError(msg)
+        # Add a variable into the underlying NcDataset object.
+        ncvar = NcVariable(
+            name=varname,
+            dimensions=dimensions,
+            group=self._ncdata,
+        )
+        # Note: initially has no data (or attributes), since this is how netCDF4 expects
+        #  to do it.
+        self._ncdata.variables[varname] = ncvar
+        # Create a netCDF4-like "wrapper" variable + install that here.
+        nc4var = _Nc4VariableLike._from_ncvariable(ncvar, dtype=datatype)
+        self.variables[varname] = nc4var
+        return nc4var
+
+    def sync(self):
+        pass
+
+    def close(self):
+        self.sync()
+
+    @staticmethod
+    def filepath():
+        #
+        # Note: for now, let's just not care about this.
+        # we *might* need this to be an optinoal defined item on an NcDataset ??
+        # .. or, we ight need to store an xarray "encoding" somewhere ?
+        # TODO: more thought here ?
+        # return self.ncdata.encoding.get("source", "")
+        return "<Nc4DatasetLike>"
+
+
+class _Nc4VariableLike(_Nc4DatalikeWithNcattrs):
+    _local_instance_props = ("_ncdata", "name", "datatype", "_raw_array")
+
+    def __init__(self, ncvar: NcVariable, datatype: np.dtype):
+        self._ncdata = ncvar
+        self.name = ncvar.name
+        # Note: datatype must be known at creation, which may be before an actual data
+        #  array is assigned on the ncvar.
+        self.datatype = np.dtype(datatype)
+        if ncvar.data is None:
+            # temporary empty data (to support never-written scalar values)
+            # NOTE: significantly, does *not* allocate an actual full array in memory
+            array = np.zeros(self.shape, self.datatype)
+            ncvar.data = array
+        self._raw_array = ncvar.data
+
+    @classmethod
+    def _from_ncvariable(cls, ncvar: NcVariable, dtype: np.dtype = None):
+        if dtype is None:
+            dtype = ncvar.dtype
+        self = cls(
+            ncvar=ncvar,
+            datatype=dtype,
+        )
+        return self
+
+    # Label this as an 'emulated' netCDF4.Variable, containing an actual (possibly
+    #  lazy) array, which can be directly read/written.
+    @property
+    def _raw_array(self):
+        return self._ncdata.data
+
+    @_raw_array.setter
+    def _raw_array(self, data):
+        self._ncdata.data = data
+        self.datatype = data.dtype
+
+    @property
+    def group(self):
+        return self._ncdata.group
+
+    @property
+    def dimensions(self):
+        return self._ncdata.dimensions
+
+    #
+    # "Normal" data access is via indexing.
+    # N.B. we do still need to support this, e.g. for DimCoords ?
+    #
+    def __getitem__(self, keys):
+        if keys != slice(None):
+            raise IndexError(keys)
+        if self.ndim == 0:
+            return self._ncdata.data
+        return self._ncdata.data[keys]
+
+    # The __setitem__ is not required for normal saving.
+    # The saver will assign ._raw_array instead
+    # TODO: might need to support this for future non-Iris usage ?
+    #
+    # def __setitem__(self, keys, data):
+    #     if keys != slice(None):
+    #         raise IndexError(keys)
+    #     if not hasattr(data, "dtype"):
+    #         raise ValueError(f"nonarray assigned as data : {data}")
+    #     if not data.shape == self.shape:
+    #         msg = (
+    #             f"assigned data has wrong shape : "
+    #             f"{data.shape} instead of {self.shape}"
+    #         )
+    #         raise ValueError(msg)
+    #     self._ncdata.data = data
+    #     self.datatype = data.dtype
+
+    @property
+    def dtype(self):
+        return self.datatype
+
+    @property
+    def dims(self):
+        return self.dimensions
+
+    @property
+    def ndim(self):
+        return len(self.dimensions)
+
+    @property
+    def shape(self):
+        dims = self.group.dimensions
+        return tuple(dims[n].size for n in self.dimensions)
+
+    @property
+    def size(self):
+        return np.prod(self.shape)
+
+    def chunking(self):
+        return None
diff --git a/lib/iris/experimental/xarray_dataset_wrapper.py b/lib/iris/experimental/xarray_dataset_wrapper.py
deleted file mode 100644
index 455057afad..0000000000
--- a/lib/iris/experimental/xarray_dataset_wrapper.py
+++ /dev/null
@@ -1,415 +0,0 @@
-# Copyright Iris contributors
-#
-# This file is part of Iris and is released under the LGPL license.
-# See COPYING and COPYING.LESSER in the root of the repository for full
-# licensing details.
-"""
-A wrapper for an xarray.Dataset that simulates a netCDF4.Dataset.
-This enables code to read/write xarray data as if it were a netcdf file.
-
-NOTE: readonly, for now.
-TODO: add modify/save functions later.
-
-NOTE: this code is effectively independent of Iris, and does not really belong.
-However, this is a convenient place to test, for now.
-
-"""
-from collections import OrderedDict
-from typing import Optional
-
-import netCDF4 as nc
-import numpy as np
-import xarray
-import xarray as xr
-
-
-class _XrMimic:
-    """
-    An netcdf object "mimic" wrapped around an xarray object, which will be
-    either a dim, var or dataset.
-
-    These (mostly) contain an underlying xarray object, and all potentially
-    have a name + group (though dataset name is unused).
-    N.B. name is provided separately, as xr types do not "know" their own names
-    - e.g. an xr.Variable has no 'name' property.
-
-    We also support object equality checks.
-
-    NOTE: a DimensionMimic, uniquely, does *NOT* in fact contain an xarray
-    object, so its self._xr == None.   See  DimensionMimic docstring.
-
-    """
-
-    def __init__(self, xr, name=None, group=None):
-        """
-        Create a mimic object wrapping a :class:`nco.Ncobj` component.
-        Note: not all the underlying objects have a name, so provide that
-        separately.
-
-        """
-        self._xr = xr
-        self._name = name
-        self._group = group
-
-    @property
-    def name(self):
-        return self._name
-
-    def group(self):
-        return self._group
-
-    def __eq__(self, other):
-        return self._xr == other._xr
-
-    def __ne__(self, other):
-        return not self == other
-
-
-class DimensionMimic(_XrMimic):
-    """
-    A Dimension object mimic wrapper.
-
-    Dimension additional properties: length, unlimited
-
-    NOTE: a DimensionMimic does *NOT* contain an xarray object representing the
-    dimension, because xarray doesn't have such objects.
-    So, in xarray, you can't rename or modify an existing Dataset dimension.
-    But you can re-order, add, and remove ones that no variable uses.
-
-    """
-
-    def __init__(self, name, len, isunlimited=False, group=None):
-        # Note that there *is* no underlying xarray object.
-        # So we make up something, to support equality checks.
-        id_placeholder = (name, len, isunlimited)
-        super().__init__(xr=id_placeholder, name=name, group=group)
-        self._len = len  # A private version, for now, in case needs change.
-        self._unlimited = isunlimited
-
-    @property
-    def size(self):
-        return 0 if self.isunlimited() else self.len
-
-    def __len__(self):
-        return self._len
-
-    def isunlimited(self):
-        return self._unlimited
-
-
-class _Nc4AttrsMimic(_XrMimic):
-    """
-    A class mixin for a Mimic with attribute access.
-
-    I.E. shared by variables and datasets.
-
-    """
-
-    def ncattrs(self):
-        return self._xr.attrs.keys()  # Probably do *not* need/expect a list ?
-
-    def getncattr(self, attr_name):
-        if attr_name in self._xr.attrs:
-            result = self._xr.attrs[attr_name]
-        else:
-            raise AttributeError()
-        return result
-
-    def __getattr__(self, attr_name):
-        return self.getncattr(attr_name)
-
-    #
-    # writing
-    #
-    def setncattr(self, attr_name, value):
-        if isinstance(value, bytes):
-            value = value.decode()
-        self._xr.attrs[attr_name] = value
-
-    # NOTE: not currently supporting ".my_attribute = value" type access.
-    # def __setattr__(self, attr_name, value):
-    #     self.setncattr(attr_name, value)
-
-
-class VariableMimic(_Nc4AttrsMimic):
-    """
-    A Variable object mimic wrapper.
-
-    Variable additional properties:
-        dimensions, dtype, data (+ attributes, parent-group)
-        shape, size, ndim
-
-    """
-
-    @property
-    def dtype(self):
-        return self._xr.dtype
-
-    def chunking(self):
-        return None
-
-    @property
-    def datatype(self):
-        return self.dtype
-
-    @property
-    def dimensions(self):
-        return self._xr.dims
-
-    def __getitem__(self, keys):
-        if self.ndim == 0:
-            return self._xr.data
-        else:
-            return self._xr[keys].data
-
-    @property
-    def shape(self):
-        return self._xr.shape
-
-    @property
-    def ndim(self):
-        return self._xr.ndim
-
-    @property
-    def size(self):
-        return self._xr.size
-
-    #
-    # writing
-    #
-    def __setitem__(self, keys, data):
-        self._xr[keys] = data
-
-
-class DatasetMimic(_Nc4AttrsMimic):
-    """
-    An object mimicking an netCDF4.Dataset, wrapping an xarray.Dataset.
-
-    """
-
-    def __init__(self, xrds=None):
-        """
-        Create a Dataset mimic, which provides a bridge between the
-        :class:`netcdf.Dataset` access API and data in the form of an
-        :class:`xarray.Dataset`.
-
-        Parameters
-        ----------
-        xrds : :class:`xr.Dataset`, optional
-            If provided, create a DatasetMimic representing the xarray data.
-            If None, initialise empty.
-            In either case, the result can be read or written like a
-            :class:`netcdf.Dataset`.  Or, an xarray equivalent can be
-            regenerated with the :meth:`to_xarray_dataset` method.
-
-        Notes
-        -----
-        Only a limited subset of the :mod:`netCDF4` APIs are currently
-        supported : just enough to allow Iris to read and write xarray datasets
-        in place of netcdf files.
-
-        In addition to the netCDF4 read API, you can at any time obtain a
-        version of the contents in the form of a :class:`xarray.Dataset`, from
-        the :meth:`DatasetMimic.to_xarray_dataset` method.
-        """
-        if xrds is None:
-            # Initialise empty dataset if not passed in.
-            xrds = xr.Dataset()
-        super().__init__(xrds)
-
-        # Capture original filepath, if known.
-        self._sourcepath = self._xr.encoding.get("source", "")
-
-        # Keep track of variables which were renamed on creation to prevent
-        # them being made into coords (which are not writable).
-        self._output_renames = {}
-
-        # Capture existing dimensions in input
-        unlim_dims = self._xr.encoding.get("unlimited_dims", set())
-        self.dimensions = OrderedDict()
-        for name, len in self._xr.dims.items():
-            is_unlim = name in unlim_dims
-            dim = DimensionMimic(name, len, isunlimited=is_unlim)
-            self.dimensions[name] = dim
-
-        # Capture existing variables in input
-        self.variables = OrderedDict()
-        for name, var in self._xr.variables.items():
-            var_mimic = VariableMimic(var, name=name)
-            self.variables[name] = var_mimic
-
-    def filepath(self) -> str:
-        return self._sourcepath
-
-    def to_xarray_dataset(self) -> xr.Dataset:
-        """Get an xarray.Dataset representing the simulated netCDF4.Dataset."""
-        ds = self._xr
-        # Drop the 'extra' coordinate variables which were required to make
-        # indexing constructions work.
-        ds = ds.drop_vars(self.dimensions.keys())
-        # Rename original dimension coords back to their dimension name.
-        ds = ds.rename_vars(self._output_renames)
-        # Apply "nofill" encoding to all the output vars which did do not
-        # actually provide a '_FillVAlue' attribute.
-        # TODO: check that a provided fill-value behaves as expected
-        for varname, var in ds.variables.items():
-            # if 'missing_value' in var.attrs:
-            #     print(varname)
-            #     del var.attrs['missing_value']
-            if "_FillValue" not in var.attrs:
-                var.encoding["_FillValue"] = None
-        return ds
-
-    def groups(self):
-        # Xarray does not support groups :-(
-        return None
-
-    def sync(self):
-        pass
-
-    def close(self):
-        pass
-
-    @staticmethod
-    def _dimcoord_adjusted_name(dimname):
-        return f"_{dimname}_XRDS_RENAMED_"
-
-    #
-    # modify/write support
-    #
-    def createDimension(
-        self, dimname, size=None, actual_length=0
-    ) -> DimensionMimic:
-        """
-        Simulate netCDF4 call.
-
-        N.B. the extra 'actual_length' keyword can be used in conjunction with
-        size=0, to create an unlimited dimension of known 'current length'.
-
-        """
-        # NOTE: this does not work in-place, but forces us to replace the
-        # original dataset.  Therefore caller can't use a ref to the original.
-        # This *could* also mean that DimensionMimics don't work, but in fact
-        # it is okay since xarray doesn't use dimension objects, and netCDF4
-        # anyway requires us to create all the dims *first*.
-        # TODO: check that 'unlimited' works -- suspect that present code can't
-        #  cope with setting the 'current length' ?
-        self._xr = self._xr.expand_dims({dimname: size}, -1)
-        size = size or 0
-        is_unlim = size == 0
-        actual_length = actual_length or size
-        if is_unlim:
-            unlim_dims = self._xr.encoding.setdefault(
-                "unlimited_dimensions", set()
-            )
-            unlim_dims.add(dimname)
-        dim = DimensionMimic(dimname, actual_length, is_unlim)
-        self.dimensions[dimname] = dim
-        if actual_length > 0:
-            # NOTE: for now, we are adding an extra index variable on each
-            # dimension, since this avoids much problems with variables being
-            # automatically converted to IndexVariables.
-            # These extra coord variables do *NOT* appear in self.variables,
-            # and are absent from the dataset produced by 'to_xarray_dataset'.
-            data = np.arange(actual_length, dtype=int)
-            self._xr[dimname] = data
-        return dim
-
-    # Expected default controls in createVariable call,
-    # from iris.fileformats.netcdf.Saver
-    _netcdf_saver_defaults = {
-        "zlib": False,
-        "complevel": 4,
-        "shuffle": True,
-        "fletcher32": False,
-        "contiguous": False,
-        "chunksizes": None,
-        "endian": "native",
-        "least_significant_digit": None,
-        "packing": None,
-    }
-
-    def createVariable(
-        self, varname, datatype, dimensions=(), fill_value=None, **kwargs
-    ) -> VariableMimic:
-        # TODO: kwargs should probably translate into 'encoding' on ds or vars
-        # FOR NOW: simply check we have no "active" kwargs requesting
-        # non-default operation.  Unfortunately, that involves some
-        # detailed knowledge of the netCDF4.createVariable interface.
-        for kwarg, val in kwargs.items():
-            if kwarg not in self._netcdf_saver_defaults:
-                msg = (
-                    "Unrecognised netcdf saver control keyword : "
-                    "{kwarg} = {val}."
-                )
-                raise ValueError(msg)
-            if val != self._netcdf_saver_defaults[kwarg]:
-                msg = (
-                    "Non-default Netcdf saver control setting : "
-                    "{kwarg} = {val}.  These controls are not supported by "
-                    "the DatasetMimic."
-                )
-                raise ValueError(msg)
-
-        datatype = np.dtype(datatype)
-        shape = tuple(self._xr.dims[dimname] for dimname in dimensions)
-
-        # Note: initially create with all-missing data.  This can subsequently
-        # be assigned different values, and even support partial writes.
-        # TODO: would really like to support Dask arrays here.
-        if fill_value is not None:
-            attrs = {"_FillValue": fill_value}
-            use_fill = fill_value
-        else:
-            attrs = {}
-            dt_code = f"{datatype.kind}{datatype.itemsize}"
-            use_fill = nc.default_fillvals[dt_code]
-        data = np.full(shape, fill_value=use_fill, dtype=datatype)
-
-        xr_var = xr.Variable(dims=dimensions, data=data, attrs=attrs)
-        original_varname = varname
-        if varname in self._xr.dims:
-            # We need to avoid creating vars as coords, for which we currently
-            # use a nasty trick :  Insert with a modified name, and rename back
-            # on output (see 'to_xarray_dataset').
-            # TODO: see if xarray provides a cleaner way to get what we want.
-            alt_varname = f"XDRS_RENAMED_{varname}_"
-            self._output_renames[alt_varname] = varname
-            varname = alt_varname
-
-        # Install the var, and immediately re-fetch it, since the internal
-        # object is *not* generally the same as the one we put in.
-        self._xr[varname] = xr_var
-        xr_var = self._xr.variables[varname]
-        # Create a mimic for interfacing to the xarray.Variable.
-        var_mimic = VariableMimic(xr_var, name=original_varname)
-        self.variables[varname] = var_mimic
-        return var_mimic
-
-
-def fake_nc4python_dataset(xr_group: Optional[xr.Dataset] = None):
-    """
-    Make a wrapper around an xarray Dataset which emulates a
-    :class:`netCDF4.Dataset`.
-
-    The resulting :class:`DatasetMimic` supports essential properties of a
-    read-mode :class:`netCDF4.Dataset`, enabling an arbitrary netcdf data
-    structure in memory to be "read" as if it were a file
-    (i.e. without writing it to disk).
-    It likewise supports write operations, which translates netCDF4 writes
-    into operations on the internal xarray dataset.
-    It can also reproduce its content as a :class:`xarray.Dataset` from its
-    :meth:`DatasetMimic.to_xarray_dataset` method.
-
-    Parameters
-    ----------
-    xr_group : xarray.Dataset, optional
-        If given, return a DatasetMimic wrapped around this data.
-        If absent, return an *empty* (but writeable) DatasetMimic.
-
-    Returns
-    -------
-    dataset : DatasetMimic
-
-    """
-    return DatasetMimic(xr_group)

From e4a079c58d3594b04fc07fae23e9a8ef7c8b4412 Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Wed, 7 Dec 2022 17:39:56 +0000
Subject: [PATCH 3/6] Reorg into planned ncdata package structure.

---
 lib/iris/experimental/ncdata/__init__.py      |  42 +++++
 .../ncdata.py => ncdata/_core.py}             |  12 +-
 .../dataset_like.py}                          |  25 +--
 lib/iris/experimental/ncdata/xarray.py        | 144 ++++++++++++++++++
 .../experimental/xarray_bridge/__init__.py    |  13 +-
 5 files changed, 212 insertions(+), 24 deletions(-)
 create mode 100644 lib/iris/experimental/ncdata/__init__.py
 rename lib/iris/experimental/{xarray_bridge/ncdata.py => ncdata/_core.py} (96%)
 rename lib/iris/experimental/{xarray_bridge/ncdata_netcdf4_adaptor.py => ncdata/dataset_like.py} (89%)
 create mode 100644 lib/iris/experimental/ncdata/xarray.py

diff --git a/lib/iris/experimental/ncdata/__init__.py b/lib/iris/experimental/ncdata/__init__.py
new file mode 100644
index 0000000000..4e2d4e93da
--- /dev/null
+++ b/lib/iris/experimental/ncdata/__init__.py
@@ -0,0 +1,42 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the LGPL license.
+# See COPYING and COPYING.LESSER in the root of the repository for full
+# licensing details.
+"""
+An abstract representation of Netcdf structured data, according to the
+"Common Data Model" : https://docs.unidata.ucar.edu/netcdf-java/5.3/userguide/common_data_model_overview.html
+
+TODO:
+  * add consistency checking
+  * add "direct" netcdf interfacing, i.e. to_nc4/from_nc4
+
+"""
+import iris
+from iris.cube import CubeList
+import iris.fileformats.netcdf as ifn
+
+from .dataset_like import Nc4DatasetLike
+from .xarray import from_xarray, to_xarray
+
+#
+# The primary conversion interfaces
+#
+
+
+def cubes_from_xarray(xrds: "xarray.Dataset", **xr_load_kwargs):  # noqa
+    ncdata = from_xarray(xrds, **xr_load_kwargs)
+    dslike = Nc4DatasetLike(ncdata)
+    cubes = CubeList(ifn.load_cubes(dslike))
+    return cubes
+
+
+def cubes_to_xarray(cubes, iris_save_kwargs=None, xr_save_kwargs=None):
+    iris_save_kwargs = iris_save_kwargs or {}
+    xr_save_kwargs = xr_save_kwargs or {}
+    nc4like = Nc4DatasetLike()
+    iris.save(
+        cubes, nc4like, saver=iris.fileformats.netcdf.save, **iris_save_kwargs
+    )
+    xrds = to_xarray(**xr_save_kwargs)
+    return xrds
diff --git a/lib/iris/experimental/xarray_bridge/ncdata.py b/lib/iris/experimental/ncdata/_core.py
similarity index 96%
rename from lib/iris/experimental/xarray_bridge/ncdata.py
rename to lib/iris/experimental/ncdata/_core.py
index 2e2b2edaeb..2de791347d 100644
--- a/lib/iris/experimental/xarray_bridge/ncdata.py
+++ b/lib/iris/experimental/ncdata/_core.py
@@ -26,20 +26,20 @@
 #
 
 
-class NcGroup:
+class NcData:
     def __init__(
         self,
         name: Optional[str] = None,
         dimensions: Dict[str, "NcDimension"] = None,
         variables: Dict[str, "NcVariable"] = None,
         attributes: Dict[str, "NcAttribute"] = None,
-        groups: Dict[str, "NcGroup"] = None,
+        groups: Dict[str, "NcData"] = None,
     ):
         self.name: str = name
         self.dimensions: Dict[str, "NcDimension"] = dimensions or {}
         self.variables: Dict[str, "NcVariable"] = variables or {}
         self.attributes: Dict[str, "NcAttribute"] = attributes or {}
-        self.groups: Dict[str, "NcGroup"] = groups or {}
+        self.groups: Dict[str, "NcData"] = groups or {}
 
 
 class NcDimension:
@@ -56,7 +56,7 @@ def __init__(
         data: np.ndarray = None,
         dtype: np.dtype = None,
         attributes: Dict[str, "NcAttribute"] = None,
-        group: "NcGroup" = None,
+        group: "NcData" = None,
     ):
         self.name = name
         self.dimensions = tuple(dimensions or ())
@@ -97,8 +97,8 @@ def _as_python_value(self):
         return result
 
 
-class NcDataset(NcGroup):
-    # An interface class providing an NcGroup which can be converted to/from an
+class NcDataset(NcData):
+    # An interface class providing an NcData which can be converted to/from an
     # xr.Dataset.  This is basically done by adding a small API enabling it to function
     # as an Xarray "AbstractDataStore".
     # This implies some embedded knowledge of Xarray, but it is very small.
diff --git a/lib/iris/experimental/xarray_bridge/ncdata_netcdf4_adaptor.py b/lib/iris/experimental/ncdata/dataset_like.py
similarity index 89%
rename from lib/iris/experimental/xarray_bridge/ncdata_netcdf4_adaptor.py
rename to lib/iris/experimental/ncdata/dataset_like.py
index ac1d878126..523cd6ab7d 100644
--- a/lib/iris/experimental/xarray_bridge/ncdata_netcdf4_adaptor.py
+++ b/lib/iris/experimental/ncdata/dataset_like.py
@@ -4,27 +4,28 @@
 # See COPYING and COPYING.LESSER in the root of the repository for full
 # licensing details.
 """
-An adaptor layer allowing an NcDataset to masquerade as a netCDF4.Dataset
+An adaptor layer allowing an NcData to masquerade as a netCDF4.Dataset object.
 
 This is provided primarily to support a re-use of the iris.fileformats.netcdf file
-format load + save, to convert cubes to+from iris.experimental.xarray_bridde.ncdata
-objects, and hence bridge to xarray.Dataset.
+format load + save, to convert cubes to+from ncdata objects, and hence convert Iris
+ cubes to+from an xarray.Dataset.
 
 These classes contain NcDataset and NcVariables, but emulating the access APIs of a
 netCDF4.Dataset.
 
-Notes:
-  (1) currently only supports what is required for Iris load/save capability
-  (2) we are proposing that this remains private, for now? -- due to (1)
+Note: currently only supports what is required for Iris load/save capability.
+It could conceivably be used for data exchange by *other* code that reads or writes
+netcdf files, but that may require API support to be extended, depending on what
+additional methods might be used.
 
 """
 import numpy as np
 
-from .ncdata import NcAttribute, NcDataset, NcDimension, NcVariable
+from ._core import NcAttribute, NcDataset, NcDimension, NcVariable
 
 
 class _Nc4DatalikeWithNcattrs:
-    # A mixin, shared by _Nc4DatasetLike and _Nc4VariableLike, which adds netcdf-like
+    # A mixin, shared by Nc4DatasetLike and Nc4VariableLike, which adds netcdf-like
     #  attribute operations'ncattrs / setncattr / getncattr', *AND* extends the local
     #  objects attribute to those things also
     # N.B. "self._ncdata" is the underlying NcData object : either an NcDataset or
@@ -67,7 +68,7 @@ def __setattr__(self, attr, value):
             self.setncattr(attr, value)
 
 
-class _Nc4DatasetLike(_Nc4DatalikeWithNcattrs):
+class Nc4DatasetLike(_Nc4DatalikeWithNcattrs):
     _local_instance_props = ("_ncdata", "variables")
 
     def __init__(self, ncdata: NcDataset = None):
@@ -77,7 +78,7 @@ def __init__(self, ncdata: NcDataset = None):
         # N.B. we need to create + store our OWN variables, as they are wrappers for
         #  the underlying NcVariable objects, with different properties.
         self.variables = {
-            name: _Nc4VariableLike._from_ncvariable(ncvar)
+            name: Nc4VariableLike._from_ncvariable(ncvar)
             for name, ncvar in self._ncdata.variables.items()
         }
 
@@ -116,7 +117,7 @@ def createVariable(self, varname, datatype, dimensions=(), **encoding):
         #  to do it.
         self._ncdata.variables[varname] = ncvar
         # Create a netCDF4-like "wrapper" variable + install that here.
-        nc4var = _Nc4VariableLike._from_ncvariable(ncvar, dtype=datatype)
+        nc4var = Nc4VariableLike._from_ncvariable(ncvar, dtype=datatype)
         self.variables[varname] = nc4var
         return nc4var
 
@@ -137,7 +138,7 @@ def filepath():
         return "<Nc4DatasetLike>"
 
 
-class _Nc4VariableLike(_Nc4DatalikeWithNcattrs):
+class Nc4VariableLike(_Nc4DatalikeWithNcattrs):
     _local_instance_props = ("_ncdata", "name", "datatype", "_raw_array")
 
     def __init__(self, ncvar: NcVariable, datatype: np.dtype):
diff --git a/lib/iris/experimental/ncdata/xarray.py b/lib/iris/experimental/ncdata/xarray.py
new file mode 100644
index 0000000000..7ba83d46bf
--- /dev/null
+++ b/lib/iris/experimental/ncdata/xarray.py
@@ -0,0 +1,144 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the LGPL license.
+# See COPYING and COPYING.LESSER in the root of the repository for full
+# licensing details.
+"""
+Wrapper classes containing NcData, which provide an interface to read/write from an
+:class:`xarray.Dataset`.
+
+This embeds a certain amount of Xarray knowledge (and dependency), hopefully a minimal
+amount.  The structure of an NcData object makes it fairly painless.
+
+"""
+from pathlib import Path
+from typing import AnyStr, Union
+
+import xarray as xr
+
+from ._core import NcAttribute, NcData, NcDimension, NcVariable
+
+
+class _XarrayNcDataStore:  # (xr.backends.common.AbstractWritableDataStore)
+    # An interface class providing a subset of the
+    # :class:`xr.AbstractWriteableDataStore` interface, and which converts to/from a
+    # contained ncdata.NcData.
+    # This requires some knowledge of Xarray, but it is very small.
+    #
+    # This code pinched from @TomekTrzeciak
+    # see https://gist.github.com/TomekTrzeciak/b00ff6c9dc301ed6f684990e400d1435
+
+    def __init__(self, ncdata: NcData = None):
+        if ncdata is None:
+            ncdata = NcData()
+        self.ncdata = ncdata
+
+    def load(self):
+        variables = {}
+        for k, v in self.ncdata.variables.items():
+            attrs = {
+                name: attr._as_python_value()
+                for name, attr in v.attributes.items()
+            }
+            xr_var = xr.Variable(
+                v.dimensions, v.data, attrs, getattr(v, "encoding", {})
+            )
+            # TODO: ?possibly? need to apply usual Xarray "encodings" to convert raw
+            #  cf-encoded data into 'normal', interpreted xr.Variables.
+            xr_var = xr.conventions.decode_cf_variable(k, xr_var)
+            variables[k] = xr_var
+        attributes = {
+            name: attr._as_python_value()
+            for name, attr in self.ncdata.attributes.items()
+        }
+        return variables, attributes
+
+    def store(
+        self,
+        variables,
+        attributes,
+        check_encoding_set=frozenset(),
+        writer=None,
+        unlimited_dims=None,
+    ):
+        for attrname, v in attributes.items():
+            if (
+                attrname in self.ncdata.attributes
+            ):  # and self.attributes[k] != v:
+                msg = (
+                    f're-setting of attribute "{attrname}" : '
+                    f"was={self.ncdata.attributes[attrname]}, now={v}"
+                )
+                raise ValueError(msg)
+            else:
+                self.ncdata.attributes[attrname] = NcAttribute(attrname, v)
+
+        for varname, var in variables.items():
+            if varname in self.ncdata.variables:
+                raise ValueError(f'duplicate variable : "{varname}"')
+
+            # An xr.Variable : remove all the possible Xarray encodings
+            # These are all the ones potentially used by
+            # :func:`xr.conventions.decode_cf_variable`, in the order in which they
+            # would be applied.
+            var = xr.conventions.encode_cf_variable(
+                var, name=varname, needs_copy=False
+            )
+
+            for dim_name, size in zip(var.dims, var.shape):
+                if dim_name in self.ncdata.dimensions:
+                    if self.ncdata.dimensions[dim_name].size != size:
+                        raise ValueError(
+                            f"size mismatch for dimension {dim_name!r}: "
+                            f"{self.ncdata.dimensions[dim_name]} != {size}"
+                        )
+                else:
+                    self.ncdata.dimensions[dim_name] = NcDimension(
+                        dim_name, size=size
+                    )
+
+            attrs = {
+                name: NcAttribute(name, value)
+                for name, value in var.attrs.items()
+            }
+            nc_var = NcVariable(
+                name=varname,
+                dimensions=var.dims,
+                attributes=attrs,
+                data=var.data,
+                group=self.ncdata,
+            )
+            self.ncdata.variables[varname] = nc_var
+
+    def close(self):
+        pass
+
+    #
+    # This interface supports conversion to+from an xarray "Dataset".
+    # N.B. using the "AbstractDataStore" interface preserves variable contents, being
+    # either real or lazy arrays.
+    #
+    @classmethod
+    def from_xarray(
+        cls, dataset_or_file: Union[xr.Dataset, AnyStr, Path], **xr_load_kwargs
+    ):
+        if not isinstance(dataset_or_file, xr.Dataset):
+            # It's a "file" (or pathstring, or Path ?).
+            dataset_or_file = xr.load_dataset(
+                dataset_or_file, **xr_load_kwargs
+            )
+        nc_data = cls()
+        dataset_or_file.dump_to_store(nc_data, **xr_load_kwargs)
+        return nc_data
+
+    def to_xarray(self, **xr_save_kwargs) -> xr.Dataset:
+        ds = xr.Dataset.load_store(self, **xr_save_kwargs)
+        return ds
+
+
+def to_xarray(ncdata: NcData) -> xr.Dataset:
+    return _XarrayNcDataStore(ncdata).to_xarray()
+
+
+def from_xarray(xrds: Union[xr.Dataset, Path, AnyStr]) -> NcData:
+    return _XarrayNcDataStore.from_xarray(xrds).ncdata
diff --git a/lib/iris/experimental/xarray_bridge/__init__.py b/lib/iris/experimental/xarray_bridge/__init__.py
index 1400cf0e17..55e59e4b0e 100644
--- a/lib/iris/experimental/xarray_bridge/__init__.py
+++ b/lib/iris/experimental/xarray_bridge/__init__.py
@@ -19,8 +19,9 @@
 from iris.cube import CubeList
 import iris.fileformats.netcdf as ifn
 
-from .ncdata import NcDataset
-from .ncdata_netcdf4_adaptor import _Nc4DatasetLike
+from ..ncdata.dataset_like import Nc4DatasetLike
+from ..ncdata.xarray import from_xarray as ncdata_from_xarray
+from ..ncdata.xarray import to_xarray as ncdata_to_xarray
 
 #
 # The primary conversion interfaces
@@ -28,8 +29,8 @@
 
 
 def cubes_from_xarray(xrds: "xarray.Dataset", **xr_load_kwargs):  # noqa
-    ncdata = NcDataset.from_xarray(xrds, **xr_load_kwargs)
-    dslike = _Nc4DatasetLike(ncdata)
+    ncdata = ncdata_from_xarray(xrds, **xr_load_kwargs)
+    dslike = Nc4DatasetLike(ncdata)
     cubes = CubeList(ifn.load_cubes(dslike))
     return cubes
 
@@ -37,9 +38,9 @@ def cubes_from_xarray(xrds: "xarray.Dataset", **xr_load_kwargs):  # noqa
 def cubes_to_xarray(cubes, iris_save_kwargs=None, xr_save_kwargs=None):
     iris_save_kwargs = iris_save_kwargs or {}
     xr_save_kwargs = xr_save_kwargs or {}
-    nc4like = _Nc4DatasetLike()
+    nc4like = Nc4DatasetLike()
     iris.save(
         cubes, nc4like, saver=iris.fileformats.netcdf.save, **iris_save_kwargs
     )
-    xrds = nc4like._ncdata.to_xarray(**xr_save_kwargs)
+    xrds = ncdata_to_xarray(nc4like._ncdata, **xr_save_kwargs)
     return xrds

From 40d582b23a79e1bced420e7712d40b2f3d2fbf85 Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Wed, 7 Dec 2022 18:41:57 +0000
Subject: [PATCH 4/6] Small tidy.

---
 lib/iris/experimental/ncdata/__init__.py |  29 +-----
 lib/iris/experimental/ncdata/_core.py    | 114 +----------------------
 2 files changed, 3 insertions(+), 140 deletions(-)

diff --git a/lib/iris/experimental/ncdata/__init__.py b/lib/iris/experimental/ncdata/__init__.py
index 4e2d4e93da..dd8f344abe 100644
--- a/lib/iris/experimental/ncdata/__init__.py
+++ b/lib/iris/experimental/ncdata/__init__.py
@@ -12,31 +12,6 @@
   * add "direct" netcdf interfacing, i.e. to_nc4/from_nc4
 
 """
-import iris
-from iris.cube import CubeList
-import iris.fileformats.netcdf as ifn
+from ._core import NcAttribute, NcData, NcDimension, NcVariable
 
-from .dataset_like import Nc4DatasetLike
-from .xarray import from_xarray, to_xarray
-
-#
-# The primary conversion interfaces
-#
-
-
-def cubes_from_xarray(xrds: "xarray.Dataset", **xr_load_kwargs):  # noqa
-    ncdata = from_xarray(xrds, **xr_load_kwargs)
-    dslike = Nc4DatasetLike(ncdata)
-    cubes = CubeList(ifn.load_cubes(dslike))
-    return cubes
-
-
-def cubes_to_xarray(cubes, iris_save_kwargs=None, xr_save_kwargs=None):
-    iris_save_kwargs = iris_save_kwargs or {}
-    xr_save_kwargs = xr_save_kwargs or {}
-    nc4like = Nc4DatasetLike()
-    iris.save(
-        cubes, nc4like, saver=iris.fileformats.netcdf.save, **iris_save_kwargs
-    )
-    xrds = to_xarray(**xr_save_kwargs)
-    return xrds
+__all__ = ["NcAttribute", "NcData", "NcDimension", "NcVariable"]
diff --git a/lib/iris/experimental/ncdata/_core.py b/lib/iris/experimental/ncdata/_core.py
index 2de791347d..b5672ae45e 100644
--- a/lib/iris/experimental/ncdata/_core.py
+++ b/lib/iris/experimental/ncdata/_core.py
@@ -11,11 +11,9 @@
 TODO: add direct netcdf file interface (easy, but not yet).
 
 """
-from pathlib import Path
-from typing import AnyStr, Dict, Optional, Tuple, Union
+from typing import Dict, Optional, Tuple
 
 import numpy as np
-import xarray as xr
 
 #
 # A totally basic and naive representation of netCDF data.
@@ -95,113 +93,3 @@ def _as_python_value(self):
             if isinstance(result, bytes):
                 result = result.decode()
         return result
-
-
-class NcDataset(NcData):
-    # An interface class providing an NcData which can be converted to/from an
-    # xr.Dataset.  This is basically done by adding a small API enabling it to function
-    # as an Xarray "AbstractDataStore".
-    # This implies some embedded knowledge of Xarray, but it is very small.
-    #
-    # This code pinched from @TomekTrzeciak
-    # see https://gist.github.com/TomekTrzeciak/b00ff6c9dc301ed6f684990e400d1435
-
-    def load(self):
-        variables = {}
-        for k, v in self.variables.items():
-            attrs = {
-                name: attr._as_python_value()
-                for name, attr in v.attributes.items()
-            }
-            xr_var = xr.Variable(
-                v.dimensions, v.data, attrs, getattr(v, "encoding", {})
-            )
-            # TODO: ?possibly? need to apply usual Xarray "encodings" to convert raw
-            #  cf-encoded data into 'normal', interpreted xr.Variables.
-            xr_var = xr.conventions.decode_cf_variable(k, xr_var)
-            variables[k] = xr_var
-        attributes = {
-            name: attr._as_python_value()
-            for name, attr in self.attributes.items()
-        }
-        return variables, attributes
-
-    def store(
-        self,
-        variables,
-        attributes,
-        check_encoding_set=frozenset(),
-        writer=None,
-        unlimited_dims=None,
-    ):
-        for attrname, v in attributes.items():
-            if attrname in self.attributes:  # and self.attributes[k] != v:
-                msg = (
-                    f're-setting of attribute "{attrname}" : '
-                    f"was={self.attributes[attrname]}, now={v}"
-                )
-                raise ValueError(msg)
-            else:
-                self.attributes[attrname] = NcAttribute(attrname, v)
-
-        for varname, var in variables.items():
-            if varname in self.variables:
-                raise ValueError(f'duplicate variable : "{varname}"')
-
-            # An xr.Variable : remove all the possible Xarray encodings
-            # These are all the ones potentially used by
-            # :func:`xr.conventions.decode_cf_variable`, in the order in which they
-            # would be applied.
-            var = xr.conventions.encode_cf_variable(
-                var, name=varname, needs_copy=False
-            )
-
-            for dim_name, size in zip(var.dims, var.shape):
-                if dim_name in self.dimensions:
-                    if self.dimensions[dim_name].size != size:
-                        raise ValueError(
-                            f"size mismatch for dimension {dim_name!r}: "
-                            f"{self.dimensions[dim_name]} != {size}"
-                        )
-                else:
-                    self.dimensions[dim_name] = NcDimension(
-                        dim_name, size=size
-                    )
-
-            attrs = {
-                name: NcAttribute(name, value)
-                for name, value in var.attrs.items()
-            }
-            nc_var = NcVariable(
-                name=varname,
-                dimensions=var.dims,
-                attributes=attrs,
-                data=var.data,
-                group=self,
-            )
-            self.variables[varname] = nc_var
-
-    def close(self):
-        pass
-
-    #
-    # This interface supports conversion to+from an xarray "Dataset".
-    # N.B. using the "AbstractDataStore" interface preserves variable contents, being
-    # either real or lazy arrays.
-    #
-    @classmethod
-    def from_xarray(
-        cls, dataset_or_file: Union[xr.Dataset, AnyStr, Path], **xr_load_kwargs
-    ):
-        if not isinstance(dataset_or_file, xr.Dataset):
-            # It's a "file" (or pathstring, or Path ?).
-            dataset_or_file = xr.load_dataset(
-                dataset_or_file, **xr_load_kwargs
-            )
-        nc_data = cls()
-        dataset_or_file.dump_to_store(nc_data, **xr_load_kwargs)
-        return nc_data
-
-    def to_xarray(self, **xr_save_kwargs) -> xr.Dataset:
-        ds = xr.Dataset.load_store(self, **xr_save_kwargs)
-        return ds

From 155c020158ea78e5b98c0f505068315dac17c712 Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Wed, 7 Dec 2022 18:45:48 +0000
Subject: [PATCH 5/6] Added nc4 interface : N.B. no unlimited dims yet.

---
 .../ncdata/_nc4_interface_exercise.py         |  24 ++++
 lib/iris/experimental/ncdata/netcdf4.py       | 134 ++++++++++++++++++
 2 files changed, 158 insertions(+)
 create mode 100644 lib/iris/experimental/ncdata/_nc4_interface_exercise.py
 create mode 100644 lib/iris/experimental/ncdata/netcdf4.py

diff --git a/lib/iris/experimental/ncdata/_nc4_interface_exercise.py b/lib/iris/experimental/ncdata/_nc4_interface_exercise.py
new file mode 100644
index 0000000000..3ed878a5c8
--- /dev/null
+++ b/lib/iris/experimental/ncdata/_nc4_interface_exercise.py
@@ -0,0 +1,24 @@
+from iris.experimental.ncdata.netcdf4 import from_nc4, to_nc4
+import iris.tests as itsts
+
+
+def example_nc4_roundtrip():
+    filepath = itsts.get_data_path(
+        ["NetCDF", "stereographic", "toa_brightness_temperature.nc"]
+    )
+    ncdata = from_nc4(filepath)
+    filepath2 = "./temp_nc_output.nc"
+    to_nc4(ncdata, filepath2)
+
+    # Convert to Iris + compare (a bit of a cheat, bit OK for now?)
+    import iris
+
+    cube1 = iris.load_cube(filepath)
+    cube2 = iris.load_cube(filepath2)
+    print("Round-tripped result, as iris cube:")
+    print(cube2)
+    print("\nold-file-cube == new-file-cube ? ", cube1 == cube2)
+
+
+if __name__ == "__main__":
+    example_nc4_roundtrip()
diff --git a/lib/iris/experimental/ncdata/netcdf4.py b/lib/iris/experimental/ncdata/netcdf4.py
new file mode 100644
index 0000000000..44553b7e6b
--- /dev/null
+++ b/lib/iris/experimental/ncdata/netcdf4.py
@@ -0,0 +1,134 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the LGPL license.
+# See COPYING and COPYING.LESSER in the root of the repository for full
+# licensing details.
+"""
+Code to read/write between NcData and a :class:`netCF4.Dataset`, or disk file.
+
+"""
+from pathlib import Path
+from typing import AnyStr, Union
+
+import dask.array as da
+import netCDF4 as nc
+
+from iris._lazy_data import as_lazy_data
+from iris.fileformats.netcdf import NetCDFDataProxy
+
+from ._core import NcAttribute, NcData, NcDimension, NcVariable
+
+
+def to_nc4(
+    ncdata: NcData, nc4_dataset_or_file: Union[nc.Dataset, Path, AnyStr]
+):
+    """
+    Write an NcData to a provided (writeable) :class:`netCDF4.Dataset`, or filepath.
+    """
+    caller_owns_dataset = hasattr(nc4_dataset_or_file, "variables")
+    if caller_owns_dataset:
+        nc4ds = nc4_dataset_or_file
+    else:
+        nc4ds = nc.Dataset(nc4_dataset_or_file, "w")
+
+    try:
+        for dimname, dim in ncdata.dimensions.items():
+            nc4ds.createDimension(dimname, dim.size)
+
+        for varname, var in ncdata.variables.items():
+            fillattr = "_FillValue"
+            if fillattr in var.attributes:
+                fill_value = var.attributes[fillattr].value
+            else:
+                fill_value = None
+
+            nc4var = nc4ds.createVariable(
+                varname=varname,
+                datatype=var.dtype,
+                dimensions=var.dimensions,
+                fill_value=fill_value
+                # TODO: needs **kwargs
+            )
+
+            data = var.data
+            if hasattr(data, "compute"):
+                da.store(data, nc4var)
+            else:
+                nc4var[:] = data
+
+            for attrname, attr in var.attributes.items():
+                if attrname != "_FillValue":
+                    nc4var.setncattr(attrname, attr._as_python_value())
+
+        for attrname, attr in ncdata.attributes.items():
+            nc4ds.setncattr(attrname, attr._as_python_value())
+
+    finally:
+        if not caller_owns_dataset:
+            nc4ds.close()
+
+
+def from_nc4(
+    nc4_dataset_or_file: Union[nc.Dataset, nc.Group, Path, AnyStr]
+) -> NcData:
+    """
+    Read an NcData from a provided :class:`netCDF4.Dataset`, or filepath.
+    """
+    ncdata = NcData()
+    caller_owns_dataset = hasattr(nc4_dataset_or_file, "variables")
+    if caller_owns_dataset:
+        nc4ds = nc4_dataset_or_file
+    else:
+        nc4ds = nc.Dataset(nc4_dataset_or_file)
+
+    try:
+        for dimname, nc4dim in nc4ds.dimensions.items():
+            ncdata.dimensions[dimname] = NcDimension(dimname, nc4dim.size)
+
+        for varname, nc4var in nc4ds.variables.items():
+            var = NcVariable(
+                name=varname,
+                dimensions=nc4var.dimensions,
+                dtype=nc4var.dtype,
+                group=ncdata,
+            )
+            ncdata.variables[varname] = var
+
+            # Assign a data object : for now, always LAZY.
+            # code shamelessly stolen from iris.fileformats.netcdf
+            fill_value = getattr(
+                var,
+                "_FillValue",
+                nc.default_fillvals[var.dtype.str[1:]],
+            )
+            shape = tuple(
+                ncdata.dimensions[dimname].size for dimname in var.dimensions
+            )
+            proxy = NetCDFDataProxy(
+                shape=shape,
+                dtype=var.dtype,
+                path=nc4ds.filepath(),
+                variable_name=varname,
+                fill_value=fill_value,
+            )
+            var.data = as_lazy_data(proxy)
+
+            for attrname in nc4var.ncattrs():
+                var.attributes[attrname] = NcAttribute(
+                    attrname, nc4var.getncattr(attrname)
+                )
+
+        for attrname in nc4ds.ncattrs():
+            ncdata.attributes[attrname] = NcAttribute(
+                attrname, nc4ds.getncattr(attrname)
+            )
+
+        # And finally, groups -- by the magic of recursion ...
+        for grpname, group in nc4ds.groups.items():
+            ncdata.groups[grpname] = from_nc4(nc4ds.groups[grpname])
+
+    finally:
+        if not caller_owns_dataset:
+            nc4ds.close()
+
+    return ncdata

From a7c7b01e1e9a9f1c59903a7e0ae2c70982264624 Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Thu, 8 Dec 2022 10:28:47 +0000
Subject: [PATCH 6/6] Fixes.

---
 lib/iris/experimental/ncdata/dataset_like.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/lib/iris/experimental/ncdata/dataset_like.py b/lib/iris/experimental/ncdata/dataset_like.py
index 523cd6ab7d..d067faa1ad 100644
--- a/lib/iris/experimental/ncdata/dataset_like.py
+++ b/lib/iris/experimental/ncdata/dataset_like.py
@@ -10,7 +10,7 @@
 format load + save, to convert cubes to+from ncdata objects, and hence convert Iris
  cubes to+from an xarray.Dataset.
 
-These classes contain NcDataset and NcVariables, but emulating the access APIs of a
+These classes contain NcData and NcVariables, but emulating the access APIs of a
 netCDF4.Dataset.
 
 Note: currently only supports what is required for Iris load/save capability.
@@ -21,14 +21,14 @@
 """
 import numpy as np
 
-from ._core import NcAttribute, NcDataset, NcDimension, NcVariable
+from ._core import NcAttribute, NcData, NcDimension, NcVariable
 
 
 class _Nc4DatalikeWithNcattrs:
     # A mixin, shared by Nc4DatasetLike and Nc4VariableLike, which adds netcdf-like
-    #  attribute operations'ncattrs / setncattr / getncattr', *AND* extends the local
+    #  attribute operations 'ncattrs / setncattr / getncattr', *AND* extends the local
     #  objects attribute to those things also
-    # N.B. "self._ncdata" is the underlying NcData object : either an NcDataset or
+    # N.B. "self._ncdata" is the underlying NcData object : either an NcData or
     #  NcVariable object.
     def ncattrs(self):
         return list(self._ncdata.attributes.keys())
@@ -71,9 +71,9 @@ def __setattr__(self, attr, value):
 class Nc4DatasetLike(_Nc4DatalikeWithNcattrs):
     _local_instance_props = ("_ncdata", "variables")
 
-    def __init__(self, ncdata: NcDataset = None):
+    def __init__(self, ncdata: NcData = None):
         if ncdata is None:
-            ncdata = NcDataset()  # an empty dataset
+            ncdata = NcData()  # an empty dataset
         self._ncdata = ncdata
         # N.B. we need to create + store our OWN variables, as they are wrappers for
         #  the underlying NcVariable objects, with different properties.
@@ -107,7 +107,7 @@ def createVariable(self, varname, datatype, dimensions=(), **encoding):
         if varname in self.variables:
             msg = f'creating duplicate variable "{varname}".'
             raise ValueError(msg)
-        # Add a variable into the underlying NcDataset object.
+        # Add a variable into the underlying NcData object.
         ncvar = NcVariable(
             name=varname,
             dimensions=dimensions,
@@ -131,7 +131,7 @@ def close(self):
     def filepath():
         #
         # Note: for now, let's just not care about this.
-        # we *might* need this to be an optinoal defined item on an NcDataset ??
+        # we *might* need this to be an optional defined item on an NcData ??
         # .. or, we ight need to store an xarray "encoding" somewhere ?
         # TODO: more thought here ?
         # return self.ncdata.encoding.get("source", "")