Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions lib/iris/experimental/ncdata/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright Iris contributors
#
# This file is part of Iris and is released under the LGPL license.
# See COPYING and COPYING.LESSER in the root of the repository for full
# licensing details.
"""
An abstract representation of Netcdf structured data, according to the
"Common Data Model" : https://docs.unidata.ucar.edu/netcdf-java/5.3/userguide/common_data_model_overview.html

TODO:
* add consistency checking
* add "direct" netcdf interfacing, i.e. to_nc4/from_nc4

"""
from ._core import NcAttribute, NcData, NcDimension, NcVariable

__all__ = ["NcAttribute", "NcData", "NcDimension", "NcVariable"]
95 changes: 95 additions & 0 deletions lib/iris/experimental/ncdata/_core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
# Copyright Iris contributors
#
# This file is part of Iris and is released under the LGPL license.
# See COPYING and COPYING.LESSER in the root of the repository for full
# licensing details.
"""
An abstract representation of Netcdf data with groups, variables + attributes

This is also provided with a read/write conversion interface to Xarray.

TODO: add direct netcdf file interface (easy, but not yet).

"""
from typing import Dict, Optional, Tuple

import numpy as np

#
# A totally basic and naive representation of netCDF data.
# The structure supports groups, variables, attributes.
# The sole limitation here is that data and attributes appear as numpy-compatible
# array-like values (though this may include dask.array.Array), and hence their types
# are modelled as np.dtype's.
#


class NcData:
def __init__(
self,
name: Optional[str] = None,
dimensions: Dict[str, "NcDimension"] = None,
variables: Dict[str, "NcVariable"] = None,
attributes: Dict[str, "NcAttribute"] = None,
groups: Dict[str, "NcData"] = None,
):
self.name: str = name
self.dimensions: Dict[str, "NcDimension"] = dimensions or {}
self.variables: Dict[str, "NcVariable"] = variables or {}
self.attributes: Dict[str, "NcAttribute"] = attributes or {}
self.groups: Dict[str, "NcData"] = groups or {}


class NcDimension:
def __init__(self, name: str, size: int = 0):
self.name: str = name
self.size: int = size # N.B. we retain the 'zero size means unlimited'


class NcVariable:
def __init__(
self,
name: str,
dimensions: Tuple[str] = None,
data: np.ndarray = None,
dtype: np.dtype = None,
attributes: Dict[str, "NcAttribute"] = None,
group: "NcData" = None,
):
self.name = name
self.dimensions = tuple(dimensions or ())
if data is not None:
if not hasattr(data, "dtype"):
data = np.asanyarray(data)
dtype = data.dtype
self.dtype = dtype
self.data = data # Supports lazy, and normally provides a dtype
self.attributes = attributes or {}
self.group = group

# # Provide some array-like readonly properties reflected from the data.
# @property
# def dtype(self):
# return self.data.dtype
#
# @property
# def shape(self):
# return self.data.shape


class NcAttribute:
def __init__(self, name: str, value):
self.name: str = name
# Attribute values are arraylike, have dtype
# TODO: may need to regularise string representations?
if not hasattr(value, "dtype"):
value = np.asanyarray(value)
self.value: np.ndarray = value

def _as_python_value(self):
result = self.value
if result.dtype.kind in ("U", "S"):
result = str(result)
if isinstance(result, bytes):
result = result.decode()
return result
24 changes: 24 additions & 0 deletions lib/iris/experimental/ncdata/_nc4_interface_exercise.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from iris.experimental.ncdata.netcdf4 import from_nc4, to_nc4
import iris.tests as itsts


def example_nc4_roundtrip():
filepath = itsts.get_data_path(
["NetCDF", "stereographic", "toa_brightness_temperature.nc"]
)
ncdata = from_nc4(filepath)
filepath2 = "./temp_nc_output.nc"
to_nc4(ncdata, filepath2)

# Convert to Iris + compare (a bit of a cheat, bit OK for now?)
import iris

cube1 = iris.load_cube(filepath)
cube2 = iris.load_cube(filepath2)
print("Round-tripped result, as iris cube:")
print(cube2)
print("\nold-file-cube == new-file-cube ? ", cube1 == cube2)


if __name__ == "__main__":
example_nc4_roundtrip()
237 changes: 237 additions & 0 deletions lib/iris/experimental/ncdata/dataset_like.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,237 @@
# Copyright Iris contributors
#
# This file is part of Iris and is released under the LGPL license.
# See COPYING and COPYING.LESSER in the root of the repository for full
# licensing details.
"""
An adaptor layer allowing an NcData to masquerade as a netCDF4.Dataset object.

This is provided primarily to support a re-use of the iris.fileformats.netcdf file
format load + save, to convert cubes to+from ncdata objects, and hence convert Iris
cubes to+from an xarray.Dataset.

These classes contain NcData and NcVariables, but emulating the access APIs of a
netCDF4.Dataset.

Note: currently only supports what is required for Iris load/save capability.
It could conceivably be used for data exchange by *other* code that reads or writes
netcdf files, but that may require API support to be extended, depending on what
additional methods might be used.

"""
import numpy as np

from ._core import NcAttribute, NcData, NcDimension, NcVariable


class _Nc4DatalikeWithNcattrs:
# A mixin, shared by Nc4DatasetLike and Nc4VariableLike, which adds netcdf-like
# attribute operations 'ncattrs / setncattr / getncattr', *AND* extends the local
# objects attribute to those things also
# N.B. "self._ncdata" is the underlying NcData object : either an NcData or
# NcVariable object.
def ncattrs(self):
return list(self._ncdata.attributes.keys())

def getncattr(self, attr):
attrs = self._ncdata.attributes
if attr in attrs:
result = attrs[attr]._as_python_value()
else:
# Don't allow it to issue a KeyError, as this upsets 'getattr' usage.
# Raise an AttributeError instead.
raise AttributeError(attr)
return result

def setncattr(self, attr, value):
# TODO: are we sure we need this translation ??
if isinstance(value, bytes):
value = value.decode("utf-8")
# N.B. using the NcAttribute class for storage also ensures/requires that all
# attributes are cast as numpy arrays (so have shape, dtype etc).
self._ncdata.attributes[attr] = NcAttribute(attr, value)

def __getattr__(self, attr):
# Extend local object attribute access to the ncattrs of the stored data item
# (Yuck, but I think the Iris load code requires it).
return self.getncattr(attr)

def __setattr__(self, attr, value):
if attr in self._local_instance_props:
# N.B. use _local_instance_props to define standard instance attributes, to avoid a
# possible endless loop here.
super().__setattr__(attr, value)
else:
# # if not hasattr(self, '_allsetattrs'):
# # self._allsetattrs = set()
# self._allsetattrs.add(attr)
self.setncattr(attr, value)


class Nc4DatasetLike(_Nc4DatalikeWithNcattrs):
_local_instance_props = ("_ncdata", "variables")

def __init__(self, ncdata: NcData = None):
if ncdata is None:
ncdata = NcData() # an empty dataset
self._ncdata = ncdata
# N.B. we need to create + store our OWN variables, as they are wrappers for
# the underlying NcVariable objects, with different properties.
self.variables = {
name: Nc4VariableLike._from_ncvariable(ncvar)
for name, ncvar in self._ncdata.variables.items()
}

@property
def dimensions(self):
return {
name: dim.size for name, dim in self._ncdata.dimensions.items()
}

@property
def groups(self):
return None # not supported

def createDimension(self, dimname, size):
if dimname in self.dimensions:
msg = f'creating duplicate dimension "{dimname}".'
raise ValueError(msg)
# if self.dimensions[name] != size:
# raise ValueError(f"size mismatch for dimension {name!r}: "
# f"{self.dimensions[name]} != {size}")
else:
self._ncdata.dimensions[dimname] = NcDimension(dimname, size)
return size

def createVariable(self, varname, datatype, dimensions=(), **encoding):
if varname in self.variables:
msg = f'creating duplicate variable "{varname}".'
raise ValueError(msg)
# Add a variable into the underlying NcData object.
ncvar = NcVariable(
name=varname,
dimensions=dimensions,
group=self._ncdata,
)
# Note: initially has no data (or attributes), since this is how netCDF4 expects
# to do it.
self._ncdata.variables[varname] = ncvar
# Create a netCDF4-like "wrapper" variable + install that here.
nc4var = Nc4VariableLike._from_ncvariable(ncvar, dtype=datatype)
self.variables[varname] = nc4var
return nc4var

def sync(self):
pass

def close(self):
self.sync()

@staticmethod
def filepath():
#
# Note: for now, let's just not care about this.
# we *might* need this to be an optional defined item on an NcData ??
# .. or, we ight need to store an xarray "encoding" somewhere ?
# TODO: more thought here ?
# return self.ncdata.encoding.get("source", "")
return "<Nc4DatasetLike>"


class Nc4VariableLike(_Nc4DatalikeWithNcattrs):
_local_instance_props = ("_ncdata", "name", "datatype", "_raw_array")

def __init__(self, ncvar: NcVariable, datatype: np.dtype):
self._ncdata = ncvar
self.name = ncvar.name
# Note: datatype must be known at creation, which may be before an actual data
# array is assigned on the ncvar.
self.datatype = np.dtype(datatype)
if ncvar.data is None:
# temporary empty data (to support never-written scalar values)
# NOTE: significantly, does *not* allocate an actual full array in memory
array = np.zeros(self.shape, self.datatype)
ncvar.data = array
self._raw_array = ncvar.data

@classmethod
def _from_ncvariable(cls, ncvar: NcVariable, dtype: np.dtype = None):
if dtype is None:
dtype = ncvar.dtype
self = cls(
ncvar=ncvar,
datatype=dtype,
)
return self

# Label this as an 'emulated' netCDF4.Variable, containing an actual (possibly
# lazy) array, which can be directly read/written.
@property
def _raw_array(self):
return self._ncdata.data

@_raw_array.setter
def _raw_array(self, data):
self._ncdata.data = data
self.datatype = data.dtype

@property
def group(self):
return self._ncdata.group

@property
def dimensions(self):
return self._ncdata.dimensions

#
# "Normal" data access is via indexing.
# N.B. we do still need to support this, e.g. for DimCoords ?
#
def __getitem__(self, keys):
if keys != slice(None):
raise IndexError(keys)
if self.ndim == 0:
return self._ncdata.data
return self._ncdata.data[keys]

# The __setitem__ is not required for normal saving.
# The saver will assign ._raw_array instead
# TODO: might need to support this for future non-Iris usage ?
#
# def __setitem__(self, keys, data):
# if keys != slice(None):
# raise IndexError(keys)
# if not hasattr(data, "dtype"):
# raise ValueError(f"nonarray assigned as data : {data}")
# if not data.shape == self.shape:
# msg = (
# f"assigned data has wrong shape : "
# f"{data.shape} instead of {self.shape}"
# )
# raise ValueError(msg)
# self._ncdata.data = data
# self.datatype = data.dtype

@property
def dtype(self):
return self.datatype

@property
def dims(self):
return self.dimensions

@property
def ndim(self):
return len(self.dimensions)

@property
def shape(self):
dims = self.group.dimensions
return tuple(dims[n].size for n in self.dimensions)

@property
def size(self):
return np.prod(self.shape)

def chunking(self):
return None
Loading