diff --git a/lib/ncdata/_core.py b/lib/ncdata/_core.py index 9f31277..909c589 100644 --- a/lib/ncdata/_core.py +++ b/lib/ncdata/_core.py @@ -326,6 +326,17 @@ def __str__(self): # noqa: D105 # NOTE: for 'repr', an interpretable literal string is too complex. # So just retain the default "object" address-based representation. + def copy(self): + """ + Copy self. + + This duplicates structure with new ncdata core objects, but does not duplicate + data arrays. See :func:`ncdata.utils.ncdata_copy`. + """ + from ncdata.utils import ncdata_copy + + return ncdata_copy(self) + class NcDimension: """ @@ -359,6 +370,10 @@ def __repr__(self): # noqa: D105 def __str__(self): # noqa: D105 return repr(self) + def copy(self): + """Copy self.""" + return NcDimension(self.name, size=self.size, unlimited=self.unlimited) + class NcVariable(_AttributeAccessMixin): """ @@ -452,6 +467,25 @@ def __str__(self): # noqa: D105 # NOTE: as for NcData, an interpretable 'repr' string is too complex. # So just retain the default "object" address-based representation. + def copy(self): + """ + Copy self. + + Does not duplicate arrays oin data or attribute content. + See :func:`ncdata.utils.ncdata_copy`. + """ + from ncdata.utils._copy import _attributes_copy + + var = NcVariable( + name=self.name, + dimensions=self.dimensions, + dtype=self.dtype, + data=self.data, + attributes=_attributes_copy(self.attributes), + group=self.group, + ) + return var + class NcAttribute: """ @@ -528,3 +562,12 @@ def __repr__(self): # noqa: D105 def __str__(self): # noqa: D105 return repr(self) + + def copy(self): + """ + Copy self. + + Does not duplicate array content. + See :func:`ncdata.utils.ncdata_copy`. + """ + return NcAttribute(self.name, self.value) diff --git a/lib/ncdata/utils/__init__.py b/lib/ncdata/utils/__init__.py index dd3dc8c..508abac 100644 --- a/lib/ncdata/utils/__init__.py +++ b/lib/ncdata/utils/__init__.py @@ -1,10 +1,11 @@ """General user utility functions.""" - from ._compare_nc_datasets import dataset_differences, variable_differences +from ._copy import ncdata_copy from ._save_errors import save_errors __all__ = [ - "save_errors", "dataset_differences", + "ncdata_copy", + "save_errors", "variable_differences", ] diff --git a/lib/ncdata/utils/_compare_nc_datasets.py b/lib/ncdata/utils/_compare_nc_datasets.py index affd6c1..d2c6cca 100644 --- a/lib/ncdata/utils/_compare_nc_datasets.py +++ b/lib/ncdata/utils/_compare_nc_datasets.py @@ -317,7 +317,14 @@ def variable_differences( errs.append(msg) # data values - is_str, is_str2 = (dt.kind in "SUb" for dt in (dtype, dtype2)) + def _is_strtype(dt): + if dt is None: + result = False + else: + result = dt.kind in "SUb" + return result + + is_str, is_str2 = (_is_strtype(dt) for dt in (dtype, dtype2)) # TODO: is this correct check to allow compare between different dtypes? if check_var_data and dims == dims2 and is_str == is_str2: # N.B. don't check shapes here: we already checked dimensions. @@ -332,6 +339,11 @@ def getdata(var): # (check for obscure property NOT provided by mimics) assert hasattr(var, "use_nc_get_vars") data = var[:] + + if data is None: + # Empty variables still "sort of" work. + data = np.array((), dtype=float) + # Return 0D as 1D, as this makes results simpler to interpret. if data.ndim == 0: data = data.flatten() diff --git a/lib/ncdata/utils/_copy.py b/lib/ncdata/utils/_copy.py new file mode 100644 index 0000000..efd80a3 --- /dev/null +++ b/lib/ncdata/utils/_copy.py @@ -0,0 +1,37 @@ +"""Utility to copy NcData objects, but not copying any contained data arrays.""" + +from ncdata import NameMap, NcAttribute, NcData + + +def _attributes_copy(attrs: NameMap) -> NameMap: + return NameMap.from_items( + [attr.copy() for attr in attrs.values()], + item_type=NcAttribute, + ) + + +def ncdata_copy(ncdata: NcData) -> NcData: + """ + Return a copy of the data. + + The operation makes fresh copies of all ncdata objects, but does not copy arrays in + either variable data or attribute values. + + Parameters + ---------- + ncdata + data to copy + + Returns + ------- + ncdata + identical but distinct copy of input + + """ + return NcData( + name=ncdata.name, + attributes=_attributes_copy(ncdata.attributes), + dimensions=[dim.copy() for dim in ncdata.dimensions.values()], + variables=[var.copy() for var in ncdata.variables.values()], + groups=[ncdata_copy(group) for group in ncdata.groups.values()], + ) diff --git a/tests/unit/core/test_NcAttribute.py b/tests/unit/core/test_NcAttribute.py index eb5ab74..b040ab0 100644 --- a/tests/unit/core/test_NcAttribute.py +++ b/tests/unit/core/test_NcAttribute.py @@ -147,3 +147,32 @@ def test_repr_same(self, datatype, structuretype): result = str(attr) expected = repr(attr) assert result == expected + + +class Test_NcAttribute_copy: + @staticmethod + def eq(attr1, attr2): + # Capture the expected equality of an original + # attribute and its copy. + # In the case of its value, if it is a numpy array, + # then it should be the **same identical object** + # -- i.e. not a copy (not even a view). + result = attr1 is not attr2 + if result: + result = attr1.name == attr1.name and np.all( + attr1.value == attr2.value + ) + if result and hasattr(attr1.value, "dtype"): + result = attr1.value is attr2.value + return result + + def test_empty(self): + attr = NcAttribute("x", None) + result = attr.copy() + assert self.eq(result, attr) + + def test_value(self, datatype, structuretype): + value = attrvalue(datatype, structuretype) + attr = NcAttribute("x", value=value) + result = attr.copy() + assert self.eq(result, attr) diff --git a/tests/unit/core/test_NcData.py b/tests/unit/core/test_NcData.py index 255f4ef..af822b8 100644 --- a/tests/unit/core/test_NcData.py +++ b/tests/unit/core/test_NcData.py @@ -48,3 +48,15 @@ def test_allargs(self): # Note: str() and repr() of NcData are too complex to test unit-wise. # See integration tests for some sample results. + + +class Test_NcData_copy: + # We only need to check that this calls "ncdata_copy", which is tested elsewhere. + def test(self, mocker): + mock_copied_ncdata = mocker.sentinel.copied_result + mock_copycall = mocker.Mock(return_value=mock_copied_ncdata) + mocker.patch("ncdata.utils.ncdata_copy", mock_copycall) + ncdata = NcData() + result = ncdata.copy() + assert mock_copycall.called_once_witk(mocker.call(ncdata)) + assert result == mock_copied_ncdata diff --git a/tests/unit/core/test_NcDimension.py b/tests/unit/core/test_NcDimension.py index 42cec19..8b42f80 100644 --- a/tests/unit/core/test_NcDimension.py +++ b/tests/unit/core/test_NcDimension.py @@ -57,3 +57,15 @@ def test_str_repr_same(self, size, unlim_type): sample = NcDimension("this", size) result = str(sample) assert result == repr(sample) + + +class Test_NcDimension_copy: + @pytest.mark.parametrize("size", [0, 2]) + @pytest.mark.parametrize("unlim", [False, True]) + def test(self, size, unlim): + sample = NcDimension("this", size, unlimited=unlim) + result = sample.copy() + assert result is not sample + assert result.name == sample.name + assert result.size == sample.size + assert result.unlimited == sample.unlimited diff --git a/tests/unit/core/test_NcVariable.py b/tests/unit/core/test_NcVariable.py index dbc3e7c..14182e3 100644 --- a/tests/unit/core/test_NcVariable.py +++ b/tests/unit/core/test_NcVariable.py @@ -8,6 +8,7 @@ import pytest from ncdata import NcAttribute, NcVariable +from ncdata.utils import variable_differences class Test_NcVariable__init__: @@ -218,3 +219,36 @@ def test_repr(self): result = repr(var) expected = f"" assert result == expected + + +class Test_NcVariable_copy: + @staticmethod + def check_var_iscopy(trial, reference): + # capture expected copy equivalence check. + # Which is : equal but distinct, containing same data array + assert trial is not reference + assert not variable_differences(trial, reference) + assert trial.data is reference.data + + def test_minimal_nodata(self): + var = NcVariable(name="x") + result = var.copy() + assert var.data is None + self.check_var_iscopy(result, var) + + def test_populated(self): + var = NcVariable( + name="x", + dimensions=( + "y", + "x", + ), + attributes={ + "a": 1, + "v": np.array([1, 2, 3]), + "s": "some characters", + }, + data=np.array([[1.0, 2, 3], [11, 12, 13]]), + ) + result = var.copy() + self.check_var_iscopy(result, var) diff --git a/tests/unit/utils/test_ncdata_copy.py b/tests/unit/utils/test_ncdata_copy.py new file mode 100644 index 0000000..a26fe52 --- /dev/null +++ b/tests/unit/utils/test_ncdata_copy.py @@ -0,0 +1,96 @@ +"""Tests for class :class:`ncdata.utils.ncdata_copy`. + +This is generic utility function version of the copy operation. +""" +import numpy as np +import pytest + +from ncdata import NameMap, NcAttribute, NcData, NcDimension, NcVariable +from ncdata.utils import dataset_differences, ncdata_copy + + +def _ncdata_duplicate_object(d1: NcData, d2: NcData): + """Find and return the first known duplicate objects between two NcData.""" + dup = None + for var1, var2 in zip(d1.variables.values(), d2.variables.values()): + if var1 is var2: + dup = var1 + break + if not dup: + for dim1, dim2 in zip(d1.dimensions.values(), d2.dimensions.values()): + if dim1 is dim2: + dup = dim1 + break + if not dup: + for attr1, attr2 in zip( + d1.attributes.values(), d2.attributes.values() + ): + if attr1 is attr2: + dup = attr1 + break + if not dup: + for grp1, grp2 in zip(d1.groups.values(), d2.groups.values()): + if grp1 is grp2: + dup = grp1 + break + return dup + + +def differences_or_duplicated_objects(original: NcData, duplicate: NcData): + # Return difference messages or duplicate objects between two NcData + results = dataset_differences(original, duplicate) + if not results: + results = _ncdata_duplicate_object(original, duplicate) + return results + + +class Test: + def test_empty(self): + sample = NcData() + result = ncdata_copy(sample) + assert not differences_or_duplicated_objects(sample, result) + + @pytest.fixture() + def sample(self): + attrs = NameMap.from_items( + [NcAttribute("q", 3)], item_type=NcAttribute + ) + dims = NameMap.from_items([NcDimension("x", 3)], item_type=NcDimension) + data_array = np.array([1, 2, 3]) + var = NcVariable( + name="a", dimensions=("x"), data=data_array, attributes=attrs + ) + sample = NcData( + dimensions=dims, + variables=[var], + attributes=attrs, + groups=[ + NcData("g1", dimensions=dims, variables=[var]), + NcData("g2", dimensions=dims, variables=[var]), + ], + ) + assert sample.variables["a"].data is data_array + assert sample.groups["g1"].variables["a"].data is data_array + assert sample.groups["g2"].variables["a"].data is data_array + return sample + + def test_general(self, sample): + result = ncdata_copy(sample) + assert not differences_or_duplicated_objects(sample, result) + + def test_sample_data(self, sample): + # Check that data arrays are *not* copied, in both variables and attributes + arr1 = np.array([9.1, 7, 4]) + sample.set_attrval("extra", arr1) + assert sample.attributes["extra"].value is arr1 + + result = ncdata_copy(sample) + + assert ( + result.attributes["extra"].value + is sample.attributes["extra"].value + ) + data_arr = sample.variables["a"].data + assert result.variables["a"].data is data_arr + assert result.groups["g1"].variables["a"].data is data_arr + assert result.groups["g2"].variables["a"].data is data_arr