Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions lib/ncdata/_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,17 @@ def __str__(self): # noqa: D105
# NOTE: for 'repr', an interpretable literal string is too complex.
# So just retain the default "object" address-based representation.

def copy(self):
"""
Copy self.

This duplicates structure with new ncdata core objects, but does not duplicate
data arrays. See :func:`ncdata.utils.ncdata_copy`.
"""
from ncdata.utils import ncdata_copy

return ncdata_copy(self)


class NcDimension:
"""
Expand Down Expand Up @@ -359,6 +370,10 @@ def __repr__(self): # noqa: D105
def __str__(self): # noqa: D105
return repr(self)

def copy(self):
"""Copy self."""
return NcDimension(self.name, size=self.size, unlimited=self.unlimited)


class NcVariable(_AttributeAccessMixin):
"""
Expand Down Expand Up @@ -452,6 +467,25 @@ def __str__(self): # noqa: D105
# NOTE: as for NcData, an interpretable 'repr' string is too complex.
# So just retain the default "object" address-based representation.

def copy(self):
"""
Copy self.

Does not duplicate arrays oin data or attribute content.
See :func:`ncdata.utils.ncdata_copy`.
"""
from ncdata.utils._copy import _attributes_copy

var = NcVariable(
name=self.name,
dimensions=self.dimensions,
dtype=self.dtype,
data=self.data,
attributes=_attributes_copy(self.attributes),
group=self.group,
)
return var


class NcAttribute:
"""
Expand Down Expand Up @@ -528,3 +562,12 @@ def __repr__(self): # noqa: D105

def __str__(self): # noqa: D105
return repr(self)

def copy(self):
"""
Copy self.

Does not duplicate array content.
See :func:`ncdata.utils.ncdata_copy`.
"""
return NcAttribute(self.name, self.value)
5 changes: 3 additions & 2 deletions lib/ncdata/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
"""General user utility functions."""

from ._compare_nc_datasets import dataset_differences, variable_differences
from ._copy import ncdata_copy
from ._save_errors import save_errors

__all__ = [
"save_errors",
"dataset_differences",
"ncdata_copy",
"save_errors",
"variable_differences",
]
14 changes: 13 additions & 1 deletion lib/ncdata/utils/_compare_nc_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,14 @@ def variable_differences(
errs.append(msg)

# data values
is_str, is_str2 = (dt.kind in "SUb" for dt in (dtype, dtype2))
def _is_strtype(dt):
if dt is None:
result = False
else:
result = dt.kind in "SUb"
return result

is_str, is_str2 = (_is_strtype(dt) for dt in (dtype, dtype2))
# TODO: is this correct check to allow compare between different dtypes?
if check_var_data and dims == dims2 and is_str == is_str2:
# N.B. don't check shapes here: we already checked dimensions.
Expand All @@ -332,6 +339,11 @@ def getdata(var):
# (check for obscure property NOT provided by mimics)
assert hasattr(var, "use_nc_get_vars")
data = var[:]

if data is None:
# Empty variables still "sort of" work.
data = np.array((), dtype=float)

# Return 0D as 1D, as this makes results simpler to interpret.
if data.ndim == 0:
data = data.flatten()
Expand Down
37 changes: 37 additions & 0 deletions lib/ncdata/utils/_copy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""Utility to copy NcData objects, but not copying any contained data arrays."""

from ncdata import NameMap, NcAttribute, NcData


def _attributes_copy(attrs: NameMap) -> NameMap:
return NameMap.from_items(
[attr.copy() for attr in attrs.values()],
item_type=NcAttribute,
)


def ncdata_copy(ncdata: NcData) -> NcData:
"""
Return a copy of the data.

The operation makes fresh copies of all ncdata objects, but does not copy arrays in
either variable data or attribute values.

Parameters
----------
ncdata
data to copy

Returns
-------
ncdata
identical but distinct copy of input

"""
return NcData(
name=ncdata.name,
attributes=_attributes_copy(ncdata.attributes),
dimensions=[dim.copy() for dim in ncdata.dimensions.values()],
variables=[var.copy() for var in ncdata.variables.values()],
groups=[ncdata_copy(group) for group in ncdata.groups.values()],
)
29 changes: 29 additions & 0 deletions tests/unit/core/test_NcAttribute.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,3 +147,32 @@ def test_repr_same(self, datatype, structuretype):
result = str(attr)
expected = repr(attr)
assert result == expected


class Test_NcAttribute_copy:
@staticmethod
def eq(attr1, attr2):
# Capture the expected equality of an original
# attribute and its copy.
# In the case of its value, if it is a numpy array,
# then it should be the **same identical object**
# -- i.e. not a copy (not even a view).
result = attr1 is not attr2
if result:
result = attr1.name == attr1.name and np.all(
attr1.value == attr2.value
)
if result and hasattr(attr1.value, "dtype"):
result = attr1.value is attr2.value
return result

def test_empty(self):
attr = NcAttribute("x", None)
result = attr.copy()
assert self.eq(result, attr)

def test_value(self, datatype, structuretype):
value = attrvalue(datatype, structuretype)
attr = NcAttribute("x", value=value)
result = attr.copy()
assert self.eq(result, attr)
12 changes: 12 additions & 0 deletions tests/unit/core/test_NcData.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,15 @@ def test_allargs(self):

# Note: str() and repr() of NcData are too complex to test unit-wise.
# See integration tests for some sample results.


class Test_NcData_copy:
# We only need to check that this calls "ncdata_copy", which is tested elsewhere.
def test(self, mocker):
mock_copied_ncdata = mocker.sentinel.copied_result
mock_copycall = mocker.Mock(return_value=mock_copied_ncdata)
mocker.patch("ncdata.utils.ncdata_copy", mock_copycall)
ncdata = NcData()
result = ncdata.copy()
assert mock_copycall.called_once_witk(mocker.call(ncdata))
assert result == mock_copied_ncdata
12 changes: 12 additions & 0 deletions tests/unit/core/test_NcDimension.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,15 @@ def test_str_repr_same(self, size, unlim_type):
sample = NcDimension("this", size)
result = str(sample)
assert result == repr(sample)


class Test_NcDimension_copy:
@pytest.mark.parametrize("size", [0, 2])
@pytest.mark.parametrize("unlim", [False, True])
def test(self, size, unlim):
sample = NcDimension("this", size, unlimited=unlim)
result = sample.copy()
assert result is not sample
assert result.name == sample.name
assert result.size == sample.size
assert result.unlimited == sample.unlimited
34 changes: 34 additions & 0 deletions tests/unit/core/test_NcVariable.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import pytest

from ncdata import NcAttribute, NcVariable
from ncdata.utils import variable_differences


class Test_NcVariable__init__:
Expand Down Expand Up @@ -218,3 +219,36 @@ def test_repr(self):
result = repr(var)
expected = f"<ncdata._core.NcVariable object at 0x{id(var):012x}>"
assert result == expected


class Test_NcVariable_copy:
@staticmethod
def check_var_iscopy(trial, reference):
# capture expected copy equivalence check.
# Which is : equal but distinct, containing same data array
assert trial is not reference
assert not variable_differences(trial, reference)
assert trial.data is reference.data

def test_minimal_nodata(self):
var = NcVariable(name="x")
result = var.copy()
assert var.data is None
self.check_var_iscopy(result, var)

def test_populated(self):
var = NcVariable(
name="x",
dimensions=(
"y",
"x",
),
attributes={
"a": 1,
"v": np.array([1, 2, 3]),
"s": "some characters",
},
data=np.array([[1.0, 2, 3], [11, 12, 13]]),
)
result = var.copy()
self.check_var_iscopy(result, var)
96 changes: 96 additions & 0 deletions tests/unit/utils/test_ncdata_copy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
"""Tests for class :class:`ncdata.utils.ncdata_copy`.

This is generic utility function version of the copy operation.
"""
import numpy as np
import pytest

from ncdata import NameMap, NcAttribute, NcData, NcDimension, NcVariable
from ncdata.utils import dataset_differences, ncdata_copy


def _ncdata_duplicate_object(d1: NcData, d2: NcData):
"""Find and return the first known duplicate objects between two NcData."""
dup = None
for var1, var2 in zip(d1.variables.values(), d2.variables.values()):
if var1 is var2:
dup = var1
break
if not dup:
for dim1, dim2 in zip(d1.dimensions.values(), d2.dimensions.values()):
if dim1 is dim2:
dup = dim1
break
if not dup:
for attr1, attr2 in zip(
d1.attributes.values(), d2.attributes.values()
):
if attr1 is attr2:
dup = attr1
break
if not dup:
for grp1, grp2 in zip(d1.groups.values(), d2.groups.values()):
if grp1 is grp2:
dup = grp1
break
return dup


def differences_or_duplicated_objects(original: NcData, duplicate: NcData):
# Return difference messages or duplicate objects between two NcData
results = dataset_differences(original, duplicate)
if not results:
results = _ncdata_duplicate_object(original, duplicate)
return results


class Test:
def test_empty(self):
sample = NcData()
result = ncdata_copy(sample)
assert not differences_or_duplicated_objects(sample, result)

@pytest.fixture()
def sample(self):
attrs = NameMap.from_items(
[NcAttribute("q", 3)], item_type=NcAttribute
)
dims = NameMap.from_items([NcDimension("x", 3)], item_type=NcDimension)
data_array = np.array([1, 2, 3])
var = NcVariable(
name="a", dimensions=("x"), data=data_array, attributes=attrs
)
sample = NcData(
dimensions=dims,
variables=[var],
attributes=attrs,
groups=[
NcData("g1", dimensions=dims, variables=[var]),
NcData("g2", dimensions=dims, variables=[var]),
],
)
assert sample.variables["a"].data is data_array
assert sample.groups["g1"].variables["a"].data is data_array
assert sample.groups["g2"].variables["a"].data is data_array
return sample

def test_general(self, sample):
result = ncdata_copy(sample)
assert not differences_or_duplicated_objects(sample, result)

def test_sample_data(self, sample):
# Check that data arrays are *not* copied, in both variables and attributes
arr1 = np.array([9.1, 7, 4])
sample.set_attrval("extra", arr1)
assert sample.attributes["extra"].value is arr1

result = ncdata_copy(sample)

assert (
result.attributes["extra"].value
is sample.attributes["extra"].value
)
data_arr = sample.variables["a"].data
assert result.variables["a"].data is data_arr
assert result.groups["g1"].variables["a"].data is data_arr
assert result.groups["g2"].variables["a"].data is data_arr