Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ jobs:

- name: "Install dependencies"
run: |
conda install --yes "numpy<2" pytest pytest-mock iris xarray filelock requests
conda install --yes numpy pytest pytest-mock iris xarray filelock requests

- name: "Install *latest* Iris"
run: |
Expand Down
62 changes: 55 additions & 7 deletions lib/ncdata/utils/_compare_nc_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,49 @@ def _attribute_arrays_eq(a1, a2):
return result


def _array_element_str(x):
"""Make a string representation of a numpy array element (scalar).

Does *not* rely on numpy array printing.
Instead converts to an equivalent Python object, and takes str(that).
Hopefully delivers independence of numpy version (a lesson learned the hard way
way in Iris development !)
"""
if not isinstance(x, np.ndarray) or not hasattr(x.dtype, "kind"):
result = str(x)
elif np.ma.is_masked(x):
result = "masked"
else:
kind = x.dtype.kind
if kind in "iu":
result = int(x)
elif kind == "f":
result = float(x)
else:
# Strings, and possibly other things.
# Not totally clear what other things might occur here.
result = str(x)
result = str(result)
return result


def _attribute_str(x):
"""Make a string representing an attribute value.

Like the above, not depending on numpy array printing.
"""
if isinstance(x, str):
result = f"'{x}'"
elif not isinstance(x, np.ndarray):
result = str(x)
elif x.ndim < 1:
result = _array_element_str(x)
else:
els = [_array_element_str(el) for el in x]
result = f"[{', '.join(els)}]"
return result


def _attribute_differences(
obj1,
obj2,
Expand All @@ -159,7 +202,7 @@ def _attribute_differences(
"""
Compare attribute name lists.

Does not return results, but appends error messages to 'errs'.
Return a list of error messages.
"""
attrnames, attrnames2 = [
list(obj.attributes.keys()) if _isncdata(obj) else list(obj.ncattrs())
Expand Down Expand Up @@ -227,7 +270,7 @@ def fix_orders(attrlist):
# N.B. special comparison to handle strings and NaNs
msg = (
f'{elemname} "{attrname}" attribute values differ : '
f"{attr!r} != {attr2!r}"
f"{_attribute_str(attr)} != {_attribute_str(attr2)}"
)
errs.append(msg)
return errs
Expand Down Expand Up @@ -404,10 +447,16 @@ def getdata(var):
diffinds = [
np.unravel_index(ind, shape=data.shape) for ind in diffinds
]
diffinds_str = ", ".join(repr(tuple(x)) for x in diffinds)
diffinds_str = ", ".join(
str(tuple([int(ind) for ind in x])) for x in diffinds
)
inds_str = f"[{diffinds_str}{ellps}]"
points_lhs_str = ", ".join(repr(data[ind]) for ind in diffinds)
points_rhs_str = ", ".join(repr(data2[ind]) for ind in diffinds)
points_lhs_str = ", ".join(
_array_element_str(data[ind]) for ind in diffinds
)
points_rhs_str = ", ".join(
_array_element_str(data2[ind]) for ind in diffinds
)
points_lhs_str = f"[{points_lhs_str}{ellps}]"
points_rhs_str = f"[{points_rhs_str}{ellps}]"
msg += (
Expand Down Expand Up @@ -435,8 +484,7 @@ def _group_differences(
"""
Inner routine to compare either whole datasets or subgroups.

Note that, rather than returning a list of error strings, it appends them to the
passed arg `errs`. This just makes recursive calling easier.
Returns a list of error strings.
"""
errs = []

Expand Down
4 changes: 3 additions & 1 deletion tests/unit/core/test_NcAttribute.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,9 @@ def test_str(self, datatype, structuretype):
# All single values appear as scalars.
value = np.array(value).flatten()[0]

value_repr = repr(value)
value_repr = str(value)
if "string" in datatype and not is_multiple:
value_repr = f"'{value_repr}'"

is_non_numpy = "custom" in datatype or "none" in datatype
if is_non_numpy or (is_multiple and "string" not in datatype):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ def test_compare_attributes_values__data_arrays_shape_mismatch(self):
assert errs == [
(
'<object attributes> "a" attribute values differ : '
"array([0, 1, 2]) != array([0, 1])"
"[0, 1, 2] != [0, 1]"
)
]

Expand All @@ -271,7 +271,7 @@ def test_compare_attributes_values__data_arrays_value_mismatch(self):
assert errs == [
(
'<object attributes> "a" attribute values differ : '
"array([1, 2, 3]) != array([ 1, 2, 777])"
"[1, 2, 3] != [1, 2, 777]"
)
]

Expand All @@ -293,7 +293,7 @@ def test_compare_attributes_values__data_arrays_nans_mismatch(self):
assert errs == [
(
'<object attributes> "a" attribute values differ : '
"array([1., 2., 3.]) != array([ 1., nan, 3.])"
"[1.0, 2.0, 3.0] != [1.0, nan, 3.0]"
)
]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ def test_value(self, attr_context):
value_string = "11"
expected = [
f'{self.location_string} "att1" attribute values differ : '
f"array({value_string}) != array(999)"
f"{value_string} != 999"
]
check(errs, expected)

Expand Down
36 changes: 36 additions & 0 deletions tests/unit/utils/compare_nc_datasets/test_variable_differences.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,3 +303,39 @@ def test_real_and_lazy(self, argtypes):
"@INDICES[(1,)] : LHS=[1.0], RHS=[2.0]"
]
check(errs, expected)

@pytest.mark.parametrize(
"ndiffs", [0, 1, 2], ids=["no_diffs", "one_diff", "two_diffs"]
)
def test_string_data(self, ndiffs):
# FOR NOW test only with character arrays, encoded as expected ("S1" dtype)
strings = ["one", "three", "", "seventeen"]
str_len = max(len(x) for x in strings)
chararray = np.zeros((4, str_len), dtype="S1")
for ind, el in enumerate(strings):
chararray[ind, 0 : len(el)] = list(el)
self.var1, self.var2 = [
NcVariable("vx", ("x"), data=chararray.copy()) for ind in range(2)
]

if ndiffs > 0:
self.var2.data[1, 1] = "X" # modify one character
if ndiffs > 1:
self.var2.data[3, 3:] = "" # (also) cut short this string

# compare + check results
errs = variable_differences(self.var1, self.var2)

expected = []
if ndiffs == 1:
expected = [
'Variable "vx" data contents differ, at 1 points: '
"@INDICES[(1, 1)] : LHS=[b'h'], RHS=[b'X']"
]
elif ndiffs == 2:
expected = [
'Variable "vx" data contents differ, at 7 points: '
"@INDICES[(1, 1), (3, 3), ...] : "
"LHS=[b'h', b'e', ...], RHS=[b'X', b'', ...]"
]
check(errs, expected)
Loading