From df4c85d100fdfe3440c02882e6248ee1c4b4d3a7 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Thu, 6 Feb 2025 17:21:24 +0000 Subject: [PATCH 1/3] Make dataset-difference independent of numpy array-printout, hence numpy version. --- lib/ncdata/utils/_compare_nc_datasets.py | 62 ++++++++++++++++--- tests/unit/core/test_NcAttribute.py | 4 +- .../test_dataset_differences__additional.py | 6 +- ...test_dataset_differences__mainfunctions.py | 2 +- 4 files changed, 62 insertions(+), 12 deletions(-) diff --git a/lib/ncdata/utils/_compare_nc_datasets.py b/lib/ncdata/utils/_compare_nc_datasets.py index 655babf..b4fd791 100644 --- a/lib/ncdata/utils/_compare_nc_datasets.py +++ b/lib/ncdata/utils/_compare_nc_datasets.py @@ -148,6 +148,49 @@ def _attribute_arrays_eq(a1, a2): return result +def _array_element_str(x): + """Make a string representation of a numpy array element (scalar). + + Does *not* rely on numpy array printing. + Instead converts to an equivalent Python object, and takes str(that). + Hopefully delivers independence of numpy version (a lesson learned the hard way + way in Iris development !) + """ + if not isinstance(x, np.ndarray) or not hasattr(x.dtype, "kind"): + result = str(x) + elif np.ma.is_masked(x): + result = "masked" + else: + kind = x.dtype.kind + if kind in "iu": + result = int(x) + elif kind == "f": + result = float(x) + else: + # Strings, and possibly other things. + # Not totally clear what other things might occur here. + result = str(x) + result = str(result) + return result + + +def _attribute_str(x): + """Make a string representing an attribute value. + + Like the above, not depending on numpy array printing. + """ + if isinstance(x, str): + result = f"'{x}'" + elif not isinstance(x, np.ndarray): + result = str(x) + elif x.ndim < 1: + result = _array_element_str(x) + else: + els = [_array_element_str(el) for el in x] + result = f"[{', '.join(els)}]" + return result + + def _attribute_differences( obj1, obj2, @@ -159,7 +202,7 @@ def _attribute_differences( """ Compare attribute name lists. - Does not return results, but appends error messages to 'errs'. + Return a list of error messages. """ attrnames, attrnames2 = [ list(obj.attributes.keys()) if _isncdata(obj) else list(obj.ncattrs()) @@ -227,7 +270,7 @@ def fix_orders(attrlist): # N.B. special comparison to handle strings and NaNs msg = ( f'{elemname} "{attrname}" attribute values differ : ' - f"{attr!r} != {attr2!r}" + f"{_attribute_str(attr)} != {_attribute_str(attr2)}" ) errs.append(msg) return errs @@ -404,10 +447,16 @@ def getdata(var): diffinds = [ np.unravel_index(ind, shape=data.shape) for ind in diffinds ] - diffinds_str = ", ".join(repr(tuple(x)) for x in diffinds) + diffinds_str = ", ".join( + str(tuple([int(ind) for ind in x])) for x in diffinds + ) inds_str = f"[{diffinds_str}{ellps}]" - points_lhs_str = ", ".join(repr(data[ind]) for ind in diffinds) - points_rhs_str = ", ".join(repr(data2[ind]) for ind in diffinds) + points_lhs_str = ", ".join( + _array_element_str(data[ind]) for ind in diffinds + ) + points_rhs_str = ", ".join( + _array_element_str(data2[ind]) for ind in diffinds + ) points_lhs_str = f"[{points_lhs_str}{ellps}]" points_rhs_str = f"[{points_rhs_str}{ellps}]" msg += ( @@ -435,8 +484,7 @@ def _group_differences( """ Inner routine to compare either whole datasets or subgroups. - Note that, rather than returning a list of error strings, it appends them to the - passed arg `errs`. This just makes recursive calling easier. + Returns a list of error strings. """ errs = [] diff --git a/tests/unit/core/test_NcAttribute.py b/tests/unit/core/test_NcAttribute.py index 283257b..26ef2e1 100644 --- a/tests/unit/core/test_NcAttribute.py +++ b/tests/unit/core/test_NcAttribute.py @@ -130,7 +130,9 @@ def test_str(self, datatype, structuretype): # All single values appear as scalars. value = np.array(value).flatten()[0] - value_repr = repr(value) + value_repr = str(value) + if "string" in datatype and not is_multiple: + value_repr = f"'{value_repr}'" is_non_numpy = "custom" in datatype or "none" in datatype if is_non_numpy or (is_multiple and "string" not in datatype): diff --git a/tests/unit/utils/compare_nc_datasets/test_dataset_differences__additional.py b/tests/unit/utils/compare_nc_datasets/test_dataset_differences__additional.py index 1c3e20e..eea7880 100644 --- a/tests/unit/utils/compare_nc_datasets/test_dataset_differences__additional.py +++ b/tests/unit/utils/compare_nc_datasets/test_dataset_differences__additional.py @@ -257,7 +257,7 @@ def test_compare_attributes_values__data_arrays_shape_mismatch(self): assert errs == [ ( ' "a" attribute values differ : ' - "array([0, 1, 2]) != array([0, 1])" + "[0, 1, 2] != [0, 1]" ) ] @@ -271,7 +271,7 @@ def test_compare_attributes_values__data_arrays_value_mismatch(self): assert errs == [ ( ' "a" attribute values differ : ' - "array([1, 2, 3]) != array([ 1, 2, 777])" + "[1, 2, 3] != [1, 2, 777]" ) ] @@ -293,7 +293,7 @@ def test_compare_attributes_values__data_arrays_nans_mismatch(self): assert errs == [ ( ' "a" attribute values differ : ' - "array([1., 2., 3.]) != array([ 1., nan, 3.])" + "[1.0, 2.0, 3.0] != [1.0, nan, 3.0]" ) ] diff --git a/tests/unit/utils/compare_nc_datasets/test_dataset_differences__mainfunctions.py b/tests/unit/utils/compare_nc_datasets/test_dataset_differences__mainfunctions.py index 1e03e9f..4d3a329 100644 --- a/tests/unit/utils/compare_nc_datasets/test_dataset_differences__mainfunctions.py +++ b/tests/unit/utils/compare_nc_datasets/test_dataset_differences__mainfunctions.py @@ -270,7 +270,7 @@ def test_value(self, attr_context): value_string = "11" expected = [ f'{self.location_string} "att1" attribute values differ : ' - f"array({value_string}) != array(999)" + f"{value_string} != 999" ] check(errs, expected) From 22ea3bb02473099032f3f6b19a7688978678c075 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Thu, 6 Feb 2025 17:23:23 +0000 Subject: [PATCH 2/3] unpin numpy in tests --- .github/workflows/ci-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml index 72e7fa2..2bd80e9 100644 --- a/.github/workflows/ci-tests.yml +++ b/.github/workflows/ci-tests.yml @@ -35,7 +35,7 @@ jobs: - name: "Install dependencies" run: | - conda install --yes "numpy<2" pytest pytest-mock iris xarray filelock requests + conda install --yes numpy pytest pytest-mock iris xarray filelock requests - name: "Install *latest* Iris" run: | From ff8f79218811072a9105f5533e9ebf485ca09bf9 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Fri, 7 Feb 2025 11:55:48 +0000 Subject: [PATCH 3/3] Add character data difference test. --- .../test_variable_differences.py | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/unit/utils/compare_nc_datasets/test_variable_differences.py b/tests/unit/utils/compare_nc_datasets/test_variable_differences.py index 986008b..840d8ef 100644 --- a/tests/unit/utils/compare_nc_datasets/test_variable_differences.py +++ b/tests/unit/utils/compare_nc_datasets/test_variable_differences.py @@ -303,3 +303,39 @@ def test_real_and_lazy(self, argtypes): "@INDICES[(1,)] : LHS=[1.0], RHS=[2.0]" ] check(errs, expected) + + @pytest.mark.parametrize( + "ndiffs", [0, 1, 2], ids=["no_diffs", "one_diff", "two_diffs"] + ) + def test_string_data(self, ndiffs): + # FOR NOW test only with character arrays, encoded as expected ("S1" dtype) + strings = ["one", "three", "", "seventeen"] + str_len = max(len(x) for x in strings) + chararray = np.zeros((4, str_len), dtype="S1") + for ind, el in enumerate(strings): + chararray[ind, 0 : len(el)] = list(el) + self.var1, self.var2 = [ + NcVariable("vx", ("x"), data=chararray.copy()) for ind in range(2) + ] + + if ndiffs > 0: + self.var2.data[1, 1] = "X" # modify one character + if ndiffs > 1: + self.var2.data[3, 3:] = "" # (also) cut short this string + + # compare + check results + errs = variable_differences(self.var1, self.var2) + + expected = [] + if ndiffs == 1: + expected = [ + 'Variable "vx" data contents differ, at 1 points: ' + "@INDICES[(1, 1)] : LHS=[b'h'], RHS=[b'X']" + ] + elif ndiffs == 2: + expected = [ + 'Variable "vx" data contents differ, at 7 points: ' + "@INDICES[(1, 1), (3, 3), ...] : " + "LHS=[b'h', b'e', ...], RHS=[b'X', b'', ...]" + ] + check(errs, expected)