From df4c85d100fdfe3440c02882e6248ee1c4b4d3a7 Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Thu, 6 Feb 2025 17:21:24 +0000
Subject: [PATCH 1/3] Make dataset-difference independent of numpy
 array-printout, hence numpy version.

---
 lib/ncdata/utils/_compare_nc_datasets.py      | 62 ++++++++++++++++---
 tests/unit/core/test_NcAttribute.py           |  4 +-
 .../test_dataset_differences__additional.py   |  6 +-
 ...test_dataset_differences__mainfunctions.py |  2 +-
 4 files changed, 62 insertions(+), 12 deletions(-)

diff --git a/lib/ncdata/utils/_compare_nc_datasets.py b/lib/ncdata/utils/_compare_nc_datasets.py
index 655babf..b4fd791 100644
--- a/lib/ncdata/utils/_compare_nc_datasets.py
+++ b/lib/ncdata/utils/_compare_nc_datasets.py
@@ -148,6 +148,49 @@ def _attribute_arrays_eq(a1, a2):
     return result
 
 
+def _array_element_str(x):
+    """Make a string representation of a numpy array element (scalar).
+
+    Does *not* rely on numpy array printing.
+    Instead converts to an equivalent Python object, and takes str(that).
+    Hopefully delivers independence of numpy version (a lesson learned the hard way
+    way in Iris development !)
+    """
+    if not isinstance(x, np.ndarray) or not hasattr(x.dtype, "kind"):
+        result = str(x)
+    elif np.ma.is_masked(x):
+        result = "masked"
+    else:
+        kind = x.dtype.kind
+        if kind in "iu":
+            result = int(x)
+        elif kind == "f":
+            result = float(x)
+        else:
+            # Strings, and possibly other things.
+            # Not totally clear what other things might occur here.
+            result = str(x)
+        result = str(result)
+    return result
+
+
+def _attribute_str(x):
+    """Make a string representing an attribute value.
+
+    Like the above, not depending on numpy array printing.
+    """
+    if isinstance(x, str):
+        result = f"'{x}'"
+    elif not isinstance(x, np.ndarray):
+        result = str(x)
+    elif x.ndim < 1:
+        result = _array_element_str(x)
+    else:
+        els = [_array_element_str(el) for el in x]
+        result = f"[{', '.join(els)}]"
+    return result
+
+
 def _attribute_differences(
     obj1,
     obj2,
@@ -159,7 +202,7 @@ def _attribute_differences(
     """
     Compare attribute name lists.
 
-    Does not return results, but appends error messages to 'errs'.
+    Return a list of error messages.
     """
     attrnames, attrnames2 = [
         list(obj.attributes.keys()) if _isncdata(obj) else list(obj.ncattrs())
@@ -227,7 +270,7 @@ def fix_orders(attrlist):
                 # N.B. special comparison to handle strings and NaNs
                 msg = (
                     f'{elemname} "{attrname}" attribute values differ : '
-                    f"{attr!r} != {attr2!r}"
+                    f"{_attribute_str(attr)} != {_attribute_str(attr2)}"
                 )
                 errs.append(msg)
     return errs
@@ -404,10 +447,16 @@ def getdata(var):
             diffinds = [
                 np.unravel_index(ind, shape=data.shape) for ind in diffinds
             ]
-            diffinds_str = ", ".join(repr(tuple(x)) for x in diffinds)
+            diffinds_str = ", ".join(
+                str(tuple([int(ind) for ind in x])) for x in diffinds
+            )
             inds_str = f"[{diffinds_str}{ellps}]"
-            points_lhs_str = ", ".join(repr(data[ind]) for ind in diffinds)
-            points_rhs_str = ", ".join(repr(data2[ind]) for ind in diffinds)
+            points_lhs_str = ", ".join(
+                _array_element_str(data[ind]) for ind in diffinds
+            )
+            points_rhs_str = ", ".join(
+                _array_element_str(data2[ind]) for ind in diffinds
+            )
             points_lhs_str = f"[{points_lhs_str}{ellps}]"
             points_rhs_str = f"[{points_rhs_str}{ellps}]"
             msg += (
@@ -435,8 +484,7 @@ def _group_differences(
     """
     Inner routine to compare either whole datasets or subgroups.
 
-    Note that, rather than returning a list of error strings, it appends them to the
-    passed arg `errs`.  This just makes recursive calling easier.
+    Returns a list of error strings.
     """
     errs = []
 
diff --git a/tests/unit/core/test_NcAttribute.py b/tests/unit/core/test_NcAttribute.py
index 283257b..26ef2e1 100644
--- a/tests/unit/core/test_NcAttribute.py
+++ b/tests/unit/core/test_NcAttribute.py
@@ -130,7 +130,9 @@ def test_str(self, datatype, structuretype):
             # All single values appear as scalars.
             value = np.array(value).flatten()[0]
 
-        value_repr = repr(value)
+        value_repr = str(value)
+        if "string" in datatype and not is_multiple:
+            value_repr = f"'{value_repr}'"
 
         is_non_numpy = "custom" in datatype or "none" in datatype
         if is_non_numpy or (is_multiple and "string" not in datatype):
diff --git a/tests/unit/utils/compare_nc_datasets/test_dataset_differences__additional.py b/tests/unit/utils/compare_nc_datasets/test_dataset_differences__additional.py
index 1c3e20e..eea7880 100644
--- a/tests/unit/utils/compare_nc_datasets/test_dataset_differences__additional.py
+++ b/tests/unit/utils/compare_nc_datasets/test_dataset_differences__additional.py
@@ -257,7 +257,7 @@ def test_compare_attributes_values__data_arrays_shape_mismatch(self):
         assert errs == [
             (
                 '<object attributes> "a" attribute values differ : '
-                "array([0, 1, 2]) != array([0, 1])"
+                "[0, 1, 2] != [0, 1]"
             )
         ]
 
@@ -271,7 +271,7 @@ def test_compare_attributes_values__data_arrays_value_mismatch(self):
         assert errs == [
             (
                 '<object attributes> "a" attribute values differ : '
-                "array([1, 2, 3]) != array([  1,   2, 777])"
+                "[1, 2, 3] != [1, 2, 777]"
             )
         ]
 
@@ -293,7 +293,7 @@ def test_compare_attributes_values__data_arrays_nans_mismatch(self):
         assert errs == [
             (
                 '<object attributes> "a" attribute values differ : '
-                "array([1., 2., 3.]) != array([ 1., nan,  3.])"
+                "[1.0, 2.0, 3.0] != [1.0, nan, 3.0]"
             )
         ]
 
diff --git a/tests/unit/utils/compare_nc_datasets/test_dataset_differences__mainfunctions.py b/tests/unit/utils/compare_nc_datasets/test_dataset_differences__mainfunctions.py
index 1e03e9f..4d3a329 100644
--- a/tests/unit/utils/compare_nc_datasets/test_dataset_differences__mainfunctions.py
+++ b/tests/unit/utils/compare_nc_datasets/test_dataset_differences__mainfunctions.py
@@ -270,7 +270,7 @@ def test_value(self, attr_context):
             value_string = "11"
         expected = [
             f'{self.location_string} "att1" attribute values differ : '
-            f"array({value_string}) != array(999)"
+            f"{value_string} != 999"
         ]
         check(errs, expected)
 

From 22ea3bb02473099032f3f6b19a7688978678c075 Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Thu, 6 Feb 2025 17:23:23 +0000
Subject: [PATCH 2/3] unpin numpy in tests

---
 .github/workflows/ci-tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml
index 72e7fa2..2bd80e9 100644
--- a/.github/workflows/ci-tests.yml
+++ b/.github/workflows/ci-tests.yml
@@ -35,7 +35,7 @@ jobs:
 
       - name: "Install dependencies"
         run: |
-          conda install --yes "numpy<2" pytest pytest-mock iris xarray filelock requests
+          conda install --yes numpy pytest pytest-mock iris xarray filelock requests
 
       - name: "Install *latest* Iris"
         run: |

From ff8f79218811072a9105f5533e9ebf485ca09bf9 Mon Sep 17 00:00:00 2001
From: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
Date: Fri, 7 Feb 2025 11:55:48 +0000
Subject: [PATCH 3/3] Add character data difference test.

---
 .../test_variable_differences.py              | 36 +++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/tests/unit/utils/compare_nc_datasets/test_variable_differences.py b/tests/unit/utils/compare_nc_datasets/test_variable_differences.py
index 986008b..840d8ef 100644
--- a/tests/unit/utils/compare_nc_datasets/test_variable_differences.py
+++ b/tests/unit/utils/compare_nc_datasets/test_variable_differences.py
@@ -303,3 +303,39 @@ def test_real_and_lazy(self, argtypes):
             "@INDICES[(1,)] : LHS=[1.0], RHS=[2.0]"
         ]
         check(errs, expected)
+
+    @pytest.mark.parametrize(
+        "ndiffs", [0, 1, 2], ids=["no_diffs", "one_diff", "two_diffs"]
+    )
+    def test_string_data(self, ndiffs):
+        # FOR NOW test only with character arrays, encoded as expected ("S1" dtype)
+        strings = ["one", "three", "", "seventeen"]
+        str_len = max(len(x) for x in strings)
+        chararray = np.zeros((4, str_len), dtype="S1")
+        for ind, el in enumerate(strings):
+            chararray[ind, 0 : len(el)] = list(el)
+        self.var1, self.var2 = [
+            NcVariable("vx", ("x"), data=chararray.copy()) for ind in range(2)
+        ]
+
+        if ndiffs > 0:
+            self.var2.data[1, 1] = "X"  # modify one character
+        if ndiffs > 1:
+            self.var2.data[3, 3:] = ""  # (also) cut short this string
+
+        # compare + check results
+        errs = variable_differences(self.var1, self.var2)
+
+        expected = []
+        if ndiffs == 1:
+            expected = [
+                'Variable "vx" data contents differ, at 1 points: '
+                "@INDICES[(1, 1)] : LHS=[b'h'], RHS=[b'X']"
+            ]
+        elif ndiffs == 2:
+            expected = [
+                'Variable "vx" data contents differ, at 7 points: '
+                "@INDICES[(1, 1), (3, 3), ...] : "
+                "LHS=[b'h', b'e', ...], RHS=[b'X', b'', ...]"
+            ]
+        check(errs, expected)