From 6f8c2323ef2293cf7d14d0eb83465640f823bf02 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Thu, 4 Apr 2024 14:42:30 +0100 Subject: [PATCH 01/16] WIP --- ...sets.py => test_compare_nc_datasets__additional.py} | 0 .../tests/test_compare_nc_datasets__mainfunctions.py | 10 ++++++++++ tests/unit/tests/unit/__init__.py | 5 ----- 3 files changed, 10 insertions(+), 5 deletions(-) rename tests/unit/tests/{test_compare_nc_datasets.py => test_compare_nc_datasets__additional.py} (100%) create mode 100644 tests/unit/tests/test_compare_nc_datasets__mainfunctions.py delete mode 100644 tests/unit/tests/unit/__init__.py diff --git a/tests/unit/tests/test_compare_nc_datasets.py b/tests/unit/tests/test_compare_nc_datasets__additional.py similarity index 100% rename from tests/unit/tests/test_compare_nc_datasets.py rename to tests/unit/tests/test_compare_nc_datasets__additional.py diff --git a/tests/unit/tests/test_compare_nc_datasets__mainfunctions.py b/tests/unit/tests/test_compare_nc_datasets__mainfunctions.py new file mode 100644 index 0000000..2ed8917 --- /dev/null +++ b/tests/unit/tests/test_compare_nc_datasets__mainfunctions.py @@ -0,0 +1,10 @@ +import pytest + +from tests.data_testcase_schemas import data_types + +data_types # avoid 'unused' warning + + +@pytest.fixture(params=[]) +def context(request): + return request.param diff --git a/tests/unit/tests/unit/__init__.py b/tests/unit/tests/unit/__init__.py deleted file mode 100644 index a8038cb..0000000 --- a/tests/unit/tests/unit/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -""" -Unit tests for :mod:`tests.unit`. - -Yes I know, tests of tests. But it seems necessary. -""" From 6a16b69d90e048fc2279942f802ce1dcd68fc535 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Fri, 5 Apr 2024 08:20:40 +0100 Subject: [PATCH 02/16] --amend --- tests/_compare_nc_datasets.py | 9 +- ...test_compare_nc_datasets__mainfunctions.py | 257 +++++++++++++++++- 2 files changed, 261 insertions(+), 5 deletions(-) diff --git a/tests/_compare_nc_datasets.py b/tests/_compare_nc_datasets.py index 21ea635..4cf728b 100644 --- a/tests/_compare_nc_datasets.py +++ b/tests/_compare_nc_datasets.py @@ -150,7 +150,7 @@ def _compare_attributes( Does not return results, but appends error messages to 'errs'. """ attrnames, attrnames2 = [ - obj.attributes.keys() if _isncdata(obj) else obj.ncattrs() + list(obj.attributes.keys()) if _isncdata(obj) else list(obj.ncattrs()) for obj in (obj1, obj2) ] if attrs_order and force_first_attrnames: @@ -261,6 +261,13 @@ def _compare_nc_groups( f"have different sizes: {dimlen} != {dimlen2}" ) errs.append(msg) + unlim1, unlim2 = [dim.unlimited for dim in (d1, d2)] + if unlim1 != unlim2: + msg = ( + f'{group_id_string} "{dimname}" dimension ' + f'has different "unlimited" status : {unlim1} != {unlim2}' + ) + errs.append(msg) # Compare file attributes _compare_attributes( diff --git a/tests/unit/tests/test_compare_nc_datasets__mainfunctions.py b/tests/unit/tests/test_compare_nc_datasets__mainfunctions.py index 2ed8917..8e103fd 100644 --- a/tests/unit/tests/test_compare_nc_datasets__mainfunctions.py +++ b/tests/unit/tests/test_compare_nc_datasets__mainfunctions.py @@ -1,10 +1,259 @@ +import numpy as np import pytest -from tests.data_testcase_schemas import data_types +from ncdata import NcAttribute, NcData, NcDimension, NcVariable +from tests._compare_nc_datasets import compare_nc_datasets -data_types # avoid 'unused' warning +# from tests.data_testcase_schemas import data_types +# data_types # avoid 'unused' warning -@pytest.fixture(params=[]) -def context(request): +@pytest.fixture( + params=["in_named", "in_unnamed", "in_namedgroup", "in_unnamedgroup"] +) +def group_context(request): + """ + The different contexts of locations in a dataset + + In which an element (dimension, group or variable) might be found, and which might + appear different in the mismatch-error messages. + """ + return request.param + + +@pytest.fixture(params=["on_group", "on_variable"]) +def attr_context(request): + """The different contexts for an attribute in a dataset.""" return request.param + + +@pytest.fixture(params=["ordered", "unordered"]) +def order_checking(request): + """Whether to test with order checking or not.""" + return request.param + + +def location_prefix(group_context, attr_context="on_group"): + prefix = "Dataset" + if "namedgroup" in group_context: + prefix += "/inner_group" + if "variable" in attr_context: + prefix += ' variable "vx"' + return prefix + + +class TestCompareDimensions: + def dimension_testdata(self, group_context): + testdata = NcData( + name="dataset_1", + dimensions=[ + NcDimension("x", 2, unlimited=True), + NcDimension("y", 3, unlimited=False), + ], + ) + + if group_context == "in_named": + pass + elif group_context == "in_unnamed": + testdata.name = None + elif "group" in group_context: + testdata.name = "inner_group" + testdata = NcData(name="outer_dataset", groups=[testdata]) + if group_context == "in_namedgroup": + pass + elif group_context == "in_unnamedgroup": + testdata.name = None + else: + raise ValueError(f"unknown group_context: {group_context!r}") + else: + raise ValueError(f"unknown group_context: {group_context!r}") + + return testdata + + def _datas_and_dims(self, group_context): + data1, data2 = [ + self.dimension_testdata(group_context) for _ in range(2) + ] + location = data2 + if "group" in group_context: + location = location.groups["inner_group"] + return data1, data2, location.dimensions + + def test_name(self, group_context): + data1, data2, dims = self._datas_and_dims(group_context=group_context) + dims.rename("x", "q") + errs = compare_nc_datasets(data1, data2) + # TODO: this is wrong -- should be getting a message + location_string = location_prefix(group_context) + expected = [ + f"{location_string} dimension lists do not match: " + "['x', 'y'] != ['q', 'y']" + ] + assert errs == expected + + def test_size(self, group_context): + data1, data2, dims = self._datas_and_dims(group_context=group_context) + dims["x"].size = 77 + + errs = compare_nc_datasets(data1, data2) + + location_string = location_prefix(group_context) + expected = [ + f'{location_string} "x" dimensions have different sizes: 2 != 77' + ] + # TODO: messages are possibly not ideal, should include dataset name ?? + assert errs == expected + + def test_unlimited(self, group_context): + data1, data2, dims = self._datas_and_dims(group_context=group_context) + dims["y"].unlimited = True + + errs = compare_nc_datasets(data1, data2) + + location_string = location_prefix(group_context) + expected = [ + f'{location_string} "y" dimension has different "unlimited" status : ' + "False != True" + ] + # TODO: this is wrong -- should be getting a message + assert errs == expected + + def test_ordering(self, group_context, order_checking): + data1, data2, dims = self._datas_and_dims(group_context=group_context) + all_dims = list(dims.values()) + dims.clear() + dims.addall(all_dims[::-1]) + + do_ordercheck = {"ordered": True, "unordered": False}[order_checking] + errs = compare_nc_datasets( + data1, data2, check_dims_order=do_ordercheck + ) + + if do_ordercheck: + groupname = "/inner_group" if "group" in group_context else "" + expected = [ + f"Dataset{groupname} dimension lists do not match: " + "['x', 'y'] != ['y', 'x']" + ] + else: + expected = [] + + assert errs == expected + + +class TestCompareAttributes: + def attribute_testdata(self, group_context): + testdata = NcData( + name="dataset_1", + variables=[ + NcVariable( + "vx", + dimensions=[], + data=np.array(1.0), + attributes=[ + NcAttribute("att1", 1), + NcAttribute("att2", 2), + ], + ) + ], + attributes=[ + NcAttribute("att1", 11), + NcAttribute("att2", 12), + ], + ) + + if group_context == "in_named": + pass + elif group_context == "in_unnamed": + testdata.name = None + elif "group" in group_context: + testdata.name = "inner_group" + testdata = NcData(name="outer_dataset", groups=[testdata]) + if group_context == "in_namedgroup": + pass + elif group_context == "in_unnamedgroup": + testdata.name = None + else: + raise ValueError(f"unknown group_context: {group_context!r}") + else: + raise ValueError(f"unknown group_context: {group_context!r}") + + return testdata + + def _datas_and_attrs(self, group_context, attr_context): + data1, data2 = [ + self.attribute_testdata(group_context) for _ in range(2) + ] + + location = data2 + if "group" in group_context: + location = location.groups["inner_group"] + is_on_var = {"on_group": False, "on_variable": True}[attr_context] + if is_on_var: + location = location.variables["vx"] + + return data1, data2, location.attributes + + def test_name(self, group_context, attr_context): + data1, data2, attrs = self._datas_and_attrs( + group_context, attr_context + ) + + attrs.rename("att1", "changed") + errs = compare_nc_datasets(data1, data2) + + expected = [ + "Dataset attribute lists do not match: " + "['att1', 'att2'] != ['changed', 'att2']" + ] + assert errs == expected + + def test_value(self, group_context, attr_context): + data1, data2, attrs = self._datas_and_attrs( + group_context, attr_context + ) + + attrs["att1"].value = np.array(999) + errs = compare_nc_datasets(data1, data2) + + path_string = "Dataset" + if "namedgroup" in group_context: + path_string += "/inner_group" + if "variable" in attr_context: + path_string += ' variable "vx"' + value_string = "1" + else: + value_string = "11" + expected = [ + f'{path_string} "att1" attribute values differ : ' + f"array({value_string}) != array(999)" + ] + assert errs == expected + + def test_order(self, group_context, attr_context, order_checking): + data1, data2, attrs = self._datas_and_attrs( + group_context, attr_context + ) + do_ordercheck = {"ordered": True, "unordered": False}[order_checking] + all_attrs = list(attrs.values()) + attrs.clear() + attrs.addall(all_attrs[::-1]) + + errs = compare_nc_datasets( + data1, data2, check_attrs_order=do_ordercheck + ) + + path_string = "Dataset" + if "namedgroup" in group_context: + path_string += "/inner_group" + if "variable" in attr_context: + path_string += ' variable "vx"' + if do_ordercheck: + expected = [ + f"{path_string} attribute lists do not match: " + "['att1', 'att2'] != ['att2', 'att1']" + ] + else: + expected = [] + + assert errs == expected From 0be00b7a989659f46e79a6988ab00c248a221208 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Mon, 8 Apr 2024 14:26:52 +0100 Subject: [PATCH 03/16] WIP --- tests/_compare_nc_datasets.py | 65 ++- ...test_compare_nc_datasets__mainfunctions.py | 479 +++++++++++++----- 2 files changed, 397 insertions(+), 147 deletions(-) diff --git a/tests/_compare_nc_datasets.py b/tests/_compare_nc_datasets.py index 4cf728b..8c5a7df 100644 --- a/tests/_compare_nc_datasets.py +++ b/tests/_compare_nc_datasets.py @@ -27,6 +27,8 @@ def compare_nc_datasets( check_groups_order: bool = True, check_var_data: bool = True, suppress_warnings: bool = False, + check_names: bool = False, + check_unlimited: bool = True, ) -> List[str]: r""" Compare netcdf data. @@ -46,6 +48,10 @@ def compare_nc_datasets( suppress_warnings : bool, default False When False (the default), report changes in content order as Warnings. When True, ignore changes in ordering. + check_names: bool, default False + Whether to warn if the names of the top-level datasets are different + check_unlimited: bool, default True + Whether to compare the 'unlimited' status of dimensions Returns ------- @@ -80,6 +86,8 @@ def compare_nc_datasets( groups_order=check_groups_order, data_equality=check_var_data, suppress_warnings=suppress_warnings, + check_names=check_names, + check_unlimited=check_unlimited, ) finally: if ds1_was_path and ds1: @@ -154,7 +162,8 @@ def _compare_attributes( for obj in (obj1, obj2) ] if attrs_order and force_first_attrnames: - + # In order to ignore the order of appearance of *specific* attributes, move + # all those ones to the front in a known order. def fix_orders(attrlist): for name in force_first_attrnames[::-1]: if name in attrlist: @@ -231,6 +240,8 @@ def _compare_nc_groups( groups_order: bool = True, data_equality: bool = True, suppress_warnings: bool = False, + check_names: bool = False, + check_unlimited: bool = True, ): """ Inner routine to compare either whole datasets or subgroups. @@ -238,6 +249,11 @@ def _compare_nc_groups( Note that, rather than returning a list of error strings, it appends them to the passed arg `errs`. This just makes recursive calling easier. """ + if check_names: + if g1.name != g2.name: + errs.append( + f"Datasets have different names: {g1.name!r} != {g2.name!r}." + ) # Compare lists of dimension names dimnames, dimnames2 = [list(grp.dimensions.keys()) for grp in (g1, g2)] _compare_name_lists( @@ -261,13 +277,18 @@ def _compare_nc_groups( f"have different sizes: {dimlen} != {dimlen2}" ) errs.append(msg) - unlim1, unlim2 = [dim.unlimited for dim in (d1, d2)] - if unlim1 != unlim2: - msg = ( - f'{group_id_string} "{dimname}" dimension ' - f'has different "unlimited" status : {unlim1} != {unlim2}' - ) - errs.append(msg) + + if check_unlimited: + unlim1, unlim2 = [ + dim.unlimited if _isncdata(dim) else dim.isunlimited() + for dim in (d1, d2) + ] + if unlim1 != unlim2: + msg = ( + f'{group_id_string} "{dimname}" dimension ' + f'has different "unlimited" status : {unlim1} != {unlim2}' + ) + errs.append(msg) # Compare file attributes _compare_attributes( @@ -286,7 +307,7 @@ def _compare_nc_groups( varnames, varnames2, f"{group_id_string} variable lists", - order_strict=dims_order, + order_strict=vars_order, suppress_warnings=suppress_warnings, ) @@ -302,6 +323,7 @@ def _compare_nc_groups( dims, dims2 = [v.dimensions for v in (v1, v2)] if dims != dims2: msg = f"{var_id_string} dimensions differ : {dims!r} != {dims2!r}" + errs.append(msg) # attributes _compare_attributes( @@ -439,28 +461,5 @@ def getdata(var): attrs_order=attrs_order, groups_order=groups_order, data_equality=data_equality, + check_unlimited=check_unlimited, ) - - -if __name__ == "__main__": - fps = [ - "/home/h05/itpp/tmp.nc", - "/home/h05/itpp/tmp2.nc", - "/home/h05/itpp/mask.nc", - "/home/h05/itpp/tmps.nc", - "/home/h05/itpp/tmps2.nc", - ] - fp1, fp2, fp3, fp4, fp5 = fps - pairs = [ - [fp1, fp1], - [fp1, fp2], - [fp1, fp3], - [fp4, fp5], - ] - for p1, p2 in pairs: - errs = compare_nc_datasets(p1, p2, check_attrs_order=False) - print("") - print(f"Compare {p1} with {p2} : {len(errs)} errors ") - for err in errs: - print(" ", err) - print("-ends-") diff --git a/tests/unit/tests/test_compare_nc_datasets__mainfunctions.py b/tests/unit/tests/test_compare_nc_datasets__mainfunctions.py index 8e103fd..e254fa7 100644 --- a/tests/unit/tests/test_compare_nc_datasets__mainfunctions.py +++ b/tests/unit/tests/test_compare_nc_datasets__mainfunctions.py @@ -1,10 +1,12 @@ +from dataclasses import dataclass + import numpy as np import pytest -from ncdata import NcAttribute, NcData, NcDimension, NcVariable +from ncdata import NameMap, NcAttribute, NcData, NcDimension, NcVariable from tests._compare_nc_datasets import compare_nc_datasets -# from tests.data_testcase_schemas import data_types +# from tests.data_testcase_schemas import _Datatype_Sample_Values, data_types # data_types # avoid 'unused' warning @@ -33,6 +35,10 @@ def order_checking(request): return request.param +def decode_ordercheck(order_checking): + return {"ordered": True, "unordered": False}[order_checking] + + def location_prefix(group_context, attr_context="on_group"): prefix = "Dataset" if "namedgroup" in group_context: @@ -42,6 +48,57 @@ def location_prefix(group_context, attr_context="on_group"): return prefix +def put_group_into_context(testdata, group_context): + if group_context == "in_named": + pass + elif group_context == "in_unnamed": + testdata.name = None + elif "group" in group_context: + testdata.name = "inner_group" + testdata = NcData(name="outer_dataset", groups=[testdata]) + if group_context == "in_namedgroup": + pass + elif group_context == "in_unnamedgroup": + testdata.name = None + else: + raise ValueError(f"unknown group_context: {group_context!r}") + else: + raise ValueError(f"unknown group_context: {group_context!r}") + + return testdata + + +_DEBUG_RESULTS = True +# _DEBUG_RESULTS = True + + +def check(results, expected): + if _DEBUG_RESULTS: + print("\nResult messages:") + for msg in results: + print(" ", msg) + assert results == expected + + +class TestCompareDatasets: + @pytest.mark.parametrize("namecheck", ["withnames", "withoutnames"]) + @pytest.mark.parametrize("altname", ["named_y", "named_none"]) + def test_names(self, namecheck, altname): + do_namecheck = namecheck == "withnames" + altname = {"named_y": "y", "named_none": None}[altname] + data1, data2 = NcData(name="x"), NcData(name=altname) + + # Use kwargs just to confirm that the default for name-checking is 'off' + kwargs = dict(check_names=True) if do_namecheck else {} + errs = compare_nc_datasets(data1, data2, **kwargs) + + if do_namecheck: + expected = [f"Datasets have different names: 'x' != {altname!r}."] + else: + expected = [] + check(errs, expected) + + class TestCompareDimensions: def dimension_testdata(self, group_context): testdata = NcData( @@ -51,94 +108,108 @@ def dimension_testdata(self, group_context): NcDimension("y", 3, unlimited=False), ], ) - - if group_context == "in_named": - pass - elif group_context == "in_unnamed": - testdata.name = None - elif "group" in group_context: - testdata.name = "inner_group" - testdata = NcData(name="outer_dataset", groups=[testdata]) - if group_context == "in_namedgroup": - pass - elif group_context == "in_unnamedgroup": - testdata.name = None - else: - raise ValueError(f"unknown group_context: {group_context!r}") - else: - raise ValueError(f"unknown group_context: {group_context!r}") - + testdata = put_group_into_context(testdata, group_context) return testdata - def _datas_and_dims(self, group_context): + @dataclass + class DimsData: + data1: NcData = None + data2: NcData = None + location_string: str = "" + dims: NameMap = None + + @pytest.fixture() + def dimsdata(self, group_context): data1, data2 = [ self.dimension_testdata(group_context) for _ in range(2) ] location = data2 if "group" in group_context: location = location.groups["inner_group"] - return data1, data2, location.dimensions - def test_name(self, group_context): - data1, data2, dims = self._datas_and_dims(group_context=group_context) - dims.rename("x", "q") - errs = compare_nc_datasets(data1, data2) - # TODO: this is wrong -- should be getting a message - location_string = location_prefix(group_context) + dimsdata = self.DimsData( + data1=data1, + data2=data2, + location_string=location_prefix(group_context), + dims=location.dimensions, + ) + return dimsdata + + def test_name(self, dimsdata): + dimsdata.dims.rename("x", "q") + errs = compare_nc_datasets(dimsdata.data1, dimsdata.data2) expected = [ - f"{location_string} dimension lists do not match: " + f"{dimsdata.location_string} dimension lists do not match: " "['x', 'y'] != ['q', 'y']" ] - assert errs == expected + check(errs, expected) - def test_size(self, group_context): - data1, data2, dims = self._datas_and_dims(group_context=group_context) - dims["x"].size = 77 + def test_size(self, dimsdata): + dimsdata.dims["x"].size = 77 - errs = compare_nc_datasets(data1, data2) + errs = compare_nc_datasets(dimsdata.data1, dimsdata.data2) - location_string = location_prefix(group_context) expected = [ - f'{location_string} "x" dimensions have different sizes: 2 != 77' + f'{dimsdata.location_string} "x" dimensions have different sizes: 2 != 77' ] - # TODO: messages are possibly not ideal, should include dataset name ?? - assert errs == expected - - def test_unlimited(self, group_context): - data1, data2, dims = self._datas_and_dims(group_context=group_context) - dims["y"].unlimited = True + check(errs, expected) - errs = compare_nc_datasets(data1, data2) + @pytest.mark.parametrize( + "check_unlim", ["unlims_checked", "unlims_unchecked"] + ) + def test_unlimited(self, dimsdata, check_unlim): + dimsdata.dims["y"].unlimited = True - location_string = location_prefix(group_context) - expected = [ - f'{location_string} "y" dimension has different "unlimited" status : ' - "False != True" + do_check_unlims = {"unlims_checked": True, "unlims_unchecked": False}[ + check_unlim ] - # TODO: this is wrong -- should be getting a message - assert errs == expected + errs = compare_nc_datasets( + dimsdata.data1, dimsdata.data2, check_unlimited=do_check_unlims + ) - def test_ordering(self, group_context, order_checking): - data1, data2, dims = self._datas_and_dims(group_context=group_context) - all_dims = list(dims.values()) - dims.clear() - dims.addall(all_dims[::-1]) + if do_check_unlims: + expected = [ + f'{dimsdata.location_string} "y" dimension has different "unlimited" status : ' + "False != True" + ] + else: + expected = [] + + check(errs, expected) - do_ordercheck = {"ordered": True, "unordered": False}[order_checking] + def test_ordering(self, dimsdata, order_checking): + all_dims = list(dimsdata.dims.values()) + dimsdata.dims.clear() + dimsdata.dims.addall(all_dims[::-1]) + + do_ordercheck = decode_ordercheck(order_checking) errs = compare_nc_datasets( - data1, data2, check_dims_order=do_ordercheck + dimsdata.data1, dimsdata.data2, check_dims_order=do_ordercheck ) if do_ordercheck: - groupname = "/inner_group" if "group" in group_context else "" expected = [ - f"Dataset{groupname} dimension lists do not match: " + f"{dimsdata.location_string} dimension lists do not match: " "['x', 'y'] != ['y', 'x']" ] else: expected = [] - assert errs == expected + check(errs, expected) + + def test_extra_or_missing(self, dimsdata): + all_dims = list(dimsdata.dims.values()) + # Remove the last dimension, so data1 has a dim not present in data2 + dimsdata.dims.clear() + dimsdata.dims.addall(all_dims[:-1]) + + errs = compare_nc_datasets(dimsdata.data1, dimsdata.data2) + + expected = [ + f"{dimsdata.location_string} dimension lists do not match: " + "['x', 'y'] != ['x']" + ] + check(errs, expected) class TestCompareAttributes: @@ -161,30 +232,21 @@ def attribute_testdata(self, group_context): NcAttribute("att2", 12), ], ) - - if group_context == "in_named": - pass - elif group_context == "in_unnamed": - testdata.name = None - elif "group" in group_context: - testdata.name = "inner_group" - testdata = NcData(name="outer_dataset", groups=[testdata]) - if group_context == "in_namedgroup": - pass - elif group_context == "in_unnamedgroup": - testdata.name = None - else: - raise ValueError(f"unknown group_context: {group_context!r}") - else: - raise ValueError(f"unknown group_context: {group_context!r}") - + testdata = put_group_into_context(testdata, group_context) return testdata - def _datas_and_attrs(self, group_context, attr_context): + @dataclass + class AttrsData: + data1: NcData = None + data2: NcData = None + location_string: str = "" + attrs: NameMap = None + + @pytest.fixture() + def attrsdata(self, group_context, attr_context): data1, data2 = [ self.attribute_testdata(group_context) for _ in range(2) ] - location = data2 if "group" in group_context: location = location.groups["inner_group"] @@ -192,68 +254,257 @@ def _datas_and_attrs(self, group_context, attr_context): if is_on_var: location = location.variables["vx"] - return data1, data2, location.attributes - - def test_name(self, group_context, attr_context): - data1, data2, attrs = self._datas_and_attrs( - group_context, attr_context + attrsdata = self.AttrsData( + data1=data1, + data2=data2, + location_string=location_prefix(group_context, attr_context), + attrs=location.attributes, ) + return attrsdata - attrs.rename("att1", "changed") - errs = compare_nc_datasets(data1, data2) + def test_name(self, attrsdata): + attrsdata.attrs.rename("att1", "changed") + + errs = compare_nc_datasets(attrsdata.data1, attrsdata.data2) expected = [ - "Dataset attribute lists do not match: " + f"{attrsdata.location_string} attribute lists do not match: " "['att1', 'att2'] != ['changed', 'att2']" ] - assert errs == expected + check(errs, expected) - def test_value(self, group_context, attr_context): - data1, data2, attrs = self._datas_and_attrs( - group_context, attr_context - ) + def test_value(self, attrsdata, attr_context): + attrsdata.attrs["att1"].value = np.array(999) - attrs["att1"].value = np.array(999) - errs = compare_nc_datasets(data1, data2) + errs = compare_nc_datasets(attrsdata.data1, attrsdata.data2) - path_string = "Dataset" - if "namedgroup" in group_context: - path_string += "/inner_group" if "variable" in attr_context: - path_string += ' variable "vx"' value_string = "1" else: value_string = "11" expected = [ - f'{path_string} "att1" attribute values differ : ' + f'{attrsdata.location_string} "att1" attribute values differ : ' f"array({value_string}) != array(999)" ] - assert errs == expected + check(errs, expected) + + def test_dtype(self): + # TODO: check over various datatype for dtype difference + # N.B. strings behave differently. + assert 0 + # attrsdata.attrs["att1"].value = np.array(999) + # + # errs = compare_nc_datasets(attrsdata.data1, attrsdata.data2) + # + # if "variable" in attr_context: + # value_string = "1" + # else: + # value_string = "11" + # expected = [ + # f'{attrsdata.location_string} "att1" attribute values differ : ' + # f"array({value_string}) != array(999)" + # ] + # check(errs, expected) + + def test_ordering(self, attrsdata, order_checking): + do_ordercheck = decode_ordercheck(order_checking) + all_attrs = list(attrsdata.attrs.values()) + attrsdata.attrs.clear() + attrsdata.attrs.addall(all_attrs[::-1]) + + errs = compare_nc_datasets( + attrsdata.data1, attrsdata.data2, check_attrs_order=do_ordercheck + ) + + if do_ordercheck: + expected = [ + f"{attrsdata.location_string} attribute lists do not match: " + "['att1', 'att2'] != ['att2', 'att1']" + ] + else: + expected = [] + check(errs, expected) + + def test_extra_or_missing(self, attrsdata, order_checking): + do_ordercheck = decode_ordercheck(order_checking) + del attrsdata.attrs["att1"] + + errs = compare_nc_datasets( + attrsdata.data1, attrsdata.data2, check_attrs_order=do_ordercheck + ) + + expected = [ + f"{attrsdata.location_string} attribute lists do not match: " + "['att1', 'att2'] != ['att2']" + ] + check(errs, expected) + + @pytest.mark.parametrize("attname", ["fillvalue", "generic"]) + def test_fillvalue_anyorder(self, attname): + """The order of "_FillValue" attributes is specially ignored.""" + name = {"fillvalue": "_FillValue", "generic": "anyold"}[attname] + # data1, data2 have attrs in the other order + attr_pair = [NcAttribute(name, 1), NcAttribute("x", 1)] + data1, data2 = [ + NcData( + variables=[ + NcVariable("vx", (), data=np.array(0.0), attributes=attrs) + ] + ) + for attrs in (attr_pair, attr_pair[::-1]) + ] + + errs = compare_nc_datasets(data1, data2) + + if "generic" in attname: + expected = [ + 'Dataset variable "vx" attribute lists do not match: ' + "['anyold', 'x'] != ['x', 'anyold']" + ] + else: + expected = [] + check(errs, expected) + + +class TestCompareVariables__metadata: + def vars_testdata(self, group_context): + def data(): + return np.zeros((2, 3)) - def test_order(self, group_context, attr_context, order_checking): - data1, data2, attrs = self._datas_and_attrs( - group_context, attr_context + testdata = NcData( + name="dataset_1", + dimensions=[NcDimension("y", 2), NcDimension("x", 3)], + variables=[ + NcVariable("v1", ("y", "x"), data=data()), + NcVariable("v2", ("y", "x"), data=data()), + ], ) - do_ordercheck = {"ordered": True, "unordered": False}[order_checking] - all_attrs = list(attrs.values()) - attrs.clear() - attrs.addall(all_attrs[::-1]) + testdata = put_group_into_context(testdata, group_context) + return testdata + + @dataclass + class VarsData: + data1: NcData = None + data2: NcData = None + location_string: str = "" + vars: NameMap = None + + @pytest.fixture() + def varsdata(self, group_context): + data1, data2 = [self.vars_testdata(group_context) for _ in range(2)] + location = data2 + if "group" in group_context: + location = location.groups["inner_group"] + + varsdata = self.VarsData( + data1=data1, + data2=data2, + location_string=location_prefix(group_context), + vars=location.variables, + ) + return varsdata + + def test_name(self, varsdata): + varsdata.vars.rename("v2", "q") + + errs = compare_nc_datasets(varsdata.data1, varsdata.data2) + expected = [ + f"{varsdata.location_string} variable lists do not match: " + "['v1', 'v2'] != ['v1', 'q']" + ] + check(errs, expected) + + def test_order(self, varsdata, order_checking): + all_vars = list(varsdata.vars.values()) + varsdata.vars.clear() + varsdata.vars.addall(all_vars[::-1]) + + do_ordercheck = decode_ordercheck(order_checking) errs = compare_nc_datasets( - data1, data2, check_attrs_order=do_ordercheck + varsdata.data1, varsdata.data2, check_vars_order=do_ordercheck ) - path_string = "Dataset" - if "namedgroup" in group_context: - path_string += "/inner_group" - if "variable" in attr_context: - path_string += ' variable "vx"' if do_ordercheck: expected = [ - f"{path_string} attribute lists do not match: " - "['att1', 'att2'] != ['att2', 'att1']" + f"{varsdata.location_string} variable lists do not match: " + "['v1', 'v2'] != ['v2', 'v1']" ] else: expected = [] + check(errs, expected) + + def test_extra_or_missing(self, varsdata, order_checking): + do_ordercheck = decode_ordercheck(order_checking) + del varsdata.vars["v1"] + + do_ordercheck = decode_ordercheck(order_checking) + errs = compare_nc_datasets( + varsdata.data1, varsdata.data2, check_vars_order=do_ordercheck + ) + + expected = [ + f"{varsdata.location_string} variable lists do not match: " + "['v1', 'v2'] != ['v2']" + ] + check(errs, expected) + + def test_dims__reorder(self, varsdata, order_checking): + # N.B. here we check behaviour of the DIMENSIONS order control, but this does + # *not* apply to dimensions order in a variable,which is always significant. + varsdata.vars["v1"].dimensions = varsdata.vars["v1"].dimensions[::-1] + # N.B. the data shape doesn't now correspond, but that won't matter as, with + # mismatched dimensions, the data won't be checked. + + do_orderchecks = decode_ordercheck(order_checking) + errs = compare_nc_datasets( + varsdata.data1, varsdata.data2, check_dims_order=do_orderchecks + ) + + expected = [ + f'{varsdata.location_string} variable "v1" dimensions differ : ' + "('y', 'x') != ('x', 'y')" + ] + check(errs, expected) + + def test_dims__extra_or_missing(self, varsdata, order_checking): + # N.B. here we check for DIMENSIONS order check control. + varsdata.vars["v1"].dimensions = varsdata.vars["v1"].dimensions[:-1] + # N.B. the data shape doesn't now correspond, but that won't matter as, with + # mismatched dimensions, the data won't be checked. + + do_orderchecks = decode_ordercheck(order_checking) + errs = compare_nc_datasets( + varsdata.data1, varsdata.data2, check_dims_order=do_orderchecks + ) + + expected = [ + f'{varsdata.location_string} variable "v1" dimensions differ : ' + "('y', 'x') != ('y',)" + ] + check(errs, expected) + + +class TestCompareVariables__data: + """ + TODO: tests for data equivalence checking. + + Check with various dtypes etc. + To consider ... + * masks + * NaNs + * real+lazy + * int/float/string datatypes + * 0/1/N-dimensional + """ + + +class TestCompareGroups: + def test_names(self): + pass + + def test_order(self): + pass - assert errs == expected + def test_extra_or_missing(self): + pass From 0c4aaef48cd80e080c68e4560bea0f25bbe4ad69 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Fri, 12 Apr 2024 16:58:23 +0100 Subject: [PATCH 04/16] Don't check unlimited dims for xarray-save-direct-vs-ncdata testing. --- tests/data_testcase_schemas.py | 4 ++-- tests/integration/test_xarray_load_and_save_equivalence.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/data_testcase_schemas.py b/tests/data_testcase_schemas.py index 26f8c7e..19d44a1 100644 --- a/tests/data_testcase_schemas.py +++ b/tests/data_testcase_schemas.py @@ -330,8 +330,8 @@ def _define_simple_testcases(): return testcases -ADD_IRIS_FILES = True -# ADD_IRIS_FILES = False +# ADD_IRIS_FILES = True +ADD_IRIS_FILES = False @standard_testcases_func diff --git a/tests/integration/test_xarray_load_and_save_equivalence.py b/tests/integration/test_xarray_load_and_save_equivalence.py index 1f996d4..92153fa 100644 --- a/tests/integration/test_xarray_load_and_save_equivalence.py +++ b/tests/integration/test_xarray_load_and_save_equivalence.py @@ -78,6 +78,7 @@ def test_save_direct_vs_viancdata(standard_testcase, tmp_path): temp_direct_savepath, temp_ncdata_savepath, check_dims_order=False, + check_unlimited=False, # TODO: remove this when we fix it suppress_warnings=True, ) assert results == [] From 9fd93fc9ed5643d0bdbc97ca4927825ffb4e3b17 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Mon, 29 Apr 2024 01:45:51 +0100 Subject: [PATCH 05/16] More WIP: replace dataclasses, partial datatype testing. --- .../test_compare_nc_datasets__additional.py | 154 ++++++----- ...test_compare_nc_datasets__mainfunctions.py | 240 ++++++++---------- 2 files changed, 176 insertions(+), 218 deletions(-) diff --git a/tests/unit/tests/test_compare_nc_datasets__additional.py b/tests/unit/tests/test_compare_nc_datasets__additional.py index e153db3..c01f929 100644 --- a/tests/unit/tests/test_compare_nc_datasets__additional.py +++ b/tests/unit/tests/test_compare_nc_datasets__additional.py @@ -1,11 +1,13 @@ """ Tests for :mod:`tests.unit.netcdf._compare_nc_files` - -Yes I know, tests of tests. But it seems necessary. +Split in two files ... + * HERE: "additional" tests cover subsidiary routines and the main + API usage modes. + * ( ALSO: "mainfunctions" (q.v.) cover the core functionality + -- which elements are compared and what errors this constructs. ) """ import shutil import warnings -from unittest import mock import netCDF4 as nc import numpy as np @@ -18,50 +20,6 @@ ) from tests.test_samplecode_cdlgen_comparablecdl import ncgen_from_cdl -# CDL to create a reference file with "all" features included. -_base_cdl = """ -netcdf everything { -dimensions: - x = 2 ; - y = 3 ; - strlen = 5 ; -variables: - int x(x) ; - x:name = "var_x" ; - int var_2d(x, y) ; - uint var_u8(x) ; - float var_f4(x) ; - double var_f8(x) ; - char var_str(x, strlen) ; - int other(x) ; - other:attr_int = 1 ; - other:attr_float = 2.0f ; - other:attr_double = 2.0 ; - other:attr_string = "this" ; - int masked_int(y) ; - masked_int:_FillValue = -3 ; - int masked_float(y) ; - masked_float:_FillValue = -4.0 ; - -// global attributes: - :global_attr_1 = "one" ; - :global_attr_2 = 2 ; - -// groups: -group: grp_1 { - dimensions: - y = 7 ; - variables: - int parent_dim(x) ; - int own_dim(y) ; -} -group: grp_2 { - variables: - int grp2_x(x) ; -} -} -""" - _simple_cdl = """ netcdf test { dimensions: @@ -132,44 +90,41 @@ def test_difforder_tolerant_nowarn(self): class Test__compare_attributes: - def test_compare_attributes_namelists(self): + def test_compare_attributes_namelists(self, mocker): # Check that it calls the generic _compare_name_lists routine, passing all the # correct controls - # Mimic 2 objects with NO attributes. - attrs1 = mock.MagicMock() - attrs2 = mock.MagicMock() - # Make the test objects look like real files (not NcData), and ensure that - # obj.ncattrs() is iterable. - obj1 = mock.Mock( - spec="ncattrs", ncattrs=mock.Mock(return_value=attrs1) + # NB make the compared object mimic nc Variables, not NcData + attrnames_1 = ["a", "b"] + attrnames_2 = ["c", "d"] + obj1 = mocker.Mock( + spec=nc.Variable, ncattrs=mocker.Mock(return_value=attrnames_1) ) - obj2 = mock.Mock( - spec="ncattrs", ncattrs=mock.Mock(return_value=attrs2) + obj2 = mocker.Mock( + spec=nc.Variable, ncattrs=mocker.Mock(return_value=attrnames_2) ) - errs = mock.sentinel.errors_list + errs = mocker.sentinel.errors_list elemname = "" - order = mock.sentinel.attrs_order - suppress = mock.sentinel.suppress_warnings + order = mocker.sentinel.attrs_order + suppress = mocker.sentinel.suppress_warnings tgt = "tests._compare_nc_datasets._compare_name_lists" - with mock.patch(tgt) as patch_tgt: - _compare_attributes( - errs=errs, - obj1=obj1, - obj2=obj2, - elemname=elemname, - attrs_order=order, - suppress_warnings=suppress, - ) - assert patch_tgt.call_args_list == [ - mock.call( - errs, - attrs1, - attrs2, - " attribute lists", - order_strict=order, - suppress_warnings=suppress, - ) - ] + patch_tgt = mocker.patch(tgt) + _compare_attributes( + errs=errs, + obj1=obj1, + obj2=obj2, + elemname=elemname, + attrs_order=order, + suppress_warnings=suppress, + ) + (one_call,) = patch_tgt.call_args_list + assert one_call == mocker.call( + errs, + attrnames_1, + attrnames_2, + " attribute lists", + order_strict=order, + suppress_warnings=suppress, + ) class Nc4ObjectWithAttrsMimic: def __init__(self, **attrs): @@ -221,7 +176,7 @@ def test_compare_attributes_values__data_mismatch(self): ' "b" attribute values differ : 2 != -77' ] - def test_compare_attributes_values__dtype_mismatch(self): + def test_compare_attributes_values__dtype_mismatch__length(self): # Attributes of different dtypes, even though values == obj1 = self.Nc4ObjectWithAttrsMimic(a=np.float32(0)) obj2 = self.Nc4ObjectWithAttrsMimic(a=np.float64(0)) @@ -234,6 +189,45 @@ def test_compare_attributes_values__dtype_mismatch(self): ) ] + def test_compare_attributes_values__dtype_mismatch__signed_unsigned(self): + # Attributes of different dtypes, even though values == + obj1 = self.Nc4ObjectWithAttrsMimic(a=np.uint32(0)) + obj2 = self.Nc4ObjectWithAttrsMimic(a=np.int32(0)) + errs = [] + _compare_attributes(errs, obj1, obj2, "") + assert errs == [ + ( + ' "a" attribute datatypes differ : ' + "dtype('uint32') != dtype('int32')" + ) + ] + + def test_compare_attributes_values__dtype_mismatch__float_int(self): + # Attributes of different dtypes, even though values == + obj1 = self.Nc4ObjectWithAttrsMimic(a=np.float32(0)) + obj2 = self.Nc4ObjectWithAttrsMimic(a=np.int32(0)) + errs = [] + _compare_attributes(errs, obj1, obj2, "") + assert errs == [ + ( + ' "a" attribute datatypes differ : ' + "dtype('float32') != dtype('int32')" + ) + ] + + def test_compare_attributes_values__dtype_mismatch__numeric_string(self): + # Attributes of different dtypes, even though values == + obj1 = self.Nc4ObjectWithAttrsMimic(a=np.float32(0)) + obj2 = self.Nc4ObjectWithAttrsMimic(a="this") + errs = [] + _compare_attributes(errs, obj1, obj2, "") + assert errs == [ + ( + ' "a" attribute datatypes differ : ' + "dtype('float32') != " + ) + ] + def test_compare_attributes_values__dtype_and_data_mismatch(self): # Attributes of different dtypes, but values != obj1 = self.Nc4ObjectWithAttrsMimic(a=np.float32(0)) diff --git a/tests/unit/tests/test_compare_nc_datasets__mainfunctions.py b/tests/unit/tests/test_compare_nc_datasets__mainfunctions.py index e254fa7..4353ea7 100644 --- a/tests/unit/tests/test_compare_nc_datasets__mainfunctions.py +++ b/tests/unit/tests/test_compare_nc_datasets__mainfunctions.py @@ -1,9 +1,15 @@ -from dataclasses import dataclass - +""" +Tests for :mod:`tests.unit.netcdf._compare_nc_files` +Split in two files ... + * HERE: "mainfunctions" cover the core functionality + -- which elements are compared and what errors this constructs. + * ( ALSO: "additional" tests (q.v.) cover subsidiary routines and the + main API usage modes. ) +""" import numpy as np import pytest -from ncdata import NameMap, NcAttribute, NcData, NcDimension, NcVariable +from ncdata import NcAttribute, NcData, NcDimension, NcVariable from tests._compare_nc_datasets import compare_nc_datasets # from tests.data_testcase_schemas import _Datatype_Sample_Values, data_types @@ -17,8 +23,8 @@ def group_context(request): """ The different contexts of locations in a dataset - In which an element (dimension, group or variable) might be found, and which might - appear different in the mismatch-error messages. + In which an element (dimension, group or variable) might be found, and + which might appear different in the mismatch-error messages. """ return request.param @@ -111,15 +117,8 @@ def dimension_testdata(self, group_context): testdata = put_group_into_context(testdata, group_context) return testdata - @dataclass - class DimsData: - data1: NcData = None - data2: NcData = None - location_string: str = "" - dims: NameMap = None - - @pytest.fixture() - def dimsdata(self, group_context): + @pytest.fixture(autouse=True) + def _dims_data(self, group_context): data1, data2 = [ self.dimension_testdata(group_context) for _ in range(2) ] @@ -127,49 +126,46 @@ def dimsdata(self, group_context): if "group" in group_context: location = location.groups["inner_group"] - dimsdata = self.DimsData( - data1=data1, - data2=data2, - location_string=location_prefix(group_context), - dims=location.dimensions, - ) - return dimsdata + self.data1 = data1 + self.data2 = data2 + self.location_string = location_prefix(group_context) + self.dims = location.dimensions - def test_name(self, dimsdata): - dimsdata.dims.rename("x", "q") - errs = compare_nc_datasets(dimsdata.data1, dimsdata.data2) + def test_name(self): + self.dims.rename("x", "q") + errs = compare_nc_datasets(self.data1, self.data2) expected = [ - f"{dimsdata.location_string} dimension lists do not match: " + f"{self.location_string} dimension lists do not match: " "['x', 'y'] != ['q', 'y']" ] check(errs, expected) - def test_size(self, dimsdata): - dimsdata.dims["x"].size = 77 + def test_size(self): + self.dims["x"].size = 77 - errs = compare_nc_datasets(dimsdata.data1, dimsdata.data2) + errs = compare_nc_datasets(self.data1, self.data2) expected = [ - f'{dimsdata.location_string} "x" dimensions have different sizes: 2 != 77' + f'{self.location_string} "x" dimensions have different sizes: 2 != 77' ] check(errs, expected) @pytest.mark.parametrize( "check_unlim", ["unlims_checked", "unlims_unchecked"] ) - def test_unlimited(self, dimsdata, check_unlim): - dimsdata.dims["y"].unlimited = True + def test_unlimited(self, check_unlim): + self.dims["y"].unlimited = True do_check_unlims = {"unlims_checked": True, "unlims_unchecked": False}[ check_unlim ] errs = compare_nc_datasets( - dimsdata.data1, dimsdata.data2, check_unlimited=do_check_unlims + self.data1, self.data2, check_unlimited=do_check_unlims ) if do_check_unlims: expected = [ - f'{dimsdata.location_string} "y" dimension has different "unlimited" status : ' + f'{self.location_string} "y" dimension has different "unlimited" status : ' "False != True" ] else: @@ -177,19 +173,19 @@ def test_unlimited(self, dimsdata, check_unlim): check(errs, expected) - def test_ordering(self, dimsdata, order_checking): - all_dims = list(dimsdata.dims.values()) - dimsdata.dims.clear() - dimsdata.dims.addall(all_dims[::-1]) + def test_ordering(self, order_checking): + all_dims = list(self.dims.values()) + self.dims.clear() + self.dims.addall(all_dims[::-1]) do_ordercheck = decode_ordercheck(order_checking) errs = compare_nc_datasets( - dimsdata.data1, dimsdata.data2, check_dims_order=do_ordercheck + self.data1, self.data2, check_dims_order=do_ordercheck ) if do_ordercheck: expected = [ - f"{dimsdata.location_string} dimension lists do not match: " + f"{self.location_string} dimension lists do not match: " "['x', 'y'] != ['y', 'x']" ] else: @@ -197,16 +193,16 @@ def test_ordering(self, dimsdata, order_checking): check(errs, expected) - def test_extra_or_missing(self, dimsdata): - all_dims = list(dimsdata.dims.values()) + def test_extra_or_missing(self): + all_dims = list(self.dims.values()) # Remove the last dimension, so data1 has a dim not present in data2 - dimsdata.dims.clear() - dimsdata.dims.addall(all_dims[:-1]) + self.dims.clear() + self.dims.addall(all_dims[:-1]) - errs = compare_nc_datasets(dimsdata.data1, dimsdata.data2) + errs = compare_nc_datasets(self.data1, self.data2) expected = [ - f"{dimsdata.location_string} dimension lists do not match: " + f"{self.location_string} dimension lists do not match: " "['x', 'y'] != ['x']" ] check(errs, expected) @@ -235,15 +231,8 @@ def attribute_testdata(self, group_context): testdata = put_group_into_context(testdata, group_context) return testdata - @dataclass - class AttrsData: - data1: NcData = None - data2: NcData = None - location_string: str = "" - attrs: NameMap = None - - @pytest.fixture() - def attrsdata(self, group_context, attr_context): + @pytest.fixture(autouse=True) + def _attrs_data(self, group_context, attr_context): data1, data2 = [ self.attribute_testdata(group_context) for _ in range(2) ] @@ -254,87 +243,66 @@ def attrsdata(self, group_context, attr_context): if is_on_var: location = location.variables["vx"] - attrsdata = self.AttrsData( - data1=data1, - data2=data2, - location_string=location_prefix(group_context, attr_context), - attrs=location.attributes, - ) - return attrsdata + self.data1 = data1 + self.data2 = data2 + self.location_string = location_prefix(group_context, attr_context) + self.attrs = location.attributes - def test_name(self, attrsdata): - attrsdata.attrs.rename("att1", "changed") + def test_name(self): + self.attrs.rename("att1", "changed") - errs = compare_nc_datasets(attrsdata.data1, attrsdata.data2) + errs = compare_nc_datasets(self.data1, self.data2) expected = [ - f"{attrsdata.location_string} attribute lists do not match: " + f"{self.location_string} attribute lists do not match: " "['att1', 'att2'] != ['changed', 'att2']" ] check(errs, expected) - def test_value(self, attrsdata, attr_context): - attrsdata.attrs["att1"].value = np.array(999) + def test_value(self, attr_context): + self.attrs["att1"].value = np.array(999) - errs = compare_nc_datasets(attrsdata.data1, attrsdata.data2) + errs = compare_nc_datasets(self.data1, self.data2) if "variable" in attr_context: value_string = "1" else: value_string = "11" expected = [ - f'{attrsdata.location_string} "att1" attribute values differ : ' + f'{self.location_string} "att1" attribute values differ : ' f"array({value_string}) != array(999)" ] check(errs, expected) - def test_dtype(self): - # TODO: check over various datatype for dtype difference - # N.B. strings behave differently. - assert 0 - # attrsdata.attrs["att1"].value = np.array(999) - # - # errs = compare_nc_datasets(attrsdata.data1, attrsdata.data2) - # - # if "variable" in attr_context: - # value_string = "1" - # else: - # value_string = "11" - # expected = [ - # f'{attrsdata.location_string} "att1" attribute values differ : ' - # f"array({value_string}) != array(999)" - # ] - # check(errs, expected) - - def test_ordering(self, attrsdata, order_checking): + def test_ordering(self, order_checking): do_ordercheck = decode_ordercheck(order_checking) - all_attrs = list(attrsdata.attrs.values()) - attrsdata.attrs.clear() - attrsdata.attrs.addall(all_attrs[::-1]) + all_attrs = list(self.attrs.values()) + self.attrs.clear() + self.attrs.addall(all_attrs[::-1]) errs = compare_nc_datasets( - attrsdata.data1, attrsdata.data2, check_attrs_order=do_ordercheck + self.data1, self.data2, check_attrs_order=do_ordercheck ) if do_ordercheck: expected = [ - f"{attrsdata.location_string} attribute lists do not match: " + f"{self.location_string} attribute lists do not match: " "['att1', 'att2'] != ['att2', 'att1']" ] else: expected = [] check(errs, expected) - def test_extra_or_missing(self, attrsdata, order_checking): + def test_extra_or_missing(self, order_checking): do_ordercheck = decode_ordercheck(order_checking) - del attrsdata.attrs["att1"] + del self.attrs["att1"] errs = compare_nc_datasets( - attrsdata.data1, attrsdata.data2, check_attrs_order=do_ordercheck + self.data1, self.data2, check_attrs_order=do_ordercheck ) expected = [ - f"{attrsdata.location_string} attribute lists do not match: " + f"{self.location_string} attribute lists do not match: " "['att1', 'att2'] != ['att2']" ] check(errs, expected) @@ -367,7 +335,8 @@ def test_fillvalue_anyorder(self, attname): class TestCompareVariables__metadata: - def vars_testdata(self, group_context): + @staticmethod + def _vars_testdata(group_context): def data(): return np.zeros((2, 3)) @@ -382,108 +351,103 @@ def data(): testdata = put_group_into_context(testdata, group_context) return testdata - @dataclass - class VarsData: - data1: NcData = None - data2: NcData = None - location_string: str = "" - vars: NameMap = None - - @pytest.fixture() - def varsdata(self, group_context): - data1, data2 = [self.vars_testdata(group_context) for _ in range(2)] + @pytest.fixture(autouse=True) + def _vars_data(self, group_context): + data1, data2 = [self._vars_testdata(group_context) for _ in range(2)] location = data2 if "group" in group_context: location = location.groups["inner_group"] - varsdata = self.VarsData( - data1=data1, - data2=data2, - location_string=location_prefix(group_context), - vars=location.variables, - ) - return varsdata + self.data1 = data1 + self.data2 = data2 + self.location_string = location_prefix(group_context) + self.vars = location.variables - def test_name(self, varsdata): - varsdata.vars.rename("v2", "q") + def test_vars_names(self): + self.vars.rename("v2", "q") - errs = compare_nc_datasets(varsdata.data1, varsdata.data2) + errs = compare_nc_datasets(self.data1, self.data2) expected = [ - f"{varsdata.location_string} variable lists do not match: " + f"{self.location_string} variable lists do not match: " "['v1', 'v2'] != ['v1', 'q']" ] check(errs, expected) - def test_order(self, varsdata, order_checking): - all_vars = list(varsdata.vars.values()) - varsdata.vars.clear() - varsdata.vars.addall(all_vars[::-1]) + def test_vars_order(self, order_checking): + all_vars = list(self.vars.values()) + self.vars.clear() + self.vars.addall(all_vars[::-1]) do_ordercheck = decode_ordercheck(order_checking) errs = compare_nc_datasets( - varsdata.data1, varsdata.data2, check_vars_order=do_ordercheck + self.data1, self.data2, check_vars_order=do_ordercheck ) if do_ordercheck: expected = [ - f"{varsdata.location_string} variable lists do not match: " + f"{self.location_string} variable lists do not match: " "['v1', 'v2'] != ['v2', 'v1']" ] else: expected = [] check(errs, expected) - def test_extra_or_missing(self, varsdata, order_checking): - do_ordercheck = decode_ordercheck(order_checking) - del varsdata.vars["v1"] + def test_vars_extra_or_missing(self, order_checking): + del self.vars["v1"] do_ordercheck = decode_ordercheck(order_checking) errs = compare_nc_datasets( - varsdata.data1, varsdata.data2, check_vars_order=do_ordercheck + self.data1, self.data2, check_vars_order=do_ordercheck ) expected = [ - f"{varsdata.location_string} variable lists do not match: " + f"{self.location_string} variable lists do not match: " "['v1', 'v2'] != ['v2']" ] check(errs, expected) - def test_dims__reorder(self, varsdata, order_checking): + def test_var_dims__reorder(self, order_checking): # N.B. here we check behaviour of the DIMENSIONS order control, but this does # *not* apply to dimensions order in a variable,which is always significant. - varsdata.vars["v1"].dimensions = varsdata.vars["v1"].dimensions[::-1] + self.vars["v1"].dimensions = self.vars["v1"].dimensions[::-1] # N.B. the data shape doesn't now correspond, but that won't matter as, with # mismatched dimensions, the data won't be checked. do_orderchecks = decode_ordercheck(order_checking) errs = compare_nc_datasets( - varsdata.data1, varsdata.data2, check_dims_order=do_orderchecks + self.data1, self.data2, check_dims_order=do_orderchecks ) expected = [ - f'{varsdata.location_string} variable "v1" dimensions differ : ' + f'{self.location_string} variable "v1" dimensions differ : ' "('y', 'x') != ('x', 'y')" ] check(errs, expected) - def test_dims__extra_or_missing(self, varsdata, order_checking): + def test_var_dims__extra_or_missing(self, order_checking): # N.B. here we check for DIMENSIONS order check control. - varsdata.vars["v1"].dimensions = varsdata.vars["v1"].dimensions[:-1] + self.vars["v1"].dimensions = self.vars["v1"].dimensions[:-1] # N.B. the data shape doesn't now correspond, but that won't matter as, with # mismatched dimensions, the data won't be checked. do_orderchecks = decode_ordercheck(order_checking) errs = compare_nc_datasets( - varsdata.data1, varsdata.data2, check_dims_order=do_orderchecks + self.data1, self.data2, check_dims_order=do_orderchecks ) expected = [ - f'{varsdata.location_string} variable "v1" dimensions differ : ' + f'{self.location_string} variable "v1" dimensions differ : ' "('y', 'x') != ('y',)" ] check(errs, expected) + def test_var_dtype(self): + pass + # PLAN: + # default data has int type (int64?) + # basic dtypes to check : u1/2/4/8, i1/2/4/8, f4/8, string + class TestCompareVariables__data: """ From 0e5bda978786686407a8f4c792000a249f843c69 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Tue, 28 May 2024 15:47:04 +0100 Subject: [PATCH 06/16] Factor out variable comparison; add show-n-diffs control. --- tests/_compare_nc_datasets.py | 260 +++++++++++++++++++--------------- 1 file changed, 143 insertions(+), 117 deletions(-) diff --git a/tests/_compare_nc_datasets.py b/tests/_compare_nc_datasets.py index 8c5a7df..ef1f202 100644 --- a/tests/_compare_nc_datasets.py +++ b/tests/_compare_nc_datasets.py @@ -15,7 +15,7 @@ import netCDF4 as nc import numpy as np -from ncdata import NcData +from ncdata import NcData, NcVariable def compare_nc_datasets( @@ -26,6 +26,7 @@ def compare_nc_datasets( check_attrs_order: bool = True, check_groups_order: bool = True, check_var_data: bool = True, + show_n_first_different: int = 2, suppress_warnings: bool = False, check_names: bool = False, check_unlimited: bool = True, @@ -45,6 +46,9 @@ def compare_nc_datasets( check_var_data : bool, default True If True, all variable data is also checked for equality. If False, only dtype and shape are compared. + NOTE: comparison of large arrays is done in-memory, so may be highly inefficient. + show_n_first_different: int, default 2 + Number of value differences to display. suppress_warnings : bool, default False When False (the default), report changes in content order as Warnings. When True, ignore changes in ordering. @@ -88,6 +92,7 @@ def compare_nc_datasets( suppress_warnings=suppress_warnings, check_names=check_names, check_unlimited=check_unlimited, + show_n_diffs=show_n_first_different, ) finally: if ds1_was_path and ds1: @@ -123,7 +128,7 @@ def _isncdata(obj): return hasattr(obj, "_print_content") -def _array_eq(a1, a2): +def _attribute_arrays_eq(a1, a2): """ Test equality of array values in attributes. @@ -220,7 +225,7 @@ def fix_orders(attrlist): # If datatypes match (only then), compare values # Cast attrs, which might be strings, to arrays for comparison arr, arr2 = [np.asarray(attr) for attr in (attr, attr2)] - if not _array_eq(arr, arr2): + if not _attribute_arrays_eq(arr, arr2): # N.B. special comparison to handle strings and NaNs msg = ( f'{elemname} "{attrname}" attribute values differ : ' @@ -229,6 +234,135 @@ def fix_orders(attrlist): errs.append(msg) +def _compare_variables( + errs: List[str], + v1: NcVariable, + v2: NcVariable, + group_id_string: str, + attrs_order: bool = True, + data_equality: bool = True, + suppress_warnings: bool = False, + show_n_diffs: int = 2, +): + varname = v1.name + assert v2.name == varname + + var_id_string = f'{group_id_string} variable "{varname}"' + + # dimensions + dims, dims2 = [v.dimensions for v in (v1, v2)] + if dims != dims2: + msg = f"{var_id_string} dimensions differ : {dims!r} != {dims2!r}" + errs.append(msg) + + # attributes + _compare_attributes( + errs, + v1, + v2, + var_id_string, + attrs_order=attrs_order, + suppress_warnings=suppress_warnings, + force_first_attrnames=[ + "_FillValue" + ], # for some reason, this doesn't always list consistently + ) + + # dtypes + dtype, dtype2 = [v.dtype if _isncdata(v) else v.datatype for v in (v1, v2)] + if dtype != dtype2: + msg = f"{var_id_string} datatypes differ : {dtype!r} != {dtype2!r}" + errs.append(msg) + + # data values + is_str, is_str2 = (dt.kind in "SUb" for dt in (dtype, dtype2)) + # TODO: is this correct check to allow compare between different dtypes? + if data_equality and dims == dims2 and is_str == is_str2: + # N.B. don't check shapes here: we already checked dimensions. + # NOTE: no attempt to use laziness here. Could be improved. + def getdata(var): + if _isncdata(var): + data = var.data + if hasattr(data, "compute"): + data = data.compute() + else: + # expect var to be an actual netCDF4.Variable + # (check for obscure property NOT provided by mimics) + assert hasattr(var, "use_nc_get_vars") + data = var[:] + # Return 0D as 1D, as this makes results simpler to interpret. + if data.ndim == 0: + data = data.flatten() + assert data.shape == (1,) + return data + + data, data2 = (getdata(v) for v in (v1, v2)) + flatdata, flatdata2 = ( + np.asanyarray(arr).flatten() for arr in (data, data2) + ) + + # For simpler checking, use flat versions + flat_diff_inds = ( + [] + ) # NB *don't* make this an array, it causes problems + + # Work out whether string : N.B. array type does not ALWAYS match the + # variable type, because apparently the scalar content of a *masked* scalar + # string variable has a numeric type (!! yuck !!) + is_string_data = flatdata.dtype.kind in ("S", "U") + if is_string_data: + safe_fill_const = "" + else: + safe_fill_const = np.zeros((1,), dtype=flatdata.dtype)[0] + + # Where data is masked, count mask mismatches and skip those points + if any(np.ma.is_masked(arr) for arr in (data, data2)): + mask, mask2 = ( + np.ma.getmaskarray(array) for array in (flatdata, flatdata2) + ) + flat_diff_inds = list(np.where(mask != mask2)[0]) + # Replace all masked points to exclude them from unmasked-point checks. + either_masked = mask | mask2 + flatdata[either_masked] = safe_fill_const + flatdata2[either_masked] = safe_fill_const + + # Where data has NANs, count mismatches and skip (as for masked) + if not is_string_data: + isnans, isnans2 = (np.isnan(arr) for arr in (flatdata, flatdata2)) + if np.any(isnans) or np.any(isnans2): + nandiffs = np.where(isnans != isnans2)[0] + if nandiffs: + flat_diff_inds += list(nandiffs) + anynans = isnans | isnans2 + flatdata[anynans] = safe_fill_const + flatdata2[anynans] = safe_fill_const + + flat_diff_inds += list(np.where(flatdata != flatdata2)[0]) + # Order the nonmatching indices : We report just the first few ... + flat_diff_inds = sorted(flat_diff_inds) + n_diffs = len(flat_diff_inds) + if n_diffs: + msg = ( + f"{var_id_string} data contents differ, at {n_diffs} points: " + ) + ellps = ", ..." if n_diffs > show_n_diffs else "" + diffinds = flat_diff_inds[:show_n_diffs] + diffinds = [ + np.unravel_index(ind, shape=data.shape) for ind in diffinds + ] + diffinds_str = ", ".join(repr(tuple(x)) for x in diffinds) + inds_str = f"[{diffinds_str}{ellps}]" + points_lhs_str = ", ".join(repr(data[ind]) for ind in diffinds) + points_rhs_str = ", ".join(repr(data2[ind]) for ind in diffinds) + points_lhs_str = f"[{points_lhs_str}{ellps}]" + points_rhs_str = f"[{points_rhs_str}{ellps}]" + msg += ( + f"@INDICES{inds_str}" + f" : LHS={points_lhs_str}, RHS={points_rhs_str}" + ) + errs.append(msg) + + def _compare_nc_groups( errs: List[str], g1: Union[netCDF4.Dataset, netCDF4.Group], @@ -242,6 +376,7 @@ def _compare_nc_groups( suppress_warnings: bool = False, check_names: bool = False, check_unlimited: bool = True, + show_n_diffs: int = 2, ): """ Inner routine to compare either whole datasets or subgroups. @@ -316,127 +451,17 @@ def _compare_nc_groups( if varname not in varnames2: continue v1, v2 = [grp.variables[varname] for grp in (g1, g2)] - - var_id_string = f'{group_id_string} variable "{varname}"' - - # dimensions - dims, dims2 = [v.dimensions for v in (v1, v2)] - if dims != dims2: - msg = f"{var_id_string} dimensions differ : {dims!r} != {dims2!r}" - errs.append(msg) - - # attributes - _compare_attributes( + _compare_variables( errs, v1, v2, - var_id_string, + group_id_string=group_id_string, attrs_order=attrs_order, + data_equality=data_equality, suppress_warnings=suppress_warnings, - force_first_attrnames=[ - "_FillValue" - ], # for some reason, this doesn't always list consistently + show_n_diffs=show_n_diffs, ) - # dtypes - dtype, dtype2 = [ - v.dtype if _isncdata(v) else v.datatype for v in (v1, v2) - ] - if dtype != dtype2: - msg = f"{var_id_string} datatypes differ : {dtype!r} != {dtype2!r}" - errs.append(msg) - - # data values - is_str, is_str2 = (dt.kind in "SUb" for dt in (dtype, dtype2)) - # TODO: is this correct check to allow compare between different dtypes? - if data_equality and dims == dims2 and is_str == is_str2: - # N.B. don't check shapes here: we already checked dimensions. - # NOTE: no attempt to use laziness here. Could be improved. - def getdata(var): - if _isncdata(var): - data = var.data - if hasattr(data, "compute"): - data = data.compute() - else: - # expect var to be an actual netCDF4.Variable - # (check for obscure property NOT provided by mimics) - assert hasattr(var, "use_nc_get_vars") - data = var[:] - # Return 0D as 1D, as this makes results simpler to interpret. - if data.ndim == 0: - data = data.flatten() - assert data.shape == (1,) - return data - - data, data2 = (getdata(v) for v in (v1, v2)) - flatdata, flatdata2 = ( - np.asanyarray(arr).flatten() for arr in (data, data2) - ) - - # For simpler checking, use flat versions - flat_diff_inds = ( - [] - ) # NB *don't* make this an array, it causes problems - - # Work out whether string : N.B. array type does not ALWAYS match the - # variable type, because apparently the scalar content of a *masked* scalar - # string variable has a numeric type (!! yuck !!) - is_string_data = flatdata.dtype.kind in ("S", "U") - if is_string_data: - safe_fill_const = "" - else: - safe_fill_const = np.zeros((1,), dtype=flatdata.dtype)[0] - - # Where data is masked, count mask mismatches and skip those points - if any(np.ma.is_masked(arr) for arr in (data, data2)): - mask, mask2 = ( - np.ma.getmaskarray(array) - for array in (flatdata, flatdata2) - ) - flat_diff_inds = list(np.where(mask != mask2)[0]) - # Replace all masked points to exclude them from unmasked-point checks. - either_masked = mask | mask2 - flatdata[either_masked] = safe_fill_const - flatdata2[either_masked] = safe_fill_const - - # Where data has NANs, count mismatches and skip (as for masked) - if not is_string_data: - isnans, isnans2 = ( - np.isnan(arr) for arr in (flatdata, flatdata2) - ) - if np.any(isnans) or np.any(isnans2): - nandiffs = np.where(isnans != isnans2)[0] - if nandiffs: - flat_diff_inds += list(nandiffs) - anynans = isnans | isnans2 - flatdata[anynans] = safe_fill_const - flatdata2[anynans] = safe_fill_const - - flat_diff_inds += list(np.where(flatdata != flatdata2)[0]) - # Order the nonmatching indices : We report just the first few ... - flat_diff_inds = sorted(flat_diff_inds) - n_diffs = len(flat_diff_inds) - if n_diffs: - msg = f"{var_id_string} data contents differ, at {n_diffs} points: " - ellps = ", ..." if n_diffs > 2 else "" - diffinds = flat_diff_inds[:2] - diffinds = [ - np.unravel_index(ind, shape=data.shape) for ind in diffinds - ] - diffinds_str = ", ".join(repr(tuple(x)) for x in diffinds) - inds_str = f"[{diffinds_str}{ellps}]" - points_lhs_str = ", ".join(repr(data[ind]) for ind in diffinds) - points_rhs_str = ", ".join( - repr(data2[ind]) for ind in diffinds - ) - points_lhs_str = f"[{points_lhs_str}{ellps}]" - points_rhs_str = f"[{points_rhs_str}{ellps}]" - msg += ( - f"@INDICES{inds_str}" - f" : LHS={points_lhs_str}, RHS={points_rhs_str}" - ) - errs.append(msg) - # Finally, recurse over groups grpnames, grpnames2 = [list(grp.groups.keys()) for grp in (g1, g2)] _compare_name_lists( @@ -462,4 +487,5 @@ def getdata(var): groups_order=groups_order, data_equality=data_equality, check_unlimited=check_unlimited, + show_n_diffs=show_n_diffs, ) From c367d1b793e7940b73a194ed1ea75ae6139b53de Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 29 May 2024 00:57:28 +0100 Subject: [PATCH 07/16] Complete testing for variables and groups. --- tests/_compare_nc_datasets.py | 5 + ...test_compare_nc_datasets__mainfunctions.py | 346 ++++++++++++++++-- 2 files changed, 325 insertions(+), 26 deletions(-) diff --git a/tests/_compare_nc_datasets.py b/tests/_compare_nc_datasets.py index ef1f202..9307c9a 100644 --- a/tests/_compare_nc_datasets.py +++ b/tests/_compare_nc_datasets.py @@ -384,6 +384,11 @@ def _compare_nc_groups( Note that, rather than returning a list of error strings, it appends them to the passed arg `errs`. This just makes recursive calling easier. """ + ndiffs = int(show_n_diffs) + if ndiffs < 1: + msg = f"'show_n_diffs' must be >=1 : got {show_n_diffs!r}." + raise ValueError(msg) + if check_names: if g1.name != g2.name: errs.append( diff --git a/tests/unit/tests/test_compare_nc_datasets__mainfunctions.py b/tests/unit/tests/test_compare_nc_datasets__mainfunctions.py index 4353ea7..de2c52f 100644 --- a/tests/unit/tests/test_compare_nc_datasets__mainfunctions.py +++ b/tests/unit/tests/test_compare_nc_datasets__mainfunctions.py @@ -6,6 +6,7 @@ * ( ALSO: "additional" tests (q.v.) cover subsidiary routines and the main API usage modes. ) """ +import dask.array as da import numpy as np import pytest @@ -363,7 +364,7 @@ def _vars_data(self, group_context): self.location_string = location_prefix(group_context) self.vars = location.variables - def test_vars_names(self): + def test_var_names(self): self.vars.rename("v2", "q") errs = compare_nc_datasets(self.data1, self.data2) @@ -374,7 +375,7 @@ def test_vars_names(self): ] check(errs, expected) - def test_vars_order(self, order_checking): + def test_var_order(self, order_checking): all_vars = list(self.vars.values()) self.vars.clear() self.vars.addall(all_vars[::-1]) @@ -409,7 +410,7 @@ def test_vars_extra_or_missing(self, order_checking): def test_var_dims__reorder(self, order_checking): # N.B. here we check behaviour of the DIMENSIONS order control, but this does - # *not* apply to dimensions order in a variable,which is always significant. + # not apply to dimensions order in a variable,which is *always* significant. self.vars["v1"].dimensions = self.vars["v1"].dimensions[::-1] # N.B. the data shape doesn't now correspond, but that won't matter as, with # mismatched dimensions, the data won't be checked. @@ -442,33 +443,326 @@ def test_var_dims__extra_or_missing(self, order_checking): ] check(errs, expected) - def test_var_dtype(self): - pass - # PLAN: - # default data has int type (int64?) - # basic dtypes to check : u1/2/4/8, i1/2/4/8, f4/8, string +class TestCompareVariables__dtype: + # Note: testing variable comparison via the 'main' public API instead of + # via '_compare_variables'. This makes sense because it is only called + # in one way, from one place. + @staticmethod + def _vars_testdata(): + def data(): + return np.zeros(3) -class TestCompareVariables__data: - """ - TODO: tests for data equivalence checking. - - Check with various dtypes etc. - To consider ... - * masks - * NaNs - * real+lazy - * int/float/string datatypes - * 0/1/N-dimensional - """ + testdata = NcData( + name="dataset_1", + dimensions=[NcDimension("x", 3)], + variables=[ + NcVariable("v1", ("x"), data=data()), + ], + ) + return testdata + + @pytest.fixture(autouse=True) + def _vars_data(self): + self.data1, self.data2 = [self._vars_testdata() for _ in range(2)] + self.testvar = self.data2.variables["v1"] + + def test_numbers_v_strings(self): + # Set a different dtype + # NB this is different from the actual data array, but that doesn't + # matter, as it won't attempt to compare strings with numbers + self.testvar.dtype = np.dtype("S5") + + # Test the comparison + errs = compare_nc_datasets(self.data1, self.data2) + expected = [ + 'Dataset variable "v1" datatypes differ : ' + "dtype('float64') != dtype('S5')" + ] + check(errs, expected) + + @pytest.mark.parametrize("equaldata", [False, True]) + def test_ints_v_floats(self, equaldata): + # In this case, there is also a data comparison to check. + v1 = self.testvar + + new_dtype = np.dtype(np.int32) + v1.data = v1.data.astype(new_dtype) + if not equaldata: + v1.data.flat[0] += 1 + v1.dtype = new_dtype + + # Test the comparison + errs = compare_nc_datasets(self.data1, self.data2) + + expected = [ + 'Dataset variable "v1" datatypes differ : ' + "dtype('float64') != dtype('int32')" + ] + if not equaldata: + expected.append( + 'Dataset variable "v1" data contents differ, at 1 points: ' + "@INDICES[(0,)] : LHS=[0.0], RHS=[1]" + ) + check(errs, expected) + + @pytest.mark.parametrize("equaldata", [False, True]) + def test_wordlengths(self, equaldata): + # Test floats with wordlength difference -- assume ints are the same + # In this case, there is also a data comparison to check. + v1 = self.testvar + + new_dtype = np.dtype(np.float32) + v1.data = v1.data.astype(new_dtype) + if not equaldata: + v1.data.flat[0] += 1 + v1.dtype = new_dtype + + # Test the comparison + errs = compare_nc_datasets(self.data1, self.data2) + + expected = [ + 'Dataset variable "v1" datatypes differ : ' + "dtype('float64') != dtype('float32')" + ] + if not equaldata: + expected.append( + 'Dataset variable "v1" data contents differ, at 1 points: ' + "@INDICES[(0,)] : LHS=[0.0], RHS=[1.0]" + ) + check(errs, expected) + + +class TestCompareVariables__data__checkcontrols: + # Note: testing variable comparison via the 'main' public API instead of + # via '_compare_variables'. This makes sense because it is only called + # in one way, from one place. + @staticmethod + def _vars_testdata(): + def data(): + return np.arange(6.0).reshape((2, 3)) + + testdata = NcData( + name="dataset_1", + dimensions=[NcDimension("y", 2), NcDimension("x", 3)], + variables=[ + NcVariable("v1", ("x"), data=data()), + ], + ) + return testdata + + @pytest.fixture(autouse=True) + def _vars_data(self): + self.data1, self.data2 = [self._vars_testdata() for _ in range(2)] + self.reference_var = self.data1.variables["v1"] + self.testvar = self.data2.variables["v1"] + + def test_no_values_check(self): + self.testvar.data += 1 + errs = compare_nc_datasets( + self.data1, self.data2, check_var_data=False + ) + check(errs, []) + + def test_print_bad_nprint(self): + msg = "'show_n_diffs' must be >=1 : got 0." + with pytest.raises(ValueError, match=msg): + compare_nc_datasets( + self.data1, self.data2, show_n_first_different=0 + ) + + @pytest.mark.parametrize("ndiffs", [1, 2, 3]) + def test_ndiffs(self, ndiffs): + self.testvar.data.flat[1 : ndiffs + 1] += 1 + errs = compare_nc_datasets(self.data1, self.data2) + detail = { + 1: "[(0, 1)] : LHS=[1.0], RHS=[2.0]", + 2: "[(0, 1), (0, 2)] : LHS=[1.0, 2.0], RHS=[2.0, 3.0]", + 3: ( + "[(0, 1), (0, 2), ...] : " + "LHS=[1.0, 2.0, ...], RHS=[2.0, 3.0, ...]" + ), + }[ndiffs] + expected = [ + f'Dataset variable "v1" data contents differ, at {ndiffs} points: ' + f"@INDICES{detail}" + ] + check(errs, expected) + + @pytest.mark.parametrize("nprint", [1, 2, 3]) + def test_show_n_first_different(self, nprint): + self.testvar.data.flat[1:3] += 1 + errs = compare_nc_datasets( + self.data1, self.data2, show_n_first_different=nprint + ) + detail = { + 1: "[(0, 1), ...] : LHS=[1.0, ...], RHS=[2.0, ...]", + 2: "[(0, 1), (0, 2)] : LHS=[1.0, 2.0], RHS=[2.0, 3.0]", + 3: "[(0, 1), (0, 2)] : LHS=[1.0, 2.0], RHS=[2.0, 3.0]", + }[nprint] + expected = [ + f'Dataset variable "v1" data contents differ, at 2 points: ' + f"@INDICES{detail}" + ] + check(errs, expected) + + +class TestCompareVariables__data__diffreports: + # Note: testing variable comparison via the 'main' public API instead of + # via '_compare_variables'. This makes sense because it is only called + # in one way, from one place. + @staticmethod + def _vars_testdata(): + def data(): + return np.arange(4.0) + + testdata = NcData( + name="dataset_1", + dimensions=[NcDimension("x", 4)], + variables=[ + NcVariable("v1", ("x"), data=data()), + ], + ) + return testdata + + @pytest.fixture(autouse=True) + def _vars_data(self): + self.data1, self.data2 = [self._vars_testdata() for _ in range(2)] + self.reference_var = self.data1.variables["v1"] + self.testvar = self.data2.variables["v1"] + + @pytest.mark.parametrize("datavalues", ["same", "different"]) + @pytest.mark.parametrize("masks", ["onemasked", "bothmasked"]) + def test_masked(self, datavalues, masks): + different = datavalues == "different" + bothmasked = masks == "bothmasked" + testvar = self.testvar + testvar.data = np.ma.masked_array(testvar.data) + if different: + testvar.data[1:2] += 1 + testvar.data[1:2] = np.ma.masked + if bothmasked: + self.reference_var.data = np.ma.masked_array( + self.reference_var.data + ) + self.reference_var.data[1:2] = np.ma.masked + errs = compare_nc_datasets(self.data1, self.data2) + if bothmasked: + expected = [] + else: + expected = [ + 'Dataset variable "v1" data contents differ, at 1 points: ' + "@INDICES[(1,)] : LHS=[1.0], RHS=[masked]" + ] + check(errs, expected) + + @pytest.mark.parametrize("nans", ["onenans", "bothnans"]) + def test_nans(self, nans): + bothnans = nans == "bothnans" + self.testvar.data[1:2] = np.nan + if bothnans: + self.reference_var.data[1:2] = np.nan + errs = compare_nc_datasets(self.data1, self.data2) + if bothnans: + expected = [] + else: + expected = [ + 'Dataset variable "v1" data contents differ, at 1 points: ' + "@INDICES[(1,)] : LHS=[1.0], RHS=[nan]" + ] + check(errs, expected) + + def test_scalar(self): + # Check how a difference of scalar arrays is reported + for value, var in enumerate([self.reference_var, self.testvar]): + var.dimensions = () + var.data = np.array(value, dtype=var.dtype) + errs = compare_nc_datasets(self.data1, self.data2) + expected = [ + 'Dataset variable "v1" data contents differ, at 1 points: ' + "@INDICES[(0,)] : LHS=[0.0], RHS=[1.0]" + ] + check(errs, expected) + + @pytest.mark.parametrize( + "argtypes", ["real_real", "real_lazy", "lazy_lazy"] + ) + def test_real_and_lazy(self, argtypes): + type1, type2 = argtypes[:4], argtypes[-4:] + # fix the testvar to create a difference + self.testvar.data[1:2] += 1 + # setup vars with lazy/real data arrays + for arraytype, var in zip( + [type1, type2], [self.reference_var, self.testvar] + ): + if arraytype == "lazy": + var.data = da.from_array(var.data, chunks=-1) + # compare + check results + errs = compare_nc_datasets(self.data1, self.data2) + # N.B. the result should be the same in all cases + expected = [ + 'Dataset variable "v1" data contents differ, at 1 points: ' + "@INDICES[(1,)] : LHS=[1.0], RHS=[2.0]" + ] + check(errs, expected) class TestCompareGroups: - def test_names(self): - pass + @staticmethod + def _groups_testdata(): + testdata = NcData( + name="dataset_1", + groups=[ + NcData(name=name, attributes=[NcAttribute("attr_1", 1)]) + for name in ("g1", "g2") + ], + ) + return testdata - def test_order(self): - pass + @pytest.fixture(autouse=True) + def _groups_data(self): + self.data1, self.data2 = [self._groups_testdata() for _ in range(2)] + self.groups = self.data2.groups - def test_extra_or_missing(self): - pass + def test_group_names(self): + self.groups.rename("g2", "q") + + errs = compare_nc_datasets(self.data1, self.data2) + + expected = [ + "Dataset subgroup lists do not match: ['g1', 'g2'] != ['g1', 'q']" + ] + check(errs, expected) + + def test_group_order(self, order_checking): + all_groups = list(self.groups.values()) + self.groups.clear() + self.groups.addall(all_groups[::-1]) + + do_ordercheck = decode_ordercheck(order_checking) + errs = compare_nc_datasets( + self.data1, self.data2, check_groups_order=do_ordercheck + ) + + if do_ordercheck: + expected = [ + "Dataset subgroup lists do not match: " + "['g1', 'g2'] != ['g2', 'g1']" + ] + else: + expected = [] + check(errs, expected) + + def test_groups_extra_or_missing(self, order_checking): + del self.groups["g1"] + + do_ordercheck = decode_ordercheck(order_checking) + errs = compare_nc_datasets( + self.data1, self.data2, check_groups_order=do_ordercheck + ) + + # NB since the sets are different, the ordering control has no effect + expected = [ + "Dataset subgroup lists do not match: ['g1', 'g2'] != ['g2']" + ] + check(errs, expected) From e126ba7c22407d0bc2c918d0283caa1ec92d39b0 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 29 May 2024 01:08:15 +0100 Subject: [PATCH 08/16] Fix test. --- tests/unit/netcdf/test_from_nc4.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/netcdf/test_from_nc4.py b/tests/unit/netcdf/test_from_nc4.py index 61c3c19..58fac7c 100644 --- a/tests/unit/netcdf/test_from_nc4.py +++ b/tests/unit/netcdf/test_from_nc4.py @@ -84,7 +84,7 @@ def test_target_types(sourcetype, tmp_path): variables=[ NcVariable( name="x", - dimensions=("xdim"), + dimensions=("xdim",), dtype=np.float32, data=[1.23, 2, 9], ) From 0226576e1ffdc715645f5c1562bd7267d8b1f581 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 29 May 2024 01:12:52 +0100 Subject: [PATCH 09/16] Add pytest-mock to testing dependencies. --- .github/workflows/ci-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml index afdaa2b..4455bf1 100644 --- a/.github/workflows/ci-tests.yml +++ b/.github/workflows/ci-tests.yml @@ -35,7 +35,7 @@ jobs: - name: "Install dependencies" run: | - conda install --yes pytest iris xarray filelock requests + conda install --yes pytest pytest-mock iris xarray filelock requests - name: "Install *latest* Iris" run: | From 51d2e4192d75c8256e5039aa6bdf208006504b41 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 29 May 2024 13:58:35 +0100 Subject: [PATCH 10/16] Add additional testcases. --- .../test_compare_nc_datasets__additional.py | 18 ++++++++++ ...test_compare_nc_datasets__mainfunctions.py | 34 ++++++++++++++++++- 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/tests/unit/tests/test_compare_nc_datasets__additional.py b/tests/unit/tests/test_compare_nc_datasets__additional.py index c01f929..fab40f0 100644 --- a/tests/unit/tests/test_compare_nc_datasets__additional.py +++ b/tests/unit/tests/test_compare_nc_datasets__additional.py @@ -166,6 +166,14 @@ def test_compare_attributes_values__allok(self): _compare_attributes(errs, obj1, obj2, "") assert errs == [] + def test_compare_attributes_values__scalar_arrayof1(self): + # Objects with matching attributes + obj1 = self.Nc4ObjectWithAttrsMimic(a=1, b=2) + obj2 = self.Nc4ObjectWithAttrsMimic(a=1, b=[2]) + errs = [] + _compare_attributes(errs, obj1, obj2, "") + assert errs == [] + def test_compare_attributes_values__data_mismatch(self): # Attributes of different value (but matching dtype) obj1 = self.Nc4ObjectWithAttrsMimic(a=1, b=2, c=3) @@ -363,6 +371,16 @@ def test_compare_attributes_values__string_array_mismatch(self): "['a', 'b'] != ['a', 'c']" ] + def test_compare_attributes__ncdata_string_scalar_array(self): + # Attributes of string type (since netCDF4 returns char attributes as string) + from ncdata import NcAttribute, NcData + + obj1 = NcData(attributes=[NcAttribute("x", ["string"])]) + obj2 = NcData(attributes=[NcAttribute("x", "string")]) + errs = [] + _compare_attributes(errs, obj1, obj2, "") + assert errs == [] + @pytest.fixture(autouse=True, scope="module") def temp_ncfiles_dir(tmp_path_factory): diff --git a/tests/unit/tests/test_compare_nc_datasets__mainfunctions.py b/tests/unit/tests/test_compare_nc_datasets__mainfunctions.py index de2c52f..2a0b162 100644 --- a/tests/unit/tests/test_compare_nc_datasets__mainfunctions.py +++ b/tests/unit/tests/test_compare_nc_datasets__mainfunctions.py @@ -465,7 +465,9 @@ def data(): @pytest.fixture(autouse=True) def _vars_data(self): self.data1, self.data2 = [self._vars_testdata() for _ in range(2)] - self.testvar = self.data2.variables["v1"] + self.reference_var, self.testvar = ( + ds.variables["v1"] for ds in (self.data1, self.data2) + ) def test_numbers_v_strings(self): # Set a different dtype @@ -532,6 +534,36 @@ def test_wordlengths(self, equaldata): ) check(errs, expected) + @pytest.mark.parametrize("equaldata", [False, True]) + def test_signed_unsigned(self, equaldata): + # Test floats with wordlength difference -- assume ints are the same + # In this case, there is also a data comparison to check. + new_dtype = np.dtype(np.int64) + v0 = self.reference_var + v0.data = v0.data.astype(new_dtype) + v0.dtype = new_dtype + + new_dtype = np.dtype(np.uint64) + v1 = self.testvar + v1.data = v1.data.astype(new_dtype) + if not equaldata: + v1.data.flat[0] += 1 + v1.dtype = new_dtype + + # Test the comparison + errs = compare_nc_datasets(self.data1, self.data2) + + expected = [ + 'Dataset variable "v1" datatypes differ : ' + "dtype('int64') != dtype('uint64')" + ] + if not equaldata: + expected.append( + 'Dataset variable "v1" data contents differ, at 1 points: ' + "@INDICES[(0,)] : LHS=[0], RHS=[1]" + ) + check(errs, expected) + class TestCompareVariables__data__checkcontrols: # Note: testing variable comparison via the 'main' public API instead of From 8446685e8ad6024b2907a087e9458d297e437163 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 29 May 2024 14:53:15 +0100 Subject: [PATCH 11/16] Rename + reorganise calling interfaces: variable-compare (still) not public. --- tests/_compare_nc_datasets.py | 62 +++++----- .../ex_ncdata_netcdf_conversion.py | 4 +- .../test_iris_load_and_save_equivalence.py | 6 +- .../test_iris_xarray_roundtrips.py | 8 +- tests/integration/test_netcdf_roundtrips.py | 4 +- .../test_xarray_load_and_save_equivalence.py | 4 +- tests/unit/netcdf/test_from_nc4.py | 6 +- tests/unit/netcdf/test_to_nc4.py | 4 +- .../test_compare_nc_datasets__additional.py | 112 +++++++----------- ...test_compare_nc_datasets__mainfunctions.py | 70 +++++------ 10 files changed, 121 insertions(+), 159 deletions(-) diff --git a/tests/_compare_nc_datasets.py b/tests/_compare_nc_datasets.py index 9307c9a..b8560b9 100644 --- a/tests/_compare_nc_datasets.py +++ b/tests/_compare_nc_datasets.py @@ -18,7 +18,7 @@ from ncdata import NcData, NcVariable -def compare_nc_datasets( +def dataset_differences( dataset_or_path_1: Union[Path, AnyStr, nc.Dataset, NcData], dataset_or_path_2: Union[Path, AnyStr, nc.Dataset, NcData], check_dims_order: bool = True, @@ -32,7 +32,7 @@ def compare_nc_datasets( check_unlimited: bool = True, ) -> List[str]: r""" - Compare netcdf data. + Compare netcdf data objects. Accepts paths, pathstrings, open :class:`netCDF4.Dataset`\\s or :class:`NcData` objects. @@ -60,7 +60,7 @@ def compare_nc_datasets( Returns ------- errs : list of str - a list of error strings. + A list of "error" strings, describing differences between the inputs. If empty, no differences were found. """ @@ -78,9 +78,7 @@ def compare_nc_datasets( else: ds2 = dataset_or_path_2 - errs = [] - _compare_nc_groups( - errs, + errs = _group_differences( ds1, ds2, group_id_string="Dataset", @@ -103,9 +101,10 @@ def compare_nc_datasets( return errs -def _compare_name_lists( - errslist, l1, l2, elemname, order_strict=True, suppress_warnings=False +def _namelist_differences( + l1, l2, elemname, order_strict=True, suppress_warnings=False ): + errs = [] msg = f"{elemname} do not match: {list(l1)} != {list(l2)}" ok = l1 == l2 ok_except_order = ok @@ -114,9 +113,10 @@ def _compare_name_lists( if not ok: if not ok_except_order or order_strict: - errslist.append(msg) + errs.append(msg) elif ok_except_order and not suppress_warnings: warn("(Ignoring: " + msg + " )", category=UserWarning) + return errs def _isncdata(obj): @@ -148,15 +148,14 @@ def _attribute_arrays_eq(a1, a2): return result -def _compare_attributes( - errs, +def _attribute_differences( obj1, obj2, elemname, attrs_order=True, suppress_warnings=False, force_first_attrnames=None, -): +) -> List[str]: """ Compare attribute name lists. @@ -178,8 +177,7 @@ def fix_orders(attrlist): attrnames = fix_orders(attrnames) attrnames2 = fix_orders(attrnames2) - _compare_name_lists( - errs, + errs = _namelist_differences( attrnames, attrnames2, f"{elemname} attribute lists", @@ -232,10 +230,10 @@ def fix_orders(attrlist): f"{attr!r} != {attr2!r}" ) errs.append(msg) + return errs -def _compare_variables( - errs: List[str], +def _variable_differences( v1: NcVariable, v2: NcVariable, group_id_string: str, @@ -243,7 +241,8 @@ def _compare_variables( data_equality: bool = True, suppress_warnings: bool = False, show_n_diffs: int = 2, -): +) -> List[str]: + errs = [] varname = v1.name assert v2.name == varname @@ -256,8 +255,7 @@ def _compare_variables( errs.append(msg) # attributes - _compare_attributes( - errs, + errs += _attribute_differences( v1, v2, var_id_string, @@ -361,10 +359,10 @@ def getdata(var): f" : LHS={points_lhs_str}, RHS={points_rhs_str}" ) errs.append(msg) + return errs -def _compare_nc_groups( - errs: List[str], +def _group_differences( g1: Union[netCDF4.Dataset, netCDF4.Group], g2: Union[netCDF4.Dataset, netCDF4.Group], group_id_string: str, @@ -377,13 +375,14 @@ def _compare_nc_groups( check_names: bool = False, check_unlimited: bool = True, show_n_diffs: int = 2, -): +) -> List[str]: """ Inner routine to compare either whole datasets or subgroups. Note that, rather than returning a list of error strings, it appends them to the passed arg `errs`. This just makes recursive calling easier. """ + errs = [] ndiffs = int(show_n_diffs) if ndiffs < 1: msg = f"'show_n_diffs' must be >=1 : got {show_n_diffs!r}." @@ -396,8 +395,7 @@ def _compare_nc_groups( ) # Compare lists of dimension names dimnames, dimnames2 = [list(grp.dimensions.keys()) for grp in (g1, g2)] - _compare_name_lists( - errs, + errs += _namelist_differences( dimnames, dimnames2, f"{group_id_string} dimension lists", @@ -431,8 +429,7 @@ def _compare_nc_groups( errs.append(msg) # Compare file attributes - _compare_attributes( - errs, + errs += _attribute_differences( g1, g2, group_id_string, @@ -442,8 +439,7 @@ def _compare_nc_groups( # Compare lists of variables varnames, varnames2 = [list(grp.variables.keys()) for grp in (g1, g2)] - _compare_name_lists( - errs, + errs += _namelist_differences( varnames, varnames2, f"{group_id_string} variable lists", @@ -456,8 +452,7 @@ def _compare_nc_groups( if varname not in varnames2: continue v1, v2 = [grp.variables[varname] for grp in (g1, g2)] - _compare_variables( - errs, + errs += _variable_differences( v1, v2, group_id_string=group_id_string, @@ -469,8 +464,7 @@ def _compare_nc_groups( # Finally, recurse over groups grpnames, grpnames2 = [list(grp.groups.keys()) for grp in (g1, g2)] - _compare_name_lists( - errs, + errs += _namelist_differences( grpnames, grpnames2, f"{group_id_string} subgroup lists", @@ -481,8 +475,7 @@ def _compare_nc_groups( if grpname not in grpnames2: continue grp1, grp2 = [grp.groups[grpname] for grp in (g1, g2)] - _compare_nc_groups( - errs, + errs += _group_differences( grp1, grp2, group_id_string=f"{group_id_string}/{grpname}", @@ -494,3 +487,4 @@ def _compare_nc_groups( check_unlimited=check_unlimited, show_n_diffs=show_n_diffs, ) + return errs diff --git a/tests/integration/example_scripts/ex_ncdata_netcdf_conversion.py b/tests/integration/example_scripts/ex_ncdata_netcdf_conversion.py index f1b2355..dd32f7f 100644 --- a/tests/integration/example_scripts/ex_ncdata_netcdf_conversion.py +++ b/tests/integration/example_scripts/ex_ncdata_netcdf_conversion.py @@ -13,7 +13,7 @@ from ncdata import NcAttribute, NcData, NcDimension, NcVariable from ncdata.netcdf4 import from_nc4, to_nc4 from tests import testdata_dir -from tests._compare_nc_datasets import compare_nc_datasets +from tests._compare_nc_datasets import dataset_differences def example_nc4_load_save_roundtrip(): # noqa: D103 @@ -28,7 +28,7 @@ def example_nc4_load_save_roundtrip(): # noqa: D103 filepath2 = tempdir_path / "temp_nc_output.nc" to_nc4(ncdata, filepath2) - result = compare_nc_datasets(filepath, filepath2) + result = dataset_differences(filepath, filepath2) equals_result = result == [] print("\nFiles compare? :", equals_result) assert equals_result diff --git a/tests/integration/test_iris_load_and_save_equivalence.py b/tests/integration/test_iris_load_and_save_equivalence.py index 042d8c9..40bdd3b 100644 --- a/tests/integration/test_iris_load_and_save_equivalence.py +++ b/tests/integration/test_iris_load_and_save_equivalence.py @@ -11,7 +11,7 @@ import pytest from ncdata.netcdf4 import from_nc4, to_nc4 -from tests._compare_nc_datasets import compare_nc_datasets +from tests._compare_nc_datasets import dataset_differences from tests.data_testcase_schemas import session_testdir, standard_testcase from tests.integration.equivalence_testing_utils import ( adjust_chunks, @@ -93,7 +93,7 @@ def test_load_direct_vs_viancdata( if not result: # FOR NOW: compare with experimental ncdata comparison. # I know this is a bit circular, but it is useful for debugging, for now ... - result = compare_nc_datasets( + result = dataset_differences( from_iris(iris_cubes), from_iris(iris_ncdata_cubes) ) assert result == [] @@ -144,5 +144,5 @@ def test_save_direct_vs_viancdata(standard_testcase, tmp_path): print(txt) # Check equivalence - results = compare_nc_datasets(temp_iris_savepath, temp_ncdata_savepath) + results = dataset_differences(temp_iris_savepath, temp_ncdata_savepath) assert results == [] diff --git a/tests/integration/test_iris_xarray_roundtrips.py b/tests/integration/test_iris_xarray_roundtrips.py index 160860a..bbc4905 100644 --- a/tests/integration/test_iris_xarray_roundtrips.py +++ b/tests/integration/test_iris_xarray_roundtrips.py @@ -20,7 +20,7 @@ from ncdata.netcdf4 import from_nc4 from ncdata.threadlock_sharing import lockshare_context from ncdata.xarray import from_xarray -from tests._compare_nc_datasets import compare_nc_datasets +from tests._compare_nc_datasets import dataset_differences from tests.data_testcase_schemas import ( BAD_LOADSAVE_TESTCASES, session_testdir, @@ -172,7 +172,7 @@ def test_roundtrip_ixi(standard_testcase, use_irislock, adjust_chunks): if not result: # FOR NOW: compare with experimental ncdata comparison. # I know this is a bit circular, but it is useful for debugging, for now ... - result = compare_nc_datasets( + result = dataset_differences( from_iris(iris_cubes), from_iris(iris_xr_cubes) ) assert result == [] @@ -299,14 +299,14 @@ def test_roundtrip_xix( "calendar", "standard" ) - result = compare_nc_datasets( + result = dataset_differences( ncds_xr, ncds_xr_iris ) # , check_var_data=False) assert result == [] # TODO: check equivalence, in Xarray terms # xr_result = xrds_iris.equals(xrds) - # ncd_result = compare_nc_datasets( + # ncd_result = dataset_differences( # ncds_xr, ncds_xr_iris # ) # , check_var_data=False) # print("\nDATASET COMPARE RESULTS:\n" + "\n".join(ncd_result)) diff --git a/tests/integration/test_netcdf_roundtrips.py b/tests/integration/test_netcdf_roundtrips.py index 79e258d..a2643e5 100644 --- a/tests/integration/test_netcdf_roundtrips.py +++ b/tests/integration/test_netcdf_roundtrips.py @@ -4,7 +4,7 @@ from subprocess import check_output from ncdata.netcdf4 import from_nc4, to_nc4 -from tests._compare_nc_datasets import compare_nc_datasets +from tests._compare_nc_datasets import dataset_differences from tests.data_testcase_schemas import session_testdir, standard_testcase # Avoid complaints that the imported fixtures are "unused" @@ -38,5 +38,5 @@ def test_basic(standard_testcase, tmp_path): print(txt) # Check that the re-saved file matches the original - results = compare_nc_datasets(source_filepath, intermediate_filepath) + results = dataset_differences(source_filepath, intermediate_filepath) assert results == [] diff --git a/tests/integration/test_xarray_load_and_save_equivalence.py b/tests/integration/test_xarray_load_and_save_equivalence.py index 92153fa..8b61830 100644 --- a/tests/integration/test_xarray_load_and_save_equivalence.py +++ b/tests/integration/test_xarray_load_and_save_equivalence.py @@ -11,7 +11,7 @@ from ncdata.netcdf4 import from_nc4, to_nc4 from ncdata.threadlock_sharing import lockshare_context from ncdata.xarray import from_xarray, to_xarray -from tests._compare_nc_datasets import compare_nc_datasets +from tests._compare_nc_datasets import dataset_differences from tests.data_testcase_schemas import ( BAD_LOADSAVE_TESTCASES, session_testdir, @@ -74,7 +74,7 @@ def test_save_direct_vs_viancdata(standard_testcase, tmp_path): to_nc4(ncds_fromxr, temp_ncdata_savepath) # Check equivalence - results = compare_nc_datasets( + results = dataset_differences( temp_direct_savepath, temp_ncdata_savepath, check_dims_order=False, diff --git a/tests/unit/netcdf/test_from_nc4.py b/tests/unit/netcdf/test_from_nc4.py index 58fac7c..66265b6 100644 --- a/tests/unit/netcdf/test_from_nc4.py +++ b/tests/unit/netcdf/test_from_nc4.py @@ -16,7 +16,7 @@ from ncdata import NcData, NcDimension, NcVariable from ncdata.netcdf4 import from_nc4 -from tests._compare_nc_datasets import compare_nc_datasets +from tests._compare_nc_datasets import dataset_differences from tests.data_testcase_schemas import make_testcase_dataset @@ -38,7 +38,7 @@ def test_target_types(sourcetype, tmp_path): """Check the various ways of specifying the input data.""" # This testcase is a rather complicated, but we need to test with groups, and we # may as well also test for variables which map dimensions from multiple levels. - # In effect, this is also exercising tricky bits of 'compare_nc_datasets' !! + # In effect, this is also exercising tricky bits of 'dataset_differences' !! test_spec = { "dims": [dict(name="xdim", size=3)], "vars": [ @@ -107,5 +107,5 @@ def test_target_types(sourcetype, tmp_path): if sourcetype == "group": ncdata_expected = ncdata_expected.groups["inner_group"] - diffs = compare_nc_datasets(ncdata, ncdata_expected) + diffs = dataset_differences(ncdata, ncdata_expected) assert diffs == [] diff --git a/tests/unit/netcdf/test_to_nc4.py b/tests/unit/netcdf/test_to_nc4.py index e72fd52..0c34ca8 100644 --- a/tests/unit/netcdf/test_to_nc4.py +++ b/tests/unit/netcdf/test_to_nc4.py @@ -17,7 +17,7 @@ from ncdata import NcData from ncdata.netcdf4 import from_nc4, to_nc4 -from tests._compare_nc_datasets import compare_nc_datasets +from tests._compare_nc_datasets import dataset_differences from tests.data_testcase_schemas import make_testcase_dataset @@ -61,7 +61,7 @@ def test_target_types(targettype, tmp_path): target.close() assert target_path.exists() - assert compare_nc_datasets(target_path, original_path) == [] + assert dataset_differences(target_path, original_path) == [] def fetch_nc_var(nc_file: nc.Dataset, var_path: str or List[str]): diff --git a/tests/unit/tests/test_compare_nc_datasets__additional.py b/tests/unit/tests/test_compare_nc_datasets__additional.py index fab40f0..485cfe4 100644 --- a/tests/unit/tests/test_compare_nc_datasets__additional.py +++ b/tests/unit/tests/test_compare_nc_datasets__additional.py @@ -14,9 +14,9 @@ import pytest from tests._compare_nc_datasets import ( - _compare_attributes, - _compare_name_lists, - compare_nc_datasets, + _attribute_differences, + _namelist_differences, + dataset_differences, ) from tests.test_samplecode_cdlgen_comparablecdl import ncgen_from_cdl @@ -35,38 +35,32 @@ """ -class Test__compare_name_lists: +class Test_namelist_differences: # Test subsidiary routine for checking a list of names def test_empty(self): - errs = [] - _compare_name_lists(errs, [], [], "named-elements") + errs = _namelist_differences([], [], "named-elements") assert errs == [] def test_same(self): tst = ["a", "b"] - errs = [] - _compare_name_lists(errs, tst, tst, "named-elements") + errs = _namelist_differences(tst, tst, "named-elements") assert errs == [] def test_diff(self): - errs = [] - _compare_name_lists(errs, ["a"], [], "named-elements") + errs = _namelist_differences(["a"], [], "named-elements") assert errs == ["named-elements do not match: ['a'] != []"] def test_difforder(self): - errs = [] - _compare_name_lists(errs, ["a", "b"], ["b", "a"], "named-elements") + errs = _namelist_differences(["a", "b"], ["b", "a"], "named-elements") assert errs == [ "named-elements do not match: ['a', 'b'] != ['b', 'a']" ] def test_difforder_tolerant_warns(self): - errs = [] with pytest.warns( UserWarning, match="Ignoring: named-elements do not match" ): - _compare_name_lists( - errs, + errs = _namelist_differences( ["a", "b"], ["b", "a"], "named-elements", @@ -75,11 +69,9 @@ def test_difforder_tolerant_warns(self): assert errs == [] def test_difforder_tolerant_nowarn(self): - errs = [] with warnings.catch_warnings(): warnings.simplefilter("error") - _compare_name_lists( - errs, + errs = _namelist_differences( ["a", "b"], ["b", "a"], "named-elements", @@ -89,9 +81,9 @@ def test_difforder_tolerant_nowarn(self): assert errs == [] -class Test__compare_attributes: +class Test_attribute_differences: def test_compare_attributes_namelists(self, mocker): - # Check that it calls the generic _compare_name_lists routine, passing all the + # Check that it calls the generic _namelist_differences routine, passing all the # correct controls # NB make the compared object mimic nc Variables, not NcData attrnames_1 = ["a", "b"] @@ -102,14 +94,12 @@ def test_compare_attributes_namelists(self, mocker): obj2 = mocker.Mock( spec=nc.Variable, ncattrs=mocker.Mock(return_value=attrnames_2) ) - errs = mocker.sentinel.errors_list elemname = "" order = mocker.sentinel.attrs_order suppress = mocker.sentinel.suppress_warnings - tgt = "tests._compare_nc_datasets._compare_name_lists" + tgt = "tests._compare_nc_datasets._namelist_differences" patch_tgt = mocker.patch(tgt) - _compare_attributes( - errs=errs, + _attribute_differences( obj1=obj1, obj2=obj2, elemname=elemname, @@ -118,7 +108,6 @@ def test_compare_attributes_namelists(self, mocker): ) (one_call,) = patch_tgt.call_args_list assert one_call == mocker.call( - errs, attrnames_1, attrnames_2, " attribute lists", @@ -154,32 +143,28 @@ def test_compare_attributes_empty(self): # Test two objects with no attributes obj1 = self.Nc4ObjectWithAttrsMimic() obj2 = self.Nc4ObjectWithAttrsMimic() - errs = [] - _compare_attributes(errs, obj1, obj2, "") + errs = _attribute_differences(obj1, obj2, "") assert errs == [] def test_compare_attributes_values__allok(self): # Objects with matching attributes obj1 = self.Nc4ObjectWithAttrsMimic(a=1, b=2) obj2 = self.Nc4ObjectWithAttrsMimic(a=1, b=2) - errs = [] - _compare_attributes(errs, obj1, obj2, "") + errs = _attribute_differences(obj1, obj2, "") assert errs == [] def test_compare_attributes_values__scalar_arrayof1(self): # Objects with matching attributes obj1 = self.Nc4ObjectWithAttrsMimic(a=1, b=2) obj2 = self.Nc4ObjectWithAttrsMimic(a=1, b=[2]) - errs = [] - _compare_attributes(errs, obj1, obj2, "") + errs = _attribute_differences(obj1, obj2, "") assert errs == [] def test_compare_attributes_values__data_mismatch(self): # Attributes of different value (but matching dtype) obj1 = self.Nc4ObjectWithAttrsMimic(a=1, b=2, c=3) obj2 = self.Nc4ObjectWithAttrsMimic(a=1, b=-77, c=3) - errs = [] - _compare_attributes(errs, obj1, obj2, "") + errs = _attribute_differences(obj1, obj2, "") assert errs == [ ' "b" attribute values differ : 2 != -77' ] @@ -188,8 +173,7 @@ def test_compare_attributes_values__dtype_mismatch__length(self): # Attributes of different dtypes, even though values == obj1 = self.Nc4ObjectWithAttrsMimic(a=np.float32(0)) obj2 = self.Nc4ObjectWithAttrsMimic(a=np.float64(0)) - errs = [] - _compare_attributes(errs, obj1, obj2, "") + errs = _attribute_differences(obj1, obj2, "") assert errs == [ ( ' "a" attribute datatypes differ : ' @@ -201,8 +185,7 @@ def test_compare_attributes_values__dtype_mismatch__signed_unsigned(self): # Attributes of different dtypes, even though values == obj1 = self.Nc4ObjectWithAttrsMimic(a=np.uint32(0)) obj2 = self.Nc4ObjectWithAttrsMimic(a=np.int32(0)) - errs = [] - _compare_attributes(errs, obj1, obj2, "") + errs = _attribute_differences(obj1, obj2, "") assert errs == [ ( ' "a" attribute datatypes differ : ' @@ -214,8 +197,7 @@ def test_compare_attributes_values__dtype_mismatch__float_int(self): # Attributes of different dtypes, even though values == obj1 = self.Nc4ObjectWithAttrsMimic(a=np.float32(0)) obj2 = self.Nc4ObjectWithAttrsMimic(a=np.int32(0)) - errs = [] - _compare_attributes(errs, obj1, obj2, "") + errs = _attribute_differences(obj1, obj2, "") assert errs == [ ( ' "a" attribute datatypes differ : ' @@ -227,8 +209,7 @@ def test_compare_attributes_values__dtype_mismatch__numeric_string(self): # Attributes of different dtypes, even though values == obj1 = self.Nc4ObjectWithAttrsMimic(a=np.float32(0)) obj2 = self.Nc4ObjectWithAttrsMimic(a="this") - errs = [] - _compare_attributes(errs, obj1, obj2, "") + errs = _attribute_differences(obj1, obj2, "") assert errs == [ ( ' "a" attribute datatypes differ : ' @@ -240,8 +221,7 @@ def test_compare_attributes_values__dtype_and_data_mismatch(self): # Attributes of different dtypes, but values != obj1 = self.Nc4ObjectWithAttrsMimic(a=np.float32(0)) obj2 = self.Nc4ObjectWithAttrsMimic(a=np.float64(1)) - errs = [] - _compare_attributes(errs, obj1, obj2, "") + errs = _attribute_differences(obj1, obj2, "") assert errs == [ ' "a" attribute datatypes differ : ' "dtype('float32') != dtype('float64')" @@ -252,8 +232,7 @@ def test_compare_attributes_values__data_arrays_match(self): array = np.arange(3.0) obj1 = self.Nc4ObjectWithAttrsMimic(a=array) obj2 = self.Nc4ObjectWithAttrsMimic(a=array) - errs = [] - _compare_attributes(errs, obj1, obj2, "") + errs = _attribute_differences(obj1, obj2, "") assert errs == [] def test_compare_attributes_values__data_arrays_dtype_mismatch(self): @@ -261,8 +240,7 @@ def test_compare_attributes_values__data_arrays_dtype_mismatch(self): array = np.arange(3, dtype="f4") obj1 = self.Nc4ObjectWithAttrsMimic(a=array) obj2 = self.Nc4ObjectWithAttrsMimic(a=array.astype("f8")) - errs = [] - _compare_attributes(errs, obj1, obj2, "") + errs = _attribute_differences(obj1, obj2, "") assert errs == [ ( ' "a" attribute datatypes differ : ' @@ -275,8 +253,7 @@ def test_compare_attributes_values__data_arrays_shape_mismatch(self): array = np.arange(3) obj1 = self.Nc4ObjectWithAttrsMimic(a=array) obj2 = self.Nc4ObjectWithAttrsMimic(a=array[:-1]) - errs = [] - _compare_attributes(errs, obj1, obj2, "") + errs = _attribute_differences(obj1, obj2, "") assert errs == [ ( ' "a" attribute values differ : ' @@ -290,8 +267,7 @@ def test_compare_attributes_values__data_arrays_value_mismatch(self): array2 = np.array([1, 2, 777]) obj1 = self.Nc4ObjectWithAttrsMimic(a=array1) obj2 = self.Nc4ObjectWithAttrsMimic(a=array2) - errs = [] - _compare_attributes(errs, obj1, obj2, "") + errs = _attribute_differences(obj1, obj2, "") assert errs == [ ( ' "a" attribute values differ : ' @@ -304,8 +280,7 @@ def test_compare_attributes_values__data_arrays_nans_match(self): array = np.array([1, np.nan, 3]) obj1 = self.Nc4ObjectWithAttrsMimic(a=array) obj2 = self.Nc4ObjectWithAttrsMimic(a=array) - errs = [] - _compare_attributes(errs, obj1, obj2, "") + errs = _attribute_differences(obj1, obj2, "") assert errs == [] def test_compare_attributes_values__data_arrays_nans_mismatch(self): @@ -314,8 +289,7 @@ def test_compare_attributes_values__data_arrays_nans_mismatch(self): array2 = np.array([1.0, np.nan, 3.0]) obj1 = self.Nc4ObjectWithAttrsMimic(a=array1) obj2 = self.Nc4ObjectWithAttrsMimic(a=array2) - errs = [] - _compare_attributes(errs, obj1, obj2, "") + errs = _attribute_differences(obj1, obj2, "") assert errs == [ ( ' "a" attribute values differ : ' @@ -327,8 +301,7 @@ def test_compare_attributes_values__string_nonstring(self): # Attributes of string and non-string types, since we handle that differently obj1 = self.Nc4ObjectWithAttrsMimic(a=1) obj2 = self.Nc4ObjectWithAttrsMimic(a="1") - errs = [] - _compare_attributes(errs, obj1, obj2, "") + errs = _attribute_differences(obj1, obj2, "") assert errs == [ ' "a" attribute datatypes differ : ' "dtype('int64') != " @@ -338,16 +311,14 @@ def test_compare_attributes_values__string_match(self): # Attributes of string type (since netCDF4 returns char attributes as string) obj1 = self.Nc4ObjectWithAttrsMimic(S="this") obj2 = self.Nc4ObjectWithAttrsMimic(S="this") - errs = [] - _compare_attributes(errs, obj1, obj2, "") + errs = _attribute_differences(obj1, obj2, "") assert errs == [] def test_compare_attributes_values__string_mismatch(self): # Attributes of string type (since netCDF4 returns char attributes as string) obj1 = self.Nc4ObjectWithAttrsMimic(S="this") obj2 = self.Nc4ObjectWithAttrsMimic(S="that") - errs = [] - _compare_attributes(errs, obj1, obj2, "") + errs = _attribute_differences(obj1, obj2, "") assert errs == [ " \"S\" attribute values differ : 'this' != 'that'" ] @@ -356,16 +327,14 @@ def test_compare_attributes_values__string_array_match(self): # Attributes of string type (since netCDF4 returns char attributes as string) obj1 = self.Nc4ObjectWithAttrsMimic(S=["a", "b"]) obj2 = self.Nc4ObjectWithAttrsMimic(S=["a", "b"]) - errs = [] - _compare_attributes(errs, obj1, obj2, "") + errs = _attribute_differences(obj1, obj2, "") assert errs == [] def test_compare_attributes_values__string_array_mismatch(self): # Attributes of string type (since netCDF4 returns char attributes as string) obj1 = self.Nc4ObjectWithAttrsMimic(S=["a", "b"]) obj2 = self.Nc4ObjectWithAttrsMimic(S=["a", "c"]) - errs = [] - _compare_attributes(errs, obj1, obj2, "") + errs = _attribute_differences(obj1, obj2, "") assert errs == [ ' "S" attribute values differ : ' "['a', 'b'] != ['a', 'c']" @@ -377,8 +346,7 @@ def test_compare_attributes__ncdata_string_scalar_array(self): obj1 = NcData(attributes=[NcAttribute("x", ["string"])]) obj2 = NcData(attributes=[NcAttribute("x", "string")]) - errs = [] - _compare_attributes(errs, obj1, obj2, "") + errs = _attribute_differences(obj1, obj2, "") assert errs == [] @@ -417,12 +385,12 @@ def samefiles_bothtypes(samefiles_filesonly, sourcetype): class Test_compare_nc_files__api: def test_identical(self, samefiles_bothtypes): source1, source2 = samefiles_bothtypes - result = compare_nc_datasets(source1, source2) + result = dataset_differences(source1, source2) assert result == [] def test_identical_stringpaths(self, samefiles_filesonly): source1, source2 = samefiles_filesonly - result = compare_nc_datasets(str(source1), str(source2)) + result = dataset_differences(str(source1), str(source2)) assert result == [] def test_identical_datasets(self, samefiles_filesonly, sourcetype): @@ -431,7 +399,7 @@ def test_identical_datasets(self, samefiles_filesonly, sourcetype): try: ds1 = nc.Dataset(source1) ds2 = nc.Dataset(source2) - result = compare_nc_datasets(ds1, ds2) + result = dataset_differences(ds1, ds2) assert result == [] finally: for ds in (ds1, ds2): @@ -454,7 +422,7 @@ def test_small_difference( # Source1/2 are NcData : just modify source2 source2.attributes["extra_global_attr"] = 1 - result = compare_nc_datasets(source1, source2) + result = dataset_differences(source1, source2) assert result == [ "Dataset attribute lists do not match: [] != ['extra_global_attr']" ] @@ -486,7 +454,7 @@ def test_vardata_difference( if ds is not None: ds.close() - result = compare_nc_datasets(source1, source2) + result = dataset_differences(source1, source2) # N.B. ncdata comparison bypasses the masked+scaled view of data, hence the # message differs. Could fix this? mask1 = "masked" if sourcetype == "InputsFile" else "9.96921e+36" diff --git a/tests/unit/tests/test_compare_nc_datasets__mainfunctions.py b/tests/unit/tests/test_compare_nc_datasets__mainfunctions.py index 2a0b162..3449523 100644 --- a/tests/unit/tests/test_compare_nc_datasets__mainfunctions.py +++ b/tests/unit/tests/test_compare_nc_datasets__mainfunctions.py @@ -11,7 +11,7 @@ import pytest from ncdata import NcAttribute, NcData, NcDimension, NcVariable -from tests._compare_nc_datasets import compare_nc_datasets +from tests._compare_nc_datasets import dataset_differences # from tests.data_testcase_schemas import _Datatype_Sample_Values, data_types # data_types # avoid 'unused' warning @@ -97,7 +97,7 @@ def test_names(self, namecheck, altname): # Use kwargs just to confirm that the default for name-checking is 'off' kwargs = dict(check_names=True) if do_namecheck else {} - errs = compare_nc_datasets(data1, data2, **kwargs) + errs = dataset_differences(data1, data2, **kwargs) if do_namecheck: expected = [f"Datasets have different names: 'x' != {altname!r}."] @@ -134,7 +134,7 @@ def _dims_data(self, group_context): def test_name(self): self.dims.rename("x", "q") - errs = compare_nc_datasets(self.data1, self.data2) + errs = dataset_differences(self.data1, self.data2) expected = [ f"{self.location_string} dimension lists do not match: " "['x', 'y'] != ['q', 'y']" @@ -144,7 +144,7 @@ def test_name(self): def test_size(self): self.dims["x"].size = 77 - errs = compare_nc_datasets(self.data1, self.data2) + errs = dataset_differences(self.data1, self.data2) expected = [ f'{self.location_string} "x" dimensions have different sizes: 2 != 77' @@ -160,7 +160,7 @@ def test_unlimited(self, check_unlim): do_check_unlims = {"unlims_checked": True, "unlims_unchecked": False}[ check_unlim ] - errs = compare_nc_datasets( + errs = dataset_differences( self.data1, self.data2, check_unlimited=do_check_unlims ) @@ -180,7 +180,7 @@ def test_ordering(self, order_checking): self.dims.addall(all_dims[::-1]) do_ordercheck = decode_ordercheck(order_checking) - errs = compare_nc_datasets( + errs = dataset_differences( self.data1, self.data2, check_dims_order=do_ordercheck ) @@ -200,7 +200,7 @@ def test_extra_or_missing(self): self.dims.clear() self.dims.addall(all_dims[:-1]) - errs = compare_nc_datasets(self.data1, self.data2) + errs = dataset_differences(self.data1, self.data2) expected = [ f"{self.location_string} dimension lists do not match: " @@ -252,7 +252,7 @@ def _attrs_data(self, group_context, attr_context): def test_name(self): self.attrs.rename("att1", "changed") - errs = compare_nc_datasets(self.data1, self.data2) + errs = dataset_differences(self.data1, self.data2) expected = [ f"{self.location_string} attribute lists do not match: " @@ -263,7 +263,7 @@ def test_name(self): def test_value(self, attr_context): self.attrs["att1"].value = np.array(999) - errs = compare_nc_datasets(self.data1, self.data2) + errs = dataset_differences(self.data1, self.data2) if "variable" in attr_context: value_string = "1" @@ -281,7 +281,7 @@ def test_ordering(self, order_checking): self.attrs.clear() self.attrs.addall(all_attrs[::-1]) - errs = compare_nc_datasets( + errs = dataset_differences( self.data1, self.data2, check_attrs_order=do_ordercheck ) @@ -298,7 +298,7 @@ def test_extra_or_missing(self, order_checking): do_ordercheck = decode_ordercheck(order_checking) del self.attrs["att1"] - errs = compare_nc_datasets( + errs = dataset_differences( self.data1, self.data2, check_attrs_order=do_ordercheck ) @@ -323,7 +323,7 @@ def test_fillvalue_anyorder(self, attname): for attrs in (attr_pair, attr_pair[::-1]) ] - errs = compare_nc_datasets(data1, data2) + errs = dataset_differences(data1, data2) if "generic" in attname: expected = [ @@ -367,7 +367,7 @@ def _vars_data(self, group_context): def test_var_names(self): self.vars.rename("v2", "q") - errs = compare_nc_datasets(self.data1, self.data2) + errs = dataset_differences(self.data1, self.data2) expected = [ f"{self.location_string} variable lists do not match: " @@ -381,7 +381,7 @@ def test_var_order(self, order_checking): self.vars.addall(all_vars[::-1]) do_ordercheck = decode_ordercheck(order_checking) - errs = compare_nc_datasets( + errs = dataset_differences( self.data1, self.data2, check_vars_order=do_ordercheck ) @@ -398,7 +398,7 @@ def test_vars_extra_or_missing(self, order_checking): del self.vars["v1"] do_ordercheck = decode_ordercheck(order_checking) - errs = compare_nc_datasets( + errs = dataset_differences( self.data1, self.data2, check_vars_order=do_ordercheck ) @@ -416,7 +416,7 @@ def test_var_dims__reorder(self, order_checking): # mismatched dimensions, the data won't be checked. do_orderchecks = decode_ordercheck(order_checking) - errs = compare_nc_datasets( + errs = dataset_differences( self.data1, self.data2, check_dims_order=do_orderchecks ) @@ -433,7 +433,7 @@ def test_var_dims__extra_or_missing(self, order_checking): # mismatched dimensions, the data won't be checked. do_orderchecks = decode_ordercheck(order_checking) - errs = compare_nc_datasets( + errs = dataset_differences( self.data1, self.data2, check_dims_order=do_orderchecks ) @@ -446,7 +446,7 @@ def test_var_dims__extra_or_missing(self, order_checking): class TestCompareVariables__dtype: # Note: testing variable comparison via the 'main' public API instead of - # via '_compare_variables'. This makes sense because it is only called + # via '_variable_differences'. This makes sense because it is only called # in one way, from one place. @staticmethod def _vars_testdata(): @@ -476,7 +476,7 @@ def test_numbers_v_strings(self): self.testvar.dtype = np.dtype("S5") # Test the comparison - errs = compare_nc_datasets(self.data1, self.data2) + errs = dataset_differences(self.data1, self.data2) expected = [ 'Dataset variable "v1" datatypes differ : ' "dtype('float64') != dtype('S5')" @@ -495,7 +495,7 @@ def test_ints_v_floats(self, equaldata): v1.dtype = new_dtype # Test the comparison - errs = compare_nc_datasets(self.data1, self.data2) + errs = dataset_differences(self.data1, self.data2) expected = [ 'Dataset variable "v1" datatypes differ : ' @@ -521,7 +521,7 @@ def test_wordlengths(self, equaldata): v1.dtype = new_dtype # Test the comparison - errs = compare_nc_datasets(self.data1, self.data2) + errs = dataset_differences(self.data1, self.data2) expected = [ 'Dataset variable "v1" datatypes differ : ' @@ -551,7 +551,7 @@ def test_signed_unsigned(self, equaldata): v1.dtype = new_dtype # Test the comparison - errs = compare_nc_datasets(self.data1, self.data2) + errs = dataset_differences(self.data1, self.data2) expected = [ 'Dataset variable "v1" datatypes differ : ' @@ -567,7 +567,7 @@ def test_signed_unsigned(self, equaldata): class TestCompareVariables__data__checkcontrols: # Note: testing variable comparison via the 'main' public API instead of - # via '_compare_variables'. This makes sense because it is only called + # via '_variable_differences'. This makes sense because it is only called # in one way, from one place. @staticmethod def _vars_testdata(): @@ -591,7 +591,7 @@ def _vars_data(self): def test_no_values_check(self): self.testvar.data += 1 - errs = compare_nc_datasets( + errs = dataset_differences( self.data1, self.data2, check_var_data=False ) check(errs, []) @@ -599,14 +599,14 @@ def test_no_values_check(self): def test_print_bad_nprint(self): msg = "'show_n_diffs' must be >=1 : got 0." with pytest.raises(ValueError, match=msg): - compare_nc_datasets( + dataset_differences( self.data1, self.data2, show_n_first_different=0 ) @pytest.mark.parametrize("ndiffs", [1, 2, 3]) def test_ndiffs(self, ndiffs): self.testvar.data.flat[1 : ndiffs + 1] += 1 - errs = compare_nc_datasets(self.data1, self.data2) + errs = dataset_differences(self.data1, self.data2) detail = { 1: "[(0, 1)] : LHS=[1.0], RHS=[2.0]", 2: "[(0, 1), (0, 2)] : LHS=[1.0, 2.0], RHS=[2.0, 3.0]", @@ -624,7 +624,7 @@ def test_ndiffs(self, ndiffs): @pytest.mark.parametrize("nprint", [1, 2, 3]) def test_show_n_first_different(self, nprint): self.testvar.data.flat[1:3] += 1 - errs = compare_nc_datasets( + errs = dataset_differences( self.data1, self.data2, show_n_first_different=nprint ) detail = { @@ -641,7 +641,7 @@ def test_show_n_first_different(self, nprint): class TestCompareVariables__data__diffreports: # Note: testing variable comparison via the 'main' public API instead of - # via '_compare_variables'. This makes sense because it is only called + # via '_variable_differences'. This makes sense because it is only called # in one way, from one place. @staticmethod def _vars_testdata(): @@ -678,7 +678,7 @@ def test_masked(self, datavalues, masks): self.reference_var.data ) self.reference_var.data[1:2] = np.ma.masked - errs = compare_nc_datasets(self.data1, self.data2) + errs = dataset_differences(self.data1, self.data2) if bothmasked: expected = [] else: @@ -694,7 +694,7 @@ def test_nans(self, nans): self.testvar.data[1:2] = np.nan if bothnans: self.reference_var.data[1:2] = np.nan - errs = compare_nc_datasets(self.data1, self.data2) + errs = dataset_differences(self.data1, self.data2) if bothnans: expected = [] else: @@ -709,7 +709,7 @@ def test_scalar(self): for value, var in enumerate([self.reference_var, self.testvar]): var.dimensions = () var.data = np.array(value, dtype=var.dtype) - errs = compare_nc_datasets(self.data1, self.data2) + errs = dataset_differences(self.data1, self.data2) expected = [ 'Dataset variable "v1" data contents differ, at 1 points: ' "@INDICES[(0,)] : LHS=[0.0], RHS=[1.0]" @@ -730,7 +730,7 @@ def test_real_and_lazy(self, argtypes): if arraytype == "lazy": var.data = da.from_array(var.data, chunks=-1) # compare + check results - errs = compare_nc_datasets(self.data1, self.data2) + errs = dataset_differences(self.data1, self.data2) # N.B. the result should be the same in all cases expected = [ 'Dataset variable "v1" data contents differ, at 1 points: ' @@ -759,7 +759,7 @@ def _groups_data(self): def test_group_names(self): self.groups.rename("g2", "q") - errs = compare_nc_datasets(self.data1, self.data2) + errs = dataset_differences(self.data1, self.data2) expected = [ "Dataset subgroup lists do not match: ['g1', 'g2'] != ['g1', 'q']" @@ -772,7 +772,7 @@ def test_group_order(self, order_checking): self.groups.addall(all_groups[::-1]) do_ordercheck = decode_ordercheck(order_checking) - errs = compare_nc_datasets( + errs = dataset_differences( self.data1, self.data2, check_groups_order=do_ordercheck ) @@ -789,7 +789,7 @@ def test_groups_extra_or_missing(self, order_checking): del self.groups["g1"] do_ordercheck = decode_ordercheck(order_checking) - errs = compare_nc_datasets( + errs = dataset_differences( self.data1, self.data2, check_groups_order=do_ordercheck ) From 6b16c1b7edf3a220930cbd6f4d237e15536d9d2e Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Fri, 31 May 2024 09:47:20 +0100 Subject: [PATCH 12/16] Move dataset-difference into utils (public). --- lib/ncdata/utils/__init__.py | 3 ++- {tests => lib/ncdata/utils}/_compare_nc_datasets.py | 0 .../example_scripts/ex_ncdata_netcdf_conversion.py | 2 +- tests/integration/test_iris_load_and_save_equivalence.py | 2 +- tests/integration/test_iris_xarray_roundtrips.py | 2 +- tests/integration/test_netcdf_roundtrips.py | 2 +- tests/integration/test_xarray_load_and_save_equivalence.py | 2 +- tests/unit/netcdf/test_from_nc4.py | 2 +- tests/unit/netcdf/test_to_nc4.py | 2 +- tests/unit/tests/test_compare_nc_datasets__additional.py | 4 ++-- tests/unit/tests/test_compare_nc_datasets__mainfunctions.py | 2 +- 11 files changed, 12 insertions(+), 11 deletions(-) rename {tests => lib/ncdata/utils}/_compare_nc_datasets.py (100%) diff --git a/lib/ncdata/utils/__init__.py b/lib/ncdata/utils/__init__.py index 1dd4138..a3405ce 100644 --- a/lib/ncdata/utils/__init__.py +++ b/lib/ncdata/utils/__init__.py @@ -1,5 +1,6 @@ """General user utility functions.""" +from ._compare_nc_datasets import dataset_differences from ._save_errors import save_errors -__all__ = ["save_errors"] +__all__ = ["dataset_differences", "save_errors"] diff --git a/tests/_compare_nc_datasets.py b/lib/ncdata/utils/_compare_nc_datasets.py similarity index 100% rename from tests/_compare_nc_datasets.py rename to lib/ncdata/utils/_compare_nc_datasets.py diff --git a/tests/integration/example_scripts/ex_ncdata_netcdf_conversion.py b/tests/integration/example_scripts/ex_ncdata_netcdf_conversion.py index dd32f7f..4468829 100644 --- a/tests/integration/example_scripts/ex_ncdata_netcdf_conversion.py +++ b/tests/integration/example_scripts/ex_ncdata_netcdf_conversion.py @@ -12,8 +12,8 @@ from ncdata import NcAttribute, NcData, NcDimension, NcVariable from ncdata.netcdf4 import from_nc4, to_nc4 +from ncdata.utils import dataset_differences from tests import testdata_dir -from tests._compare_nc_datasets import dataset_differences def example_nc4_load_save_roundtrip(): # noqa: D103 diff --git a/tests/integration/test_iris_load_and_save_equivalence.py b/tests/integration/test_iris_load_and_save_equivalence.py index 40bdd3b..93579c8 100644 --- a/tests/integration/test_iris_load_and_save_equivalence.py +++ b/tests/integration/test_iris_load_and_save_equivalence.py @@ -11,7 +11,7 @@ import pytest from ncdata.netcdf4 import from_nc4, to_nc4 -from tests._compare_nc_datasets import dataset_differences +from ncdata.utils import dataset_differences from tests.data_testcase_schemas import session_testdir, standard_testcase from tests.integration.equivalence_testing_utils import ( adjust_chunks, diff --git a/tests/integration/test_iris_xarray_roundtrips.py b/tests/integration/test_iris_xarray_roundtrips.py index bbc4905..638cae5 100644 --- a/tests/integration/test_iris_xarray_roundtrips.py +++ b/tests/integration/test_iris_xarray_roundtrips.py @@ -19,8 +19,8 @@ from ncdata.iris_xarray import cubes_to_xarray from ncdata.netcdf4 import from_nc4 from ncdata.threadlock_sharing import lockshare_context +from ncdata.utils import dataset_differences from ncdata.xarray import from_xarray -from tests._compare_nc_datasets import dataset_differences from tests.data_testcase_schemas import ( BAD_LOADSAVE_TESTCASES, session_testdir, diff --git a/tests/integration/test_netcdf_roundtrips.py b/tests/integration/test_netcdf_roundtrips.py index a2643e5..6fe635d 100644 --- a/tests/integration/test_netcdf_roundtrips.py +++ b/tests/integration/test_netcdf_roundtrips.py @@ -4,7 +4,7 @@ from subprocess import check_output from ncdata.netcdf4 import from_nc4, to_nc4 -from tests._compare_nc_datasets import dataset_differences +from ncdata.utils import dataset_differences from tests.data_testcase_schemas import session_testdir, standard_testcase # Avoid complaints that the imported fixtures are "unused" diff --git a/tests/integration/test_xarray_load_and_save_equivalence.py b/tests/integration/test_xarray_load_and_save_equivalence.py index 8b61830..5b8c384 100644 --- a/tests/integration/test_xarray_load_and_save_equivalence.py +++ b/tests/integration/test_xarray_load_and_save_equivalence.py @@ -10,8 +10,8 @@ from ncdata.netcdf4 import from_nc4, to_nc4 from ncdata.threadlock_sharing import lockshare_context +from ncdata.utils import dataset_differences from ncdata.xarray import from_xarray, to_xarray -from tests._compare_nc_datasets import dataset_differences from tests.data_testcase_schemas import ( BAD_LOADSAVE_TESTCASES, session_testdir, diff --git a/tests/unit/netcdf/test_from_nc4.py b/tests/unit/netcdf/test_from_nc4.py index 66265b6..ea61291 100644 --- a/tests/unit/netcdf/test_from_nc4.py +++ b/tests/unit/netcdf/test_from_nc4.py @@ -16,7 +16,7 @@ from ncdata import NcData, NcDimension, NcVariable from ncdata.netcdf4 import from_nc4 -from tests._compare_nc_datasets import dataset_differences +from ncdata.utils import dataset_differences from tests.data_testcase_schemas import make_testcase_dataset diff --git a/tests/unit/netcdf/test_to_nc4.py b/tests/unit/netcdf/test_to_nc4.py index 0c34ca8..8f2934a 100644 --- a/tests/unit/netcdf/test_to_nc4.py +++ b/tests/unit/netcdf/test_to_nc4.py @@ -17,7 +17,7 @@ from ncdata import NcData from ncdata.netcdf4 import from_nc4, to_nc4 -from tests._compare_nc_datasets import dataset_differences +from ncdata.utils import dataset_differences from tests.data_testcase_schemas import make_testcase_dataset diff --git a/tests/unit/tests/test_compare_nc_datasets__additional.py b/tests/unit/tests/test_compare_nc_datasets__additional.py index 485cfe4..1c3e20e 100644 --- a/tests/unit/tests/test_compare_nc_datasets__additional.py +++ b/tests/unit/tests/test_compare_nc_datasets__additional.py @@ -13,7 +13,7 @@ import numpy as np import pytest -from tests._compare_nc_datasets import ( +from ncdata.utils._compare_nc_datasets import ( _attribute_differences, _namelist_differences, dataset_differences, @@ -97,7 +97,7 @@ def test_compare_attributes_namelists(self, mocker): elemname = "" order = mocker.sentinel.attrs_order suppress = mocker.sentinel.suppress_warnings - tgt = "tests._compare_nc_datasets._namelist_differences" + tgt = "ncdata.utils._compare_nc_datasets._namelist_differences" patch_tgt = mocker.patch(tgt) _attribute_differences( obj1=obj1, diff --git a/tests/unit/tests/test_compare_nc_datasets__mainfunctions.py b/tests/unit/tests/test_compare_nc_datasets__mainfunctions.py index 3449523..9e75b88 100644 --- a/tests/unit/tests/test_compare_nc_datasets__mainfunctions.py +++ b/tests/unit/tests/test_compare_nc_datasets__mainfunctions.py @@ -11,7 +11,7 @@ import pytest from ncdata import NcAttribute, NcData, NcDimension, NcVariable -from tests._compare_nc_datasets import dataset_differences +from ncdata.utils import dataset_differences # from tests.data_testcase_schemas import _Datatype_Sample_Values, data_types # data_types # avoid 'unused' warning From 794c659eb890115025bfcce0d4be8ee1f0c8f2a4 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Fri, 31 May 2024 10:41:20 +0100 Subject: [PATCH 13/16] Move variable-differences testing into own testfile. --- lib/ncdata/utils/_compare_nc_datasets.py | 27 +- .../utils/compare_nc_datasets/__init__.py | 1 + .../test_dataset_differences__additional.py} | 0 ...est_dataset_differences__mainfunctions.py} | 331 ------------------ .../test_variable_differences.py | 303 ++++++++++++++++ 5 files changed, 323 insertions(+), 339 deletions(-) create mode 100644 tests/unit/utils/compare_nc_datasets/__init__.py rename tests/unit/{tests/test_compare_nc_datasets__additional.py => utils/compare_nc_datasets/test_dataset_differences__additional.py} (100%) rename tests/unit/{tests/test_compare_nc_datasets__mainfunctions.py => utils/compare_nc_datasets/test_dataset_differences__mainfunctions.py} (54%) create mode 100644 tests/unit/utils/compare_nc_datasets/test_variable_differences.py diff --git a/lib/ncdata/utils/_compare_nc_datasets.py b/lib/ncdata/utils/_compare_nc_datasets.py index b8560b9..090e654 100644 --- a/lib/ncdata/utils/_compare_nc_datasets.py +++ b/lib/ncdata/utils/_compare_nc_datasets.py @@ -236,17 +236,32 @@ def fix_orders(attrlist): def _variable_differences( v1: NcVariable, v2: NcVariable, - group_id_string: str, + group_id_string: str = None, attrs_order: bool = True, data_equality: bool = True, suppress_warnings: bool = False, show_n_diffs: int = 2, ) -> List[str]: errs = [] - varname = v1.name - assert v2.name == varname - var_id_string = f'{group_id_string} variable "{varname}"' + show_n_diffs = int(show_n_diffs) + if show_n_diffs < 1: + msg = f"'show_n_diffs' must be >=1 : got {show_n_diffs!r}." + raise ValueError(msg) + + if v1.name == v2.name: + varname = v1.name + else: + varname = f"{v1.name} / {v2.name}" + + if group_id_string: + var_id_string = f'{group_id_string} variable "{varname}"' + else: + var_id_string = f'Variable "{varname}"' + + if v1.name != v2.name: + msg = f"{var_id_string} names differ : {v1.name!r} != {v2.name!r}" + errs.append(msg) # dimensions dims, dims2 = [v.dimensions for v in (v1, v2)] @@ -383,10 +398,6 @@ def _group_differences( passed arg `errs`. This just makes recursive calling easier. """ errs = [] - ndiffs = int(show_n_diffs) - if ndiffs < 1: - msg = f"'show_n_diffs' must be >=1 : got {show_n_diffs!r}." - raise ValueError(msg) if check_names: if g1.name != g2.name: diff --git a/tests/unit/utils/compare_nc_datasets/__init__.py b/tests/unit/utils/compare_nc_datasets/__init__.py new file mode 100644 index 0000000..7f699aa --- /dev/null +++ b/tests/unit/utils/compare_nc_datasets/__init__.py @@ -0,0 +1 @@ +"""Unit tests for :mod:`ncdata.utils._compare_nc_datasets`.""" diff --git a/tests/unit/tests/test_compare_nc_datasets__additional.py b/tests/unit/utils/compare_nc_datasets/test_dataset_differences__additional.py similarity index 100% rename from tests/unit/tests/test_compare_nc_datasets__additional.py rename to tests/unit/utils/compare_nc_datasets/test_dataset_differences__additional.py diff --git a/tests/unit/tests/test_compare_nc_datasets__mainfunctions.py b/tests/unit/utils/compare_nc_datasets/test_dataset_differences__mainfunctions.py similarity index 54% rename from tests/unit/tests/test_compare_nc_datasets__mainfunctions.py rename to tests/unit/utils/compare_nc_datasets/test_dataset_differences__mainfunctions.py index 9e75b88..0bf52f7 100644 --- a/tests/unit/tests/test_compare_nc_datasets__mainfunctions.py +++ b/tests/unit/utils/compare_nc_datasets/test_dataset_differences__mainfunctions.py @@ -6,7 +6,6 @@ * ( ALSO: "additional" tests (q.v.) cover subsidiary routines and the main API usage modes. ) """ -import dask.array as da import numpy as np import pytest @@ -408,336 +407,6 @@ def test_vars_extra_or_missing(self, order_checking): ] check(errs, expected) - def test_var_dims__reorder(self, order_checking): - # N.B. here we check behaviour of the DIMENSIONS order control, but this does - # not apply to dimensions order in a variable,which is *always* significant. - self.vars["v1"].dimensions = self.vars["v1"].dimensions[::-1] - # N.B. the data shape doesn't now correspond, but that won't matter as, with - # mismatched dimensions, the data won't be checked. - - do_orderchecks = decode_ordercheck(order_checking) - errs = dataset_differences( - self.data1, self.data2, check_dims_order=do_orderchecks - ) - - expected = [ - f'{self.location_string} variable "v1" dimensions differ : ' - "('y', 'x') != ('x', 'y')" - ] - check(errs, expected) - - def test_var_dims__extra_or_missing(self, order_checking): - # N.B. here we check for DIMENSIONS order check control. - self.vars["v1"].dimensions = self.vars["v1"].dimensions[:-1] - # N.B. the data shape doesn't now correspond, but that won't matter as, with - # mismatched dimensions, the data won't be checked. - - do_orderchecks = decode_ordercheck(order_checking) - errs = dataset_differences( - self.data1, self.data2, check_dims_order=do_orderchecks - ) - - expected = [ - f'{self.location_string} variable "v1" dimensions differ : ' - "('y', 'x') != ('y',)" - ] - check(errs, expected) - - -class TestCompareVariables__dtype: - # Note: testing variable comparison via the 'main' public API instead of - # via '_variable_differences'. This makes sense because it is only called - # in one way, from one place. - @staticmethod - def _vars_testdata(): - def data(): - return np.zeros(3) - - testdata = NcData( - name="dataset_1", - dimensions=[NcDimension("x", 3)], - variables=[ - NcVariable("v1", ("x"), data=data()), - ], - ) - return testdata - - @pytest.fixture(autouse=True) - def _vars_data(self): - self.data1, self.data2 = [self._vars_testdata() for _ in range(2)] - self.reference_var, self.testvar = ( - ds.variables["v1"] for ds in (self.data1, self.data2) - ) - - def test_numbers_v_strings(self): - # Set a different dtype - # NB this is different from the actual data array, but that doesn't - # matter, as it won't attempt to compare strings with numbers - self.testvar.dtype = np.dtype("S5") - - # Test the comparison - errs = dataset_differences(self.data1, self.data2) - expected = [ - 'Dataset variable "v1" datatypes differ : ' - "dtype('float64') != dtype('S5')" - ] - check(errs, expected) - - @pytest.mark.parametrize("equaldata", [False, True]) - def test_ints_v_floats(self, equaldata): - # In this case, there is also a data comparison to check. - v1 = self.testvar - - new_dtype = np.dtype(np.int32) - v1.data = v1.data.astype(new_dtype) - if not equaldata: - v1.data.flat[0] += 1 - v1.dtype = new_dtype - - # Test the comparison - errs = dataset_differences(self.data1, self.data2) - - expected = [ - 'Dataset variable "v1" datatypes differ : ' - "dtype('float64') != dtype('int32')" - ] - if not equaldata: - expected.append( - 'Dataset variable "v1" data contents differ, at 1 points: ' - "@INDICES[(0,)] : LHS=[0.0], RHS=[1]" - ) - check(errs, expected) - - @pytest.mark.parametrize("equaldata", [False, True]) - def test_wordlengths(self, equaldata): - # Test floats with wordlength difference -- assume ints are the same - # In this case, there is also a data comparison to check. - v1 = self.testvar - - new_dtype = np.dtype(np.float32) - v1.data = v1.data.astype(new_dtype) - if not equaldata: - v1.data.flat[0] += 1 - v1.dtype = new_dtype - - # Test the comparison - errs = dataset_differences(self.data1, self.data2) - - expected = [ - 'Dataset variable "v1" datatypes differ : ' - "dtype('float64') != dtype('float32')" - ] - if not equaldata: - expected.append( - 'Dataset variable "v1" data contents differ, at 1 points: ' - "@INDICES[(0,)] : LHS=[0.0], RHS=[1.0]" - ) - check(errs, expected) - - @pytest.mark.parametrize("equaldata", [False, True]) - def test_signed_unsigned(self, equaldata): - # Test floats with wordlength difference -- assume ints are the same - # In this case, there is also a data comparison to check. - new_dtype = np.dtype(np.int64) - v0 = self.reference_var - v0.data = v0.data.astype(new_dtype) - v0.dtype = new_dtype - - new_dtype = np.dtype(np.uint64) - v1 = self.testvar - v1.data = v1.data.astype(new_dtype) - if not equaldata: - v1.data.flat[0] += 1 - v1.dtype = new_dtype - - # Test the comparison - errs = dataset_differences(self.data1, self.data2) - - expected = [ - 'Dataset variable "v1" datatypes differ : ' - "dtype('int64') != dtype('uint64')" - ] - if not equaldata: - expected.append( - 'Dataset variable "v1" data contents differ, at 1 points: ' - "@INDICES[(0,)] : LHS=[0], RHS=[1]" - ) - check(errs, expected) - - -class TestCompareVariables__data__checkcontrols: - # Note: testing variable comparison via the 'main' public API instead of - # via '_variable_differences'. This makes sense because it is only called - # in one way, from one place. - @staticmethod - def _vars_testdata(): - def data(): - return np.arange(6.0).reshape((2, 3)) - - testdata = NcData( - name="dataset_1", - dimensions=[NcDimension("y", 2), NcDimension("x", 3)], - variables=[ - NcVariable("v1", ("x"), data=data()), - ], - ) - return testdata - - @pytest.fixture(autouse=True) - def _vars_data(self): - self.data1, self.data2 = [self._vars_testdata() for _ in range(2)] - self.reference_var = self.data1.variables["v1"] - self.testvar = self.data2.variables["v1"] - - def test_no_values_check(self): - self.testvar.data += 1 - errs = dataset_differences( - self.data1, self.data2, check_var_data=False - ) - check(errs, []) - - def test_print_bad_nprint(self): - msg = "'show_n_diffs' must be >=1 : got 0." - with pytest.raises(ValueError, match=msg): - dataset_differences( - self.data1, self.data2, show_n_first_different=0 - ) - - @pytest.mark.parametrize("ndiffs", [1, 2, 3]) - def test_ndiffs(self, ndiffs): - self.testvar.data.flat[1 : ndiffs + 1] += 1 - errs = dataset_differences(self.data1, self.data2) - detail = { - 1: "[(0, 1)] : LHS=[1.0], RHS=[2.0]", - 2: "[(0, 1), (0, 2)] : LHS=[1.0, 2.0], RHS=[2.0, 3.0]", - 3: ( - "[(0, 1), (0, 2), ...] : " - "LHS=[1.0, 2.0, ...], RHS=[2.0, 3.0, ...]" - ), - }[ndiffs] - expected = [ - f'Dataset variable "v1" data contents differ, at {ndiffs} points: ' - f"@INDICES{detail}" - ] - check(errs, expected) - - @pytest.mark.parametrize("nprint", [1, 2, 3]) - def test_show_n_first_different(self, nprint): - self.testvar.data.flat[1:3] += 1 - errs = dataset_differences( - self.data1, self.data2, show_n_first_different=nprint - ) - detail = { - 1: "[(0, 1), ...] : LHS=[1.0, ...], RHS=[2.0, ...]", - 2: "[(0, 1), (0, 2)] : LHS=[1.0, 2.0], RHS=[2.0, 3.0]", - 3: "[(0, 1), (0, 2)] : LHS=[1.0, 2.0], RHS=[2.0, 3.0]", - }[nprint] - expected = [ - f'Dataset variable "v1" data contents differ, at 2 points: ' - f"@INDICES{detail}" - ] - check(errs, expected) - - -class TestCompareVariables__data__diffreports: - # Note: testing variable comparison via the 'main' public API instead of - # via '_variable_differences'. This makes sense because it is only called - # in one way, from one place. - @staticmethod - def _vars_testdata(): - def data(): - return np.arange(4.0) - - testdata = NcData( - name="dataset_1", - dimensions=[NcDimension("x", 4)], - variables=[ - NcVariable("v1", ("x"), data=data()), - ], - ) - return testdata - - @pytest.fixture(autouse=True) - def _vars_data(self): - self.data1, self.data2 = [self._vars_testdata() for _ in range(2)] - self.reference_var = self.data1.variables["v1"] - self.testvar = self.data2.variables["v1"] - - @pytest.mark.parametrize("datavalues", ["same", "different"]) - @pytest.mark.parametrize("masks", ["onemasked", "bothmasked"]) - def test_masked(self, datavalues, masks): - different = datavalues == "different" - bothmasked = masks == "bothmasked" - testvar = self.testvar - testvar.data = np.ma.masked_array(testvar.data) - if different: - testvar.data[1:2] += 1 - testvar.data[1:2] = np.ma.masked - if bothmasked: - self.reference_var.data = np.ma.masked_array( - self.reference_var.data - ) - self.reference_var.data[1:2] = np.ma.masked - errs = dataset_differences(self.data1, self.data2) - if bothmasked: - expected = [] - else: - expected = [ - 'Dataset variable "v1" data contents differ, at 1 points: ' - "@INDICES[(1,)] : LHS=[1.0], RHS=[masked]" - ] - check(errs, expected) - - @pytest.mark.parametrize("nans", ["onenans", "bothnans"]) - def test_nans(self, nans): - bothnans = nans == "bothnans" - self.testvar.data[1:2] = np.nan - if bothnans: - self.reference_var.data[1:2] = np.nan - errs = dataset_differences(self.data1, self.data2) - if bothnans: - expected = [] - else: - expected = [ - 'Dataset variable "v1" data contents differ, at 1 points: ' - "@INDICES[(1,)] : LHS=[1.0], RHS=[nan]" - ] - check(errs, expected) - - def test_scalar(self): - # Check how a difference of scalar arrays is reported - for value, var in enumerate([self.reference_var, self.testvar]): - var.dimensions = () - var.data = np.array(value, dtype=var.dtype) - errs = dataset_differences(self.data1, self.data2) - expected = [ - 'Dataset variable "v1" data contents differ, at 1 points: ' - "@INDICES[(0,)] : LHS=[0.0], RHS=[1.0]" - ] - check(errs, expected) - - @pytest.mark.parametrize( - "argtypes", ["real_real", "real_lazy", "lazy_lazy"] - ) - def test_real_and_lazy(self, argtypes): - type1, type2 = argtypes[:4], argtypes[-4:] - # fix the testvar to create a difference - self.testvar.data[1:2] += 1 - # setup vars with lazy/real data arrays - for arraytype, var in zip( - [type1, type2], [self.reference_var, self.testvar] - ): - if arraytype == "lazy": - var.data = da.from_array(var.data, chunks=-1) - # compare + check results - errs = dataset_differences(self.data1, self.data2) - # N.B. the result should be the same in all cases - expected = [ - 'Dataset variable "v1" data contents differ, at 1 points: ' - "@INDICES[(1,)] : LHS=[1.0], RHS=[2.0]" - ] - check(errs, expected) - class TestCompareGroups: @staticmethod diff --git a/tests/unit/utils/compare_nc_datasets/test_variable_differences.py b/tests/unit/utils/compare_nc_datasets/test_variable_differences.py new file mode 100644 index 0000000..c4b2066 --- /dev/null +++ b/tests/unit/utils/compare_nc_datasets/test_variable_differences.py @@ -0,0 +1,303 @@ +import dask.array as da +import numpy as np +import pytest + +from ncdata import NcVariable +from ncdata.utils._compare_nc_datasets import ( + _variable_differences as variable_differences, +) + +_DEBUG_RESULTS = True +# _DEBUG_RESULTS = True + + +def check(results, expected): + if _DEBUG_RESULTS: + print("\nResult messages:") + for msg in results: + print(" ", msg) + assert results == expected + + +class TestSimpleProperties: + @pytest.fixture(autouse=True) + def _vars_data(self): + self.var1, self.var2 = [ + NcVariable("v1", ("y", "x"), data=np.zeros((2, 3))) + for _ in range(2) + ] + + def test_var_names(self): + self.var2.name = "q" + + errs = variable_differences(self.var1, self.var2) + expected = ['Variable "v1 / q" names differ : ' "'v1' != 'q'"] + check(errs, expected) + + def test_var_dims__reorder(self): + # N.B. here we check behaviour of the DIMENSIONS order control, but this does + # not apply to dimensions order in a variable,which is *always* significant. + self.var2.dimensions = self.var2.dimensions[::-1] + # N.B. the data shape doesn't now correspond, but that won't matter as, with + # mismatched dimensions, the data won't be checked. + + errs = variable_differences(self.var1, self.var2) + + expected = [ + 'Variable "v1" dimensions differ : ' "('y', 'x') != ('x', 'y')" + ] + check(errs, expected) + + def test_var_dims__extra_or_missing(self): + # N.B. here we check for DIMENSIONS order check control. + self.var2.dimensions = self.var2.dimensions[:-1] + # N.B. the data shape doesn't now correspond, but that won't matter as, with + # mismatched dimensions, the data won't be checked. + + errs = variable_differences(self.var1, self.var2) + + expected = ["Variable \"v1\" dimensions differ : ('y', 'x') != ('y',)"] + check(errs, expected) + + +class TestDtypes: + # Note: testing variable comparison via the 'main' public API instead of + # via '_variable_differences'. This makes sense because it is only called + # in one way, from one place. + @pytest.fixture(autouse=True) + def _vars_data(self): + self.var1, self.var2 = [ + NcVariable("v1", ("x"), data=np.zeros(3)) for _ in range(2) + ] + + def test_numbers_v_strings(self): + # Set a different dtype + # NB this is different from the actual data array, but that doesn't + # matter, as it won't attempt to compare strings with numbers + self.var2.dtype = np.dtype("S5") + + # Test the comparison + errs = variable_differences(self.var1, self.var2) + expected = [ + 'Variable "v1" datatypes differ : ' + "dtype('float64') != dtype('S5')" + ] + check(errs, expected) + + @pytest.mark.parametrize("equaldata", [False, True]) + def test_ints_v_floats(self, equaldata): + # In this case, there is also a data comparison to check. + v1 = self.var2 + + new_dtype = np.dtype(np.int32) + v1.data = v1.data.astype(new_dtype) + if not equaldata: + v1.data.flat[0] += 1 + v1.dtype = new_dtype + + # Test the comparison + errs = variable_differences(self.var1, self.var2) + + expected = [ + 'Variable "v1" datatypes differ : ' + "dtype('float64') != dtype('int32')" + ] + if not equaldata: + expected.append( + 'Variable "v1" data contents differ, at 1 points: ' + "@INDICES[(0,)] : LHS=[0.0], RHS=[1]" + ) + check(errs, expected) + + @pytest.mark.parametrize("equaldata", [False, True]) + def test_wordlengths(self, equaldata): + # Test floats with wordlength difference -- assume ints are the same + # In this case, there is also a data comparison to check. + v1 = self.var2 + + new_dtype = np.dtype(np.float32) + v1.data = v1.data.astype(new_dtype) + if not equaldata: + v1.data.flat[0] += 1 + v1.dtype = new_dtype + + # Test the comparison + errs = variable_differences(self.var1, self.var2) + + expected = [ + 'Variable "v1" datatypes differ : ' + "dtype('float64') != dtype('float32')" + ] + if not equaldata: + expected.append( + 'Variable "v1" data contents differ, at 1 points: ' + "@INDICES[(0,)] : LHS=[0.0], RHS=[1.0]" + ) + check(errs, expected) + + @pytest.mark.parametrize("equaldata", [False, True]) + def test_signed_unsigned(self, equaldata): + # Test floats with wordlength difference -- assume ints are the same + # In this case, there is also a data comparison to check. + new_dtype = np.dtype(np.int64) + v0 = self.var1 + v0.data = v0.data.astype(new_dtype) + v0.dtype = new_dtype + + new_dtype = np.dtype(np.uint64) + v1 = self.var2 + v1.data = v1.data.astype(new_dtype) + if not equaldata: + v1.data.flat[0] += 1 + v1.dtype = new_dtype + + # Test the comparison + errs = variable_differences(self.var1, self.var2) + + expected = [ + 'Variable "v1" datatypes differ : ' + "dtype('int64') != dtype('uint64')" + ] + if not equaldata: + expected.append( + 'Variable "v1" data contents differ, at 1 points: ' + "@INDICES[(0,)] : LHS=[0], RHS=[1]" + ) + check(errs, expected) + + +class TestDataCheck__controls: + # Note: testing variable comparison via the 'main' public API instead of + # via '_variable_differences'. This makes sense because it is only called + # in one way, from one place. + @pytest.fixture(autouse=True) + def _vars_data(self): + self.var1, self.var2 = [ + NcVariable("v1", ("x"), data=np.arange(6.0).reshape((2, 3))) + for _ in range(2) + ] + + def test_no_values_check(self): + self.var2.data += 1 + errs = variable_differences(self.var1, self.var2, data_equality=False) + check(errs, []) + + def test_print_bad_nprint(self): + msg = "'show_n_diffs' must be >=1 : got 0." + with pytest.raises(ValueError, match=msg): + variable_differences(self.var1, self.var2, show_n_diffs=0) + + @pytest.mark.parametrize("ndiffs", [1, 2, 3]) + def test_ndiffs(self, ndiffs): + self.var2.data.flat[1 : ndiffs + 1] += 1 + errs = variable_differences(self.var1, self.var2) + detail = { + 1: "[(0, 1)] : LHS=[1.0], RHS=[2.0]", + 2: "[(0, 1), (0, 2)] : LHS=[1.0, 2.0], RHS=[2.0, 3.0]", + 3: ( + "[(0, 1), (0, 2), ...] : " + "LHS=[1.0, 2.0, ...], RHS=[2.0, 3.0, ...]" + ), + }[ndiffs] + expected = [ + f'Variable "v1" data contents differ, at {ndiffs} points: ' + f"@INDICES{detail}" + ] + check(errs, expected) + + @pytest.mark.parametrize("nprint", [1, 2, 3]) + def test_show_n_first_different(self, nprint): + self.var2.data.flat[1:3] += 1 + errs = variable_differences(self.var1, self.var2, show_n_diffs=nprint) + detail = { + 1: "[(0, 1), ...] : LHS=[1.0, ...], RHS=[2.0, ...]", + 2: "[(0, 1), (0, 2)] : LHS=[1.0, 2.0], RHS=[2.0, 3.0]", + 3: "[(0, 1), (0, 2)] : LHS=[1.0, 2.0], RHS=[2.0, 3.0]", + }[nprint] + expected = [ + f'Variable "v1" data contents differ, at 2 points: ' + f"@INDICES{detail}" + ] + check(errs, expected) + + +class TestDataCheck__difference_reports: + # Note: testing variable comparison via the 'main' public API instead of + # via '_variable_differences'. This makes sense because it is only called + # in one way, from one place. + @pytest.fixture(autouse=True) + def _vars_data(self): + self.var1, self.var2 = [ + NcVariable("v1", ("x"), data=np.arange(4.0)) for _ in range(2) + ] + + @pytest.mark.parametrize("datavalues", ["same", "different"]) + @pytest.mark.parametrize("masks", ["onemasked", "bothmasked"]) + def test_masked(self, datavalues, masks): + different = datavalues == "different" + bothmasked = masks == "bothmasked" + testvar = self.var2 + testvar.data = np.ma.masked_array(testvar.data) + if different: + testvar.data[1:2] += 1 + testvar.data[1:2] = np.ma.masked + if bothmasked: + self.var1.data = np.ma.masked_array(self.var1.data) + self.var1.data[1:2] = np.ma.masked + errs = variable_differences(self.var1, self.var2) + if bothmasked: + expected = [] + else: + expected = [ + 'Variable "v1" data contents differ, at 1 points: ' + "@INDICES[(1,)] : LHS=[1.0], RHS=[masked]" + ] + check(errs, expected) + + @pytest.mark.parametrize("nans", ["onenans", "bothnans"]) + def test_nans(self, nans): + bothnans = nans == "bothnans" + self.var2.data[1:2] = np.nan + if bothnans: + self.var1.data[1:2] = np.nan + errs = variable_differences(self.var1, self.var2) + if bothnans: + expected = [] + else: + expected = [ + 'Variable "v1" data contents differ, at 1 points: ' + "@INDICES[(1,)] : LHS=[1.0], RHS=[nan]" + ] + check(errs, expected) + + def test_scalar(self): + # Check how a difference of scalar arrays is reported + for value, var in enumerate([self.var1, self.var2]): + var.dimensions = () + var.data = np.array(value, dtype=var.dtype) + errs = variable_differences(self.var1, self.var2) + expected = [ + 'Variable "v1" data contents differ, at 1 points: ' + "@INDICES[(0,)] : LHS=[0.0], RHS=[1.0]" + ] + check(errs, expected) + + @pytest.mark.parametrize( + "argtypes", ["real_real", "real_lazy", "lazy_lazy"] + ) + def test_real_and_lazy(self, argtypes): + type1, type2 = argtypes[:4], argtypes[-4:] + # fix the testvar to create a difference + self.var2.data[1:2] += 1 + # setup vars with lazy/real data arrays + for arraytype, var in zip([type1, type2], [self.var1, self.var2]): + if arraytype == "lazy": + var.data = da.from_array(var.data, chunks=-1) + # compare + check results + errs = variable_differences(self.var1, self.var2) + # N.B. the result should be the same in all cases + expected = [ + 'Variable "v1" data contents differ, at 1 points: ' + "@INDICES[(1,)] : LHS=[1.0], RHS=[2.0]" + ] + check(errs, expected) From da2266d7fdccfd868276f83bd3a389670675e5c0 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Fri, 31 May 2024 11:37:09 +0100 Subject: [PATCH 14/16] Revise comparison API; make variable-differences public. --- lib/ncdata/utils/__init__.py | 8 +- lib/ncdata/utils/_compare_nc_datasets.py | 81 +++++++++++++------ .../test_xarray_load_and_save_equivalence.py | 2 +- ...test_dataset_differences__mainfunctions.py | 14 +++- .../test_variable_differences.py | 20 ++--- 5 files changed, 85 insertions(+), 40 deletions(-) diff --git a/lib/ncdata/utils/__init__.py b/lib/ncdata/utils/__init__.py index a3405ce..033c542 100644 --- a/lib/ncdata/utils/__init__.py +++ b/lib/ncdata/utils/__init__.py @@ -1,6 +1,10 @@ """General user utility functions.""" -from ._compare_nc_datasets import dataset_differences +from ._compare_nc_datasets import dataset_differences, variable_differences from ._save_errors import save_errors -__all__ = ["dataset_differences", "save_errors"] +__all__ = [ + "dataset_differences", + "save_errors", + "variable_differences", +] diff --git a/lib/ncdata/utils/_compare_nc_datasets.py b/lib/ncdata/utils/_compare_nc_datasets.py index 090e654..affd6c1 100644 --- a/lib/ncdata/utils/_compare_nc_datasets.py +++ b/lib/ncdata/utils/_compare_nc_datasets.py @@ -21,15 +21,15 @@ def dataset_differences( dataset_or_path_1: Union[Path, AnyStr, nc.Dataset, NcData], dataset_or_path_2: Union[Path, AnyStr, nc.Dataset, NcData], + check_names: bool = False, check_dims_order: bool = True, + check_dims_unlimited: bool = True, check_vars_order: bool = True, check_attrs_order: bool = True, check_groups_order: bool = True, check_var_data: bool = True, show_n_first_different: int = 2, suppress_warnings: bool = False, - check_names: bool = False, - check_unlimited: bool = True, ) -> List[str]: r""" Compare netcdf data objects. @@ -43,6 +43,10 @@ def dataset_differences( check_dims_order, check_vars_order, check_attrs_order, check_groups_order : bool, default True If False, no error results from the same contents in a different order, however unless `suppress_warnings` is True, the error string is issued as a warning. + check_names: bool, default False + Whether to warn if the names of the top-level datasets are different + check_dims_unlimited: bool, default True + Whether to compare the 'unlimited' status of dimensions check_var_data : bool, default True If True, all variable data is also checked for equality. If False, only dtype and shape are compared. @@ -52,10 +56,6 @@ def dataset_differences( suppress_warnings : bool, default False When False (the default), report changes in content order as Warnings. When True, ignore changes in ordering. - check_names: bool, default False - Whether to warn if the names of the top-level datasets are different - check_unlimited: bool, default True - Whether to compare the 'unlimited' status of dimensions Returns ------- @@ -89,7 +89,7 @@ def dataset_differences( data_equality=check_var_data, suppress_warnings=suppress_warnings, check_names=check_names, - check_unlimited=check_unlimited, + check_unlimited=check_dims_unlimited, show_n_diffs=show_n_first_different, ) finally: @@ -233,20 +233,49 @@ def fix_orders(attrlist): return errs -def _variable_differences( +def variable_differences( v1: NcVariable, v2: NcVariable, - group_id_string: str = None, - attrs_order: bool = True, - data_equality: bool = True, + check_attrs_order: bool = True, + check_var_data: bool = True, + show_n_first_different: int = 2, suppress_warnings: bool = False, - show_n_diffs: int = 2, + _group_id_string: str = None, ) -> List[str]: + r""" + Compare variables. + + Parameters + ---------- + v1, v2 : NcVariable + variables to compare + check_attrs_order : bool, default True + If False, no error results from the same contents in a different order, + however unless `suppress_warnings` is True, the error string is issued as a warning. + check_var_data : bool, default True + If True, all variable data is also checked for equality. + If False, only dtype and shape are compared. + NOTE: comparison of large arrays is done in-memory, so may be highly inefficient. + show_n_first_different: int, default 2 + Number of value differences to display. + suppress_warnings : bool, default False + When False (the default), report changes in content order as Warnings. + When True, ignore changes in ordering entirely. + _group_id_string : str + (internal use only) + + Returns + ------- + errs : list of str + A list of "error" strings, describing differences between the inputs. + If empty, no differences were found. + + """ errs = [] - show_n_diffs = int(show_n_diffs) - if show_n_diffs < 1: - msg = f"'show_n_diffs' must be >=1 : got {show_n_diffs!r}." + show_n_first_different = int(show_n_first_different) + if show_n_first_different < 1: + msg = f"'show_n_diffs' must be >=1 : got {show_n_first_different!r}." raise ValueError(msg) if v1.name == v2.name: @@ -254,8 +283,8 @@ def _variable_differences( else: varname = f"{v1.name} / {v2.name}" - if group_id_string: - var_id_string = f'{group_id_string} variable "{varname}"' + if _group_id_string: + var_id_string = f'{_group_id_string} variable "{varname}"' else: var_id_string = f'Variable "{varname}"' @@ -274,7 +303,7 @@ def _variable_differences( v1, v2, var_id_string, - attrs_order=attrs_order, + attrs_order=check_attrs_order, suppress_warnings=suppress_warnings, force_first_attrnames=[ "_FillValue" @@ -290,7 +319,7 @@ def _variable_differences( # data values is_str, is_str2 = (dt.kind in "SUb" for dt in (dtype, dtype2)) # TODO: is this correct check to allow compare between different dtypes? - if data_equality and dims == dims2 and is_str == is_str2: + if check_var_data and dims == dims2 and is_str == is_str2: # N.B. don't check shapes here: we already checked dimensions. # NOTE: no attempt to use laziness here. Could be improved. def getdata(var): @@ -358,8 +387,8 @@ def getdata(var): msg = ( f"{var_id_string} data contents differ, at {n_diffs} points: " ) - ellps = ", ..." if n_diffs > show_n_diffs else "" - diffinds = flat_diff_inds[:show_n_diffs] + ellps = ", ..." if n_diffs > show_n_first_different else "" + diffinds = flat_diff_inds[:show_n_first_different] diffinds = [ np.unravel_index(ind, shape=data.shape) for ind in diffinds ] @@ -463,14 +492,14 @@ def _group_differences( if varname not in varnames2: continue v1, v2 = [grp.variables[varname] for grp in (g1, g2)] - errs += _variable_differences( + errs += variable_differences( v1, v2, - group_id_string=group_id_string, - attrs_order=attrs_order, - data_equality=data_equality, + check_attrs_order=attrs_order, + check_var_data=data_equality, + show_n_first_different=show_n_diffs, suppress_warnings=suppress_warnings, - show_n_diffs=show_n_diffs, + _group_id_string=group_id_string, ) # Finally, recurse over groups diff --git a/tests/integration/test_xarray_load_and_save_equivalence.py b/tests/integration/test_xarray_load_and_save_equivalence.py index 5b8c384..d7fb316 100644 --- a/tests/integration/test_xarray_load_and_save_equivalence.py +++ b/tests/integration/test_xarray_load_and_save_equivalence.py @@ -78,7 +78,7 @@ def test_save_direct_vs_viancdata(standard_testcase, tmp_path): temp_direct_savepath, temp_ncdata_savepath, check_dims_order=False, - check_unlimited=False, # TODO: remove this when we fix it + check_dims_unlimited=False, # TODO: remove this when we fix it suppress_warnings=True, ) assert results == [] diff --git a/tests/unit/utils/compare_nc_datasets/test_dataset_differences__mainfunctions.py b/tests/unit/utils/compare_nc_datasets/test_dataset_differences__mainfunctions.py index 0bf52f7..1e03e9f 100644 --- a/tests/unit/utils/compare_nc_datasets/test_dataset_differences__mainfunctions.py +++ b/tests/unit/utils/compare_nc_datasets/test_dataset_differences__mainfunctions.py @@ -160,7 +160,7 @@ def test_unlimited(self, check_unlim): check_unlim ] errs = dataset_differences( - self.data1, self.data2, check_unlimited=do_check_unlims + self.data1, self.data2, check_dims_unlimited=do_check_unlims ) if do_check_unlims: @@ -334,7 +334,17 @@ def test_fillvalue_anyorder(self, attname): check(errs, expected) -class TestCompareVariables__metadata: +class TestCompareVariables: + """ + Test variable comparison. + + Mostly, this is about comparison of the variable contents of a dataset + or group, since variable-to-variable comparison is done by + variable_differences, which is tested independently elsewhere. + This includes testing the generation of the variable identity strings in + various contexts (by parametrising over group_context). + """ + @staticmethod def _vars_testdata(group_context): def data(): diff --git a/tests/unit/utils/compare_nc_datasets/test_variable_differences.py b/tests/unit/utils/compare_nc_datasets/test_variable_differences.py index c4b2066..986008b 100644 --- a/tests/unit/utils/compare_nc_datasets/test_variable_differences.py +++ b/tests/unit/utils/compare_nc_datasets/test_variable_differences.py @@ -3,9 +3,7 @@ import pytest from ncdata import NcVariable -from ncdata.utils._compare_nc_datasets import ( - _variable_differences as variable_differences, -) +from ncdata.utils import variable_differences _DEBUG_RESULTS = True # _DEBUG_RESULTS = True @@ -62,7 +60,7 @@ def test_var_dims__extra_or_missing(self): class TestDtypes: # Note: testing variable comparison via the 'main' public API instead of - # via '_variable_differences'. This makes sense because it is only called + # via 'variable_differences'. This makes sense because it is only called # in one way, from one place. @pytest.fixture(autouse=True) def _vars_data(self): @@ -168,7 +166,7 @@ def test_signed_unsigned(self, equaldata): class TestDataCheck__controls: # Note: testing variable comparison via the 'main' public API instead of - # via '_variable_differences'. This makes sense because it is only called + # via 'variable_differences'. This makes sense because it is only called # in one way, from one place. @pytest.fixture(autouse=True) def _vars_data(self): @@ -179,13 +177,15 @@ def _vars_data(self): def test_no_values_check(self): self.var2.data += 1 - errs = variable_differences(self.var1, self.var2, data_equality=False) + errs = variable_differences(self.var1, self.var2, check_var_data=False) check(errs, []) def test_print_bad_nprint(self): msg = "'show_n_diffs' must be >=1 : got 0." with pytest.raises(ValueError, match=msg): - variable_differences(self.var1, self.var2, show_n_diffs=0) + variable_differences( + self.var1, self.var2, show_n_first_different=0 + ) @pytest.mark.parametrize("ndiffs", [1, 2, 3]) def test_ndiffs(self, ndiffs): @@ -208,7 +208,9 @@ def test_ndiffs(self, ndiffs): @pytest.mark.parametrize("nprint", [1, 2, 3]) def test_show_n_first_different(self, nprint): self.var2.data.flat[1:3] += 1 - errs = variable_differences(self.var1, self.var2, show_n_diffs=nprint) + errs = variable_differences( + self.var1, self.var2, show_n_first_different=nprint + ) detail = { 1: "[(0, 1), ...] : LHS=[1.0, ...], RHS=[2.0, ...]", 2: "[(0, 1), (0, 2)] : LHS=[1.0, 2.0], RHS=[2.0, 3.0]", @@ -223,7 +225,7 @@ def test_show_n_first_different(self, nprint): class TestDataCheck__difference_reports: # Note: testing variable comparison via the 'main' public API instead of - # via '_variable_differences'. This makes sense because it is only called + # via 'variable_differences'. This makes sense because it is only called # in one way, from one place. @pytest.fixture(autouse=True) def _vars_data(self): From 558245ad37e867cf5db8eb69b0abfbe658b7aae7 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Sat, 8 Jun 2024 00:49:00 +0100 Subject: [PATCH 15/16] Fix order of presentation of utils routines. --- lib/ncdata/utils/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/ncdata/utils/__init__.py b/lib/ncdata/utils/__init__.py index 033c542..dd3dc8c 100644 --- a/lib/ncdata/utils/__init__.py +++ b/lib/ncdata/utils/__init__.py @@ -4,7 +4,7 @@ from ._save_errors import save_errors __all__ = [ - "dataset_differences", "save_errors", + "dataset_differences", "variable_differences", ] From 43c09ef9391b3497fba7a2ea7261b096b0d23414 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Mon, 10 Jun 2024 15:02:12 +0100 Subject: [PATCH 16/16] Add change note --- docs/change_log.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/change_log.rst b/docs/change_log.rst index 8a64708..f44b29e 100644 --- a/docs/change_log.rst +++ b/docs/change_log.rst @@ -22,6 +22,9 @@ Unreleased ^^^^^^^^^^ TODO: highlights +* `@pp-mo`_ dataset comparison routines now a public utility. + (`PR#70 `_). + * `@pp-mo`_ initial Sphinx documentation (`PR#76 `_).