From 1ffdc7526149cc5b5bf3f3a27be2e4b3914fd76b Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Thu, 6 Mar 2025 12:26:22 +0000 Subject: [PATCH 1/3] Fix error in dataset comparison. --- lib/ncdata/utils/_compare_nc_datasets.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/lib/ncdata/utils/_compare_nc_datasets.py b/lib/ncdata/utils/_compare_nc_datasets.py index b9892a3..95b634f 100644 --- a/lib/ncdata/utils/_compare_nc_datasets.py +++ b/lib/ncdata/utils/_compare_nc_datasets.py @@ -387,6 +387,14 @@ def variable_differences( ], # for some reason, this doesn't always list consistently ) + # shapes + shape, shape2 = [ + v.data.shape if _isncdata(v) else v.shape for v in (v1, v2) + ] + if shape != shape2: + msg = f"{var_id_string} shapes differ : {shape!r} != {shape2!r}" + errs.append(msg) + # dtypes dtype, dtype2 = [v.dtype if _isncdata(v) else v.datatype for v in (v1, v2)] if dtype != dtype2: @@ -403,8 +411,12 @@ def _is_strtype(dt): is_str, is_str2 = (_is_strtype(dt) for dt in (dtype, dtype2)) # TODO: is this correct check to allow compare between different dtypes? - if check_var_data and dims == dims2 and is_str == is_str2: - # N.B. don't check shapes here: we already checked dimensions. + if ( + check_var_data + and dims == dims2 + and shape == shape2 + and is_str == is_str2 + ): # NOTE: no attempt to use laziness here. Could be improved. def getdata(var): if _isncdata(var): From 1d4ca3f9d1b22c6a4de0e8467f1e1bd874e4baf4 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 9 Apr 2025 15:38:37 +0100 Subject: [PATCH 2/3] Fix dataset/variable difference for no-data variables. --- lib/ncdata/utils/_compare_nc_datasets.py | 17 ++++++-- .../test_variable_differences.py | 43 +++++++++++++++++++ 2 files changed, 57 insertions(+), 3 deletions(-) diff --git a/lib/ncdata/utils/_compare_nc_datasets.py b/lib/ncdata/utils/_compare_nc_datasets.py index 95b634f..f70003f 100644 --- a/lib/ncdata/utils/_compare_nc_datasets.py +++ b/lib/ncdata/utils/_compare_nc_datasets.py @@ -388,9 +388,20 @@ def variable_differences( ) # shapes - shape, shape2 = [ - v.data.shape if _isncdata(v) else v.shape for v in (v1, v2) - ] + def safe_varshape(var): + if _isncdata(var): + # NcVariable passed + if var.data is None: + # Allow for NcVariable.data to be empty + shape = None + else: + shape = var.data.shape + else: + # netCDF4.Variable passed + shape = var.shape + return shape + + shape, shape2 = [safe_varshape(v) for v in (v1, v2)] if shape != shape2: msg = f"{var_id_string} shapes differ : {shape!r} != {shape2!r}" errs.append(msg) diff --git a/tests/unit/utils/compare_nc_datasets/test_variable_differences.py b/tests/unit/utils/compare_nc_datasets/test_variable_differences.py index ddb6011..30be947 100644 --- a/tests/unit/utils/compare_nc_datasets/test_variable_differences.py +++ b/tests/unit/utils/compare_nc_datasets/test_variable_differences.py @@ -162,6 +162,49 @@ def test_signed_unsigned(self, equaldata): ) check(errs, expected) + @pytest.mark.parametrize("given", ["nodata", "data", "dtype"]) + def test_nodata_nodtype(self, given): + # Check that we can correctly compare a variable with NO specified data or dtype, + # with one that may have either. + # N.B. this omits comparing 2 variables with dtype only. See following. + v1 = NcVariable("x") + + kwargs = {} + if given == "data": + kwargs["data"] = [1, 2] + expected = [ + 'Variable "x" shapes differ : None != (2,)', + 'Variable "x" datatypes differ : None != dtype(\'int64\')', + ] + elif given == "dtype": + kwargs["dtype"] = np.float32 + expected = ['Variable "x" datatypes differ : None != dtype(\'float32\')'] + elif given == "nodata": + expected = [] + else: + raise ValueError(f"unrecognised 'given' param : {given!s}") + + v2 = NcVariable("x", **kwargs) + errs = variable_differences(v1, v2) + check(errs, expected) + + @pytest.mark.parametrize("equality", ["same", "different"]) + def test_nodata_withdtype(self, equality): + # Check that we can correctly compare variables which have dtype but no data. + # N.B. the other possibilities are all covered in the "nodata_nodtype" test. + dtype = np.int16 + v1 = NcVariable("x", dtype=dtype) + expected = [] + if equality == "different": + dtype = np.float16 + expected = [ + 'Variable "x" datatypes differ : dtype(\'int16\') != dtype(\'float16\')' + ] + + v2 = NcVariable("x", dtype=dtype) + errs = variable_differences(v1, v2) + check(errs, expected) + class TestDataCheck__controls: # Note: testing variable comparison via the 'main' public API instead of From 9825b43fcbfab278e3284923dda4341d5adb8122 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 2 Sep 2025 16:15:17 +0000 Subject: [PATCH 3/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../compare_nc_datasets/test_variable_differences.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/unit/utils/compare_nc_datasets/test_variable_differences.py b/tests/unit/utils/compare_nc_datasets/test_variable_differences.py index 30be947..f0f553c 100644 --- a/tests/unit/utils/compare_nc_datasets/test_variable_differences.py +++ b/tests/unit/utils/compare_nc_datasets/test_variable_differences.py @@ -171,14 +171,16 @@ def test_nodata_nodtype(self, given): kwargs = {} if given == "data": - kwargs["data"] = [1, 2] + kwargs["data"] = [1, 2] expected = [ 'Variable "x" shapes differ : None != (2,)', - 'Variable "x" datatypes differ : None != dtype(\'int64\')', + "Variable \"x\" datatypes differ : None != dtype('int64')", ] elif given == "dtype": kwargs["dtype"] = np.float32 - expected = ['Variable "x" datatypes differ : None != dtype(\'float32\')'] + expected = [ + "Variable \"x\" datatypes differ : None != dtype('float32')" + ] elif given == "nodata": expected = [] else: @@ -198,7 +200,7 @@ def test_nodata_withdtype(self, equality): if equality == "different": dtype = np.float16 expected = [ - 'Variable "x" datatypes differ : dtype(\'int16\') != dtype(\'float16\')' + "Variable \"x\" datatypes differ : dtype('int16') != dtype('float16')" ] v2 = NcVariable("x", dtype=dtype)