diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml
index afdaa2b..4455bf1 100644
--- a/.github/workflows/ci-tests.yml
+++ b/.github/workflows/ci-tests.yml
@@ -35,7 +35,7 @@ jobs:
 
       - name: "Install dependencies"
         run: |
-          conda install --yes pytest iris xarray filelock requests
+          conda install --yes pytest pytest-mock iris xarray filelock requests
 
       - name: "Install *latest* Iris"
         run: |
diff --git a/docs/change_log.rst b/docs/change_log.rst
index 8a64708..f44b29e 100644
--- a/docs/change_log.rst
+++ b/docs/change_log.rst
@@ -22,6 +22,9 @@ Unreleased
 ^^^^^^^^^^
 TODO: highlights
 
+* `@pp-mo`_ dataset comparison routines now a public utility.
+  (`PR#70 <https://github.com/pp-mo/ncdata/pull/70>`_).
+
 * `@pp-mo`_ initial Sphinx documentation
   (`PR#76 <https://github.com/pp-mo/ncdata/pull/76>`_).
 
diff --git a/lib/ncdata/utils/__init__.py b/lib/ncdata/utils/__init__.py
index 1dd4138..dd3dc8c 100644
--- a/lib/ncdata/utils/__init__.py
+++ b/lib/ncdata/utils/__init__.py
@@ -1,5 +1,10 @@
 """General user utility functions."""
 
+from ._compare_nc_datasets import dataset_differences, variable_differences
 from ._save_errors import save_errors
 
-__all__ = ["save_errors"]
+__all__ = [
+    "save_errors",
+    "dataset_differences",
+    "variable_differences",
+]
diff --git a/lib/ncdata/utils/_compare_nc_datasets.py b/lib/ncdata/utils/_compare_nc_datasets.py
new file mode 100644
index 0000000..affd6c1
--- /dev/null
+++ b/lib/ncdata/utils/_compare_nc_datasets.py
@@ -0,0 +1,530 @@
+"""
+Utility for comparing 2 netcdf datasets.
+
+Works with file-specs, netCDF4.Datasets *or* NcData.
+
+For purposes of testing ncdata.netcdf4 behaviour.
+TODO: one day might be public ?
+"""
+
+from pathlib import Path
+from typing import AnyStr, List, Union
+from warnings import warn
+
+import netCDF4
+import netCDF4 as nc
+import numpy as np
+
+from ncdata import NcData, NcVariable
+
+
+def dataset_differences(
+    dataset_or_path_1: Union[Path, AnyStr, nc.Dataset, NcData],
+    dataset_or_path_2: Union[Path, AnyStr, nc.Dataset, NcData],
+    check_names: bool = False,
+    check_dims_order: bool = True,
+    check_dims_unlimited: bool = True,
+    check_vars_order: bool = True,
+    check_attrs_order: bool = True,
+    check_groups_order: bool = True,
+    check_var_data: bool = True,
+    show_n_first_different: int = 2,
+    suppress_warnings: bool = False,
+) -> List[str]:
+    r"""
+    Compare netcdf data objects.
+
+    Accepts paths, pathstrings, open :class:`netCDF4.Dataset`\\s or :class:`NcData` objects.
+
+    Parameters
+    ----------
+    dataset_or_path_1, dataset_or_path_2 : str or Path or netCDF4.Dataset or NcData
+        two datasets to compare, either NcData or netCDF4
+    check_dims_order, check_vars_order, check_attrs_order, check_groups_order : bool, default True
+        If False, no error results from the same contents in a different order,
+        however unless `suppress_warnings` is True, the error string is issued as a warning.
+    check_names: bool, default False
+        Whether to warn if the names of the top-level datasets are different
+    check_dims_unlimited: bool, default True
+        Whether to compare the 'unlimited' status of dimensions
+    check_var_data : bool, default True
+        If True, all variable data is also checked for equality.
+        If False, only dtype and shape are compared.
+        NOTE: comparison of large arrays is done in-memory, so may be highly inefficient.
+    show_n_first_different: int, default 2
+        Number of value differences to display.
+    suppress_warnings : bool, default False
+        When False (the default), report changes in content order as Warnings.
+        When True, ignore changes in ordering.
+
+    Returns
+    -------
+    errs : list of str
+        A list of "error" strings, describing differences between the inputs.
+        If empty, no differences were found.
+
+    """
+    ds1_was_path = not hasattr(dataset_or_path_1, "variables")
+    ds2_was_path = not hasattr(dataset_or_path_2, "variables")
+    ds1, ds2 = None, None
+    try:
+        if ds1_was_path:
+            ds1 = nc.Dataset(dataset_or_path_1)
+        else:
+            ds1 = dataset_or_path_1
+
+        if ds2_was_path:
+            ds2 = nc.Dataset(dataset_or_path_2)
+        else:
+            ds2 = dataset_or_path_2
+
+        errs = _group_differences(
+            ds1,
+            ds2,
+            group_id_string="Dataset",
+            dims_order=check_dims_order,
+            vars_order=check_vars_order,
+            attrs_order=check_attrs_order,
+            groups_order=check_groups_order,
+            data_equality=check_var_data,
+            suppress_warnings=suppress_warnings,
+            check_names=check_names,
+            check_unlimited=check_dims_unlimited,
+            show_n_diffs=show_n_first_different,
+        )
+    finally:
+        if ds1_was_path and ds1:
+            ds1.close()
+        if ds2_was_path and ds2:
+            ds2.close()
+
+    return errs
+
+
+def _namelist_differences(
+    l1, l2, elemname, order_strict=True, suppress_warnings=False
+):
+    errs = []
+    msg = f"{elemname} do not match: {list(l1)} != {list(l2)}"
+    ok = l1 == l2
+    ok_except_order = ok
+    if not ok:
+        ok_except_order = sorted(l1) == sorted(l2)
+
+    if not ok:
+        if not ok_except_order or order_strict:
+            errs.append(msg)
+        elif ok_except_order and not suppress_warnings:
+            warn("(Ignoring: " + msg + " )", category=UserWarning)
+    return errs
+
+
+def _isncdata(obj):
+    """
+    Distinguish NcData objects from similar netCDF4 ones.
+
+    A crude test, used to support comparisons on either type of data.
+    """
+    return hasattr(obj, "_print_content")
+
+
+def _attribute_arrays_eq(a1, a2):
+    """
+    Test equality of array values in attributes.
+
+    Assumes values (attributes) are presented as numpy arrays (not lazy).
+    Matches any NaNs.
+    Does *NOT* handle masked data -- which does not occur in attributes.
+    """
+    result = True
+    result &= a1.shape == a2.shape
+    result &= a1.dtype == a2.dtype
+    if result:
+        if a1.dtype.kind in ("S", "U", "b"):
+            result = np.all(a1 == a2)
+        else:
+            # array_equal handles possible NaN cases
+            result = np.array_equal(a1, a2, equal_nan=True)
+    return result
+
+
+def _attribute_differences(
+    obj1,
+    obj2,
+    elemname,
+    attrs_order=True,
+    suppress_warnings=False,
+    force_first_attrnames=None,
+) -> List[str]:
+    """
+    Compare attribute name lists.
+
+    Does not return results, but appends error messages to 'errs'.
+    """
+    attrnames, attrnames2 = [
+        list(obj.attributes.keys()) if _isncdata(obj) else list(obj.ncattrs())
+        for obj in (obj1, obj2)
+    ]
+    if attrs_order and force_first_attrnames:
+        # In order to ignore the order of appearance of *specific* attributes, move
+        # all those ones to the front in a known order.
+        def fix_orders(attrlist):
+            for name in force_first_attrnames[::-1]:
+                if name in attrlist:
+                    attrlist = [name] + [n for n in attrlist if n != name]
+            return attrlist
+
+        attrnames = fix_orders(attrnames)
+        attrnames2 = fix_orders(attrnames2)
+
+    errs = _namelist_differences(
+        attrnames,
+        attrnames2,
+        f"{elemname} attribute lists",
+        order_strict=attrs_order,
+        suppress_warnings=suppress_warnings,
+    )
+
+    # Compare the attributes themselves (dtypes and values)
+    for attrname in attrnames:
+        if attrname not in attrnames2:
+            # Only compare attributes existing on both inputs.
+            continue
+
+        attr, attr2 = [
+            (
+                obj.attributes[attrname].as_python_value()
+                if _isncdata(obj)
+                else obj.getncattr(attrname)
+            )
+            for obj in (obj1, obj2)
+        ]
+
+        # TODO: this still doesn't work well for strings : for those, we should ignore
+        #  exact "type" (including length), and just compare the content.
+        # TODO: get a good testcase going to check this behaviour
+        dtype, dtype2 = [
+            # Get x.dtype, or fallback on type(x) -- basically, for strings.
+            getattr(attr, "dtype", type(attr))
+            for attr in (attr, attr2)
+        ]
+        if all(
+            isinstance(dt, np.dtype) and dt.kind in "SUb"
+            for dt in (dtype, dtype2)
+        ):
+            dtype = dtype2 = "string"
+        if dtype != dtype2:
+            msg = (
+                f'{elemname} "{attrname}" attribute datatypes differ : '
+                f"{dtype!r} != {dtype2!r}"
+            )
+            errs.append(msg)
+        else:
+            # If datatypes match (only then), compare values
+            # Cast attrs, which might be strings, to arrays for comparison
+            arr, arr2 = [np.asarray(attr) for attr in (attr, attr2)]
+            if not _attribute_arrays_eq(arr, arr2):
+                # N.B. special comparison to handle strings and NaNs
+                msg = (
+                    f'{elemname} "{attrname}" attribute values differ : '
+                    f"{attr!r} != {attr2!r}"
+                )
+                errs.append(msg)
+    return errs
+
+
+def variable_differences(
+    v1: NcVariable,
+    v2: NcVariable,
+    check_attrs_order: bool = True,
+    check_var_data: bool = True,
+    show_n_first_different: int = 2,
+    suppress_warnings: bool = False,
+    _group_id_string: str = None,
+) -> List[str]:
+    r"""
+    Compare variables.
+
+    Parameters
+    ----------
+    v1, v2 : NcVariable
+        variables to compare
+    check_attrs_order : bool, default True
+        If False, no error results from the same contents in a different order,
+        however unless `suppress_warnings` is True, the error string is issued as a warning.
+    check_var_data : bool, default True
+        If True, all variable data is also checked for equality.
+        If False, only dtype and shape are compared.
+        NOTE: comparison of large arrays is done in-memory, so may be highly inefficient.
+    show_n_first_different: int, default 2
+        Number of value differences to display.
+    suppress_warnings : bool, default False
+        When False (the default), report changes in content order as Warnings.
+        When True, ignore changes in ordering entirely.
+    _group_id_string : str
+        (internal use only)
+
+    Returns
+    -------
+    errs : list of str
+        A list of "error" strings, describing differences between the inputs.
+        If empty, no differences were found.
+
+    """
+    errs = []
+
+    show_n_first_different = int(show_n_first_different)
+    if show_n_first_different < 1:
+        msg = f"'show_n_diffs' must be >=1 : got {show_n_first_different!r}."
+        raise ValueError(msg)
+
+    if v1.name == v2.name:
+        varname = v1.name
+    else:
+        varname = f"{v1.name} / {v2.name}"
+
+    if _group_id_string:
+        var_id_string = f'{_group_id_string} variable "{varname}"'
+    else:
+        var_id_string = f'Variable "{varname}"'
+
+    if v1.name != v2.name:
+        msg = f"{var_id_string} names differ : {v1.name!r} != {v2.name!r}"
+        errs.append(msg)
+
+    # dimensions
+    dims, dims2 = [v.dimensions for v in (v1, v2)]
+    if dims != dims2:
+        msg = f"{var_id_string} dimensions differ : {dims!r} != {dims2!r}"
+        errs.append(msg)
+
+    # attributes
+    errs += _attribute_differences(
+        v1,
+        v2,
+        var_id_string,
+        attrs_order=check_attrs_order,
+        suppress_warnings=suppress_warnings,
+        force_first_attrnames=[
+            "_FillValue"
+        ],  # for some reason, this doesn't always list consistently
+    )
+
+    # dtypes
+    dtype, dtype2 = [v.dtype if _isncdata(v) else v.datatype for v in (v1, v2)]
+    if dtype != dtype2:
+        msg = f"{var_id_string} datatypes differ : {dtype!r} != {dtype2!r}"
+        errs.append(msg)
+
+    # data values
+    is_str, is_str2 = (dt.kind in "SUb" for dt in (dtype, dtype2))
+    # TODO: is this correct check to allow compare between different dtypes?
+    if check_var_data and dims == dims2 and is_str == is_str2:
+        # N.B. don't check shapes here: we already checked dimensions.
+        # NOTE: no attempt to use laziness here.  Could be improved.
+        def getdata(var):
+            if _isncdata(var):
+                data = var.data
+                if hasattr(data, "compute"):
+                    data = data.compute()
+            else:
+                # expect var to be an actual netCDF4.Variable
+                # (check for obscure property NOT provided by mimics)
+                assert hasattr(var, "use_nc_get_vars")
+                data = var[:]
+            # Return 0D as 1D, as this makes results simpler to interpret.
+            if data.ndim == 0:
+                data = data.flatten()
+                assert data.shape == (1,)
+            return data
+
+        data, data2 = (getdata(v) for v in (v1, v2))
+        flatdata, flatdata2 = (
+            np.asanyarray(arr).flatten() for arr in (data, data2)
+        )
+
+        # For simpler checking, use flat versions
+        flat_diff_inds = (
+            []
+        )  # NB *don't* make this an array, it causes problems
+
+        # Work out whether string : N.B. array type does not ALWAYS match the
+        # variable type, because apparently the scalar content of a *masked* scalar
+        # string variable has a numeric type (!! yuck !!)
+        is_string_data = flatdata.dtype.kind in ("S", "U")
+        if is_string_data:
+            safe_fill_const = ""
+        else:
+            safe_fill_const = np.zeros((1,), dtype=flatdata.dtype)[0]
+
+        # Where data is masked, count mask mismatches and skip those points
+        if any(np.ma.is_masked(arr) for arr in (data, data2)):
+            mask, mask2 = (
+                np.ma.getmaskarray(array) for array in (flatdata, flatdata2)
+            )
+            flat_diff_inds = list(np.where(mask != mask2)[0])
+            # Replace all masked points to exclude them from unmasked-point checks.
+            either_masked = mask | mask2
+            flatdata[either_masked] = safe_fill_const
+            flatdata2[either_masked] = safe_fill_const
+
+        # Where data has NANs, count mismatches and skip (as for masked)
+        if not is_string_data:
+            isnans, isnans2 = (np.isnan(arr) for arr in (flatdata, flatdata2))
+            if np.any(isnans) or np.any(isnans2):
+                nandiffs = np.where(isnans != isnans2)[0]
+                if nandiffs:
+                    flat_diff_inds += list(nandiffs)
+                anynans = isnans | isnans2
+                flatdata[anynans] = safe_fill_const
+                flatdata2[anynans] = safe_fill_const
+
+        flat_diff_inds += list(np.where(flatdata != flatdata2)[0])
+        # Order the nonmatching indices :  We report just the first few ...
+        flat_diff_inds = sorted(flat_diff_inds)
+        n_diffs = len(flat_diff_inds)
+        if n_diffs:
+            msg = (
+                f"{var_id_string} data contents differ, at {n_diffs} points: "
+            )
+            ellps = ", ..." if n_diffs > show_n_first_different else ""
+            diffinds = flat_diff_inds[:show_n_first_different]
+            diffinds = [
+                np.unravel_index(ind, shape=data.shape) for ind in diffinds
+            ]
+            diffinds_str = ", ".join(repr(tuple(x)) for x in diffinds)
+            inds_str = f"[{diffinds_str}{ellps}]"
+            points_lhs_str = ", ".join(repr(data[ind]) for ind in diffinds)
+            points_rhs_str = ", ".join(repr(data2[ind]) for ind in diffinds)
+            points_lhs_str = f"[{points_lhs_str}{ellps}]"
+            points_rhs_str = f"[{points_rhs_str}{ellps}]"
+            msg += (
+                f"@INDICES{inds_str}"
+                f" : LHS={points_lhs_str}, RHS={points_rhs_str}"
+            )
+            errs.append(msg)
+    return errs
+
+
+def _group_differences(
+    g1: Union[netCDF4.Dataset, netCDF4.Group],
+    g2: Union[netCDF4.Dataset, netCDF4.Group],
+    group_id_string: str,
+    dims_order: bool = True,
+    vars_order: bool = True,
+    attrs_order: bool = True,
+    groups_order: bool = True,
+    data_equality: bool = True,
+    suppress_warnings: bool = False,
+    check_names: bool = False,
+    check_unlimited: bool = True,
+    show_n_diffs: int = 2,
+) -> List[str]:
+    """
+    Inner routine to compare either whole datasets or subgroups.
+
+    Note that, rather than returning a list of error strings, it appends them to the
+    passed arg `errs`.  This just makes recursive calling easier.
+    """
+    errs = []
+
+    if check_names:
+        if g1.name != g2.name:
+            errs.append(
+                f"Datasets have different names: {g1.name!r} != {g2.name!r}."
+            )
+    # Compare lists of dimension names
+    dimnames, dimnames2 = [list(grp.dimensions.keys()) for grp in (g1, g2)]
+    errs += _namelist_differences(
+        dimnames,
+        dimnames2,
+        f"{group_id_string} dimension lists",
+        order_strict=dims_order,
+        suppress_warnings=suppress_warnings,
+    )
+
+    # Compare the dimensions themselves
+    for dimname in dimnames:
+        if dimname not in dimnames2:
+            continue
+        d1, d2 = [grp.dimensions[dimname] for grp in (g1, g2)]
+        dimlen, dimlen2 = [dim.size for dim in (d1, d2)]
+        if dimlen != dimlen2:
+            msg = (
+                f'{group_id_string} "{dimname}" dimensions '
+                f"have different sizes: {dimlen} != {dimlen2}"
+            )
+            errs.append(msg)
+
+        if check_unlimited:
+            unlim1, unlim2 = [
+                dim.unlimited if _isncdata(dim) else dim.isunlimited()
+                for dim in (d1, d2)
+            ]
+            if unlim1 != unlim2:
+                msg = (
+                    f'{group_id_string} "{dimname}" dimension '
+                    f'has different "unlimited" status : {unlim1} != {unlim2}'
+                )
+                errs.append(msg)
+
+    # Compare file attributes
+    errs += _attribute_differences(
+        g1,
+        g2,
+        group_id_string,
+        attrs_order=attrs_order,
+        suppress_warnings=suppress_warnings,
+    )
+
+    # Compare lists of variables
+    varnames, varnames2 = [list(grp.variables.keys()) for grp in (g1, g2)]
+    errs += _namelist_differences(
+        varnames,
+        varnames2,
+        f"{group_id_string} variable lists",
+        order_strict=vars_order,
+        suppress_warnings=suppress_warnings,
+    )
+
+    # Compare the variables themselves
+    for varname in varnames:
+        if varname not in varnames2:
+            continue
+        v1, v2 = [grp.variables[varname] for grp in (g1, g2)]
+        errs += variable_differences(
+            v1,
+            v2,
+            check_attrs_order=attrs_order,
+            check_var_data=data_equality,
+            show_n_first_different=show_n_diffs,
+            suppress_warnings=suppress_warnings,
+            _group_id_string=group_id_string,
+        )
+
+    # Finally, recurse over groups
+    grpnames, grpnames2 = [list(grp.groups.keys()) for grp in (g1, g2)]
+    errs += _namelist_differences(
+        grpnames,
+        grpnames2,
+        f"{group_id_string} subgroup lists",
+        order_strict=groups_order,
+        suppress_warnings=suppress_warnings,
+    )
+    for grpname in grpnames:
+        if grpname not in grpnames2:
+            continue
+        grp1, grp2 = [grp.groups[grpname] for grp in (g1, g2)]
+        errs += _group_differences(
+            grp1,
+            grp2,
+            group_id_string=f"{group_id_string}/{grpname}",
+            dims_order=dims_order,
+            vars_order=vars_order,
+            attrs_order=attrs_order,
+            groups_order=groups_order,
+            data_equality=data_equality,
+            check_unlimited=check_unlimited,
+            show_n_diffs=show_n_diffs,
+        )
+    return errs
diff --git a/tests/_compare_nc_datasets.py b/tests/_compare_nc_datasets.py
deleted file mode 100644
index 21ea635..0000000
--- a/tests/_compare_nc_datasets.py
+++ /dev/null
@@ -1,459 +0,0 @@
-"""
-Utility for comparing 2 netcdf datasets.
-
-Works with file-specs, netCDF4.Datasets *or* NcData.
-
-For purposes of testing ncdata.netcdf4 behaviour.
-TODO: one day might be public ?
-"""
-
-from pathlib import Path
-from typing import AnyStr, List, Union
-from warnings import warn
-
-import netCDF4
-import netCDF4 as nc
-import numpy as np
-
-from ncdata import NcData
-
-
-def compare_nc_datasets(
-    dataset_or_path_1: Union[Path, AnyStr, nc.Dataset, NcData],
-    dataset_or_path_2: Union[Path, AnyStr, nc.Dataset, NcData],
-    check_dims_order: bool = True,
-    check_vars_order: bool = True,
-    check_attrs_order: bool = True,
-    check_groups_order: bool = True,
-    check_var_data: bool = True,
-    suppress_warnings: bool = False,
-) -> List[str]:
-    r"""
-    Compare netcdf data.
-
-    Accepts paths, pathstrings, open :class:`netCDF4.Dataset`\\s or :class:`NcData` objects.
-
-    Parameters
-    ----------
-    dataset_or_path_1, dataset_or_path_2 : str or Path or netCDF4.Dataset or NcData
-        two datasets to compare, either NcData or netCDF4
-    check_dims_order, check_vars_order, check_attrs_order, check_groups_order : bool, default True
-        If False, no error results from the same contents in a different order,
-        however unless `suppress_warnings` is True, the error string is issued as a warning.
-    check_var_data : bool, default True
-        If True, all variable data is also checked for equality.
-        If False, only dtype and shape are compared.
-    suppress_warnings : bool, default False
-        When False (the default), report changes in content order as Warnings.
-        When True, ignore changes in ordering.
-
-    Returns
-    -------
-    errs : list of str
-        a list of error strings.
-        If empty, no differences were found.
-
-    """
-    ds1_was_path = not hasattr(dataset_or_path_1, "variables")
-    ds2_was_path = not hasattr(dataset_or_path_2, "variables")
-    ds1, ds2 = None, None
-    try:
-        if ds1_was_path:
-            ds1 = nc.Dataset(dataset_or_path_1)
-        else:
-            ds1 = dataset_or_path_1
-
-        if ds2_was_path:
-            ds2 = nc.Dataset(dataset_or_path_2)
-        else:
-            ds2 = dataset_or_path_2
-
-        errs = []
-        _compare_nc_groups(
-            errs,
-            ds1,
-            ds2,
-            group_id_string="Dataset",
-            dims_order=check_dims_order,
-            vars_order=check_vars_order,
-            attrs_order=check_attrs_order,
-            groups_order=check_groups_order,
-            data_equality=check_var_data,
-            suppress_warnings=suppress_warnings,
-        )
-    finally:
-        if ds1_was_path and ds1:
-            ds1.close()
-        if ds2_was_path and ds2:
-            ds2.close()
-
-    return errs
-
-
-def _compare_name_lists(
-    errslist, l1, l2, elemname, order_strict=True, suppress_warnings=False
-):
-    msg = f"{elemname} do not match: {list(l1)} != {list(l2)}"
-    ok = l1 == l2
-    ok_except_order = ok
-    if not ok:
-        ok_except_order = sorted(l1) == sorted(l2)
-
-    if not ok:
-        if not ok_except_order or order_strict:
-            errslist.append(msg)
-        elif ok_except_order and not suppress_warnings:
-            warn("(Ignoring: " + msg + " )", category=UserWarning)
-
-
-def _isncdata(obj):
-    """
-    Distinguish NcData objects from similar netCDF4 ones.
-
-    A crude test, used to support comparisons on either type of data.
-    """
-    return hasattr(obj, "_print_content")
-
-
-def _array_eq(a1, a2):
-    """
-    Test equality of array values in attributes.
-
-    Assumes values (attributes) are presented as numpy arrays (not lazy).
-    Matches any NaNs.
-    Does *NOT* handle masked data -- which does not occur in attributes.
-    """
-    result = True
-    result &= a1.shape == a2.shape
-    result &= a1.dtype == a2.dtype
-    if result:
-        if a1.dtype.kind in ("S", "U", "b"):
-            result = np.all(a1 == a2)
-        else:
-            # array_equal handles possible NaN cases
-            result = np.array_equal(a1, a2, equal_nan=True)
-    return result
-
-
-def _compare_attributes(
-    errs,
-    obj1,
-    obj2,
-    elemname,
-    attrs_order=True,
-    suppress_warnings=False,
-    force_first_attrnames=None,
-):
-    """
-    Compare attribute name lists.
-
-    Does not return results, but appends error messages to 'errs'.
-    """
-    attrnames, attrnames2 = [
-        obj.attributes.keys() if _isncdata(obj) else obj.ncattrs()
-        for obj in (obj1, obj2)
-    ]
-    if attrs_order and force_first_attrnames:
-
-        def fix_orders(attrlist):
-            for name in force_first_attrnames[::-1]:
-                if name in attrlist:
-                    attrlist = [name] + [n for n in attrlist if n != name]
-            return attrlist
-
-        attrnames = fix_orders(attrnames)
-        attrnames2 = fix_orders(attrnames2)
-
-    _compare_name_lists(
-        errs,
-        attrnames,
-        attrnames2,
-        f"{elemname} attribute lists",
-        order_strict=attrs_order,
-        suppress_warnings=suppress_warnings,
-    )
-
-    # Compare the attributes themselves (dtypes and values)
-    for attrname in attrnames:
-        if attrname not in attrnames2:
-            # Only compare attributes existing on both inputs.
-            continue
-
-        attr, attr2 = [
-            (
-                obj.attributes[attrname].as_python_value()
-                if _isncdata(obj)
-                else obj.getncattr(attrname)
-            )
-            for obj in (obj1, obj2)
-        ]
-
-        # TODO: this still doesn't work well for strings : for those, we should ignore
-        #  exact "type" (including length), and just compare the content.
-        # TODO: get a good testcase going to check this behaviour
-        dtype, dtype2 = [
-            # Get x.dtype, or fallback on type(x) -- basically, for strings.
-            getattr(attr, "dtype", type(attr))
-            for attr in (attr, attr2)
-        ]
-        if all(
-            isinstance(dt, np.dtype) and dt.kind in "SUb"
-            for dt in (dtype, dtype2)
-        ):
-            dtype = dtype2 = "string"
-        if dtype != dtype2:
-            msg = (
-                f'{elemname} "{attrname}" attribute datatypes differ : '
-                f"{dtype!r} != {dtype2!r}"
-            )
-            errs.append(msg)
-        else:
-            # If datatypes match (only then), compare values
-            # Cast attrs, which might be strings, to arrays for comparison
-            arr, arr2 = [np.asarray(attr) for attr in (attr, attr2)]
-            if not _array_eq(arr, arr2):
-                # N.B. special comparison to handle strings and NaNs
-                msg = (
-                    f'{elemname} "{attrname}" attribute values differ : '
-                    f"{attr!r} != {attr2!r}"
-                )
-                errs.append(msg)
-
-
-def _compare_nc_groups(
-    errs: List[str],
-    g1: Union[netCDF4.Dataset, netCDF4.Group],
-    g2: Union[netCDF4.Dataset, netCDF4.Group],
-    group_id_string: str,
-    dims_order: bool = True,
-    vars_order: bool = True,
-    attrs_order: bool = True,
-    groups_order: bool = True,
-    data_equality: bool = True,
-    suppress_warnings: bool = False,
-):
-    """
-    Inner routine to compare either whole datasets or subgroups.
-
-    Note that, rather than returning a list of error strings, it appends them to the
-    passed arg `errs`.  This just makes recursive calling easier.
-    """
-    # Compare lists of dimension names
-    dimnames, dimnames2 = [list(grp.dimensions.keys()) for grp in (g1, g2)]
-    _compare_name_lists(
-        errs,
-        dimnames,
-        dimnames2,
-        f"{group_id_string} dimension lists",
-        order_strict=dims_order,
-        suppress_warnings=suppress_warnings,
-    )
-
-    # Compare the dimensions themselves
-    for dimname in dimnames:
-        if dimname not in dimnames2:
-            continue
-        d1, d2 = [grp.dimensions[dimname] for grp in (g1, g2)]
-        dimlen, dimlen2 = [dim.size for dim in (d1, d2)]
-        if dimlen != dimlen2:
-            msg = (
-                f'{group_id_string} "{dimname}" dimensions '
-                f"have different sizes: {dimlen} != {dimlen2}"
-            )
-            errs.append(msg)
-
-    # Compare file attributes
-    _compare_attributes(
-        errs,
-        g1,
-        g2,
-        group_id_string,
-        attrs_order=attrs_order,
-        suppress_warnings=suppress_warnings,
-    )
-
-    # Compare lists of variables
-    varnames, varnames2 = [list(grp.variables.keys()) for grp in (g1, g2)]
-    _compare_name_lists(
-        errs,
-        varnames,
-        varnames2,
-        f"{group_id_string} variable lists",
-        order_strict=dims_order,
-        suppress_warnings=suppress_warnings,
-    )
-
-    # Compare the variables themselves
-    for varname in varnames:
-        if varname not in varnames2:
-            continue
-        v1, v2 = [grp.variables[varname] for grp in (g1, g2)]
-
-        var_id_string = f'{group_id_string} variable "{varname}"'
-
-        # dimensions
-        dims, dims2 = [v.dimensions for v in (v1, v2)]
-        if dims != dims2:
-            msg = f"{var_id_string} dimensions differ : {dims!r} != {dims2!r}"
-
-        # attributes
-        _compare_attributes(
-            errs,
-            v1,
-            v2,
-            var_id_string,
-            attrs_order=attrs_order,
-            suppress_warnings=suppress_warnings,
-            force_first_attrnames=[
-                "_FillValue"
-            ],  # for some reason, this doesn't always list consistently
-        )
-
-        # dtypes
-        dtype, dtype2 = [
-            v.dtype if _isncdata(v) else v.datatype for v in (v1, v2)
-        ]
-        if dtype != dtype2:
-            msg = f"{var_id_string} datatypes differ : {dtype!r} != {dtype2!r}"
-            errs.append(msg)
-
-        # data values
-        is_str, is_str2 = (dt.kind in "SUb" for dt in (dtype, dtype2))
-        # TODO: is this correct check to allow compare between different dtypes?
-        if data_equality and dims == dims2 and is_str == is_str2:
-            # N.B. don't check shapes here: we already checked dimensions.
-            # NOTE: no attempt to use laziness here.  Could be improved.
-            def getdata(var):
-                if _isncdata(var):
-                    data = var.data
-                    if hasattr(data, "compute"):
-                        data = data.compute()
-                else:
-                    # expect var to be an actual netCDF4.Variable
-                    # (check for obscure property NOT provided by mimics)
-                    assert hasattr(var, "use_nc_get_vars")
-                    data = var[:]
-                # Return 0D as 1D, as this makes results simpler to interpret.
-                if data.ndim == 0:
-                    data = data.flatten()
-                    assert data.shape == (1,)
-                return data
-
-            data, data2 = (getdata(v) for v in (v1, v2))
-            flatdata, flatdata2 = (
-                np.asanyarray(arr).flatten() for arr in (data, data2)
-            )
-
-            # For simpler checking, use flat versions
-            flat_diff_inds = (
-                []
-            )  # NB *don't* make this an array, it causes problems
-
-            # Work out whether string : N.B. array type does not ALWAYS match the
-            # variable type, because apparently the scalar content of a *masked* scalar
-            # string variable has a numeric type (!! yuck !!)
-            is_string_data = flatdata.dtype.kind in ("S", "U")
-            if is_string_data:
-                safe_fill_const = ""
-            else:
-                safe_fill_const = np.zeros((1,), dtype=flatdata.dtype)[0]
-
-            # Where data is masked, count mask mismatches and skip those points
-            if any(np.ma.is_masked(arr) for arr in (data, data2)):
-                mask, mask2 = (
-                    np.ma.getmaskarray(array)
-                    for array in (flatdata, flatdata2)
-                )
-                flat_diff_inds = list(np.where(mask != mask2)[0])
-                # Replace all masked points to exclude them from unmasked-point checks.
-                either_masked = mask | mask2
-                flatdata[either_masked] = safe_fill_const
-                flatdata2[either_masked] = safe_fill_const
-
-            # Where data has NANs, count mismatches and skip (as for masked)
-            if not is_string_data:
-                isnans, isnans2 = (
-                    np.isnan(arr) for arr in (flatdata, flatdata2)
-                )
-                if np.any(isnans) or np.any(isnans2):
-                    nandiffs = np.where(isnans != isnans2)[0]
-                    if nandiffs:
-                        flat_diff_inds += list(nandiffs)
-                    anynans = isnans | isnans2
-                    flatdata[anynans] = safe_fill_const
-                    flatdata2[anynans] = safe_fill_const
-
-            flat_diff_inds += list(np.where(flatdata != flatdata2)[0])
-            # Order the nonmatching indices :  We report just the first few ...
-            flat_diff_inds = sorted(flat_diff_inds)
-            n_diffs = len(flat_diff_inds)
-            if n_diffs:
-                msg = f"{var_id_string} data contents differ, at {n_diffs} points: "
-                ellps = ", ..." if n_diffs > 2 else ""
-                diffinds = flat_diff_inds[:2]
-                diffinds = [
-                    np.unravel_index(ind, shape=data.shape) for ind in diffinds
-                ]
-                diffinds_str = ", ".join(repr(tuple(x)) for x in diffinds)
-                inds_str = f"[{diffinds_str}{ellps}]"
-                points_lhs_str = ", ".join(repr(data[ind]) for ind in diffinds)
-                points_rhs_str = ", ".join(
-                    repr(data2[ind]) for ind in diffinds
-                )
-                points_lhs_str = f"[{points_lhs_str}{ellps}]"
-                points_rhs_str = f"[{points_rhs_str}{ellps}]"
-                msg += (
-                    f"@INDICES{inds_str}"
-                    f" : LHS={points_lhs_str}, RHS={points_rhs_str}"
-                )
-                errs.append(msg)
-
-    # Finally, recurse over groups
-    grpnames, grpnames2 = [list(grp.groups.keys()) for grp in (g1, g2)]
-    _compare_name_lists(
-        errs,
-        grpnames,
-        grpnames2,
-        f"{group_id_string} subgroup lists",
-        order_strict=groups_order,
-        suppress_warnings=suppress_warnings,
-    )
-    for grpname in grpnames:
-        if grpname not in grpnames2:
-            continue
-        grp1, grp2 = [grp.groups[grpname] for grp in (g1, g2)]
-        _compare_nc_groups(
-            errs,
-            grp1,
-            grp2,
-            group_id_string=f"{group_id_string}/{grpname}",
-            dims_order=dims_order,
-            vars_order=vars_order,
-            attrs_order=attrs_order,
-            groups_order=groups_order,
-            data_equality=data_equality,
-        )
-
-
-if __name__ == "__main__":
-    fps = [
-        "/home/h05/itpp/tmp.nc",
-        "/home/h05/itpp/tmp2.nc",
-        "/home/h05/itpp/mask.nc",
-        "/home/h05/itpp/tmps.nc",
-        "/home/h05/itpp/tmps2.nc",
-    ]
-    fp1, fp2, fp3, fp4, fp5 = fps
-    pairs = [
-        [fp1, fp1],
-        [fp1, fp2],
-        [fp1, fp3],
-        [fp4, fp5],
-    ]
-    for p1, p2 in pairs:
-        errs = compare_nc_datasets(p1, p2, check_attrs_order=False)
-        print("")
-        print(f"Compare {p1} with {p2} : {len(errs)} errors ")
-        for err in errs:
-            print("  ", err)
-        print("-ends-")
diff --git a/tests/data_testcase_schemas.py b/tests/data_testcase_schemas.py
index 26f8c7e..19d44a1 100644
--- a/tests/data_testcase_schemas.py
+++ b/tests/data_testcase_schemas.py
@@ -330,8 +330,8 @@ def _define_simple_testcases():
     return testcases
 
 
-ADD_IRIS_FILES = True
-# ADD_IRIS_FILES = False
+# ADD_IRIS_FILES = True
+ADD_IRIS_FILES = False
 
 
 @standard_testcases_func
diff --git a/tests/integration/example_scripts/ex_ncdata_netcdf_conversion.py b/tests/integration/example_scripts/ex_ncdata_netcdf_conversion.py
index f1b2355..4468829 100644
--- a/tests/integration/example_scripts/ex_ncdata_netcdf_conversion.py
+++ b/tests/integration/example_scripts/ex_ncdata_netcdf_conversion.py
@@ -12,8 +12,8 @@
 
 from ncdata import NcAttribute, NcData, NcDimension, NcVariable
 from ncdata.netcdf4 import from_nc4, to_nc4
+from ncdata.utils import dataset_differences
 from tests import testdata_dir
-from tests._compare_nc_datasets import compare_nc_datasets
 
 
 def example_nc4_load_save_roundtrip():  # noqa: D103
@@ -28,7 +28,7 @@ def example_nc4_load_save_roundtrip():  # noqa: D103
         filepath2 = tempdir_path / "temp_nc_output.nc"
         to_nc4(ncdata, filepath2)
 
-        result = compare_nc_datasets(filepath, filepath2)
+        result = dataset_differences(filepath, filepath2)
         equals_result = result == []
         print("\nFiles compare? :", equals_result)
         assert equals_result
diff --git a/tests/integration/test_iris_load_and_save_equivalence.py b/tests/integration/test_iris_load_and_save_equivalence.py
index 042d8c9..93579c8 100644
--- a/tests/integration/test_iris_load_and_save_equivalence.py
+++ b/tests/integration/test_iris_load_and_save_equivalence.py
@@ -11,7 +11,7 @@
 import pytest
 
 from ncdata.netcdf4 import from_nc4, to_nc4
-from tests._compare_nc_datasets import compare_nc_datasets
+from ncdata.utils import dataset_differences
 from tests.data_testcase_schemas import session_testdir, standard_testcase
 from tests.integration.equivalence_testing_utils import (
     adjust_chunks,
@@ -93,7 +93,7 @@ def test_load_direct_vs_viancdata(
     if not result:
         # FOR NOW: compare with experimental ncdata comparison.
         # I know this is a bit circular, but it is useful for debugging, for now ...
-        result = compare_nc_datasets(
+        result = dataset_differences(
             from_iris(iris_cubes), from_iris(iris_ncdata_cubes)
         )
         assert result == []
@@ -144,5 +144,5 @@ def test_save_direct_vs_viancdata(standard_testcase, tmp_path):
         print(txt)
 
     # Check equivalence
-    results = compare_nc_datasets(temp_iris_savepath, temp_ncdata_savepath)
+    results = dataset_differences(temp_iris_savepath, temp_ncdata_savepath)
     assert results == []
diff --git a/tests/integration/test_iris_xarray_roundtrips.py b/tests/integration/test_iris_xarray_roundtrips.py
index 160860a..638cae5 100644
--- a/tests/integration/test_iris_xarray_roundtrips.py
+++ b/tests/integration/test_iris_xarray_roundtrips.py
@@ -19,8 +19,8 @@
 from ncdata.iris_xarray import cubes_to_xarray
 from ncdata.netcdf4 import from_nc4
 from ncdata.threadlock_sharing import lockshare_context
+from ncdata.utils import dataset_differences
 from ncdata.xarray import from_xarray
-from tests._compare_nc_datasets import compare_nc_datasets
 from tests.data_testcase_schemas import (
     BAD_LOADSAVE_TESTCASES,
     session_testdir,
@@ -172,7 +172,7 @@ def test_roundtrip_ixi(standard_testcase, use_irislock, adjust_chunks):
     if not result:
         # FOR NOW: compare with experimental ncdata comparison.
         # I know this is a bit circular, but it is useful for debugging, for now ...
-        result = compare_nc_datasets(
+        result = dataset_differences(
             from_iris(iris_cubes), from_iris(iris_xr_cubes)
         )
         assert result == []
@@ -299,14 +299,14 @@ def test_roundtrip_xix(
                         "calendar", "standard"
                     )
 
-    result = compare_nc_datasets(
+    result = dataset_differences(
         ncds_xr, ncds_xr_iris
     )  # , check_var_data=False)
     assert result == []
 
     # TODO:  check equivalence, in Xarray terms
     # xr_result = xrds_iris.equals(xrds)
-    # ncd_result = compare_nc_datasets(
+    # ncd_result = dataset_differences(
     #     ncds_xr, ncds_xr_iris
     # )  # , check_var_data=False)
     # print("\nDATASET COMPARE RESULTS:\n" + "\n".join(ncd_result))
diff --git a/tests/integration/test_netcdf_roundtrips.py b/tests/integration/test_netcdf_roundtrips.py
index 79e258d..6fe635d 100644
--- a/tests/integration/test_netcdf_roundtrips.py
+++ b/tests/integration/test_netcdf_roundtrips.py
@@ -4,7 +4,7 @@
 from subprocess import check_output
 
 from ncdata.netcdf4 import from_nc4, to_nc4
-from tests._compare_nc_datasets import compare_nc_datasets
+from ncdata.utils import dataset_differences
 from tests.data_testcase_schemas import session_testdir, standard_testcase
 
 # Avoid complaints that the imported fixtures are "unused"
@@ -38,5 +38,5 @@ def test_basic(standard_testcase, tmp_path):
         print(txt)
 
     # Check that the re-saved file matches the original
-    results = compare_nc_datasets(source_filepath, intermediate_filepath)
+    results = dataset_differences(source_filepath, intermediate_filepath)
     assert results == []
diff --git a/tests/integration/test_xarray_load_and_save_equivalence.py b/tests/integration/test_xarray_load_and_save_equivalence.py
index 1f996d4..d7fb316 100644
--- a/tests/integration/test_xarray_load_and_save_equivalence.py
+++ b/tests/integration/test_xarray_load_and_save_equivalence.py
@@ -10,8 +10,8 @@
 
 from ncdata.netcdf4 import from_nc4, to_nc4
 from ncdata.threadlock_sharing import lockshare_context
+from ncdata.utils import dataset_differences
 from ncdata.xarray import from_xarray, to_xarray
-from tests._compare_nc_datasets import compare_nc_datasets
 from tests.data_testcase_schemas import (
     BAD_LOADSAVE_TESTCASES,
     session_testdir,
@@ -74,10 +74,11 @@ def test_save_direct_vs_viancdata(standard_testcase, tmp_path):
     to_nc4(ncds_fromxr, temp_ncdata_savepath)
 
     # Check equivalence
-    results = compare_nc_datasets(
+    results = dataset_differences(
         temp_direct_savepath,
         temp_ncdata_savepath,
         check_dims_order=False,
+        check_dims_unlimited=False,  # TODO: remove this when we fix it
         suppress_warnings=True,
     )
     assert results == []
diff --git a/tests/unit/netcdf/test_from_nc4.py b/tests/unit/netcdf/test_from_nc4.py
index 61c3c19..ea61291 100644
--- a/tests/unit/netcdf/test_from_nc4.py
+++ b/tests/unit/netcdf/test_from_nc4.py
@@ -16,7 +16,7 @@
 
 from ncdata import NcData, NcDimension, NcVariable
 from ncdata.netcdf4 import from_nc4
-from tests._compare_nc_datasets import compare_nc_datasets
+from ncdata.utils import dataset_differences
 from tests.data_testcase_schemas import make_testcase_dataset
 
 
@@ -38,7 +38,7 @@ def test_target_types(sourcetype, tmp_path):
     """Check the various ways of specifying the input data."""
     # This testcase is a rather complicated, but we need to test with groups, and we
     # may as well also test for variables which map dimensions from multiple levels.
-    # In effect, this is also exercising tricky bits of 'compare_nc_datasets' !!
+    # In effect, this is also exercising tricky bits of 'dataset_differences' !!
     test_spec = {
         "dims": [dict(name="xdim", size=3)],
         "vars": [
@@ -84,7 +84,7 @@ def test_target_types(sourcetype, tmp_path):
         variables=[
             NcVariable(
                 name="x",
-                dimensions=("xdim"),
+                dimensions=("xdim",),
                 dtype=np.float32,
                 data=[1.23, 2, 9],
             )
@@ -107,5 +107,5 @@ def test_target_types(sourcetype, tmp_path):
     if sourcetype == "group":
         ncdata_expected = ncdata_expected.groups["inner_group"]
 
-    diffs = compare_nc_datasets(ncdata, ncdata_expected)
+    diffs = dataset_differences(ncdata, ncdata_expected)
     assert diffs == []
diff --git a/tests/unit/netcdf/test_to_nc4.py b/tests/unit/netcdf/test_to_nc4.py
index e72fd52..8f2934a 100644
--- a/tests/unit/netcdf/test_to_nc4.py
+++ b/tests/unit/netcdf/test_to_nc4.py
@@ -17,7 +17,7 @@
 
 from ncdata import NcData
 from ncdata.netcdf4 import from_nc4, to_nc4
-from tests._compare_nc_datasets import compare_nc_datasets
+from ncdata.utils import dataset_differences
 from tests.data_testcase_schemas import make_testcase_dataset
 
 
@@ -61,7 +61,7 @@ def test_target_types(targettype, tmp_path):
         target.close()
 
     assert target_path.exists()
-    assert compare_nc_datasets(target_path, original_path) == []
+    assert dataset_differences(target_path, original_path) == []
 
 
 def fetch_nc_var(nc_file: nc.Dataset, var_path: str or List[str]):
diff --git a/tests/unit/tests/unit/__init__.py b/tests/unit/tests/unit/__init__.py
deleted file mode 100644
index a8038cb..0000000
--- a/tests/unit/tests/unit/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-"""
-Unit tests for :mod:`tests.unit`.
-
-Yes I know, tests of tests.  But it seems necessary.
-"""
diff --git a/tests/unit/utils/compare_nc_datasets/__init__.py b/tests/unit/utils/compare_nc_datasets/__init__.py
new file mode 100644
index 0000000..7f699aa
--- /dev/null
+++ b/tests/unit/utils/compare_nc_datasets/__init__.py
@@ -0,0 +1 @@
+"""Unit tests for :mod:`ncdata.utils._compare_nc_datasets`."""
diff --git a/tests/unit/tests/test_compare_nc_datasets.py b/tests/unit/utils/compare_nc_datasets/test_dataset_differences__additional.py
similarity index 68%
rename from tests/unit/tests/test_compare_nc_datasets.py
rename to tests/unit/utils/compare_nc_datasets/test_dataset_differences__additional.py
index e153db3..1c3e20e 100644
--- a/tests/unit/tests/test_compare_nc_datasets.py
+++ b/tests/unit/utils/compare_nc_datasets/test_dataset_differences__additional.py
@@ -1,67 +1,25 @@
 """
 Tests for :mod:`tests.unit.netcdf._compare_nc_files`
-
-Yes I know, tests of tests.  But it seems necessary.
+Split in two files ...
+  * HERE: "additional" tests cover subsidiary routines and the main
+    API usage modes.
+  * ( ALSO: "mainfunctions" (q.v.) cover the core functionality
+    -- which elements are compared and what errors this constructs. )
 """
 import shutil
 import warnings
-from unittest import mock
 
 import netCDF4 as nc
 import numpy as np
 import pytest
 
-from tests._compare_nc_datasets import (
-    _compare_attributes,
-    _compare_name_lists,
-    compare_nc_datasets,
+from ncdata.utils._compare_nc_datasets import (
+    _attribute_differences,
+    _namelist_differences,
+    dataset_differences,
 )
 from tests.test_samplecode_cdlgen_comparablecdl import ncgen_from_cdl
 
-# CDL to create a reference file with "all" features included.
-_base_cdl = """
-netcdf everything {
-dimensions:
-    x = 2 ;
-    y = 3 ;
-    strlen = 5 ;
-variables:
-    int x(x) ;
-        x:name = "var_x" ;
-    int var_2d(x, y) ;
-    uint var_u8(x) ;
-    float var_f4(x) ;
-    double var_f8(x) ;
-    char var_str(x, strlen) ;
-    int other(x) ;
-        other:attr_int = 1 ;
-        other:attr_float = 2.0f ;
-        other:attr_double = 2.0 ;
-        other:attr_string = "this" ;
-    int masked_int(y) ;
-        masked_int:_FillValue = -3 ;
-    int masked_float(y) ;
-        masked_float:_FillValue = -4.0 ;
-
-// global attributes:
-        :global_attr_1 = "one" ;
-        :global_attr_2 = 2 ;
-
-// groups:
-group: grp_1 {
-    dimensions:
-        y = 7 ;
-    variables:
-        int parent_dim(x) ;
-        int own_dim(y) ;
-}
-group: grp_2 {
-    variables:
-        int grp2_x(x) ;
-}
-}
-"""
-
 _simple_cdl = """
 netcdf test {
 dimensions:
@@ -77,38 +35,32 @@
 """
 
 
-class Test__compare_name_lists:
+class Test_namelist_differences:
     # Test subsidiary routine for checking a list of names
     def test_empty(self):
-        errs = []
-        _compare_name_lists(errs, [], [], "named-elements")
+        errs = _namelist_differences([], [], "named-elements")
         assert errs == []
 
     def test_same(self):
         tst = ["a", "b"]
-        errs = []
-        _compare_name_lists(errs, tst, tst, "named-elements")
+        errs = _namelist_differences(tst, tst, "named-elements")
         assert errs == []
 
     def test_diff(self):
-        errs = []
-        _compare_name_lists(errs, ["a"], [], "named-elements")
+        errs = _namelist_differences(["a"], [], "named-elements")
         assert errs == ["named-elements do not match: ['a'] != []"]
 
     def test_difforder(self):
-        errs = []
-        _compare_name_lists(errs, ["a", "b"], ["b", "a"], "named-elements")
+        errs = _namelist_differences(["a", "b"], ["b", "a"], "named-elements")
         assert errs == [
             "named-elements do not match: ['a', 'b'] != ['b', 'a']"
         ]
 
     def test_difforder_tolerant_warns(self):
-        errs = []
         with pytest.warns(
             UserWarning, match="Ignoring: named-elements do not match"
         ):
-            _compare_name_lists(
-                errs,
+            errs = _namelist_differences(
                 ["a", "b"],
                 ["b", "a"],
                 "named-elements",
@@ -117,11 +69,9 @@ def test_difforder_tolerant_warns(self):
         assert errs == []
 
     def test_difforder_tolerant_nowarn(self):
-        errs = []
         with warnings.catch_warnings():
             warnings.simplefilter("error")
-            _compare_name_lists(
-                errs,
+            errs = _namelist_differences(
                 ["a", "b"],
                 ["b", "a"],
                 "named-elements",
@@ -131,45 +81,39 @@ def test_difforder_tolerant_nowarn(self):
         assert errs == []
 
 
-class Test__compare_attributes:
-    def test_compare_attributes_namelists(self):
-        # Check that it calls the generic _compare_name_lists routine, passing all the
+class Test_attribute_differences:
+    def test_compare_attributes_namelists(self, mocker):
+        # Check that it calls the generic _namelist_differences routine, passing all the
         # correct controls
-        # Mimic 2 objects with NO attributes.
-        attrs1 = mock.MagicMock()
-        attrs2 = mock.MagicMock()
-        # Make the test objects look like real files (not NcData), and ensure that
-        # obj.ncattrs() is iterable.
-        obj1 = mock.Mock(
-            spec="ncattrs", ncattrs=mock.Mock(return_value=attrs1)
+        # NB make the compared object mimic nc Variables, not NcData
+        attrnames_1 = ["a", "b"]
+        attrnames_2 = ["c", "d"]
+        obj1 = mocker.Mock(
+            spec=nc.Variable, ncattrs=mocker.Mock(return_value=attrnames_1)
         )
-        obj2 = mock.Mock(
-            spec="ncattrs", ncattrs=mock.Mock(return_value=attrs2)
+        obj2 = mocker.Mock(
+            spec=nc.Variable, ncattrs=mocker.Mock(return_value=attrnames_2)
         )
-        errs = mock.sentinel.errors_list
         elemname = "<elem_types>"
-        order = mock.sentinel.attrs_order
-        suppress = mock.sentinel.suppress_warnings
-        tgt = "tests._compare_nc_datasets._compare_name_lists"
-        with mock.patch(tgt) as patch_tgt:
-            _compare_attributes(
-                errs=errs,
-                obj1=obj1,
-                obj2=obj2,
-                elemname=elemname,
-                attrs_order=order,
-                suppress_warnings=suppress,
-            )
-        assert patch_tgt.call_args_list == [
-            mock.call(
-                errs,
-                attrs1,
-                attrs2,
-                "<elem_types> attribute lists",
-                order_strict=order,
-                suppress_warnings=suppress,
-            )
-        ]
+        order = mocker.sentinel.attrs_order
+        suppress = mocker.sentinel.suppress_warnings
+        tgt = "ncdata.utils._compare_nc_datasets._namelist_differences"
+        patch_tgt = mocker.patch(tgt)
+        _attribute_differences(
+            obj1=obj1,
+            obj2=obj2,
+            elemname=elemname,
+            attrs_order=order,
+            suppress_warnings=suppress,
+        )
+        (one_call,) = patch_tgt.call_args_list
+        assert one_call == mocker.call(
+            attrnames_1,
+            attrnames_2,
+            "<elem_types> attribute lists",
+            order_strict=order,
+            suppress_warnings=suppress,
+        )
 
     class Nc4ObjectWithAttrsMimic:
         def __init__(self, **attrs):
@@ -199,34 +143,37 @@ def test_compare_attributes_empty(self):
         # Test two objects with no attributes
         obj1 = self.Nc4ObjectWithAttrsMimic()
         obj2 = self.Nc4ObjectWithAttrsMimic()
-        errs = []
-        _compare_attributes(errs, obj1, obj2, "<object attributes>")
+        errs = _attribute_differences(obj1, obj2, "<object attributes>")
         assert errs == []
 
     def test_compare_attributes_values__allok(self):
         # Objects with matching attributes
         obj1 = self.Nc4ObjectWithAttrsMimic(a=1, b=2)
         obj2 = self.Nc4ObjectWithAttrsMimic(a=1, b=2)
-        errs = []
-        _compare_attributes(errs, obj1, obj2, "<object attributes>")
+        errs = _attribute_differences(obj1, obj2, "<object attributes>")
+        assert errs == []
+
+    def test_compare_attributes_values__scalar_arrayof1(self):
+        # Objects with matching attributes
+        obj1 = self.Nc4ObjectWithAttrsMimic(a=1, b=2)
+        obj2 = self.Nc4ObjectWithAttrsMimic(a=1, b=[2])
+        errs = _attribute_differences(obj1, obj2, "<object attributes>")
         assert errs == []
 
     def test_compare_attributes_values__data_mismatch(self):
         # Attributes of different value (but matching dtype)
         obj1 = self.Nc4ObjectWithAttrsMimic(a=1, b=2, c=3)
         obj2 = self.Nc4ObjectWithAttrsMimic(a=1, b=-77, c=3)
-        errs = []
-        _compare_attributes(errs, obj1, obj2, "<object attributes>")
+        errs = _attribute_differences(obj1, obj2, "<object attributes>")
         assert errs == [
             '<object attributes> "b" attribute values differ : 2 != -77'
         ]
 
-    def test_compare_attributes_values__dtype_mismatch(self):
+    def test_compare_attributes_values__dtype_mismatch__length(self):
         # Attributes of different dtypes, even though values ==
         obj1 = self.Nc4ObjectWithAttrsMimic(a=np.float32(0))
         obj2 = self.Nc4ObjectWithAttrsMimic(a=np.float64(0))
-        errs = []
-        _compare_attributes(errs, obj1, obj2, "<object attributes>")
+        errs = _attribute_differences(obj1, obj2, "<object attributes>")
         assert errs == [
             (
                 '<object attributes> "a" attribute datatypes differ : '
@@ -234,12 +181,47 @@ def test_compare_attributes_values__dtype_mismatch(self):
             )
         ]
 
+    def test_compare_attributes_values__dtype_mismatch__signed_unsigned(self):
+        # Attributes of different dtypes, even though values ==
+        obj1 = self.Nc4ObjectWithAttrsMimic(a=np.uint32(0))
+        obj2 = self.Nc4ObjectWithAttrsMimic(a=np.int32(0))
+        errs = _attribute_differences(obj1, obj2, "<object attributes>")
+        assert errs == [
+            (
+                '<object attributes> "a" attribute datatypes differ : '
+                "dtype('uint32') != dtype('int32')"
+            )
+        ]
+
+    def test_compare_attributes_values__dtype_mismatch__float_int(self):
+        # Attributes of different dtypes, even though values ==
+        obj1 = self.Nc4ObjectWithAttrsMimic(a=np.float32(0))
+        obj2 = self.Nc4ObjectWithAttrsMimic(a=np.int32(0))
+        errs = _attribute_differences(obj1, obj2, "<object attributes>")
+        assert errs == [
+            (
+                '<object attributes> "a" attribute datatypes differ : '
+                "dtype('float32') != dtype('int32')"
+            )
+        ]
+
+    def test_compare_attributes_values__dtype_mismatch__numeric_string(self):
+        # Attributes of different dtypes, even though values ==
+        obj1 = self.Nc4ObjectWithAttrsMimic(a=np.float32(0))
+        obj2 = self.Nc4ObjectWithAttrsMimic(a="this")
+        errs = _attribute_differences(obj1, obj2, "<object attributes>")
+        assert errs == [
+            (
+                '<object attributes> "a" attribute datatypes differ : '
+                "dtype('float32') != <class 'str'>"
+            )
+        ]
+
     def test_compare_attributes_values__dtype_and_data_mismatch(self):
         # Attributes of different dtypes, but values !=
         obj1 = self.Nc4ObjectWithAttrsMimic(a=np.float32(0))
         obj2 = self.Nc4ObjectWithAttrsMimic(a=np.float64(1))
-        errs = []
-        _compare_attributes(errs, obj1, obj2, "<object attributes>")
+        errs = _attribute_differences(obj1, obj2, "<object attributes>")
         assert errs == [
             '<object attributes> "a" attribute datatypes differ : '
             "dtype('float32') != dtype('float64')"
@@ -250,8 +232,7 @@ def test_compare_attributes_values__data_arrays_match(self):
         array = np.arange(3.0)
         obj1 = self.Nc4ObjectWithAttrsMimic(a=array)
         obj2 = self.Nc4ObjectWithAttrsMimic(a=array)
-        errs = []
-        _compare_attributes(errs, obj1, obj2, "<object attributes>")
+        errs = _attribute_differences(obj1, obj2, "<object attributes>")
         assert errs == []
 
     def test_compare_attributes_values__data_arrays_dtype_mismatch(self):
@@ -259,8 +240,7 @@ def test_compare_attributes_values__data_arrays_dtype_mismatch(self):
         array = np.arange(3, dtype="f4")
         obj1 = self.Nc4ObjectWithAttrsMimic(a=array)
         obj2 = self.Nc4ObjectWithAttrsMimic(a=array.astype("f8"))
-        errs = []
-        _compare_attributes(errs, obj1, obj2, "<object attributes>")
+        errs = _attribute_differences(obj1, obj2, "<object attributes>")
         assert errs == [
             (
                 '<object attributes> "a" attribute datatypes differ : '
@@ -273,8 +253,7 @@ def test_compare_attributes_values__data_arrays_shape_mismatch(self):
         array = np.arange(3)
         obj1 = self.Nc4ObjectWithAttrsMimic(a=array)
         obj2 = self.Nc4ObjectWithAttrsMimic(a=array[:-1])
-        errs = []
-        _compare_attributes(errs, obj1, obj2, "<object attributes>")
+        errs = _attribute_differences(obj1, obj2, "<object attributes>")
         assert errs == [
             (
                 '<object attributes> "a" attribute values differ : '
@@ -288,8 +267,7 @@ def test_compare_attributes_values__data_arrays_value_mismatch(self):
         array2 = np.array([1, 2, 777])
         obj1 = self.Nc4ObjectWithAttrsMimic(a=array1)
         obj2 = self.Nc4ObjectWithAttrsMimic(a=array2)
-        errs = []
-        _compare_attributes(errs, obj1, obj2, "<object attributes>")
+        errs = _attribute_differences(obj1, obj2, "<object attributes>")
         assert errs == [
             (
                 '<object attributes> "a" attribute values differ : '
@@ -302,8 +280,7 @@ def test_compare_attributes_values__data_arrays_nans_match(self):
         array = np.array([1, np.nan, 3])
         obj1 = self.Nc4ObjectWithAttrsMimic(a=array)
         obj2 = self.Nc4ObjectWithAttrsMimic(a=array)
-        errs = []
-        _compare_attributes(errs, obj1, obj2, "<object attributes>")
+        errs = _attribute_differences(obj1, obj2, "<object attributes>")
         assert errs == []
 
     def test_compare_attributes_values__data_arrays_nans_mismatch(self):
@@ -312,8 +289,7 @@ def test_compare_attributes_values__data_arrays_nans_mismatch(self):
         array2 = np.array([1.0, np.nan, 3.0])
         obj1 = self.Nc4ObjectWithAttrsMimic(a=array1)
         obj2 = self.Nc4ObjectWithAttrsMimic(a=array2)
-        errs = []
-        _compare_attributes(errs, obj1, obj2, "<object attributes>")
+        errs = _attribute_differences(obj1, obj2, "<object attributes>")
         assert errs == [
             (
                 '<object attributes> "a" attribute values differ : '
@@ -325,8 +301,7 @@ def test_compare_attributes_values__string_nonstring(self):
         # Attributes of string and non-string types, since we handle that differently
         obj1 = self.Nc4ObjectWithAttrsMimic(a=1)
         obj2 = self.Nc4ObjectWithAttrsMimic(a="1")
-        errs = []
-        _compare_attributes(errs, obj1, obj2, "<object attributes>")
+        errs = _attribute_differences(obj1, obj2, "<object attributes>")
         assert errs == [
             '<object attributes> "a" attribute datatypes differ : '
             "dtype('int64') != <class 'str'>"
@@ -336,16 +311,14 @@ def test_compare_attributes_values__string_match(self):
         # Attributes of string type (since netCDF4 returns char attributes as string)
         obj1 = self.Nc4ObjectWithAttrsMimic(S="this")
         obj2 = self.Nc4ObjectWithAttrsMimic(S="this")
-        errs = []
-        _compare_attributes(errs, obj1, obj2, "<object attributes>")
+        errs = _attribute_differences(obj1, obj2, "<object attributes>")
         assert errs == []
 
     def test_compare_attributes_values__string_mismatch(self):
         # Attributes of string type (since netCDF4 returns char attributes as string)
         obj1 = self.Nc4ObjectWithAttrsMimic(S="this")
         obj2 = self.Nc4ObjectWithAttrsMimic(S="that")
-        errs = []
-        _compare_attributes(errs, obj1, obj2, "<object attributes>")
+        errs = _attribute_differences(obj1, obj2, "<object attributes>")
         assert errs == [
             "<object attributes> \"S\" attribute values differ : 'this' != 'that'"
         ]
@@ -354,21 +327,28 @@ def test_compare_attributes_values__string_array_match(self):
         # Attributes of string type (since netCDF4 returns char attributes as string)
         obj1 = self.Nc4ObjectWithAttrsMimic(S=["a", "b"])
         obj2 = self.Nc4ObjectWithAttrsMimic(S=["a", "b"])
-        errs = []
-        _compare_attributes(errs, obj1, obj2, "<object attributes>")
+        errs = _attribute_differences(obj1, obj2, "<object attributes>")
         assert errs == []
 
     def test_compare_attributes_values__string_array_mismatch(self):
         # Attributes of string type (since netCDF4 returns char attributes as string)
         obj1 = self.Nc4ObjectWithAttrsMimic(S=["a", "b"])
         obj2 = self.Nc4ObjectWithAttrsMimic(S=["a", "c"])
-        errs = []
-        _compare_attributes(errs, obj1, obj2, "<object attributes>")
+        errs = _attribute_differences(obj1, obj2, "<object attributes>")
         assert errs == [
             '<object attributes> "S" attribute values differ : '
             "['a', 'b'] != ['a', 'c']"
         ]
 
+    def test_compare_attributes__ncdata_string_scalar_array(self):
+        # Attributes of string type (since netCDF4 returns char attributes as string)
+        from ncdata import NcAttribute, NcData
+
+        obj1 = NcData(attributes=[NcAttribute("x", ["string"])])
+        obj2 = NcData(attributes=[NcAttribute("x", "string")])
+        errs = _attribute_differences(obj1, obj2, "<object attributes>")
+        assert errs == []
+
 
 @pytest.fixture(autouse=True, scope="module")
 def temp_ncfiles_dir(tmp_path_factory):
@@ -405,12 +385,12 @@ def samefiles_bothtypes(samefiles_filesonly, sourcetype):
 class Test_compare_nc_files__api:
     def test_identical(self, samefiles_bothtypes):
         source1, source2 = samefiles_bothtypes
-        result = compare_nc_datasets(source1, source2)
+        result = dataset_differences(source1, source2)
         assert result == []
 
     def test_identical_stringpaths(self, samefiles_filesonly):
         source1, source2 = samefiles_filesonly
-        result = compare_nc_datasets(str(source1), str(source2))
+        result = dataset_differences(str(source1), str(source2))
         assert result == []
 
     def test_identical_datasets(self, samefiles_filesonly, sourcetype):
@@ -419,7 +399,7 @@ def test_identical_datasets(self, samefiles_filesonly, sourcetype):
         try:
             ds1 = nc.Dataset(source1)
             ds2 = nc.Dataset(source2)
-            result = compare_nc_datasets(ds1, ds2)
+            result = dataset_differences(ds1, ds2)
             assert result == []
         finally:
             for ds in (ds1, ds2):
@@ -442,7 +422,7 @@ def test_small_difference(
             # Source1/2 are NcData : just modify source2
             source2.attributes["extra_global_attr"] = 1
 
-        result = compare_nc_datasets(source1, source2)
+        result = dataset_differences(source1, source2)
         assert result == [
             "Dataset attribute lists do not match: [] != ['extra_global_attr']"
         ]
@@ -474,7 +454,7 @@ def test_vardata_difference(
             if ds is not None:
                 ds.close()
 
-        result = compare_nc_datasets(source1, source2)
+        result = dataset_differences(source1, source2)
         # N.B. ncdata comparison bypasses the masked+scaled view of data, hence the
         # message differs.  Could fix this?
         mask1 = "masked" if sourcetype == "InputsFile" else "9.96921e+36"
diff --git a/tests/unit/utils/compare_nc_datasets/test_dataset_differences__mainfunctions.py b/tests/unit/utils/compare_nc_datasets/test_dataset_differences__mainfunctions.py
new file mode 100644
index 0000000..1e03e9f
--- /dev/null
+++ b/tests/unit/utils/compare_nc_datasets/test_dataset_differences__mainfunctions.py
@@ -0,0 +1,479 @@
+"""
+Tests for :mod:`tests.unit.netcdf._compare_nc_files`
+Split in two files ...
+    * HERE: "mainfunctions" cover the core functionality
+        -- which elements are compared and what errors this constructs.
+    * ( ALSO: "additional" tests (q.v.) cover subsidiary routines and the
+        main API usage modes. )
+"""
+import numpy as np
+import pytest
+
+from ncdata import NcAttribute, NcData, NcDimension, NcVariable
+from ncdata.utils import dataset_differences
+
+# from tests.data_testcase_schemas import _Datatype_Sample_Values, data_types
+# data_types  # avoid 'unused' warning
+
+
+@pytest.fixture(
+    params=["in_named", "in_unnamed", "in_namedgroup", "in_unnamedgroup"]
+)
+def group_context(request):
+    """
+    The different contexts of locations in a dataset
+
+    In which an element (dimension, group or variable) might be found, and
+    which might appear different in the mismatch-error messages.
+    """
+    return request.param
+
+
+@pytest.fixture(params=["on_group", "on_variable"])
+def attr_context(request):
+    """The different contexts for an attribute in a dataset."""
+    return request.param
+
+
+@pytest.fixture(params=["ordered", "unordered"])
+def order_checking(request):
+    """Whether to test with order checking or not."""
+    return request.param
+
+
+def decode_ordercheck(order_checking):
+    return {"ordered": True, "unordered": False}[order_checking]
+
+
+def location_prefix(group_context, attr_context="on_group"):
+    prefix = "Dataset"
+    if "namedgroup" in group_context:
+        prefix += "/inner_group"
+    if "variable" in attr_context:
+        prefix += ' variable "vx"'
+    return prefix
+
+
+def put_group_into_context(testdata, group_context):
+    if group_context == "in_named":
+        pass
+    elif group_context == "in_unnamed":
+        testdata.name = None
+    elif "group" in group_context:
+        testdata.name = "inner_group"
+        testdata = NcData(name="outer_dataset", groups=[testdata])
+        if group_context == "in_namedgroup":
+            pass
+        elif group_context == "in_unnamedgroup":
+            testdata.name = None
+        else:
+            raise ValueError(f"unknown group_context: {group_context!r}")
+    else:
+        raise ValueError(f"unknown group_context: {group_context!r}")
+
+    return testdata
+
+
+_DEBUG_RESULTS = True
+# _DEBUG_RESULTS = True
+
+
+def check(results, expected):
+    if _DEBUG_RESULTS:
+        print("\nResult messages:")
+        for msg in results:
+            print("  ", msg)
+    assert results == expected
+
+
+class TestCompareDatasets:
+    @pytest.mark.parametrize("namecheck", ["withnames", "withoutnames"])
+    @pytest.mark.parametrize("altname", ["named_y", "named_none"])
+    def test_names(self, namecheck, altname):
+        do_namecheck = namecheck == "withnames"
+        altname = {"named_y": "y", "named_none": None}[altname]
+        data1, data2 = NcData(name="x"), NcData(name=altname)
+
+        # Use kwargs just to confirm that the default for name-checking is 'off'
+        kwargs = dict(check_names=True) if do_namecheck else {}
+        errs = dataset_differences(data1, data2, **kwargs)
+
+        if do_namecheck:
+            expected = [f"Datasets have different names: 'x' != {altname!r}."]
+        else:
+            expected = []
+        check(errs, expected)
+
+
+class TestCompareDimensions:
+    def dimension_testdata(self, group_context):
+        testdata = NcData(
+            name="dataset_1",
+            dimensions=[
+                NcDimension("x", 2, unlimited=True),
+                NcDimension("y", 3, unlimited=False),
+            ],
+        )
+        testdata = put_group_into_context(testdata, group_context)
+        return testdata
+
+    @pytest.fixture(autouse=True)
+    def _dims_data(self, group_context):
+        data1, data2 = [
+            self.dimension_testdata(group_context) for _ in range(2)
+        ]
+        location = data2
+        if "group" in group_context:
+            location = location.groups["inner_group"]
+
+        self.data1 = data1
+        self.data2 = data2
+        self.location_string = location_prefix(group_context)
+        self.dims = location.dimensions
+
+    def test_name(self):
+        self.dims.rename("x", "q")
+        errs = dataset_differences(self.data1, self.data2)
+        expected = [
+            f"{self.location_string} dimension lists do not match: "
+            "['x', 'y'] != ['q', 'y']"
+        ]
+        check(errs, expected)
+
+    def test_size(self):
+        self.dims["x"].size = 77
+
+        errs = dataset_differences(self.data1, self.data2)
+
+        expected = [
+            f'{self.location_string} "x" dimensions have different sizes: 2 != 77'
+        ]
+        check(errs, expected)
+
+    @pytest.mark.parametrize(
+        "check_unlim", ["unlims_checked", "unlims_unchecked"]
+    )
+    def test_unlimited(self, check_unlim):
+        self.dims["y"].unlimited = True
+
+        do_check_unlims = {"unlims_checked": True, "unlims_unchecked": False}[
+            check_unlim
+        ]
+        errs = dataset_differences(
+            self.data1, self.data2, check_dims_unlimited=do_check_unlims
+        )
+
+        if do_check_unlims:
+            expected = [
+                f'{self.location_string} "y" dimension has different "unlimited" status : '
+                "False != True"
+            ]
+        else:
+            expected = []
+
+        check(errs, expected)
+
+    def test_ordering(self, order_checking):
+        all_dims = list(self.dims.values())
+        self.dims.clear()
+        self.dims.addall(all_dims[::-1])
+
+        do_ordercheck = decode_ordercheck(order_checking)
+        errs = dataset_differences(
+            self.data1, self.data2, check_dims_order=do_ordercheck
+        )
+
+        if do_ordercheck:
+            expected = [
+                f"{self.location_string} dimension lists do not match: "
+                "['x', 'y'] != ['y', 'x']"
+            ]
+        else:
+            expected = []
+
+        check(errs, expected)
+
+    def test_extra_or_missing(self):
+        all_dims = list(self.dims.values())
+        # Remove the last dimension, so data1 has a dim not present in data2
+        self.dims.clear()
+        self.dims.addall(all_dims[:-1])
+
+        errs = dataset_differences(self.data1, self.data2)
+
+        expected = [
+            f"{self.location_string} dimension lists do not match: "
+            "['x', 'y'] != ['x']"
+        ]
+        check(errs, expected)
+
+
+class TestCompareAttributes:
+    def attribute_testdata(self, group_context):
+        testdata = NcData(
+            name="dataset_1",
+            variables=[
+                NcVariable(
+                    "vx",
+                    dimensions=[],
+                    data=np.array(1.0),
+                    attributes=[
+                        NcAttribute("att1", 1),
+                        NcAttribute("att2", 2),
+                    ],
+                )
+            ],
+            attributes=[
+                NcAttribute("att1", 11),
+                NcAttribute("att2", 12),
+            ],
+        )
+        testdata = put_group_into_context(testdata, group_context)
+        return testdata
+
+    @pytest.fixture(autouse=True)
+    def _attrs_data(self, group_context, attr_context):
+        data1, data2 = [
+            self.attribute_testdata(group_context) for _ in range(2)
+        ]
+        location = data2
+        if "group" in group_context:
+            location = location.groups["inner_group"]
+        is_on_var = {"on_group": False, "on_variable": True}[attr_context]
+        if is_on_var:
+            location = location.variables["vx"]
+
+        self.data1 = data1
+        self.data2 = data2
+        self.location_string = location_prefix(group_context, attr_context)
+        self.attrs = location.attributes
+
+    def test_name(self):
+        self.attrs.rename("att1", "changed")
+
+        errs = dataset_differences(self.data1, self.data2)
+
+        expected = [
+            f"{self.location_string} attribute lists do not match: "
+            "['att1', 'att2'] != ['changed', 'att2']"
+        ]
+        check(errs, expected)
+
+    def test_value(self, attr_context):
+        self.attrs["att1"].value = np.array(999)
+
+        errs = dataset_differences(self.data1, self.data2)
+
+        if "variable" in attr_context:
+            value_string = "1"
+        else:
+            value_string = "11"
+        expected = [
+            f'{self.location_string} "att1" attribute values differ : '
+            f"array({value_string}) != array(999)"
+        ]
+        check(errs, expected)
+
+    def test_ordering(self, order_checking):
+        do_ordercheck = decode_ordercheck(order_checking)
+        all_attrs = list(self.attrs.values())
+        self.attrs.clear()
+        self.attrs.addall(all_attrs[::-1])
+
+        errs = dataset_differences(
+            self.data1, self.data2, check_attrs_order=do_ordercheck
+        )
+
+        if do_ordercheck:
+            expected = [
+                f"{self.location_string} attribute lists do not match: "
+                "['att1', 'att2'] != ['att2', 'att1']"
+            ]
+        else:
+            expected = []
+        check(errs, expected)
+
+    def test_extra_or_missing(self, order_checking):
+        do_ordercheck = decode_ordercheck(order_checking)
+        del self.attrs["att1"]
+
+        errs = dataset_differences(
+            self.data1, self.data2, check_attrs_order=do_ordercheck
+        )
+
+        expected = [
+            f"{self.location_string} attribute lists do not match: "
+            "['att1', 'att2'] != ['att2']"
+        ]
+        check(errs, expected)
+
+    @pytest.mark.parametrize("attname", ["fillvalue", "generic"])
+    def test_fillvalue_anyorder(self, attname):
+        """The order of "_FillValue" attributes is specially ignored."""
+        name = {"fillvalue": "_FillValue", "generic": "anyold"}[attname]
+        # data1, data2 have attrs in the other order
+        attr_pair = [NcAttribute(name, 1), NcAttribute("x", 1)]
+        data1, data2 = [
+            NcData(
+                variables=[
+                    NcVariable("vx", (), data=np.array(0.0), attributes=attrs)
+                ]
+            )
+            for attrs in (attr_pair, attr_pair[::-1])
+        ]
+
+        errs = dataset_differences(data1, data2)
+
+        if "generic" in attname:
+            expected = [
+                'Dataset variable "vx" attribute lists do not match: '
+                "['anyold', 'x'] != ['x', 'anyold']"
+            ]
+        else:
+            expected = []
+        check(errs, expected)
+
+
+class TestCompareVariables:
+    """
+    Test variable comparison.
+
+    Mostly, this is about comparison of the variable contents of a dataset
+    or group, since variable-to-variable comparison is done by
+    variable_differences, which is tested independently elsewhere.
+    This includes testing the generation of the variable identity strings in
+    various contexts (by parametrising over group_context).
+    """
+
+    @staticmethod
+    def _vars_testdata(group_context):
+        def data():
+            return np.zeros((2, 3))
+
+        testdata = NcData(
+            name="dataset_1",
+            dimensions=[NcDimension("y", 2), NcDimension("x", 3)],
+            variables=[
+                NcVariable("v1", ("y", "x"), data=data()),
+                NcVariable("v2", ("y", "x"), data=data()),
+            ],
+        )
+        testdata = put_group_into_context(testdata, group_context)
+        return testdata
+
+    @pytest.fixture(autouse=True)
+    def _vars_data(self, group_context):
+        data1, data2 = [self._vars_testdata(group_context) for _ in range(2)]
+        location = data2
+        if "group" in group_context:
+            location = location.groups["inner_group"]
+
+        self.data1 = data1
+        self.data2 = data2
+        self.location_string = location_prefix(group_context)
+        self.vars = location.variables
+
+    def test_var_names(self):
+        self.vars.rename("v2", "q")
+
+        errs = dataset_differences(self.data1, self.data2)
+
+        expected = [
+            f"{self.location_string} variable lists do not match: "
+            "['v1', 'v2'] != ['v1', 'q']"
+        ]
+        check(errs, expected)
+
+    def test_var_order(self, order_checking):
+        all_vars = list(self.vars.values())
+        self.vars.clear()
+        self.vars.addall(all_vars[::-1])
+
+        do_ordercheck = decode_ordercheck(order_checking)
+        errs = dataset_differences(
+            self.data1, self.data2, check_vars_order=do_ordercheck
+        )
+
+        if do_ordercheck:
+            expected = [
+                f"{self.location_string} variable lists do not match: "
+                "['v1', 'v2'] != ['v2', 'v1']"
+            ]
+        else:
+            expected = []
+        check(errs, expected)
+
+    def test_vars_extra_or_missing(self, order_checking):
+        del self.vars["v1"]
+
+        do_ordercheck = decode_ordercheck(order_checking)
+        errs = dataset_differences(
+            self.data1, self.data2, check_vars_order=do_ordercheck
+        )
+
+        expected = [
+            f"{self.location_string} variable lists do not match: "
+            "['v1', 'v2'] != ['v2']"
+        ]
+        check(errs, expected)
+
+
+class TestCompareGroups:
+    @staticmethod
+    def _groups_testdata():
+        testdata = NcData(
+            name="dataset_1",
+            groups=[
+                NcData(name=name, attributes=[NcAttribute("attr_1", 1)])
+                for name in ("g1", "g2")
+            ],
+        )
+        return testdata
+
+    @pytest.fixture(autouse=True)
+    def _groups_data(self):
+        self.data1, self.data2 = [self._groups_testdata() for _ in range(2)]
+        self.groups = self.data2.groups
+
+    def test_group_names(self):
+        self.groups.rename("g2", "q")
+
+        errs = dataset_differences(self.data1, self.data2)
+
+        expected = [
+            "Dataset subgroup lists do not match: ['g1', 'g2'] != ['g1', 'q']"
+        ]
+        check(errs, expected)
+
+    def test_group_order(self, order_checking):
+        all_groups = list(self.groups.values())
+        self.groups.clear()
+        self.groups.addall(all_groups[::-1])
+
+        do_ordercheck = decode_ordercheck(order_checking)
+        errs = dataset_differences(
+            self.data1, self.data2, check_groups_order=do_ordercheck
+        )
+
+        if do_ordercheck:
+            expected = [
+                "Dataset subgroup lists do not match: "
+                "['g1', 'g2'] != ['g2', 'g1']"
+            ]
+        else:
+            expected = []
+        check(errs, expected)
+
+    def test_groups_extra_or_missing(self, order_checking):
+        del self.groups["g1"]
+
+        do_ordercheck = decode_ordercheck(order_checking)
+        errs = dataset_differences(
+            self.data1, self.data2, check_groups_order=do_ordercheck
+        )
+
+        # NB since the sets are different, the ordering control has no effect
+        expected = [
+            "Dataset subgroup lists do not match: ['g1', 'g2'] != ['g2']"
+        ]
+        check(errs, expected)
diff --git a/tests/unit/utils/compare_nc_datasets/test_variable_differences.py b/tests/unit/utils/compare_nc_datasets/test_variable_differences.py
new file mode 100644
index 0000000..986008b
--- /dev/null
+++ b/tests/unit/utils/compare_nc_datasets/test_variable_differences.py
@@ -0,0 +1,305 @@
+import dask.array as da
+import numpy as np
+import pytest
+
+from ncdata import NcVariable
+from ncdata.utils import variable_differences
+
+_DEBUG_RESULTS = True
+# _DEBUG_RESULTS = True
+
+
+def check(results, expected):
+    if _DEBUG_RESULTS:
+        print("\nResult messages:")
+        for msg in results:
+            print("  ", msg)
+    assert results == expected
+
+
+class TestSimpleProperties:
+    @pytest.fixture(autouse=True)
+    def _vars_data(self):
+        self.var1, self.var2 = [
+            NcVariable("v1", ("y", "x"), data=np.zeros((2, 3)))
+            for _ in range(2)
+        ]
+
+    def test_var_names(self):
+        self.var2.name = "q"
+
+        errs = variable_differences(self.var1, self.var2)
+        expected = ['Variable "v1 / q" names differ : ' "'v1' != 'q'"]
+        check(errs, expected)
+
+    def test_var_dims__reorder(self):
+        # N.B. here we check behaviour of the DIMENSIONS order control, but this does
+        # not apply to dimensions order in a variable,which is *always* significant.
+        self.var2.dimensions = self.var2.dimensions[::-1]
+        # N.B. the data shape doesn't now correspond, but that won't matter as, with
+        #  mismatched dimensions, the data won't be checked.
+
+        errs = variable_differences(self.var1, self.var2)
+
+        expected = [
+            'Variable "v1" dimensions differ : ' "('y', 'x') != ('x', 'y')"
+        ]
+        check(errs, expected)
+
+    def test_var_dims__extra_or_missing(self):
+        # N.B. here we check for DIMENSIONS order check control.
+        self.var2.dimensions = self.var2.dimensions[:-1]
+        # N.B. the data shape doesn't now correspond, but that won't matter as, with
+        #  mismatched dimensions, the data won't be checked.
+
+        errs = variable_differences(self.var1, self.var2)
+
+        expected = ["Variable \"v1\" dimensions differ : ('y', 'x') != ('y',)"]
+        check(errs, expected)
+
+
+class TestDtypes:
+    # Note: testing variable comparison via the 'main' public API instead of
+    #  via 'variable_differences'.  This makes sense because it is only called
+    #  in one way, from one place.
+    @pytest.fixture(autouse=True)
+    def _vars_data(self):
+        self.var1, self.var2 = [
+            NcVariable("v1", ("x"), data=np.zeros(3)) for _ in range(2)
+        ]
+
+    def test_numbers_v_strings(self):
+        # Set a different dtype
+        # NB this is different from the actual data array, but that doesn't
+        #  matter, as it won't attempt to compare strings with numbers
+        self.var2.dtype = np.dtype("S5")
+
+        # Test the comparison
+        errs = variable_differences(self.var1, self.var2)
+        expected = [
+            'Variable "v1" datatypes differ : '
+            "dtype('float64') != dtype('S5')"
+        ]
+        check(errs, expected)
+
+    @pytest.mark.parametrize("equaldata", [False, True])
+    def test_ints_v_floats(self, equaldata):
+        # In this case, there is also a data comparison to check.
+        v1 = self.var2
+
+        new_dtype = np.dtype(np.int32)
+        v1.data = v1.data.astype(new_dtype)
+        if not equaldata:
+            v1.data.flat[0] += 1
+        v1.dtype = new_dtype
+
+        # Test the comparison
+        errs = variable_differences(self.var1, self.var2)
+
+        expected = [
+            'Variable "v1" datatypes differ : '
+            "dtype('float64') != dtype('int32')"
+        ]
+        if not equaldata:
+            expected.append(
+                'Variable "v1" data contents differ, at 1 points: '
+                "@INDICES[(0,)] : LHS=[0.0], RHS=[1]"
+            )
+        check(errs, expected)
+
+    @pytest.mark.parametrize("equaldata", [False, True])
+    def test_wordlengths(self, equaldata):
+        # Test floats with wordlength difference -- assume ints are the same
+        # In this case, there is also a data comparison to check.
+        v1 = self.var2
+
+        new_dtype = np.dtype(np.float32)
+        v1.data = v1.data.astype(new_dtype)
+        if not equaldata:
+            v1.data.flat[0] += 1
+        v1.dtype = new_dtype
+
+        # Test the comparison
+        errs = variable_differences(self.var1, self.var2)
+
+        expected = [
+            'Variable "v1" datatypes differ : '
+            "dtype('float64') != dtype('float32')"
+        ]
+        if not equaldata:
+            expected.append(
+                'Variable "v1" data contents differ, at 1 points: '
+                "@INDICES[(0,)] : LHS=[0.0], RHS=[1.0]"
+            )
+        check(errs, expected)
+
+    @pytest.mark.parametrize("equaldata", [False, True])
+    def test_signed_unsigned(self, equaldata):
+        # Test floats with wordlength difference -- assume ints are the same
+        # In this case, there is also a data comparison to check.
+        new_dtype = np.dtype(np.int64)
+        v0 = self.var1
+        v0.data = v0.data.astype(new_dtype)
+        v0.dtype = new_dtype
+
+        new_dtype = np.dtype(np.uint64)
+        v1 = self.var2
+        v1.data = v1.data.astype(new_dtype)
+        if not equaldata:
+            v1.data.flat[0] += 1
+        v1.dtype = new_dtype
+
+        # Test the comparison
+        errs = variable_differences(self.var1, self.var2)
+
+        expected = [
+            'Variable "v1" datatypes differ : '
+            "dtype('int64') != dtype('uint64')"
+        ]
+        if not equaldata:
+            expected.append(
+                'Variable "v1" data contents differ, at 1 points: '
+                "@INDICES[(0,)] : LHS=[0], RHS=[1]"
+            )
+        check(errs, expected)
+
+
+class TestDataCheck__controls:
+    # Note: testing variable comparison via the 'main' public API instead of
+    #  via 'variable_differences'.  This makes sense because it is only called
+    #  in one way, from one place.
+    @pytest.fixture(autouse=True)
+    def _vars_data(self):
+        self.var1, self.var2 = [
+            NcVariable("v1", ("x"), data=np.arange(6.0).reshape((2, 3)))
+            for _ in range(2)
+        ]
+
+    def test_no_values_check(self):
+        self.var2.data += 1
+        errs = variable_differences(self.var1, self.var2, check_var_data=False)
+        check(errs, [])
+
+    def test_print_bad_nprint(self):
+        msg = "'show_n_diffs' must be >=1 : got 0."
+        with pytest.raises(ValueError, match=msg):
+            variable_differences(
+                self.var1, self.var2, show_n_first_different=0
+            )
+
+    @pytest.mark.parametrize("ndiffs", [1, 2, 3])
+    def test_ndiffs(self, ndiffs):
+        self.var2.data.flat[1 : ndiffs + 1] += 1
+        errs = variable_differences(self.var1, self.var2)
+        detail = {
+            1: "[(0, 1)] : LHS=[1.0], RHS=[2.0]",
+            2: "[(0, 1), (0, 2)] : LHS=[1.0, 2.0], RHS=[2.0, 3.0]",
+            3: (
+                "[(0, 1), (0, 2), ...] : "
+                "LHS=[1.0, 2.0, ...], RHS=[2.0, 3.0, ...]"
+            ),
+        }[ndiffs]
+        expected = [
+            f'Variable "v1" data contents differ, at {ndiffs} points: '
+            f"@INDICES{detail}"
+        ]
+        check(errs, expected)
+
+    @pytest.mark.parametrize("nprint", [1, 2, 3])
+    def test_show_n_first_different(self, nprint):
+        self.var2.data.flat[1:3] += 1
+        errs = variable_differences(
+            self.var1, self.var2, show_n_first_different=nprint
+        )
+        detail = {
+            1: "[(0, 1), ...] : LHS=[1.0, ...], RHS=[2.0, ...]",
+            2: "[(0, 1), (0, 2)] : LHS=[1.0, 2.0], RHS=[2.0, 3.0]",
+            3: "[(0, 1), (0, 2)] : LHS=[1.0, 2.0], RHS=[2.0, 3.0]",
+        }[nprint]
+        expected = [
+            f'Variable "v1" data contents differ, at 2 points: '
+            f"@INDICES{detail}"
+        ]
+        check(errs, expected)
+
+
+class TestDataCheck__difference_reports:
+    # Note: testing variable comparison via the 'main' public API instead of
+    #  via 'variable_differences'.  This makes sense because it is only called
+    #  in one way, from one place.
+    @pytest.fixture(autouse=True)
+    def _vars_data(self):
+        self.var1, self.var2 = [
+            NcVariable("v1", ("x"), data=np.arange(4.0)) for _ in range(2)
+        ]
+
+    @pytest.mark.parametrize("datavalues", ["same", "different"])
+    @pytest.mark.parametrize("masks", ["onemasked", "bothmasked"])
+    def test_masked(self, datavalues, masks):
+        different = datavalues == "different"
+        bothmasked = masks == "bothmasked"
+        testvar = self.var2
+        testvar.data = np.ma.masked_array(testvar.data)
+        if different:
+            testvar.data[1:2] += 1
+        testvar.data[1:2] = np.ma.masked
+        if bothmasked:
+            self.var1.data = np.ma.masked_array(self.var1.data)
+            self.var1.data[1:2] = np.ma.masked
+        errs = variable_differences(self.var1, self.var2)
+        if bothmasked:
+            expected = []
+        else:
+            expected = [
+                'Variable "v1" data contents differ, at 1 points: '
+                "@INDICES[(1,)] : LHS=[1.0], RHS=[masked]"
+            ]
+        check(errs, expected)
+
+    @pytest.mark.parametrize("nans", ["onenans", "bothnans"])
+    def test_nans(self, nans):
+        bothnans = nans == "bothnans"
+        self.var2.data[1:2] = np.nan
+        if bothnans:
+            self.var1.data[1:2] = np.nan
+        errs = variable_differences(self.var1, self.var2)
+        if bothnans:
+            expected = []
+        else:
+            expected = [
+                'Variable "v1" data contents differ, at 1 points: '
+                "@INDICES[(1,)] : LHS=[1.0], RHS=[nan]"
+            ]
+        check(errs, expected)
+
+    def test_scalar(self):
+        # Check how a difference of scalar arrays is reported
+        for value, var in enumerate([self.var1, self.var2]):
+            var.dimensions = ()
+            var.data = np.array(value, dtype=var.dtype)
+        errs = variable_differences(self.var1, self.var2)
+        expected = [
+            'Variable "v1" data contents differ, at 1 points: '
+            "@INDICES[(0,)] : LHS=[0.0], RHS=[1.0]"
+        ]
+        check(errs, expected)
+
+    @pytest.mark.parametrize(
+        "argtypes", ["real_real", "real_lazy", "lazy_lazy"]
+    )
+    def test_real_and_lazy(self, argtypes):
+        type1, type2 = argtypes[:4], argtypes[-4:]
+        # fix the testvar to create a difference
+        self.var2.data[1:2] += 1
+        # setup vars with lazy/real data arrays
+        for arraytype, var in zip([type1, type2], [self.var1, self.var2]):
+            if arraytype == "lazy":
+                var.data = da.from_array(var.data, chunks=-1)
+        # compare + check results
+        errs = variable_differences(self.var1, self.var2)
+        # N.B. the result should be the same in all cases
+        expected = [
+            'Variable "v1" data contents differ, at 1 points: '
+            "@INDICES[(1,)] : LHS=[1.0], RHS=[2.0]"
+        ]
+        check(errs, expected)