diff --git a/src/borg/archive.py b/src/borg/archive.py index b87fa35afd..218d0ea721 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -1044,7 +1044,7 @@ def chunk_decref(id, stats, part=False): logger.warning('borg check --repair is required to free all space.') @staticmethod - def compare_archives_iter(archive1, archive2, matcher=None, can_compare_chunk_ids=False): + def compare_archives_iter(archive1, archive2, matcher=None, can_compare_chunk_ids=False, content_only=False): """ Yields tuples with a path and an ItemDiff instance describing changes/indicating equality. @@ -1073,7 +1073,8 @@ def compare_items(item1, item2): return ItemDiff(item1, item2, archive1.pipeline.fetch_many([c.id for c in item1.get('chunks', [])]), archive2.pipeline.fetch_many([c.id for c in item2.get('chunks', [])]), - can_compare_chunk_ids=can_compare_chunk_ids) + can_compare_chunk_ids=can_compare_chunk_ids, + content_only=content_only) def defer_if_necessary(item1, item2): """Adds item tuple to deferred if necessary and returns True, if items were deferred""" @@ -1124,7 +1125,8 @@ def defer_if_necessary(item1, item2): for item1, item2 in deferred: assert hardlink_master_seen(item1) assert hardlink_master_seen(item2) - yield (path, compare_items(item1, item2)) + assert item1.path == item2.path, "Deferred items have different paths" + yield (item1.path, compare_items(item1, item2)) class MetadataCollector: diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 8cc3a7b09d..ff006dcd35 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -76,6 +76,7 @@ from .helpers import sig_int, ignore_sigint from .helpers import iter_separated from .helpers import get_tar_filter + from .helpers.parseformat import BorgJsonEncoder from .nanorst import rst_to_terminal from .patterns import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern from .patterns import PatternMatcher @@ -1117,7 +1118,7 @@ def do_diff(self, args, repository, manifest, key, archive): """Diff contents of two archives""" def print_json_output(diff, path): - print(json.dumps({"path": path, "changes": [j for j, str in diff]})) + print(json.dumps({"path": path, "changes": [j for j, str in diff]}, sort_keys=True, cls=BorgJsonEncoder)) def print_text_output(diff, path): print("{:<19} {}".format(' '.join([str for j, str in diff]), path)) @@ -1146,7 +1147,7 @@ def print_text_output(diff, path): matcher = self.build_matcher(args.patterns, args.paths) - diffs = Archive.compare_archives_iter(archive1, archive2, matcher, can_compare_chunk_ids=can_compare_chunk_ids) + diffs = Archive.compare_archives_iter(archive1, archive2, matcher, can_compare_chunk_ids=can_compare_chunk_ids, content_only=args.content_only) # Conversion to string and filtering for diff.equal to save memory if sorting diffs = ((path, diff.changes()) for path, diff in diffs if not diff.equal) @@ -3908,6 +3909,11 @@ def define_borg_mount(parser): help='Override check of chunker parameters.') subparser.add_argument('--sort', dest='sort', action='store_true', help='Sort the output lines by file path.') + subparser.add_argument( + "--content-only", + action="store_true", + help="Only compare differences in content (exclude metadata differences)", + ) subparser.add_argument('--json-lines', action='store_true', help='Format output as JSON Lines. ') subparser.add_argument('location', metavar='REPO::ARCHIVE1', diff --git a/src/borg/helpers/parseformat.py b/src/borg/helpers/parseformat.py index 9cf6b37418..087c718bb1 100644 --- a/src/borg/helpers/parseformat.py +++ b/src/borg/helpers/parseformat.py @@ -648,7 +648,7 @@ def __init__(self, format, repository, manifest, key, *, json=False, iec=False): self.item_data = static_keys def format_item_json(self, item): - return json.dumps(self.get_item_data(item), cls=BorgJsonEncoder) + '\n' + return json.dumps(self.get_item_data(item), cls=BorgJsonEncoder, sort_keys=True) + '\n' def get_item_data(self, archive_info): self.name = archive_info.name diff --git a/src/borg/item.pyx b/src/borg/item.pyx index 119be6a824..126329f336 100644 --- a/src/borg/item.pyx +++ b/src/borg/item.pyx @@ -6,6 +6,7 @@ from .helpers import safe_encode, safe_decode from .helpers import bigint_to_int, int_to_bigint from .helpers import StableDict from .helpers import format_file_size +from .helpers.time import OutputTimestamp, safe_timestamp cdef extern from "_item.c": object _object_to_optr(object obj) @@ -421,9 +422,10 @@ class ItemDiff: It does not include extended or time attributes in the comparison. """ - def __init__(self, item1, item2, chunk_iterator1, chunk_iterator2, numeric_ids=False, can_compare_chunk_ids=False): + def __init__(self, item1, item2, chunk_iterator1, chunk_iterator2, numeric_ids=False, can_compare_chunk_ids=False, content_only=False): self._item1 = item1 self._item2 = item2 + self._content_only = content_only self._numeric_ids = numeric_ids self._can_compare_chunk_ids = can_compare_chunk_ids self.equal = self._equal(chunk_iterator1, chunk_iterator2) @@ -447,9 +449,10 @@ class ItemDiff: if self._item1.is_fifo() or self._item2.is_fifo(): changes.append(self._presence_diff('fifo')) - if not (self._item1.get('deleted') or self._item2.get('deleted')): + if not (self._item1.get('deleted') or self._item2.get('deleted')) and not self._content_only: changes.append(self._owner_diff()) changes.append(self._mode_diff()) + changes.extend(self._time_diffs()) # filter out empty changes self._changes = [ch for ch in changes if ch] @@ -467,8 +470,11 @@ class ItemDiff: if self._item1.get('deleted') and self._item2.get('deleted'): return True - attr_list = ['deleted', 'mode', 'source'] - attr_list += ['uid', 'gid'] if self._numeric_ids else ['user', 'group'] + attr_list = ['deleted', 'source'] + if not self._content_only: + attr_list += ['mode', 'ctime', 'mtime'] + attr_list += ['uid', 'gid'] if self._numeric_ids else ['user', 'group'] + for attr in attr_list: if self._item1.get(attr) != self._item2.get(attr): return False @@ -531,6 +537,16 @@ class ItemDiff: mode2 = stat.filemode(self._item2.mode) return ({"type": "mode", "old_mode": mode1, "new_mode": mode2}, '[{} -> {}]'.format(mode1, mode2)) + def _time_diffs(self): + changes = [] + attrs = ["ctime", "mtime"] + for attr in attrs: + if attr in self._item1 and attr in self._item2 and self._item1.get(attr) != self._item2.get(attr): + ts1 = OutputTimestamp(safe_timestamp(self._item1.get(attr))) + ts2 = OutputTimestamp(safe_timestamp(self._item2.get(attr))) + changes.append(({"type": attr, f"old_{attr}": ts1, f"new_{attr}": ts2}, '[{}: {} -> {}]'.format(attr, ts1, ts2))) + return changes + def _content_equal(self, chunk_iterator1, chunk_iterator2): if self._can_compare_chunk_ids: return self._item1.chunks == self._item2.chunks diff --git a/src/borg/testsuite/__init__.py b/src/borg/testsuite/__init__.py index b1db8aa95d..9bef452b3d 100644 --- a/src/borg/testsuite/__init__.py +++ b/src/borg/testsuite/__init__.py @@ -7,6 +7,7 @@ except ImportError: posix = None +import re import stat import sys import sysconfig @@ -180,6 +181,9 @@ def assert_dirs_equal(self, dir1, dir2, **kwargs): diff = filecmp.dircmp(dir1, dir2) self._assert_dirs_equal_cmp(diff, **kwargs) + def assert_line_exists(self, lines, expected_regexpr): + assert any(re.search(expected_regexpr, line) for line in lines), f"no match for {expected_regexpr} in {lines}" + def _assert_dirs_equal_cmp(self, diff, ignore_flags=False, ignore_xattrs=False, ignore_ns=False): self.assert_equal(diff.left_only, []) self.assert_equal(diff.right_only, []) diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index 22cf71754e..7e6efb1939 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -56,7 +56,7 @@ from . import has_lchflags, llfuse from . import BaseTestCase, changedir, environment_variable, no_selinux from . import are_symlinks_supported, are_hardlinks_supported, are_fifos_supported, is_utime_fully_supported, is_birthtime_fully_supported -from .platform import fakeroot_detected, is_darwin +from .platform import fakeroot_detected, is_darwin, is_win32 from .upgrader import make_attic_repo from . import key @@ -4174,18 +4174,19 @@ def test_basic_functionality(self): self.cmd('create', self.repository_location + '::test1a', 'input') self.cmd('create', '--chunker-params', '16,18,17,4095', self.repository_location + '::test1b', 'input') - def do_asserts(output, can_compare_ids): + def do_asserts(output, can_compare_ids, content_only=False): # File contents changed (deleted and replaced with a new file) change = 'B' if can_compare_ids else '{:<19}'.format('modified') + lines = output.splitlines() assert 'file_replaced' in output # added to debug #3494 - assert f'{change} input/file_replaced' in output + self.assert_line_exists(lines, f"{change}.*input/file_replaced") # File unchanged assert 'input/file_unchanged' not in output # Directory replaced with a regular file - if 'BORG_TESTS_IGNORE_MODES' not in os.environ: - assert '[drwxr-xr-x -> -rwxr-xr-x] input/dir_replaced_with_file' in output + if "BORG_TESTS_IGNORE_MODES" not in os.environ and not is_win32 and not content_only: + self.assert_line_exists(lines, "drwxr-xr-x -> -rwxr-xr-x.*input/dir_replaced_with_file") # Basic directory cases assert 'added directory input/dir_added' in output @@ -4193,13 +4194,13 @@ def do_asserts(output, can_compare_ids): if are_symlinks_supported(): # Basic symlink cases - assert 'changed link input/link_changed' in output - assert 'added link input/link_added' in output - assert 'removed link input/link_removed' in output + self.assert_line_exists(lines, "changed link.*input/link_changed") + self.assert_line_exists(lines, "added link.*input/link_added") + self.assert_line_exists(lines, "removed link.*input/link_removed") # Symlink replacing or being replaced - assert '] input/dir_replaced_with_link' in output - assert '] input/link_replaced_by_file' in output + assert 'input/dir_replaced_with_link' in output + assert 'input/link_replaced_by_file' in output # Symlink target removed. Should not affect the symlink at all. assert 'input/link_target_removed' not in output @@ -4208,9 +4209,10 @@ def do_asserts(output, can_compare_ids): # should notice the changes in both links. However, the symlink # pointing to the file is not changed. change = '0 B' if can_compare_ids else '{:<19}'.format('modified') - assert f'{change} input/empty' in output + self.assert_line_exists(lines, f"{change}.*input/empty") + if are_hardlinks_supported(): - assert f'{change} input/hardlink_contents_changed' in output + self.assert_line_exists(lines, f"{change}.*input/hardlink_contents_changed") if are_symlinks_supported(): assert 'input/link_target_contents_changed' not in output @@ -4229,18 +4231,18 @@ def do_asserts(output, can_compare_ids): if are_hardlinks_supported(): assert 'removed 256 B input/hardlink_removed' in output - # Another link (marked previously as the source in borg) to the - # same inode was removed. This should not change this link at all. - if are_hardlinks_supported(): - assert 'input/hardlink_target_removed' not in output + if are_hardlinks_supported() and content_only: + # Another link (marked previously as the source in borg) to the + # same inode was removed. This should only change the ctime since removing + # the link would result in the decrementation of the inode's hard-link count. + assert "input/hardlink_target_removed" not in output - # Another link (marked previously as the source in borg) to the - # same inode was replaced with a new regular file. This should not - # change this link at all. - if are_hardlinks_supported(): - assert 'input/hardlink_target_replaced' not in output + # Another link (marked previously as the source in borg) to the + # same inode was replaced with a new regular file. This should only change + # its ctime. This should not be reflected in the output if content-only is set + assert "input/hardlink_target_replaced" not in output - def do_json_asserts(output, can_compare_ids): + def do_json_asserts(output, can_compare_ids, content_only=False): def get_changes(filename, data): chgsets = [j['changes'] for j in data if j['path'] == filename] assert len(chgsets) < 2 @@ -4258,7 +4260,7 @@ def get_changes(filename, data): assert not any(get_changes('input/file_unchanged', joutput)) # Directory replaced with a regular file - if 'BORG_TESTS_IGNORE_MODES' not in os.environ: + if 'BORG_TESTS_IGNORE_MODES' not in os.environ and not content_only: assert {'type': 'mode', 'old_mode': 'drwxr-xr-x', 'new_mode': '-rwxr-xr-x'} in \ get_changes('input/dir_replaced_with_file', joutput) @@ -4273,10 +4275,15 @@ def get_changes(filename, data): assert {'type': 'removed link'} in get_changes('input/link_removed', joutput) # Symlink replacing or being replaced - assert any(chg['type'] == 'mode' and chg['new_mode'].startswith('l') for chg in - get_changes('input/dir_replaced_with_link', joutput)) - assert any(chg['type'] == 'mode' and chg['old_mode'].startswith('l') for chg in - get_changes('input/link_replaced_by_file', joutput)) + if not content_only: + assert any( + chg["type"] == "mode" and chg["new_mode"].startswith("l") + for chg in get_changes("input/dir_replaced_with_link", joutput) + ), get_changes("input/dir_replaced_with_link", joutput) + assert any( + chg["type"] == "mode" and chg["old_mode"].startswith("l") + for chg in get_changes("input/link_replaced_by_file", joutput) + ), get_changes("input/link_replaced_by_file", joutput) # Symlink target removed. Should not affect the symlink at all. assert not any(get_changes('input/link_target_removed', joutput)) @@ -4306,21 +4313,27 @@ def get_changes(filename, data): if are_hardlinks_supported(): assert {'type': 'removed', 'size': 256} in get_changes('input/hardlink_removed', joutput) - # Another link (marked previously as the source in borg) to the - # same inode was removed. This should not change this link at all. - if are_hardlinks_supported(): - assert not any(get_changes('input/hardlink_target_removed', joutput)) + if are_hardlinks_supported() and content_only: + # Another link (marked previously as the source in borg) to the + # same inode was removed. This should only change the ctime since removing + # the link would result in the decrementation of the inode's hard-link count. + assert not any(get_changes("input/hardlink_target_removed", joutput)) - # Another link (marked previously as the source in borg) to the - # same inode was replaced with a new regular file. This should not - # change this link at all. - if are_hardlinks_supported(): - assert not any(get_changes('input/hardlink_target_replaced', joutput)) + # Another link (marked previously as the source in borg) to the + # same inode was replaced with a new regular file. This should only change + # its ctime. This should not be reflected in the output if content-only is set + assert not any(get_changes("input/hardlink_target_replaced", joutput)) + + output = self.cmd("diff", self.repository_location + "::test0", "test1a") + do_asserts(output, True) + output = self.cmd("diff", self.repository_location + "::test0", "test1b", "--content-only", exit_code=1) + do_asserts(output, False, content_only=True) - do_asserts(self.cmd('diff', self.repository_location + '::test0', 'test1a'), True) - # We expect exit_code=1 due to the chunker params warning - do_asserts(self.cmd('diff', self.repository_location + '::test0', 'test1b', exit_code=1), False) - do_json_asserts(self.cmd('diff', self.repository_location + '::test0', 'test1a', '--json-lines'), True) + output = self.cmd("diff", self.repository_location + "::test0", "test1a", "--json-lines") + do_json_asserts(output, True) + + output = self.cmd("diff", self.repository_location + "::test0", "test1a", "--json-lines", "--content-only") + do_json_asserts(output, True, content_only=True) def test_sort_option(self): self.cmd('init', '--encryption=repokey', self.repository_location) @@ -4341,7 +4354,7 @@ def test_sort_option(self): self.create_regular_file('d_file_added', size=256) self.cmd('create', self.repository_location + '::test1', 'input') - output = self.cmd('diff', '--sort', self.repository_location + '::test0', 'test1') + output = self.cmd('diff', '--sort', self.repository_location + '::test0', 'test1', '--content-only') expected = [ 'a_file_removed', 'b_file_added', @@ -4353,6 +4366,30 @@ def test_sort_option(self): assert all(x in line for x, line in zip(expected, output.splitlines())) + def test_time_diffs(self): + self.cmd('init', '--encryption=repokey', self.repository_location) + self.create_regular_file("test_file", size=10) + self.cmd('create', self.repository_location + '::archive1', 'input') + time.sleep(0.1) + os.unlink("input/test_file") + if is_win32: + # Sleeping for 15s because Windows doesn't refresh ctime if file is deleted and recreated within 15 seconds. + time.sleep(15) + self.create_regular_file("test_file", size=15) + self.cmd('create', self.repository_location + '::archive2', 'input') + output = self.cmd("diff", self.repository_location + "::archive1", "archive2") + self.assert_in("mtime", output) + self.assert_in("ctime", output) # Should show up on windows as well since it is a new file. + os.chmod("input/test_file", 777) + self.cmd('create', self.repository_location + '::archive3', 'input') + output = self.cmd("diff", self.repository_location + "::archive2", "archive3") + self.assert_not_in("mtime", output) + # Checking platform because ctime should not be shown on windows since it wasn't recreated. + if not is_win32: + self.assert_in("ctime", output) + else: + self.assert_not_in("ctime", output) + def test_get_args(): archiver = Archiver()