Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/borg/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -1096,7 +1096,7 @@ def chunk_decref(id, stats):
logger.warning("borg check --repair is required to free all space.")

@staticmethod
def compare_archives_iter(archive1, archive2, matcher=None, can_compare_chunk_ids=False):
def compare_archives_iter(archive1, archive2, matcher=None, can_compare_chunk_ids=False, content_only=False):
"""
Yields tuples with a path and an ItemDiff instance describing changes/indicating equality.

Expand All @@ -1111,6 +1111,7 @@ def compare_items(item1, item2):
archive1.pipeline.fetch_many([c.id for c in item1.get("chunks", [])]),
archive2.pipeline.fetch_many([c.id for c in item2.get("chunks", [])]),
can_compare_chunk_ids=can_compare_chunk_ids,
content_only=content_only,
)

orphans_archive1 = OrderedDict()
Expand Down
12 changes: 10 additions & 2 deletions src/borg/archiver/diff_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from ..constants import * # NOQA
from ..helpers import archivename_validator
from ..manifest import Manifest
from ..helpers.parseformat import BorgJsonEncoder

from ..logger import create_logger

Expand All @@ -19,7 +20,7 @@ def do_diff(self, args, repository, manifest, archive):
"""Diff contents of two archives"""

def print_json_output(diff, path):
print(json.dumps({"path": path, "changes": [j for j, str in diff]}))
print(json.dumps({"path": path, "changes": [j for j, str in diff]}, sort_keys=True, cls=BorgJsonEncoder))

def print_text_output(diff, path):
print("{:<19} {}".format(" ".join([str for j, str in diff]), path))
Expand All @@ -42,7 +43,9 @@ def print_text_output(diff, path):

matcher = build_matcher(args.patterns, args.paths)

diffs = Archive.compare_archives_iter(archive1, archive2, matcher, can_compare_chunk_ids=can_compare_chunk_ids)
diffs = Archive.compare_archives_iter(
archive1, archive2, matcher, can_compare_chunk_ids=can_compare_chunk_ids, content_only=args.content_only
)
# Conversion to string and filtering for diff.equal to save memory if sorting
diffs = ((path, diff.changes()) for path, diff in diffs if not diff.equal)

Expand Down Expand Up @@ -105,6 +108,11 @@ def build_parser_diff(self, subparsers, common_parser, mid_common_parser):
)
subparser.add_argument("--sort", dest="sort", action="store_true", help="Sort the output lines by file path.")
subparser.add_argument("--json-lines", action="store_true", help="Format output as JSON Lines. ")
subparser.add_argument(
"--content-only",
action="store_true",
help="Only compare differences in content (exclude metadata differences)",
)
subparser.add_argument("name", metavar="ARCHIVE1", type=archivename_validator, help="ARCHIVE1 name")
subparser.add_argument("other_name", metavar="ARCHIVE2", type=archivename_validator, help="ARCHIVE2 name")
subparser.add_argument(
Expand Down
2 changes: 1 addition & 1 deletion src/borg/helpers/parseformat.py
Original file line number Diff line number Diff line change
Expand Up @@ -876,7 +876,7 @@ def __init__(self, archive, format, *, json_lines=False):
self.used_call_keys = set(self.call_keys) & self.format_keys

def format_item_json(self, item):
return json.dumps(self.get_item_data(item), cls=BorgJsonEncoder) + "\n"
return json.dumps(self.get_item_data(item), cls=BorgJsonEncoder, sort_keys=True) + "\n"

def get_item_data(self, item):
item_data = {}
Expand Down
30 changes: 24 additions & 6 deletions src/borg/item.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ from .constants import ITEM_KEYS, ARCHIVE_KEYS
from .helpers import StableDict
from .helpers import format_file_size
from .helpers.msgpack import timestamp_to_int, int_to_timestamp, Timestamp
from .helpers.time import OutputTimestamp, safe_timestamp


cdef extern from "_item.c":
Expand Down Expand Up @@ -626,9 +627,10 @@ class ItemDiff:
It does not include extended or time attributes in the comparison.
"""

def __init__(self, item1, item2, chunk_iterator1, chunk_iterator2, numeric_ids=False, can_compare_chunk_ids=False):
def __init__(self, item1, item2, chunk_iterator1, chunk_iterator2, numeric_ids=False, can_compare_chunk_ids=False, content_only=False):
self._item1 = item1
self._item2 = item2
self._content_only = content_only
self._numeric_ids = numeric_ids
self._can_compare_chunk_ids = can_compare_chunk_ids
self.equal = self._equal(chunk_iterator1, chunk_iterator2)
Expand All @@ -652,9 +654,11 @@ class ItemDiff:
if self._item1.is_fifo() or self._item2.is_fifo():
changes.append(self._presence_diff('fifo'))

if not (self._item1.get('deleted') or self._item2.get('deleted')):
changes.append(self._owner_diff())
changes.append(self._mode_diff())
if not self._content_only:
if not (self._item1.get('deleted') or self._item2.get('deleted')):
changes.append(self._owner_diff())
changes.append(self._mode_diff())
changes.extend(self._time_diffs())

# filter out empty changes
self._changes = [ch for ch in changes if ch]
Expand All @@ -672,8 +676,12 @@ class ItemDiff:
if self._item1.get('deleted') and self._item2.get('deleted'):
return True

attr_list = ['deleted', 'mode', 'target']
attr_list += ['uid', 'gid'] if self._numeric_ids else ['user', 'group']
attr_list = ['deleted', 'target']

if not self._content_only:
attr_list += ['mode', 'ctime', 'mtime']
attr_list += ['uid', 'gid'] if self._numeric_ids else ['user', 'group']

for attr in attr_list:
if self._item1.get(attr) != self._item2.get(attr):
return False
Expand Down Expand Up @@ -736,6 +744,16 @@ class ItemDiff:
mode2 = stat.filemode(self._item2.mode)
return ({"type": "mode", "old_mode": mode1, "new_mode": mode2}, '[{} -> {}]'.format(mode1, mode2))

def _time_diffs(self):
changes = []
attrs = ["ctime", "mtime"]
for attr in attrs:
if attr in self._item1 and attr in self._item2 and self._item1.get(attr) != self._item2.get(attr):
ts1 = OutputTimestamp(safe_timestamp(self._item1.get(attr)))
ts2 = OutputTimestamp(safe_timestamp(self._item2.get(attr)))
changes.append(({"type": attr, f"old_{attr}": ts1, f"new_{attr}": ts2}, '[{}: {} -> {}]'.format(attr, ts1, ts2)))
return changes

def _content_equal(self, chunk_iterator1, chunk_iterator2):
if self._can_compare_chunk_ids:
return self._item1.chunks == self._item2.chunks
Expand Down
4 changes: 4 additions & 0 deletions src/borg/testsuite/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
except ImportError:
posix = None

import re
import stat
import sys
import sysconfig
Expand Down Expand Up @@ -187,6 +188,9 @@ def assert_dirs_equal(self, dir1, dir2, **kwargs):
diff = filecmp.dircmp(dir1, dir2)
self._assert_dirs_equal_cmp(diff, **kwargs)

def assert_line_exists(self, lines, expected_regexpr):
assert any(re.search(expected_regexpr, line) for line in lines), f"no match for {expected_regexpr} in {lines}"

def _assert_dirs_equal_cmp(self, diff, ignore_flags=False, ignore_xattrs=False, ignore_ns=False):
self.assert_equal(diff.left_only, [])
self.assert_equal(diff.right_only, [])
Expand Down
118 changes: 78 additions & 40 deletions src/borg/testsuite/archiver/diff_cmd.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import os
import stat
import time
import unittest

from ...constants import * # NOQA
Expand Down Expand Up @@ -70,32 +71,33 @@ def test_basic_functionality(self):
self.cmd(f"--repo={self.repository_location}", "create", "test1a", "input")
self.cmd(f"--repo={self.repository_location}", "create", "test1b", "input", "--chunker-params", "16,18,17,4095")

def do_asserts(output, can_compare_ids):
def do_asserts(output, can_compare_ids, content_only=False):
# File contents changed (deleted and replaced with a new file)
change = "B" if can_compare_ids else "{:<19}".format("modified")
lines = output.splitlines()
assert "file_replaced" in output # added to debug #3494
assert f"{change} input/file_replaced" in output
self.assert_line_exists(lines, f"{change}.*input/file_replaced")

# File unchanged
assert "input/file_unchanged" not in output

# Directory replaced with a regular file
if "BORG_TESTS_IGNORE_MODES" not in os.environ and not is_win32:
assert "[drwxr-xr-x -> -rwxr-xr-x] input/dir_replaced_with_file" in output
if "BORG_TESTS_IGNORE_MODES" not in os.environ and not is_win32 and not content_only:
self.assert_line_exists(lines, "drwxr-xr-x -> -rwxr-xr-x.*input/dir_replaced_with_file")

# Basic directory cases
assert "added directory input/dir_added" in output
assert "removed directory input/dir_removed" in output

if are_symlinks_supported():
# Basic symlink cases
assert "changed link input/link_changed" in output
assert "added link input/link_added" in output
assert "removed link input/link_removed" in output
self.assert_line_exists(lines, "changed link.*input/link_changed")
self.assert_line_exists(lines, "added link.*input/link_added")
self.assert_line_exists(lines, "removed link.*input/link_removed")

# Symlink replacing or being replaced
assert "] input/dir_replaced_with_link" in output
assert "] input/link_replaced_by_file" in output
assert "input/dir_replaced_with_link" in output
assert "input/link_replaced_by_file" in output

# Symlink target removed. Should not affect the symlink at all.
assert "input/link_target_removed" not in output
Expand All @@ -104,9 +106,9 @@ def do_asserts(output, can_compare_ids):
# should notice the changes in both links. However, the symlink
# pointing to the file is not changed.
change = "0 B" if can_compare_ids else "{:<19}".format("modified")
assert f"{change} input/empty" in output
self.assert_line_exists(lines, f"{change}.*input/empty")
if are_hardlinks_supported():
assert f"{change} input/hardlink_contents_changed" in output
self.assert_line_exists(lines, f"{change}.*input/hardlink_contents_changed")
if are_symlinks_supported():
assert "input/link_target_contents_changed" not in output

Expand All @@ -125,18 +127,18 @@ def do_asserts(output, can_compare_ids):
if are_hardlinks_supported():
assert "removed 256 B input/hardlink_removed" in output

# Another link (marked previously as the source in borg) to the
# same inode was removed. This should not change this link at all.
if are_hardlinks_supported():
if are_hardlinks_supported() and content_only:
# Another link (marked previously as the source in borg) to the
# same inode was removed. This should only change the ctime since removing
# the link would result in the decrementation of the inode's hard-link count.
assert "input/hardlink_target_removed" not in output

# Another link (marked previously as the source in borg) to the
# same inode was replaced with a new regular file. This should not
# change this link at all.
if are_hardlinks_supported():
# Another link (marked previously as the source in borg) to the
# same inode was replaced with a new regular file. This should only change
# its ctime. This should not be reflected in the output if content-only is set
assert "input/hardlink_target_replaced" not in output

def do_json_asserts(output, can_compare_ids):
def do_json_asserts(output, can_compare_ids, content_only=False):
def get_changes(filename, data):
chgsets = [j["changes"] for j in data if j["path"] == filename]
assert len(chgsets) < 2
Expand All @@ -154,7 +156,7 @@ def get_changes(filename, data):
assert not any(get_changes("input/file_unchanged", joutput))

# Directory replaced with a regular file
if "BORG_TESTS_IGNORE_MODES" not in os.environ and not is_win32:
if "BORG_TESTS_IGNORE_MODES" not in os.environ and not is_win32 and not content_only:
assert {"type": "mode", "old_mode": "drwxr-xr-x", "new_mode": "-rwxr-xr-x"} in get_changes(
"input/dir_replaced_with_file", joutput
)
Expand All @@ -170,14 +172,16 @@ def get_changes(filename, data):
assert {"type": "removed link"} in get_changes("input/link_removed", joutput)

# Symlink replacing or being replaced
assert any(
chg["type"] == "mode" and chg["new_mode"].startswith("l")
for chg in get_changes("input/dir_replaced_with_link", joutput)
)
assert any(
chg["type"] == "mode" and chg["old_mode"].startswith("l")
for chg in get_changes("input/link_replaced_by_file", joutput)
)

if not content_only:
assert any(
chg["type"] == "mode" and chg["new_mode"].startswith("l")
for chg in get_changes("input/dir_replaced_with_link", joutput)
), get_changes("input/dir_replaced_with_link", joutput)
assert any(
chg["type"] == "mode" and chg["old_mode"].startswith("l")
for chg in get_changes("input/link_replaced_by_file", joutput)
), get_changes("input/link_replaced_by_file", joutput)

# Symlink target removed. Should not affect the symlink at all.
assert not any(get_changes("input/link_target_removed", joutput))
Expand Down Expand Up @@ -207,21 +211,56 @@ def get_changes(filename, data):
if are_hardlinks_supported():
assert {"type": "removed", "size": 256} in get_changes("input/hardlink_removed", joutput)

# Another link (marked previously as the source in borg) to the
# same inode was removed. This should not change this link at all.
if are_hardlinks_supported():
if are_hardlinks_supported() and content_only:
# Another link (marked previously as the source in borg) to the
# same inode was removed. This should only change the ctime since removing
# the link would result in the decrementation of the inode's hard-link count.
assert not any(get_changes("input/hardlink_target_removed", joutput))

# Another link (marked previously as the source in borg) to the
# same inode was replaced with a new regular file. This should not
# change this link at all.
if are_hardlinks_supported():
# Another link (marked previously as the source in borg) to the
# same inode was replaced with a new regular file. This should only change
# its ctime. This should not be reflected in the output if content-only is set
assert not any(get_changes("input/hardlink_target_replaced", joutput))

do_asserts(self.cmd(f"--repo={self.repository_location}", "diff", "test0", "test1a"), True)
output = self.cmd(f"--repo={self.repository_location}", "diff", "test0", "test1a")
do_asserts(output, True)
# We expect exit_code=1 due to the chunker params warning
do_asserts(self.cmd(f"--repo={self.repository_location}", "diff", "test0", "test1b", exit_code=1), False)
do_json_asserts(self.cmd(f"--repo={self.repository_location}", "diff", "test0", "test1a", "--json-lines"), True)
output = self.cmd(
f"--repo={self.repository_location}", "diff", "test0", "test1b", "--content-only", exit_code=1
)
do_asserts(output, False, content_only=True)

output = self.cmd(f"--repo={self.repository_location}", "diff", "test0", "test1a", "--json-lines")
do_json_asserts(output, True)

output = self.cmd(
f"--repo={self.repository_location}", "diff", "test0", "test1a", "--json-lines", "--content-only"
)
do_json_asserts(output, True, content_only=True)

def test_time_diffs(self):
self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
self.create_regular_file("test_file", size=10)
self.cmd(f"--repo={self.repository_location}", "create", "archive1", "input")
time.sleep(0.1)
os.unlink("input/test_file")
if is_win32:
# Sleeping for 15s because Windows doesn't refresh ctime if file is deleted and recreated within 15 seconds.
time.sleep(15)
self.create_regular_file("test_file", size=15)
self.cmd(f"--repo={self.repository_location}", "create", "archive2", "input")
output = self.cmd(f"--repo={self.repository_location}", "diff", "archive1", "archive2")
self.assert_in("mtime", output)
self.assert_in("ctime", output) # Should show up on windows as well since it is a new file.
os.chmod("input/test_file", 777)
self.cmd(f"--repo={self.repository_location}", "create", "archive3", "input")
output = self.cmd(f"--repo={self.repository_location}", "diff", "archive2", "archive3")
self.assert_not_in("mtime", output)
# Checking platform because ctime should not be shown on windows since it wasn't recreated.
if not is_win32:
self.assert_in("ctime", output)
else:
self.assert_not_in("ctime", output)

def test_sort_option(self):
self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
Expand All @@ -242,7 +281,7 @@ def test_sort_option(self):
self.create_regular_file("d_file_added", size=256)
self.cmd(f"--repo={self.repository_location}", "create", "test1", "input")

output = self.cmd(f"--repo={self.repository_location}", "diff", "test0", "test1", "--sort")
output = self.cmd(f"--repo={self.repository_location}", "diff", "test0", "test1", "--sort", "--content-only")
expected = [
"a_file_removed",
"b_file_added",
Expand All @@ -251,7 +290,6 @@ def test_sort_option(self):
"e_file_changed",
"f_file_removed",
]

assert all(x in line for x, line in zip(expected, output.splitlines()))


Expand Down