Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions src/borg/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
from .helpers import bin_to_hex
from .helpers import safe_ns
from .helpers import ellipsis_truncate, ProgressIndicatorPercent, log_multi
from .helpers import PathPrefixPattern, FnmatchPattern
from .helpers import PathPrefixPattern, FnmatchPattern, IECommand
from .item import Item, ArchiveItem
from .key import key_factory
from .platform import acl_get, acl_set, set_flags, get_flags, swidth
Expand Down Expand Up @@ -1721,10 +1721,10 @@ def matcher_add_tagged_dirs(self, archive):
"""Add excludes to the matcher created by exclude_cache and exclude_if_present."""
def exclude(dir, tag_item):
if self.keep_exclude_tags:
tag_files.append(PathPrefixPattern(tag_item.path))
tagged_dirs.append(FnmatchPattern(dir + '/'))
tag_files.append(PathPrefixPattern(tag_item.path, recurse_dir=False))
tagged_dirs.append(FnmatchPattern(dir + '/', recurse_dir=False))
else:
tagged_dirs.append(PathPrefixPattern(dir))
tagged_dirs.append(PathPrefixPattern(dir, recurse_dir=False))

matcher = self.matcher
tag_files = []
Expand All @@ -1747,8 +1747,8 @@ def exclude(dir, tag_item):
file = open_item(archive, cachedir_masters[item.source])
if file.read(len(CACHE_TAG_CONTENTS)).startswith(CACHE_TAG_CONTENTS):
exclude(dir, item)
matcher.add(tag_files, True)
matcher.add(tagged_dirs, False)
matcher.add(tag_files, IECommand.Include)
matcher.add(tagged_dirs, IECommand.ExcludeNoRecurse)

def create_target(self, archive, target_name=None):
"""Create target archive."""
Expand Down
56 changes: 30 additions & 26 deletions src/borg/archiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
from .helpers import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern
from .helpers import dir_is_tagged, is_slow_msgpack, yes, sysinfo
from .helpers import log_multi
from .helpers import parse_pattern, PatternMatcher, PathPrefixPattern
from .helpers import PatternMatcher
from .helpers import signal_handler, raising_signal_handler, SigHup, SigTerm
from .helpers import ErrorIgnoringTextIOWrapper
from .helpers import ProgressIndicatorPercent
Expand Down Expand Up @@ -190,16 +190,11 @@ def compare_chunk_contents(chunks1, chunks2):
bi += slicelen

@staticmethod
def build_matcher(inclexcl_patterns, paths):
def build_matcher(inclexcl_patterns, include_paths):
matcher = PatternMatcher()
if inclexcl_patterns:
matcher.add_inclexcl(inclexcl_patterns)
include_patterns = []
if paths:
include_patterns.extend(parse_pattern(i, PathPrefixPattern) for i in paths)
matcher.add(include_patterns, True)
matcher.fallback = not include_patterns
return matcher, include_patterns
matcher.add_inclexcl(inclexcl_patterns)
matcher.add_includepaths(include_paths)
return matcher

def do_serve(self, args):
"""Start in server mode. This command is usually not used manually."""
Expand Down Expand Up @@ -493,13 +488,20 @@ def _process(self, archive, cache, matcher, exclude_caches, exclude_if_present,

This should only raise on critical errors. Per-item errors must be handled within this method.
"""
if st is None:
with backup_io('stat'):
st = os.lstat(path)

recurse_excluded_dir = False
if not matcher.match(path):
self.print_file_status('x', path)
return

if stat.S_ISDIR(st.st_mode) and matcher.recurse_dir:
recurse_excluded_dir = True
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

naming: isn't this rather a state flag than a command flag?

would recursing_excluded_dir better describe what it tells?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it doesn't matter much to me what it is named, but I chose recurse (as a command verb) because in my mind the recursing hasn't happened yet at that point, and will only happen later on, based on the value of the flag which is set here.

else:
return

try:
if st is None:
with backup_io('stat'):
st = os.lstat(path)
if (st.st_ino, st.st_dev) in skip_inodes:
return
# if restrict_dev is given, we do not want to recurse into a new filesystem,
Expand Down Expand Up @@ -527,7 +529,8 @@ def _process(self, archive, cache, matcher, exclude_caches, exclude_if_present,
read_special=read_special, dry_run=dry_run)
return
if not dry_run:
status = archive.process_dir(path, st)
if not recurse_excluded_dir:
status = archive.process_dir(path, st)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there might be a problem here:

if the code recurses into an excluded dir (searching for stuff that is included again in directories deeper below),
it does not archive the intermediate directories, so we won't have their metadata (like uid/gid/mode/...) in the archive.

at extract time, borg will automatically create missing intermediate dirs, but it can not set the correct metadata on them (not even when doing a full extract) as it does not have them in the archive.

Copy link
Copy Markdown
Contributor Author

@edgimar edgimar Apr 19, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, does it matter if the intermediate dirs are archived before or after the nested included path? It could be possible to add the intermediate dirs (at the time a nested included path is archived) to a list of paths to be archived later.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

permissions might be important to set before filling the dir.
timestamps must be set after filling the dir.

(check the code dealing with dirs elsewhere)

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

guess the upper-layer dir entries should precede the lower-layer stuff in the archive.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't claim to know lots about the details of borg's storage method, but I'm assuming that each backup operation stores file chunks in some manner that doesn't depend on order, but tracks the backup itself in the form of some manifest of the backed-up items. If this is the case, then it could be solved as simply as just ensuring that the manifest is sorted before storing it such that parent folders come before their children. This would work in conjunction with the idea of maintaining a list of parents to process after a matching nested folder is found. Does this sound right or doable?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps just use a heap-queue for the manifest list, and always add item-names to it via heapq.heappush()?

if recurse:
with backup_io('scandir'):
entries = helpers.scandir_inorder(path)
Expand Down Expand Up @@ -590,7 +593,9 @@ def _process(self, archive, cache, matcher, exclude_caches, exclude_if_present,
status = '?' # need to add a status code somewhere
else:
status = '-' # dry run, item was not backed up
self.print_file_status(status, path)

if not recurse_excluded_dir:
self.print_file_status(status, path)

@staticmethod
def build_filter(matcher, peek_and_store_hardlink_masters, strip_components):
Expand All @@ -616,7 +621,7 @@ def do_extract(self, args, repository, manifest, key, archive):
if sys.platform.startswith(('linux', 'freebsd', 'netbsd', 'openbsd', 'darwin', )):
logger.warning('Hint: You likely need to fix your locale setup. E.g. install locales and use: LANG=en_US.UTF-8')

matcher, include_patterns = self.build_matcher(args.patterns, args.paths)
matcher = self.build_matcher(args.patterns, args.paths)

progress = args.progress
output_list = args.output_list
Expand Down Expand Up @@ -681,9 +686,8 @@ def peek_and_store_hardlink_masters(item, matched):
archive.extract_item(dir_item)
except BackupOSError as e:
self.print_warning('%s: %s', remove_surrogates(dir_item.path), e)
for pattern in include_patterns:
if pattern.match_count == 0:
self.print_warning("Include pattern '%s' never matched.", pattern)
for pattern in matcher.get_unmatched_include_patterns():
self.print_warning("Include pattern '%s' never matched.", pattern)
if pi:
# clear progress output
pi.finish()
Expand Down Expand Up @@ -893,13 +897,13 @@ def compare_or_defer(item1, item2):
'If you know for certain that they are the same, pass --same-chunker-params '
'to override this check.')

matcher, include_patterns = self.build_matcher(args.patterns, args.paths)
matcher = self.build_matcher(args.patterns, args.paths)

compare_archives(archive1, archive2, matcher)

for pattern in include_patterns:
if pattern.match_count == 0:
self.print_warning("Include pattern '%s' never matched.", pattern)
for pattern in matcher.get_unmatched_include_patterns():
self.print_warning("Include pattern '%s' never matched.", pattern)

return self.exit_code

@with_repository(exclusive=True, cache=True)
Expand Down Expand Up @@ -1048,7 +1052,7 @@ def write(bytestring):
return self._list_repository(args, manifest, write)

def _list_archive(self, args, repository, manifest, key, write):
matcher, _ = self.build_matcher(args.patterns, args.paths)
matcher = self.build_matcher(args.patterns, args.paths)
if args.format is not None:
format = args.format
elif args.short:
Expand Down Expand Up @@ -1330,7 +1334,7 @@ def do_recreate(self, args, repository, manifest, key, cache):
env_var_override='BORG_RECREATE_I_KNOW_WHAT_I_AM_DOING'):
return EXIT_ERROR

matcher, include_patterns = self.build_matcher(args.patterns, args.paths)
matcher = self.build_matcher(args.patterns, args.paths)
self.output_list = args.output_list
self.output_filter = args.output_filter
recompress = args.recompress != 'never'
Expand Down
Loading