-
-
Notifications
You must be signed in to change notification settings - Fork 836
allow excluding parent and including child, fixes #2314 #2322
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -54,7 +54,7 @@ | |
| from .helpers import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern | ||
| from .helpers import dir_is_tagged, is_slow_msgpack, yes, sysinfo | ||
| from .helpers import log_multi | ||
| from .helpers import parse_pattern, PatternMatcher, PathPrefixPattern | ||
| from .helpers import PatternMatcher | ||
| from .helpers import signal_handler, raising_signal_handler, SigHup, SigTerm | ||
| from .helpers import ErrorIgnoringTextIOWrapper | ||
| from .helpers import ProgressIndicatorPercent | ||
|
|
@@ -190,16 +190,11 @@ def compare_chunk_contents(chunks1, chunks2): | |
| bi += slicelen | ||
|
|
||
| @staticmethod | ||
| def build_matcher(inclexcl_patterns, paths): | ||
| def build_matcher(inclexcl_patterns, include_paths): | ||
| matcher = PatternMatcher() | ||
| if inclexcl_patterns: | ||
| matcher.add_inclexcl(inclexcl_patterns) | ||
| include_patterns = [] | ||
| if paths: | ||
| include_patterns.extend(parse_pattern(i, PathPrefixPattern) for i in paths) | ||
| matcher.add(include_patterns, True) | ||
| matcher.fallback = not include_patterns | ||
| return matcher, include_patterns | ||
| matcher.add_inclexcl(inclexcl_patterns) | ||
| matcher.add_includepaths(include_paths) | ||
| return matcher | ||
|
|
||
| def do_serve(self, args): | ||
| """Start in server mode. This command is usually not used manually.""" | ||
|
|
@@ -493,13 +488,20 @@ def _process(self, archive, cache, matcher, exclude_caches, exclude_if_present, | |
|
|
||
| This should only raise on critical errors. Per-item errors must be handled within this method. | ||
| """ | ||
| if st is None: | ||
| with backup_io('stat'): | ||
| st = os.lstat(path) | ||
|
|
||
| recurse_excluded_dir = False | ||
| if not matcher.match(path): | ||
| self.print_file_status('x', path) | ||
| return | ||
|
|
||
| if stat.S_ISDIR(st.st_mode) and matcher.recurse_dir: | ||
| recurse_excluded_dir = True | ||
| else: | ||
| return | ||
|
|
||
| try: | ||
| if st is None: | ||
| with backup_io('stat'): | ||
| st = os.lstat(path) | ||
| if (st.st_ino, st.st_dev) in skip_inodes: | ||
| return | ||
| # if restrict_dev is given, we do not want to recurse into a new filesystem, | ||
|
|
@@ -527,7 +529,8 @@ def _process(self, archive, cache, matcher, exclude_caches, exclude_if_present, | |
| read_special=read_special, dry_run=dry_run) | ||
| return | ||
| if not dry_run: | ||
| status = archive.process_dir(path, st) | ||
| if not recurse_excluded_dir: | ||
| status = archive.process_dir(path, st) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there might be a problem here: if the code recurses into an excluded dir (searching for stuff that is included again in directories deeper below), at extract time, borg will automatically create missing intermediate dirs, but it can not set the correct metadata on them (not even when doing a full extract) as it does not have them in the archive.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hm, does it matter if the intermediate dirs are archived before or after the nested included path? It could be possible to add the intermediate dirs (at the time a nested included path is archived) to a list of paths to be archived later.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. permissions might be important to set before filling the dir. (check the code dealing with dirs elsewhere)
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. guess the upper-layer dir entries should precede the lower-layer stuff in the archive.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't claim to know lots about the details of borg's storage method, but I'm assuming that each backup operation stores file chunks in some manner that doesn't depend on order, but tracks the backup itself in the form of some manifest of the backed-up items. If this is the case, then it could be solved as simply as just ensuring that the manifest is sorted before storing it such that parent folders come before their children. This would work in conjunction with the idea of maintaining a list of parents to process after a matching nested folder is found. Does this sound right or doable?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps just use a heap-queue for the manifest list, and always add item-names to it via heapq.heappush()? |
||
| if recurse: | ||
| with backup_io('scandir'): | ||
| entries = helpers.scandir_inorder(path) | ||
|
|
@@ -590,7 +593,9 @@ def _process(self, archive, cache, matcher, exclude_caches, exclude_if_present, | |
| status = '?' # need to add a status code somewhere | ||
| else: | ||
| status = '-' # dry run, item was not backed up | ||
| self.print_file_status(status, path) | ||
|
|
||
| if not recurse_excluded_dir: | ||
| self.print_file_status(status, path) | ||
|
|
||
| @staticmethod | ||
| def build_filter(matcher, peek_and_store_hardlink_masters, strip_components): | ||
|
|
@@ -616,7 +621,7 @@ def do_extract(self, args, repository, manifest, key, archive): | |
| if sys.platform.startswith(('linux', 'freebsd', 'netbsd', 'openbsd', 'darwin', )): | ||
| logger.warning('Hint: You likely need to fix your locale setup. E.g. install locales and use: LANG=en_US.UTF-8') | ||
|
|
||
| matcher, include_patterns = self.build_matcher(args.patterns, args.paths) | ||
| matcher = self.build_matcher(args.patterns, args.paths) | ||
|
|
||
| progress = args.progress | ||
| output_list = args.output_list | ||
|
|
@@ -681,9 +686,8 @@ def peek_and_store_hardlink_masters(item, matched): | |
| archive.extract_item(dir_item) | ||
| except BackupOSError as e: | ||
| self.print_warning('%s: %s', remove_surrogates(dir_item.path), e) | ||
| for pattern in include_patterns: | ||
| if pattern.match_count == 0: | ||
| self.print_warning("Include pattern '%s' never matched.", pattern) | ||
| for pattern in matcher.get_unmatched_include_patterns(): | ||
| self.print_warning("Include pattern '%s' never matched.", pattern) | ||
| if pi: | ||
| # clear progress output | ||
| pi.finish() | ||
|
|
@@ -893,13 +897,13 @@ def compare_or_defer(item1, item2): | |
| 'If you know for certain that they are the same, pass --same-chunker-params ' | ||
| 'to override this check.') | ||
|
|
||
| matcher, include_patterns = self.build_matcher(args.patterns, args.paths) | ||
| matcher = self.build_matcher(args.patterns, args.paths) | ||
|
|
||
| compare_archives(archive1, archive2, matcher) | ||
|
|
||
| for pattern in include_patterns: | ||
| if pattern.match_count == 0: | ||
| self.print_warning("Include pattern '%s' never matched.", pattern) | ||
| for pattern in matcher.get_unmatched_include_patterns(): | ||
| self.print_warning("Include pattern '%s' never matched.", pattern) | ||
|
|
||
| return self.exit_code | ||
|
|
||
| @with_repository(exclusive=True, cache=True) | ||
|
|
@@ -1048,7 +1052,7 @@ def write(bytestring): | |
| return self._list_repository(args, manifest, write) | ||
|
|
||
| def _list_archive(self, args, repository, manifest, key, write): | ||
| matcher, _ = self.build_matcher(args.patterns, args.paths) | ||
| matcher = self.build_matcher(args.patterns, args.paths) | ||
| if args.format is not None: | ||
| format = args.format | ||
| elif args.short: | ||
|
|
@@ -1330,7 +1334,7 @@ def do_recreate(self, args, repository, manifest, key, cache): | |
| env_var_override='BORG_RECREATE_I_KNOW_WHAT_I_AM_DOING'): | ||
| return EXIT_ERROR | ||
|
|
||
| matcher, include_patterns = self.build_matcher(args.patterns, args.paths) | ||
| matcher = self.build_matcher(args.patterns, args.paths) | ||
| self.output_list = args.output_list | ||
| self.output_filter = args.output_filter | ||
| recompress = args.recompress != 'never' | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
naming: isn't this rather a state flag than a command flag?
would
recursing_excluded_dirbetter describe what it tells?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it doesn't matter much to me what it is named, but I chose recurse (as a command verb) because in my mind the recursing hasn't happened yet at that point, and will only happen later on, based on the value of the flag which is set here.