From 8436865427c8d4e6e780fac2d78df39d1cff7673 Mon Sep 17 00:00:00 2001 From: Guinness Date: Mon, 23 Nov 2020 16:00:04 +0100 Subject: [PATCH 1/5] Complements the documentation for pattern files and exclude files --- src/borg/archiver.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 14fd3057f4..2946970860 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -2644,11 +2644,13 @@ def define_exclude_and_patterns(add_option, *, tag_files=False, strip_components type=parse_exclude_pattern, action='append', help='exclude paths matching PATTERN') add_option('--exclude-from', metavar='EXCLUDEFILE', action=ArgparseExcludeFileAction, - help='read exclude patterns from EXCLUDEFILE, one per line') + help='read exclude patterns from EXCLUDEFILE, one per' + 'line. These entries are processed by borg and not the shell') add_option('--pattern', metavar='PATTERN', action=ArgparsePatternAction, help='experimental: include/exclude paths matching PATTERN') add_option('--patterns-from', metavar='PATTERNFILE', action=ArgparsePatternFileAction, - help='experimental: read include/exclude patterns from PATTERNFILE, one per line') + help='experimental: read include/exclude patterns from PATTERNFILE, one per line' + 'These entries are processed by borg and not the shell') if tag_files: add_option('--exclude-caches', dest='exclude_caches', action='store_true', From 3880f71a7d309d142ff8d321334951ff33a6f467 Mon Sep 17 00:00:00 2001 From: Guinness Date: Tue, 24 Nov 2020 16:09:10 +0100 Subject: [PATCH 2/5] Fix docstring on all instances that can use --exclude-from --- src/borg/archiver.py | 75 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 71 insertions(+), 4 deletions(-) diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 2946970860..e0e7543a3e 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -2644,13 +2644,11 @@ def define_exclude_and_patterns(add_option, *, tag_files=False, strip_components type=parse_exclude_pattern, action='append', help='exclude paths matching PATTERN') add_option('--exclude-from', metavar='EXCLUDEFILE', action=ArgparseExcludeFileAction, - help='read exclude patterns from EXCLUDEFILE, one per' - 'line. These entries are processed by borg and not the shell') + help='read exclude patterns from EXCLUDEFILE, one per line.') add_option('--pattern', metavar='PATTERN', action=ArgparsePatternAction, help='experimental: include/exclude paths matching PATTERN') add_option('--patterns-from', metavar='PATTERNFILE', action=ArgparsePatternFileAction, - help='experimental: read include/exclude patterns from PATTERNFILE, one per line' - 'These entries are processed by borg and not the shell') + help='experimental: read include/exclude patterns from PATTERNFILE, one per line') if tag_files: add_option('--exclude-caches', dest='exclude_caches', action='store_true', @@ -3108,6 +3106,18 @@ def define_borg_mount(parser): exclusive because the data is not actually compressed and deduplicated during a dry run. See the output of the "borg help patterns" command for more help on exclude patterns. + To give an example for --exclude-from: + ```shell + $ cat exclude-file.txt + # Comment line + /home/*/junk + *.tmp + One file with spaces + ``` + Then the command borg ``create --exclude-from exclude-file.txt`` will + delete all files excluding /home/*/junk, *.tmp and the file named + ``One file with spaces`` + See the output of the "borg help placeholders" command for more help on placeholders. .. man NOTES @@ -3505,6 +3515,18 @@ def define_borg_mount(parser): (for more info on these patterns, see ``borg help patterns``). Note that these two options are mutually exclusive. + To give an example for --exclude-from: + ```shell + $ cat exclude-file.txt + # Comment line + /home/*/junk + *.tmp + One file with spaces + ``` + Then the command borg ``create --exclude-from exclude-file.txt`` will + delete all files excluding /home/*/junk, *.tmp and the file named + ``One file with spaces`` + To avoid accidentally deleting archives, especially when using glob patterns, it might be helpful to use the ``--dry-run`` to test out the command without actually making any changes to the repository. @@ -3553,6 +3575,18 @@ def define_borg_mount(parser): Note that the chunker params changed from Borg 0.xx to 1.0. See the output of the "borg help patterns" command for more help on exclude patterns. + + To give an example for --exclude-from: + ```shell + $ cat exclude-file.txt + # Comment line + /home/*/junk + *.tmp + One file with spaces + ``` + Then the command borg ``create --exclude-from exclude-file.txt`` will + delete all files excluding /home/*/junk, *.tmp and the file named + ``One file with spaces`` """) subparser = subparsers.add_parser('diff', parents=[common_parser], add_help=False, description=self.do_diff.__doc__, @@ -3608,6 +3642,17 @@ def define_borg_mount(parser): The file selection can further be restricted by using the ``--exclude`` option. See the output of the "borg help patterns" command for more help on exclude patterns. + To give an example for --exclude-from: + ```shell + $ cat exclude-file.txt + # Comment line + /home/*/junk + *.tmp + One file with spaces + ``` + Then the command borg ``create --exclude-from exclude-file.txt`` will + delete all files excluding /home/*/junk, *.tmp and the file named + ``One file with spaces`` ``--progress`` can be slower than no progress display, since it makes one additional pass over the archive metadata. @@ -3639,6 +3684,17 @@ def define_borg_mount(parser): be restricted by using the ``--exclude`` option. See the output of the "borg help patterns" command for more help on exclude patterns. + To give an example for --exclude-from: + ```shell + $ cat exclude-file.txt + # Comment line + /home/*/junk + *.tmp + One file with spaces + ``` + Then the command borg ``create --exclude-from exclude-file.txt`` will + delete all files excluding /home/*/junk, *.tmp and the file named + ``One file with spaces`` By using ``--dry-run``, you can do all extraction steps except actually writing the output data: reading metadata and data chunks from the repo, checking the hash/hmac, @@ -3983,6 +4039,17 @@ def define_borg_mount(parser): This command lists the contents of a repository or an archive. See the "borg help patterns" command for more help on exclude patterns. + To give an example for --exclude-from: + ```shell + $ cat exclude-file.txt + # Comment line + /home/*/junk + *.tmp + One file with spaces + ``` + Then the command borg ``create --exclude-from exclude-file.txt`` will + delete all files excluding /home/*/junk, *.tmp and the file named + ``One file with spaces`` .. man NOTES From 19397c81edae465a5070aaf3eae498d274936e0f Mon Sep 17 00:00:00 2001 From: Guinness Date: Sat, 28 Nov 2020 21:30:03 +0100 Subject: [PATCH 3/5] Factorize all the doc in the The path/filenames used as input for the pattern matching start from the currently active recursion root. You usually give the recursion root(s) when invoking borg and these can be either relative or absolute paths. So, when you give relative/ as root, the paths going into the matcher will look like relative/.../file.ext. When you give /absolute/ as root, they will look like /absolute/.../file.ext. File paths in Borg archives are always stored normalized and relative. This means that e.g. borg create /path/to/repo ../some/path will store all files as some/path/.../file.ext and borg create /path/to/repo /home/user will store all files as home/user/.../file.ext. File patterns support these styles: fnmatch, shell, regular expressions, path prefixes and path full-matches. By default, fnmatch is used for --exclude patterns and shell-style is used for the experimental --pattern option. Starting with Borg 1.2, for all but regular expression pattern matching styles, all paths are treated as relative, meaning that a leading path separator is removed after normalizing and before matching. This allows you to use absolute or relative patterns arbitrarily. If followed by a colon (':') the first two characters of a pattern are used as a style selector. Explicit style selection is necessary when a non-default style is desired or when the desired pattern starts with two alphanumeric characters followed by a colon (i.e. aa:something/*). Fnmatch _, selector fm: This is the default style for --exclude and --exclude-from. These patterns use a variant of shell pattern syntax, with '*' matching any number of characters, '?' matching any single character, '[...]' matching any single character specified, including ranges, and '[!...]' matching any character not specified. For the purpose of these patterns, the path separator (backslash for Windows and '/' on other systems) is not treated specially. Wrap meta-characters in brackets for a literal match (i.e. [?] to match the literal character ?). For a path to match a pattern, the full path must match, or it must match from the start of the full path to just before a path separator. Except for the root path, paths will never end in the path separator when matching is attempted. Thus, if a given pattern ends in a path separator, a '*' is appended before matching is attempted. A leading path separator is always removed. Shell-style patterns, selector sh: This is the default style for --pattern and --patterns-from. Like fnmatch patterns these are similar to shell patterns. The difference is that the pattern may include **/ for matching zero or more directory levels, * for matching zero or more arbitrary characters with the exception of any path separator. A leading path separator is always removed. Regular expressions, selector re: Regular expressions similar to those found in Perl are supported. Unlike shell patterns regular expressions are not required to match the full path and any substring match is sufficient. It is strongly recommended to anchor patterns to the start ('^'), to the end ('$') or both. Path separators (backslash for Windows and '/' on other systems) in paths are always normalized to a forward slash ('/') before applying a pattern. The regular expression syntax is described in the Python documentation for the re module _. Path prefix, selector pp: This pattern style is useful to match whole sub-directories. The pattern pp:root/somedir matches root/somedir and everything therein. A leading path separator is always removed. Path full-match, selector pf: This pattern style is (only) useful to match full paths. This is kind of a pseudo pattern as it can not have any variable or unspecified parts - the full path must be given. pf:root/file.ext matches root/file.ext only. A leading path separator is always removed. Implementation note: this is implemented via very time-efficient O(1) hashtable lookups (this means you can have huge amounts of such patterns without impacting performance much). Due to that, this kind of pattern does not respect any context or order. If you use such a pattern to include a file, it will always be included (if the directory recursion encounters it). Other include/exclude patterns that would normally match will be ignored. Same logic applies for exclude. Note: re:, sh: and fm: patterns are all implemented on top of the Python SRE engine. It is very easy to formulate patterns for each of these types which requires an inordinate amount of time to match paths. If untrusted users are able to supply patterns, ensure they cannot supply re: patterns. Further, ensure that sh: and fm: patterns only contain a handful of wildcards at most. Exclusions can be passed via the command line option --exclude. When used from within a shell the patterns should be quoted to protect them from expansion. The --exclude-from option permits loading exclusion patterns from a text file with one pattern per line. Lines empty or starting with the number sign ('#') after removing whitespace on both ends are ignored. The optional style selector prefix is also supported for patterns loaded from a file. Due to whitespace removal paths with whitespace at the beginning or end can only be excluded using regular expressions. To test your exclusion patterns without performing an actual backup you can run borg create --list --dry-run .... Examples: # Exclude '/home/user/file.o' but not '/home/user/file.odt': $ borg create -e '*.o' backup / # Exclude '/home/user/junk' and '/home/user/subdir/junk' but # not '/home/user/importantjunk' or '/etc/junk': $ borg create -e '/home/*/junk' backup / # Exclude the contents of '/home/user/cache' but not the directory itself: $ borg create -e home/user/cache/ backup / # The file '/home/user/cache/important' is *not* backed up: $ borg create -e /home/user/cache/ backup / /home/user/cache/important # The contents of directories in '/home' are not backed up when their name # ends in '.tmp' $ borg create --exclude 're:^/home/[^/]+\.tmp/' backup / # Load exclusions from file $ cat >exclude.txt < Date: Sat, 28 Nov 2020 22:35:14 +0100 Subject: [PATCH 4/5] Fix accidental removals of lines --- src/borg/archiver.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 21dd8af5bc..3f73b98bbc 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -2227,9 +2227,9 @@ def do_break_lock(self, args, repository): fm:aa:something/* re:^home/[^/]\\.tmp/ sh:home/*/.thumbnails - EOF # Example with spaces, no need to escape as it is processed by borg some file with spaces.txt + EOF $ borg create --exclude-from exclude.txt backup / .. container:: experimental @@ -3619,6 +3619,8 @@ def define_borg_mount(parser): can be selected by passing a list of ``PATHs`` as arguments. The file selection can further be restricted by using the ``--exclude`` option. + See the output of the "borg help patterns" command for more help on exclude patterns. + ``--progress`` can be slower than no progress display, since it makes one additional pass over the archive metadata. """) @@ -3649,6 +3651,7 @@ def define_borg_mount(parser): be restricted by using the ``--exclude`` option. See the output of the "borg help patterns" command for more help on exclude patterns. + By using ``--dry-run``, you can do all extraction steps except actually writing the output data: reading metadata and data chunks from the repo, checking the hash/hmac, decrypting, decompressing. @@ -3991,6 +3994,8 @@ def define_borg_mount(parser): list_epilog = process_epilog(""" This command lists the contents of a repository or an archive. + See the "borg help patterns" command for more help on exclude patterns. + .. man NOTES The following keys are available for ``--format``: From ea3470b0d549c2cf153addef8d2862fd4992343b Mon Sep 17 00:00:00 2001 From: Guinness Date: Sun, 29 Nov 2020 17:36:56 +0100 Subject: [PATCH 5/5] Fix last corrections --- src/borg/archiver.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 3f73b98bbc..8f5d3abe24 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -2646,7 +2646,7 @@ def define_exclude_and_patterns(add_option, *, tag_files=False, strip_components type=parse_exclude_pattern, action='append', help='exclude paths matching PATTERN') add_option('--exclude-from', metavar='EXCLUDEFILE', action=ArgparseExcludeFileAction, - help='read exclude patterns from EXCLUDEFILE, one per line.') + help='read exclude patterns from EXCLUDEFILE, one per line') add_option('--pattern', metavar='PATTERN', action=ArgparsePatternAction, help='experimental: include/exclude paths matching PATTERN') add_option('--patterns-from', metavar='PATTERNFILE', action=ArgparsePatternFileAction, @@ -3108,17 +3108,6 @@ def define_borg_mount(parser): exclusive because the data is not actually compressed and deduplicated during a dry run. See the output of the "borg help patterns" command for more help on exclude patterns. - To give an example for --exclude-from: - ```shell - $ cat exclude-file.txt - # Comment line - /home/*/junk - *.tmp - One file with spaces - ``` - Then the command borg ``create --exclude-from exclude-file.txt`` will - delete all files excluding /home/*/junk, *.tmp and the file named - ``One file with spaces`` See the output of the "borg help placeholders" command for more help on placeholders.