From dca04ba53857df5d2581b3870501fc879c8ea1cf Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 12 Aug 2018 00:22:01 +0200 Subject: [PATCH 01/12] _process: remove an infrequently used micro-opt st param was only given at the root paths of the recursion. we can just drop that and make the code simpler. --- src/borg/archiver.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 207585e24f..db6fcc854c 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -490,7 +490,7 @@ def create_inner(archive, cache, fso): restrict_dev = None self._process(fso, cache, matcher, args.exclude_caches, args.exclude_if_present, args.keep_exclude_tags, skip_inodes, path, restrict_dev, - read_special=args.read_special, dry_run=dry_run, st=st) + read_special=args.read_special, dry_run=dry_run) if not dry_run: archive.save(comment=args.comment, timestamp=args.timestamp) if args.progress: @@ -544,20 +544,17 @@ def create_inner(archive, cache, fso): def _process(self, fso, cache, matcher, exclude_caches, exclude_if_present, keep_exclude_tags, skip_inodes, path, restrict_dev, - read_special=False, dry_run=False, st=None): + read_special=False, dry_run=False): """ Process *path* recursively according to the various parameters. - *st* (if given) is a *os.stat_result* object for *path*. - This should only raise on critical errors. Per-item errors must be handled within this method. """ try: recurse_excluded_dir = False if matcher.match(path): - if st is None: - with backup_io('stat'): - st = os.stat(path, follow_symlinks=False) + with backup_io('stat'): + st = os.stat(path, follow_symlinks=False) else: self.print_file_status('x', path) # get out here as quickly as possible: @@ -566,9 +563,8 @@ def _process(self, fso, cache, matcher, exclude_caches, exclude_if_present, # could trigger an error, e.g. if access is forbidden, see #3209. if not matcher.recurse_dir: return - if st is None: - with backup_io('stat'): - st = os.stat(path, follow_symlinks=False) + with backup_io('stat'): + st = os.stat(path, follow_symlinks=False) recurse_excluded_dir = stat.S_ISDIR(st.st_mode) if not recurse_excluded_dir: return From 677102f29263c2c1613c05462e0459048c9f17e6 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 12 Aug 2018 04:50:27 +0200 Subject: [PATCH 02/12] process_file: avoid race condition: stat data vs. content always open the file and then do all operations with the fd: - fstat - read - get xattrs, acls, bsdflags --- src/borg/archive.py | 98 ++++++++++++++++++++++----------------------- 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index ef9a83b301..efb2f8d2b1 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -1126,60 +1126,60 @@ def process_stdin(self, path, cache): def process_file(self, path, st, cache): with self.create_helper(path, st, None) as (item, status, hardlinked, hardlink_master): # no status yet - md = None - is_special_file = is_special(st.st_mode) - if not hardlinked or hardlink_master: - if not is_special_file: - path_hash = self.key.id_hash(safe_encode(os.path.join(self.cwd, path))) - known, ids = cache.file_known_and_unchanged(path_hash, st) - else: - # in --read-special mode, we may be called for special files. - # there should be no information in the cache about special files processed in - # read-special mode, but we better play safe as this was wrong in the past: - path_hash = None - known, ids = False, None - chunks = None - if ids is not None: - # Make sure all ids are available - for id_ in ids: - if not cache.seen_chunk(id_): - status = 'M' # cache said it is unmodified, but we lost a chunk: process file like modified - break + with backup_io('open'): + fd = Archive._open_rb(path) + try: + with backup_io('fstat'): + curr_st = os.fstat(fd) + # XXX do some checks here: st vs. curr_st + assert stat.S_ISREG(curr_st.st_mode) + # make sure stats refer to same object that we are processing below + st = curr_st + is_special_file = is_special(st.st_mode) + if not hardlinked or hardlink_master: + if not is_special_file: + path_hash = self.key.id_hash(safe_encode(os.path.join(self.cwd, path))) + known, ids = cache.file_known_and_unchanged(path_hash, st) else: - chunks = [cache.chunk_incref(id_, self.stats) for id_ in ids] - status = 'U' # regular file, unchanged - else: - status = 'M' if known else 'A' # regular file, modified or added - item.hardlink_master = hardlinked - item.update(self.metadata_collector.stat_simple_attrs(st)) - # Only chunkify the file if needed - if chunks is not None: - item.chunks = chunks - else: - with backup_io('open'): - fh = Archive._open_rb(path) - try: - self.process_file_chunks(item, cache, self.stats, self.show_progress, backup_io_iter(self.chunker.chunkify(None, fh))) - md = self.metadata_collector.stat_attrs(st, path, fd=fh) - finally: - os.close(fh) + # in --read-special mode, we may be called for special files. + # there should be no information in the cache about special files processed in + # read-special mode, but we better play safe as this was wrong in the past: + path_hash = None + known, ids = False, None + chunks = None + if ids is not None: + # Make sure all ids are available + for id_ in ids: + if not cache.seen_chunk(id_): + status = 'M' # cache said it is unmodified, but we lost a chunk: process file like modified + break + else: + chunks = [cache.chunk_incref(id_, self.stats) for id_ in ids] + status = 'U' # regular file, unchanged + else: + status = 'M' if known else 'A' # regular file, modified or added + item.hardlink_master = hardlinked + item.update(self.metadata_collector.stat_simple_attrs(st)) + # Only chunkify the file if needed + if chunks is not None: + item.chunks = chunks + else: + with backup_io('read'): + self.process_file_chunks(item, cache, self.stats, self.show_progress, backup_io_iter(self.chunker.chunkify(None, fd))) if not is_special_file: # we must not memorize special files, because the contents of e.g. a # block or char device will change without its mtime/size/inode changing. cache.memorize_file(path_hash, st, [c.id for c in item.chunks]) - self.stats.nfiles += 1 - if md is None: - fh = Archive._open_rb(path) - try: - md = self.metadata_collector.stat_attrs(st, path, fd=fh) - finally: - os.close(fh) - item.update(md) - item.get_size(memorize=True) - if is_special_file: - # we processed a special file like a regular file. reflect that in mode, - # so it can be extracted / accessed in FUSE mount like a regular file: - item.mode = stat.S_IFREG | stat.S_IMODE(item.mode) + self.stats.nfiles += 1 + md = self.metadata_collector.stat_attrs(st, path, fd=fd) + item.update(md) + item.get_size(memorize=True) + if is_special_file: + # we processed a special file like a regular file. reflect that in mode, + # so it can be extracted / accessed in FUSE mount like a regular file: + item.mode = stat.S_IFREG | stat.S_IMODE(item.mode) + finally: + os.close(fd) return status From 8220c6eac876d4230c84f8a7bfea4429f27fc230 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 12 Aug 2018 17:39:30 +0200 Subject: [PATCH 03/12] move/refactor Archive._open_rb function to helpers.os_open also: - add and use OsOpen context manager - add O_NONBLOCK, O_NOFOLLOW, O_NOCTTY (inspired by gnu tar) --- src/borg/archive.py | 32 ++++++++++++-------------------- src/borg/helpers/fs.py | 30 ++++++++++++++++++++++++++++++ src/borg/testsuite/archiver.py | 3 ++- 3 files changed, 44 insertions(+), 21 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index efb2f8d2b1..8f69873e8a 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -38,6 +38,7 @@ from .helpers import bin_to_hex from .helpers import safe_ns from .helpers import ellipsis_truncate, ProgressIndicatorPercent, log_multi +from .helpers import os_open, flags_normal from .helpers import msgpack from .patterns import PathPrefixPattern, FnmatchPattern, IECommand from .item import Item, ArchiveItem, ItemDiff @@ -47,9 +48,6 @@ has_lchmod = hasattr(os, 'lchmod') -flags_normal = os.O_RDONLY | getattr(os, 'O_BINARY', 0) -flags_noatime = flags_normal | getattr(os, 'O_NOATIME', 0) - class Statistics: @@ -197,6 +195,16 @@ def backup_io_iter(iterator): yield item +@contextmanager +def OsOpen(path, flags, noatime=False, op='open'): + with backup_io(op): + fd = os_open(path, flags, noatime) + try: + yield fd + finally: + os.close(fd) + + class DownloadPipeline: def __init__(self, repository, key): @@ -826,18 +834,6 @@ def chunk_decref(id, stats): logger.warning('forced deletion succeeded, but the deleted archive was corrupted.') logger.warning('borg check --repair is required to free all space.') - @staticmethod - def _open_rb(path): - try: - # if we have O_NOATIME, this likely will succeed if we are root or owner of file: - return os.open(path, flags_noatime) - except PermissionError: - if flags_noatime == flags_normal: - # we do not have O_NOATIME, no need to try again: - raise - # Was this EPERM due to the O_NOATIME flag? Try again without it: - return os.open(path, flags_normal) - @staticmethod def compare_archives_iter(archive1, archive2, matcher=None, can_compare_chunk_ids=False): """ @@ -1126,9 +1122,7 @@ def process_stdin(self, path, cache): def process_file(self, path, st, cache): with self.create_helper(path, st, None) as (item, status, hardlinked, hardlink_master): # no status yet - with backup_io('open'): - fd = Archive._open_rb(path) - try: + with OsOpen(path, flags_normal, noatime=True) as fd: with backup_io('fstat'): curr_st = os.fstat(fd) # XXX do some checks here: st vs. curr_st @@ -1178,8 +1172,6 @@ def process_file(self, path, st, cache): # we processed a special file like a regular file. reflect that in mode, # so it can be extracted / accessed in FUSE mount like a regular file: item.mode = stat.S_IFREG | stat.S_IMODE(item.mode) - finally: - os.close(fd) return status diff --git a/src/borg/helpers/fs.py b/src/borg/helpers/fs.py index ef6b707297..93852c5bcc 100644 --- a/src/borg/helpers/fs.py +++ b/src/borg/helpers/fs.py @@ -189,6 +189,36 @@ def dash_open(path, mode): return open(path, mode) +def O_(*flags): + result = 0 + for flag in flags: + result |= getattr(os, 'O_' + flag, 0) + return result + + +flags_base = O_('BINARY', 'NONBLOCK', 'NOCTTY') # later: add 'NOFOLLOW' +flags_normal = flags_base | O_('RDONLY') +flags_noatime = flags_normal | O_('NOATIME') + + +def os_open(path, flags, noatime=False): + _flags_normal = flags + if noatime: + _flags_noatime = _flags_normal | O_('NOATIME') + try: + # if we have O_NOATIME, this likely will succeed if we are root or owner of file: + fd = os.open(path, _flags_noatime) + except PermissionError: + if _flags_noatime == _flags_normal: + # we do not have O_NOATIME, no need to try again: + raise + # Was this EPERM due to the O_NOATIME flag? Try again without it: + fd = os.open(path, _flags_normal) + else: + fd = os.open(path, _flags_normal) + return fd + + def umount(mountpoint): env = prepare_subprocess_env(system=True) try: diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index 88eee3122d..7c1316db24 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -32,7 +32,7 @@ import borg from .. import xattr, helpers, platform -from ..archive import Archive, ChunkBuffer, flags_noatime, flags_normal +from ..archive import Archive, ChunkBuffer from ..archiver import Archiver, parse_storage_quota from ..cache import Cache, LocalCache from ..constants import * # NOQA @@ -46,6 +46,7 @@ from ..helpers import bin_to_hex from ..helpers import MAX_S from ..helpers import msgpack +from ..helpers import flags_noatime, flags_normal from ..nanorst import RstToTextLazy, rst_to_terminal from ..patterns import IECommand, PatternMatcher, parse_pattern from ..item import Item, ItemDiff From ad5b9a1dfd26ad2974c081ce6284266469eeabbc Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 12 Aug 2018 19:11:27 +0200 Subject: [PATCH 04/12] _process / process_*: change to kwargs only we'll add/remove some args soon, so many pos args would be just bad. --- src/borg/archive.py | 12 +++++----- src/borg/archiver.py | 57 +++++++++++++++++++++++++------------------- 2 files changed, 38 insertions(+), 31 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index 8f69873e8a..24a7d96e04 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -1076,23 +1076,23 @@ def create_helper(self, path, st, status=None, hardlinkable=True): if hardlink_master: self.hard_links[(st.st_ino, st.st_dev)] = safe_path - def process_dir(self, path, st): + def process_dir(self, *, path, st): with self.create_helper(path, st, 'd', hardlinkable=False) as (item, status, hardlinked, hardlink_master): item.update(self.metadata_collector.stat_attrs(st, path)) return status - def process_fifo(self, path, st): + def process_fifo(self, *, path, st): with self.create_helper(path, st, 'f') as (item, status, hardlinked, hardlink_master): # fifo item.update(self.metadata_collector.stat_attrs(st, path)) return status - def process_dev(self, path, st, dev_type): + def process_dev(self, *, path, st, dev_type): with self.create_helper(path, st, dev_type) as (item, status, hardlinked, hardlink_master): # char/block device item.rdev = st.st_rdev item.update(self.metadata_collector.stat_attrs(st, path)) return status - def process_symlink(self, path, st): + def process_symlink(self, *, path, st): # note: using hardlinkable=False because we can not support hardlinked symlinks, # due to the dual-use of item.source, see issue #2343: # hardlinked symlinks will be archived [and extracted] as non-hardlinked symlinks. @@ -1103,7 +1103,7 @@ def process_symlink(self, path, st): item.update(self.metadata_collector.stat_attrs(st, path)) return status - def process_stdin(self, path, cache): + def process_stdin(self, *, path, cache): uid, gid = 0, 0 t = int(time.time()) * 1000000000 item = Item( @@ -1120,7 +1120,7 @@ def process_stdin(self, path, cache): self.add_item(item, stats=self.stats) return 'i' # stdin - def process_file(self, path, st, cache): + def process_file(self, *, path, st, cache): with self.create_helper(path, st, None) as (item, status, hardlinked, hardlink_master): # no status yet with OsOpen(path, flags_normal, noatime=True) as fd: with backup_io('fstat'): diff --git a/src/borg/archiver.py b/src/borg/archiver.py index db6fcc854c..09b3e213c3 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -470,7 +470,7 @@ def create_inner(archive, cache, fso): path = args.stdin_name if not dry_run: try: - status = fso.process_stdin(path, cache) + status = fso.process_stdin(path=path, cache=cache) except BackupOSError as e: status = 'E' self.print_warning('%s: %s', path, e) @@ -488,9 +488,11 @@ def create_inner(archive, cache, fso): restrict_dev = st.st_dev else: restrict_dev = None - self._process(fso, cache, matcher, args.exclude_caches, args.exclude_if_present, - args.keep_exclude_tags, skip_inodes, path, restrict_dev, - read_special=args.read_special, dry_run=dry_run) + self._process(path=path, + fso=fso, cache=cache, matcher=matcher, + exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present, + keep_exclude_tags=args.keep_exclude_tags, skip_inodes=skip_inodes, + restrict_dev=restrict_dev, read_special=args.read_special, dry_run=dry_run) if not dry_run: archive.save(comment=args.comment, timestamp=args.timestamp) if args.progress: @@ -542,9 +544,10 @@ def create_inner(archive, cache, fso): create_inner(None, None, None) return self.exit_code - def _process(self, fso, cache, matcher, exclude_caches, exclude_if_present, - keep_exclude_tags, skip_inodes, path, restrict_dev, - read_special=False, dry_run=False): + def _process(self, *, path, + fso, cache, matcher, + exclude_caches, exclude_if_present, keep_exclude_tags, skip_inodes, + restrict_dev, read_special=False, dry_run=False): """ Process *path* recursively according to the various parameters. @@ -584,7 +587,7 @@ def _process(self, fso, cache, matcher, exclude_caches, exclude_if_present, return if stat.S_ISREG(st.st_mode): if not dry_run: - status = fso.process_file(path, st, cache) + status = fso.process_file(path=path, st=st, cache=cache) elif stat.S_ISDIR(st.st_mode): if recurse: tag_paths = dir_is_tagged(path, exclude_caches, exclude_if_present) @@ -593,28 +596,32 @@ def _process(self, fso, cache, matcher, exclude_caches, exclude_if_present, # returning (we do not need to archive or recurse into tagged directories), see #3991: if not recurse_excluded_dir: if keep_exclude_tags and not dry_run: - fso.process_dir(path, st) + fso.process_dir(path=path, st=st) for tag_path in tag_paths: - self._process(fso, cache, matcher, exclude_caches, exclude_if_present, - keep_exclude_tags, skip_inodes, tag_path, restrict_dev, - read_special=read_special, dry_run=dry_run) + self._process(path=tag_path, + fso=fso, cache=cache, matcher=matcher, + exclude_caches=exclude_caches, exclude_if_present=exclude_if_present, + keep_exclude_tags=keep_exclude_tags, skip_inodes=skip_inodes, + restrict_dev=restrict_dev, read_special=read_special, dry_run=dry_run) self.print_file_status('x', path) return if not dry_run: if not recurse_excluded_dir: - status = fso.process_dir(path, st) + status = fso.process_dir(path=path, st=st) if recurse: with backup_io('scandir'): entries = helpers.scandir_inorder(path) for dirent in entries: normpath = os.path.normpath(dirent.path) - self._process(fso, cache, matcher, exclude_caches, exclude_if_present, - keep_exclude_tags, skip_inodes, normpath, restrict_dev, - read_special=read_special, dry_run=dry_run) + self._process(path=normpath, + fso=fso, cache=cache, matcher=matcher, + exclude_caches=exclude_caches, exclude_if_present=exclude_if_present, + keep_exclude_tags=keep_exclude_tags, skip_inodes=skip_inodes, + restrict_dev=restrict_dev, read_special=read_special, dry_run=dry_run) elif stat.S_ISLNK(st.st_mode): if not dry_run: if not read_special: - status = fso.process_symlink(path, st) + status = fso.process_symlink(path=path, st=st) else: try: st_target = os.stat(path) @@ -623,27 +630,27 @@ def _process(self, fso, cache, matcher, exclude_caches, exclude_if_present, else: special = is_special(st_target.st_mode) if special: - status = fso.process_file(path, st_target, cache) + status = fso.process_file(path=path, st=st_target, cache=cache) else: - status = fso.process_symlink(path, st) + status = fso.process_symlink(path=path, st=st) elif stat.S_ISFIFO(st.st_mode): if not dry_run: if not read_special: - status = fso.process_fifo(path, st) + status = fso.process_fifo(path=path, st=st) else: - status = fso.process_file(path, st, cache) + status = fso.process_file(path=path, st=st, cache=cache) elif stat.S_ISCHR(st.st_mode): if not dry_run: if not read_special: - status = fso.process_dev(path, st, 'c') + status = fso.process_dev(path=path, st=st, dev_type='c') else: - status = fso.process_file(path, st, cache) + status = fso.process_file(path=path, st=st, cache=cache) elif stat.S_ISBLK(st.st_mode): if not dry_run: if not read_special: - status = fso.process_dev(path, st, 'b') + status = fso.process_dev(path=path, st=st, dev_type='b') else: - status = fso.process_file(path, st, cache) + status = fso.process_file(path=path, st=st, cache=cache) elif stat.S_ISSOCK(st.st_mode): # Ignore unix sockets return From 833c49f834ac788b2587b3ad10cf739305f7100e Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 13 Aug 2018 01:18:00 +0200 Subject: [PATCH 05/12] use *at style functions (e.g. openat, statat) to avoid races races via changing path components can be avoided by opening the parent directory and using parent_fd + file_name combination with *at style functions to access the directories' contents. --- src/borg/archive.py | 49 +++++++++------ src/borg/archiver.py | 132 +++++++++++++++++++++++------------------ src/borg/helpers/fs.py | 44 ++++++++++---- 3 files changed, 138 insertions(+), 87 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index 24a7d96e04..87ebf19261 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -196,9 +196,9 @@ def backup_io_iter(iterator): @contextmanager -def OsOpen(path, flags, noatime=False, op='open'): +def OsOpen(*, flags, path=None, parent_fd=None, name=None, noatime=False, op='open'): with backup_io(op): - fd = os_open(path, flags, noatime) + fd = os_open(path=path, parent_fd=parent_fd, name=name, flags=flags, noatime=noatime) try: yield fd finally: @@ -1076,31 +1076,46 @@ def create_helper(self, path, st, status=None, hardlinkable=True): if hardlink_master: self.hard_links[(st.st_ino, st.st_dev)] = safe_path - def process_dir(self, *, path, st): + def process_dir(self, *, path, fd, st): with self.create_helper(path, st, 'd', hardlinkable=False) as (item, status, hardlinked, hardlink_master): - item.update(self.metadata_collector.stat_attrs(st, path)) + item.update(self.metadata_collector.stat_attrs(st, path, fd=fd)) return status - def process_fifo(self, *, path, st): + def process_fifo(self, *, path, parent_fd, name, st): with self.create_helper(path, st, 'f') as (item, status, hardlinked, hardlink_master): # fifo - item.update(self.metadata_collector.stat_attrs(st, path)) - return status + with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_normal, noatime=True) as fd: + with backup_io('fstat'): + curr_st = os.fstat(fd) + # XXX do some checks here: st vs. curr_st + assert stat.S_ISFIFO(curr_st.st_mode) + # make sure stats refer to same object that we are processing below + st = curr_st + item.update(self.metadata_collector.stat_attrs(st, path, fd=fd)) + return status - def process_dev(self, *, path, st, dev_type): + def process_dev(self, *, path, parent_fd, name, st, dev_type): with self.create_helper(path, st, dev_type) as (item, status, hardlinked, hardlink_master): # char/block device - item.rdev = st.st_rdev - item.update(self.metadata_collector.stat_attrs(st, path)) - return status + with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_normal, noatime=True) as fd: + with backup_io('fstat'): + curr_st = os.fstat(fd) + # XXX do some checks here: st vs. curr_st + assert stat.S_ISBLK(curr_st.st_mode) or stat.S_ISCHR(curr_st.st_mode) + # make sure stats refer to same object that we are processing below + st = curr_st + item.rdev = st.st_rdev + item.update(self.metadata_collector.stat_attrs(st, path, fd=fd)) + return status - def process_symlink(self, *, path, st): + def process_symlink(self, *, path, parent_fd, name, st): # note: using hardlinkable=False because we can not support hardlinked symlinks, # due to the dual-use of item.source, see issue #2343: # hardlinked symlinks will be archived [and extracted] as non-hardlinked symlinks. with self.create_helper(path, st, 's', hardlinkable=False) as (item, status, hardlinked, hardlink_master): + fname = name if name is not None and parent_fd is not None else path with backup_io('readlink'): - source = os.readlink(path) + source = os.readlink(fname, dir_fd=parent_fd) item.source = source - item.update(self.metadata_collector.stat_attrs(st, path)) + item.update(self.metadata_collector.stat_attrs(st, path)) # can't use FD here? return status def process_stdin(self, *, path, cache): @@ -1120,9 +1135,9 @@ def process_stdin(self, *, path, cache): self.add_item(item, stats=self.stats) return 'i' # stdin - def process_file(self, *, path, st, cache): + def process_file(self, *, path, parent_fd, name, st, cache): with self.create_helper(path, st, None) as (item, status, hardlinked, hardlink_master): # no status yet - with OsOpen(path, flags_normal, noatime=True) as fd: + with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_normal, noatime=True) as fd: with backup_io('fstat'): curr_st = os.fstat(fd) # XXX do some checks here: st vs. curr_st @@ -1172,7 +1187,7 @@ def process_file(self, *, path, st, cache): # we processed a special file like a regular file. reflect that in mode, # so it can be extracted / accessed in FUSE mount like a regular file: item.mode = stat.S_IFREG | stat.S_IMODE(item.mode) - return status + return status def valid_msgpacked_dict(d, keys_serialized): diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 09b3e213c3..5747ed6129 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -34,7 +34,7 @@ from . import helpers from .algorithms.checksums import crc32 from .archive import Archive, ArchiveChecker, ArchiveRecreater, Statistics, is_special -from .archive import BackupError, BackupOSError, backup_io +from .archive import BackupError, BackupOSError, backup_io, OsOpen from .archive import FilesystemObjectProcessors, MetadataCollector, ChunksProcessor from .cache import Cache, assert_secure, SecurityManager from .constants import * # NOQA @@ -66,6 +66,7 @@ from .helpers import popen_with_error_handling, prepare_subprocess_env from .helpers import dash_open from .helpers import umount +from .helpers import flags_root, flags_dir from .helpers import msgpack from .nanorst import rst_to_terminal from .patterns import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern @@ -479,20 +480,23 @@ def create_inner(archive, cache, fso): self.print_file_status(status, path) continue path = os.path.normpath(path) - try: - st = os.stat(path, follow_symlinks=False) - except OSError as e: - self.print_warning('%s: %s', path, e) - continue - if args.one_file_system: - restrict_dev = st.st_dev - else: - restrict_dev = None - self._process(path=path, - fso=fso, cache=cache, matcher=matcher, - exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present, - keep_exclude_tags=args.keep_exclude_tags, skip_inodes=skip_inodes, - restrict_dev=restrict_dev, read_special=args.read_special, dry_run=dry_run) + parent_dir = os.path.dirname(path) or '.' + name = os.path.basename(path) + with OsOpen(path=parent_dir, flags=flags_root, noatime=True, op='open_root') as parent_fd: + try: + st = os.stat(name, dir_fd=parent_fd, follow_symlinks=False) + except OSError as e: + self.print_warning('%s: %s', path, e) + continue + if args.one_file_system: + restrict_dev = st.st_dev + else: + restrict_dev = None + self._process(path=path, parent_fd=parent_fd, name=name, + fso=fso, cache=cache, matcher=matcher, + exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present, + keep_exclude_tags=args.keep_exclude_tags, skip_inodes=skip_inodes, + restrict_dev=restrict_dev, read_special=args.read_special, dry_run=dry_run) if not dry_run: archive.save(comment=args.comment, timestamp=args.timestamp) if args.progress: @@ -544,12 +548,12 @@ def create_inner(archive, cache, fso): create_inner(None, None, None) return self.exit_code - def _process(self, *, path, + def _process(self, *, path, parent_fd=None, name=None, fso, cache, matcher, exclude_caches, exclude_if_present, keep_exclude_tags, skip_inodes, restrict_dev, read_special=False, dry_run=False): """ - Process *path* recursively according to the various parameters. + Process *path* (or, preferably, parent_fd/name) recursively according to the various parameters. This should only raise on critical errors. Per-item errors must be handled within this method. """ @@ -557,7 +561,7 @@ def _process(self, *, path, recurse_excluded_dir = False if matcher.match(path): with backup_io('stat'): - st = os.stat(path, follow_symlinks=False) + st = os.stat(name, dir_fd=parent_fd, follow_symlinks=False) else: self.print_file_status('x', path) # get out here as quickly as possible: @@ -567,7 +571,7 @@ def _process(self, *, path, if not matcher.recurse_dir: return with backup_io('stat'): - st = os.stat(path, follow_symlinks=False) + st = os.stat(name, dir_fd=parent_fd, follow_symlinks=False) recurse_excluded_dir = stat.S_ISDIR(st.st_mode) if not recurse_excluded_dir: return @@ -582,75 +586,85 @@ def _process(self, *, path, if self.exclude_nodump: # Ignore if nodump flag is set with backup_io('flags'): - if get_flags(path, st) & stat.UF_NODUMP: + if get_flags(path=path, st=st) & stat.UF_NODUMP: self.print_file_status('x', path) return if stat.S_ISREG(st.st_mode): if not dry_run: - status = fso.process_file(path=path, st=st, cache=cache) + status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, cache=cache) elif stat.S_ISDIR(st.st_mode): - if recurse: - tag_paths = dir_is_tagged(path, exclude_caches, exclude_if_present) - if tag_paths: - # if we are already recursing in an excluded dir, we do not need to do anything else than - # returning (we do not need to archive or recurse into tagged directories), see #3991: + with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_dir, + noatime=True, op='dir_open') as child_fd: + with backup_io('fstat'): + curr_st = os.fstat(child_fd) + # XXX do some checks here: st vs. curr_st + assert stat.S_ISDIR(curr_st.st_mode) + # make sure stats refer to same object that we are processing below + st = curr_st + if recurse: + tag_names = dir_is_tagged(path, exclude_caches, exclude_if_present) + if tag_names: + # if we are already recursing in an excluded dir, we do not need to do anything else than + # returning (we do not need to archive or recurse into tagged directories), see #3991: + if not recurse_excluded_dir: + if keep_exclude_tags and not dry_run: + fso.process_dir(path=path, fd=child_fd, st=st) + for tag_name in tag_names: + tag_path = os.path.join(path, tag_name) + self._process(path=tag_path, parent_fd=child_fd, name=tag_name, + fso=fso, cache=cache, matcher=matcher, + exclude_caches=exclude_caches, exclude_if_present=exclude_if_present, + keep_exclude_tags=keep_exclude_tags, skip_inodes=skip_inodes, + restrict_dev=restrict_dev, read_special=read_special, dry_run=dry_run) + self.print_file_status('x', path) + return + if not dry_run: if not recurse_excluded_dir: - if keep_exclude_tags and not dry_run: - fso.process_dir(path=path, st=st) - for tag_path in tag_paths: - self._process(path=tag_path, - fso=fso, cache=cache, matcher=matcher, - exclude_caches=exclude_caches, exclude_if_present=exclude_if_present, - keep_exclude_tags=keep_exclude_tags, skip_inodes=skip_inodes, - restrict_dev=restrict_dev, read_special=read_special, dry_run=dry_run) - self.print_file_status('x', path) - return - if not dry_run: - if not recurse_excluded_dir: - status = fso.process_dir(path=path, st=st) - if recurse: - with backup_io('scandir'): - entries = helpers.scandir_inorder(path) - for dirent in entries: - normpath = os.path.normpath(dirent.path) - self._process(path=normpath, - fso=fso, cache=cache, matcher=matcher, - exclude_caches=exclude_caches, exclude_if_present=exclude_if_present, - keep_exclude_tags=keep_exclude_tags, skip_inodes=skip_inodes, - restrict_dev=restrict_dev, read_special=read_special, dry_run=dry_run) + status = fso.process_dir(path=path, fd=child_fd, st=st) + if recurse: + with backup_io('scandir'): + entries = helpers.scandir_inorder(path=path, fd=child_fd) + for dirent in entries: + normpath = os.path.normpath(dirent.path) + self._process(path=normpath, parent_fd=child_fd, name=dirent.name, + fso=fso, cache=cache, matcher=matcher, + exclude_caches=exclude_caches, exclude_if_present=exclude_if_present, + keep_exclude_tags=keep_exclude_tags, skip_inodes=skip_inodes, + restrict_dev=restrict_dev, read_special=read_special, dry_run=dry_run) elif stat.S_ISLNK(st.st_mode): if not dry_run: if not read_special: - status = fso.process_symlink(path=path, st=st) + status = fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st) else: try: - st_target = os.stat(path) + st_target = os.stat(name, dir_fd=parent_fd, follow_symlinks=True) except OSError: special = False else: special = is_special(st_target.st_mode) if special: - status = fso.process_file(path=path, st=st_target, cache=cache) + # XXX must FOLLOW symlinks! + status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st_target, cache=cache) else: - status = fso.process_symlink(path=path, st=st) + status = fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st) elif stat.S_ISFIFO(st.st_mode): if not dry_run: if not read_special: - status = fso.process_fifo(path=path, st=st) + status = fso.process_fifo(path=path, parent_fd=parent_fd, name=name, st=st) else: - status = fso.process_file(path=path, st=st, cache=cache) + status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, cache=cache) elif stat.S_ISCHR(st.st_mode): if not dry_run: if not read_special: - status = fso.process_dev(path=path, st=st, dev_type='c') + status = fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type='c') else: - status = fso.process_file(path=path, st=st, cache=cache) + status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, cache=cache) elif stat.S_ISBLK(st.st_mode): if not dry_run: if not read_special: - status = fso.process_dev(path=path, st=st, dev_type='b') + status = fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type='b') else: - status = fso.process_file(path=path, st=st, cache=cache) + status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, cache=cache) elif stat.S_ISSOCK(st.st_mode): # Ignore unix sockets return diff --git a/src/borg/helpers/fs.py b/src/borg/helpers/fs.py index 93852c5bcc..6c9d9556f4 100644 --- a/src/borg/helpers/fs.py +++ b/src/borg/helpers/fs.py @@ -15,6 +15,9 @@ logger = create_logger() +py_37_plus = sys.version_info >= (3, 7) + + def get_base_dir(): """Get home directory / base directory for borg: @@ -103,18 +106,19 @@ def dir_is_cachedir(path): def dir_is_tagged(path, exclude_caches, exclude_if_present): """Determines whether the specified path is excluded by being a cache directory or containing user-specified tag files/directories. Returns a - list of the paths of the tag files/directories (either CACHEDIR.TAG or the + list of the names of the tag files/directories (either CACHEDIR.TAG or the matching user-specified files/directories). """ - tag_paths = [] + # TODO: do operations based on the directory fd + tag_names = [] if exclude_caches and dir_is_cachedir(path): - tag_paths.append(os.path.join(path, CACHE_TAG_NAME)) + tag_names.append(CACHE_TAG_NAME) if exclude_if_present is not None: for tag in exclude_if_present: tag_path = os.path.join(path, tag) if os.path.exists(tag_path): - tag_paths.append(tag_path) - return tag_paths + tag_names.append(tag) + return tag_names _safe_re = re.compile(r'^((\.\.)?/+)+') @@ -144,8 +148,10 @@ def scandir_keyfunc(dirent): return (1, dirent.name) -def scandir_inorder(path='.'): - return sorted(os.scandir(path), key=scandir_keyfunc) +def scandir_inorder(*, path, fd=None): + # py37+ supports giving a fd instead of a path + arg = fd if fd is not None and py_37_plus else path + return sorted(os.scandir(arg), key=scandir_keyfunc) def secure_erase(path): @@ -199,23 +205,39 @@ def O_(*flags): flags_base = O_('BINARY', 'NONBLOCK', 'NOCTTY') # later: add 'NOFOLLOW' flags_normal = flags_base | O_('RDONLY') flags_noatime = flags_normal | O_('NOATIME') +flags_root = O_('RDONLY') +flags_dir = O_('DIRECTORY', 'RDONLY', 'NOFOLLOW') + +def os_open(*, flags, path=None, parent_fd=None, name=None, noatime=False): + """ + Use os.open to open a fs item. + + If parent_fd and name are given, they are preferred and openat will be used, + path is not used in this case. -def os_open(path, flags, noatime=False): + :param path: full (but not necessarily absolute) path + :param parent_fd: open directory file descriptor + :param name: name relative to parent_fd + :param flags: open flags for os.open() (int) + :param noatime: True if access time shall be preserved + :return: file descriptor + """ + fname = name if name is not None and parent_fd is not None else path _flags_normal = flags if noatime: _flags_noatime = _flags_normal | O_('NOATIME') try: # if we have O_NOATIME, this likely will succeed if we are root or owner of file: - fd = os.open(path, _flags_noatime) + fd = os.open(fname, _flags_noatime, dir_fd=parent_fd) except PermissionError: if _flags_noatime == _flags_normal: # we do not have O_NOATIME, no need to try again: raise # Was this EPERM due to the O_NOATIME flag? Try again without it: - fd = os.open(path, _flags_normal) + fd = os.open(fname, _flags_normal, dir_fd=parent_fd) else: - fd = os.open(path, _flags_normal) + fd = os.open(fname, _flags_normal, dir_fd=parent_fd) return fd From 66dd25ebc4cd318802e7311a54e766090fd391b8 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 13 Aug 2018 03:36:53 +0200 Subject: [PATCH 06/12] when scandir gets called with an FD, dirent.path is not usable if scandir does not get a path, it can't prefix it in front of the filename in the direntries it returns, so dirent.path == dirent.name. thus, we just only use dirent.name and construct the full path. --- src/borg/archiver.py | 2 +- src/borg/helpers/fs.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 5747ed6129..137701264d 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -625,7 +625,7 @@ def _process(self, *, path, parent_fd=None, name=None, with backup_io('scandir'): entries = helpers.scandir_inorder(path=path, fd=child_fd) for dirent in entries: - normpath = os.path.normpath(dirent.path) + normpath = os.path.normpath(os.path.join(path, dirent.name)) self._process(path=normpath, parent_fd=child_fd, name=dirent.name, fso=fso, cache=cache, matcher=matcher, exclude_caches=exclude_caches, exclude_if_present=exclude_if_present, diff --git a/src/borg/helpers/fs.py b/src/borg/helpers/fs.py index 6c9d9556f4..24c525d250 100644 --- a/src/borg/helpers/fs.py +++ b/src/borg/helpers/fs.py @@ -149,7 +149,7 @@ def scandir_keyfunc(dirent): def scandir_inorder(*, path, fd=None): - # py37+ supports giving a fd instead of a path + # py37+ supports giving an fd instead of a path (no full entry.path in DirEntry in that case!) arg = fd if fd is not None and py_37_plus else path return sorted(os.scandir(arg), key=scandir_keyfunc) From b960d3cd23db47f2ed9027f2a16a27a82c543131 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Tue, 14 Aug 2018 17:30:27 +0200 Subject: [PATCH 07/12] linux: acl_(get|set) - adapt to more FD usage / default acl for dirs acl_get: remove assumption that having an FD means it is a regular file, we try to use FDs a much as possible. only get the default acl for directories - other fs objects are not expected to have a default acl. the path needs to be encoded also for the case when we have an fd, it is needed to get the default acl for directories. also: micro-opt: encode path later, not needed for ISLNK check. acl_set: remove the "if False" branch, it is the same here: the fd-based api only supports access ACLs, but not default ACLs, so we always need to use the path-based api here. --- src/borg/platform/linux.pyx | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/src/borg/platform/linux.pyx b/src/borg/platform/linux.pyx index 115817f9ad..11007acdc7 100644 --- a/src/borg/platform/linux.pyx +++ b/src/borg/platform/linux.pyx @@ -233,10 +233,11 @@ def acl_get(path, item, st, numeric_owner=False, fd=None): cdef char *default_text = NULL cdef char *access_text = NULL - if fd is None and isinstance(path, str): - path = os.fsencode(path) if stat.S_ISLNK(st.st_mode): + # symlinks can not have ACLs return + if isinstance(path, str): + path = os.fsencode(path) if (fd is not None and acl_extended_fd(fd) <= 0 or fd is None and acl_extended_file(path) <= 0): @@ -247,12 +248,11 @@ def acl_get(path, item, st, numeric_owner=False, fd=None): converter = acl_append_numeric_ids try: if fd is not None: - # we only have a fd for FILES (not other fs objects), so we can get the access_acl: - assert stat.S_ISREG(st.st_mode) access_acl = acl_get_fd(fd) else: - # if we have no fd, it can be anything access_acl = acl_get_file(path, ACL_TYPE_ACCESS) + if stat.S_ISDIR(st.st_mode): + # only directories can have a default ACL. there is no fd-based api to get it. default_acl = acl_get_file(path, ACL_TYPE_DEFAULT) if access_acl: access_text = acl_to_text(access_acl, NULL) @@ -299,11 +299,8 @@ def acl_set(path, item, numeric_owner=False, fd=None): try: default_acl = acl_from_text(converter(default_text)) if default_acl: - # default acls apply only to directories - if False and fd is not None: # Linux API seems to not support this - acl_set_fd(fd, default_acl) - else: - acl_set_file(path, ACL_TYPE_DEFAULT, default_acl) + # only directories can get a default ACL. there is no fd-based api to set it. + acl_set_file(path, ACL_TYPE_DEFAULT, default_acl) finally: acl_free(default_acl) From 85b711fc887a6eca633c9ac18cac80bdafbdb654 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 24 Dec 2018 01:30:51 +0100 Subject: [PATCH 08/12] opening device files is troublesome, don't do it for fd-based operations, we would have to open the file, but for char / block devices this has unwanted effects, even if we do not read from the device. thus, we use path (or dir_fd + name) based ops here. --- src/borg/archive.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index 87ebf19261..eb887218be 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -1095,16 +1095,16 @@ def process_fifo(self, *, path, parent_fd, name, st): def process_dev(self, *, path, parent_fd, name, st, dev_type): with self.create_helper(path, st, dev_type) as (item, status, hardlinked, hardlink_master): # char/block device - with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_normal, noatime=True) as fd: - with backup_io('fstat'): - curr_st = os.fstat(fd) - # XXX do some checks here: st vs. curr_st - assert stat.S_ISBLK(curr_st.st_mode) or stat.S_ISCHR(curr_st.st_mode) - # make sure stats refer to same object that we are processing below - st = curr_st - item.rdev = st.st_rdev - item.update(self.metadata_collector.stat_attrs(st, path, fd=fd)) - return status + # looks like we can not work fd-based here without causing issues when trying to open/close the device + with backup_io('stat'): + curr_st = os.stat(name, dir_fd=parent_fd, follow_symlinks=False) + # XXX do some checks here: st vs. curr_st + assert stat.S_ISBLK(curr_st.st_mode) or stat.S_ISCHR(curr_st.st_mode) + # make sure stats refer to same object that we are processing below + st = curr_st + item.rdev = st.st_rdev + item.update(self.metadata_collector.stat_attrs(st, path)) + return status def process_symlink(self, *, path, parent_fd, name, st): # note: using hardlinkable=False because we can not support hardlinked symlinks, From 39922e88e57aa221cd09b807506f02ebcab513d6 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 17 Feb 2019 02:46:03 +0100 Subject: [PATCH 09/12] micro-opt: get xattrs directly before acls on linux, acls are based on xattrs, so do these closeby: 1. listxattr -> keys (without acl related keys) 2. for all keys: getxattr 3. acl-related getxattr by acl library --- src/borg/archive.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index eb887218be..e14be30a69 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -954,9 +954,9 @@ def stat_ext_attrs(self, st, path, fd=None): attrs = {} bsdflags = 0 with backup_io('extended stat'): - xattrs = xattr.get_all(fd or path, follow_symlinks=False) if not self.nobsdflags: bsdflags = get_flags(path, st, fd=fd) + xattrs = xattr.get_all(fd or path, follow_symlinks=False) acl_get(path, attrs, st, self.numeric_owner, fd=fd) if xattrs: attrs['xattrs'] = StableDict(xattrs) From b4ca919d02037dbd21716c3ce5e89f908538761a Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 17 Feb 2019 05:17:52 +0100 Subject: [PATCH 10/12] add O_NOFOLLOW to base flags, see #908 scenario: - x is a regular file - borg does stat on x: is a regular file - so borg dispatches to process_file - attack: x gets replaced by a symlink (mv symlink x) - in process_file, borg opens x and must not follow the symlink nor continue processing as a normal file, but rather error in open() due to NOFOLLOW. --- src/borg/helpers/fs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/borg/helpers/fs.py b/src/borg/helpers/fs.py index 24c525d250..1f39d58df3 100644 --- a/src/borg/helpers/fs.py +++ b/src/borg/helpers/fs.py @@ -202,7 +202,7 @@ def O_(*flags): return result -flags_base = O_('BINARY', 'NONBLOCK', 'NOCTTY') # later: add 'NOFOLLOW' +flags_base = O_('BINARY', 'NONBLOCK', 'NOCTTY', 'NOFOLLOW') flags_normal = flags_base | O_('RDONLY') flags_noatime = flags_normal | O_('NOATIME') flags_root = O_('RDONLY') From ec17f0a6072209c930f631b38128b64352ac7257 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Sun, 17 Feb 2019 06:45:24 +0100 Subject: [PATCH 11/12] check for stat race conditions, see #908 we must avoid a handler processing a fs item of wrong file type, so check if it has changed. --- src/borg/archive.py | 44 +++++++++++++++++++++++++++++--------------- src/borg/archiver.py | 10 +++------- 2 files changed, 32 insertions(+), 22 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index e14be30a69..6083c8d82a 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -195,6 +195,32 @@ def backup_io_iter(iterator): yield item +def stat_update_check(st_old, st_curr): + """ + this checks for some race conditions between the first filename-based stat() + we did before dispatching to the (hopefully correct) file type backup handler + and the (hopefully) fd-based fstat() we did in the handler. + + if there is a problematic difference (e.g. file type changed), we rather + skip the file than being tricked into a security problem. + + such races should only happen if: + - we are backing up a live filesystem (no snapshot, not inactive) + - if files change due to normal fs activity at an unfortunate time + - if somebody is doing an attack against us + """ + # assuming that a file type change implicates a different inode change AND that inode numbers + # are not duplicate in a short timeframe, this check is redundant and solved by the ino check: + if stat.S_IFMT(st_old.st_mode) != stat.S_IFMT(st_curr.st_mode): + # in this case, we dispatched to wrong handler - abort + raise BackupError('file type changed (race condition), skipping file') + if st_old.st_ino != st_curr.st_ino: + # in this case, the hardlinks-related code in create_helper has the wrong inode - abort! + raise BackupError('file inode changed (race condition), skipping file') + # looks ok, we are still dealing with the same thing - return current stat: + return st_curr + + @contextmanager def OsOpen(*, flags, path=None, parent_fd=None, name=None, noatime=False, op='open'): with backup_io(op): @@ -1085,11 +1111,7 @@ def process_fifo(self, *, path, parent_fd, name, st): with self.create_helper(path, st, 'f') as (item, status, hardlinked, hardlink_master): # fifo with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_normal, noatime=True) as fd: with backup_io('fstat'): - curr_st = os.fstat(fd) - # XXX do some checks here: st vs. curr_st - assert stat.S_ISFIFO(curr_st.st_mode) - # make sure stats refer to same object that we are processing below - st = curr_st + st = stat_update_check(st, os.fstat(fd)) item.update(self.metadata_collector.stat_attrs(st, path, fd=fd)) return status @@ -1097,11 +1119,7 @@ def process_dev(self, *, path, parent_fd, name, st, dev_type): with self.create_helper(path, st, dev_type) as (item, status, hardlinked, hardlink_master): # char/block device # looks like we can not work fd-based here without causing issues when trying to open/close the device with backup_io('stat'): - curr_st = os.stat(name, dir_fd=parent_fd, follow_symlinks=False) - # XXX do some checks here: st vs. curr_st - assert stat.S_ISBLK(curr_st.st_mode) or stat.S_ISCHR(curr_st.st_mode) - # make sure stats refer to same object that we are processing below - st = curr_st + st = stat_update_check(st, os.stat(name, dir_fd=parent_fd, follow_symlinks=False)) item.rdev = st.st_rdev item.update(self.metadata_collector.stat_attrs(st, path)) return status @@ -1139,11 +1157,7 @@ def process_file(self, *, path, parent_fd, name, st, cache): with self.create_helper(path, st, None) as (item, status, hardlinked, hardlink_master): # no status yet with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_normal, noatime=True) as fd: with backup_io('fstat'): - curr_st = os.fstat(fd) - # XXX do some checks here: st vs. curr_st - assert stat.S_ISREG(curr_st.st_mode) - # make sure stats refer to same object that we are processing below - st = curr_st + st = stat_update_check(st, os.fstat(fd)) is_special_file = is_special(st.st_mode) if not hardlinked or hardlink_master: if not is_special_file: diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 137701264d..116907c4d4 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -34,7 +34,7 @@ from . import helpers from .algorithms.checksums import crc32 from .archive import Archive, ArchiveChecker, ArchiveRecreater, Statistics, is_special -from .archive import BackupError, BackupOSError, backup_io, OsOpen +from .archive import BackupError, BackupOSError, backup_io, OsOpen, stat_update_check from .archive import FilesystemObjectProcessors, MetadataCollector, ChunksProcessor from .cache import Cache, assert_secure, SecurityManager from .constants import * # NOQA @@ -596,11 +596,7 @@ def _process(self, *, path, parent_fd=None, name=None, with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_dir, noatime=True, op='dir_open') as child_fd: with backup_io('fstat'): - curr_st = os.fstat(child_fd) - # XXX do some checks here: st vs. curr_st - assert stat.S_ISDIR(curr_st.st_mode) - # make sure stats refer to same object that we are processing below - st = curr_st + st = stat_update_check(st, os.fstat(child_fd)) if recurse: tag_names = dir_is_tagged(path, exclude_caches, exclude_if_present) if tag_names: @@ -677,7 +673,7 @@ def _process(self, *, path, parent_fd=None, name=None, else: self.print_warning('Unknown file type: %s', path) return - except BackupOSError as e: + except (BackupOSError, BackupError) as e: self.print_warning('%s: %s', path, e) status = 'E' # Status output From 23eeded7c5bc56c46d7bf7966c4062d98a476c28 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 20 Feb 2019 10:13:09 +0100 Subject: [PATCH 12/12] fix --read-special behaviour: follow symlinks pointing to special files also: added a test for this. --- src/borg/archive.py | 4 ++-- src/borg/archiver.py | 6 +++--- src/borg/helpers/fs.py | 5 +++-- src/borg/testsuite/archiver.py | 14 ++++++++++++++ 4 files changed, 22 insertions(+), 7 deletions(-) diff --git a/src/borg/archive.py b/src/borg/archive.py index 6083c8d82a..21ed7b0cda 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -1153,9 +1153,9 @@ def process_stdin(self, *, path, cache): self.add_item(item, stats=self.stats) return 'i' # stdin - def process_file(self, *, path, parent_fd, name, st, cache): + def process_file(self, *, path, parent_fd, name, st, cache, flags=flags_normal): with self.create_helper(path, st, None) as (item, status, hardlinked, hardlink_master): # no status yet - with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_normal, noatime=True) as fd: + with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags, noatime=True) as fd: with backup_io('fstat'): st = stat_update_check(st, os.fstat(fd)) is_special_file = is_special(st.st_mode) diff --git a/src/borg/archiver.py b/src/borg/archiver.py index 116907c4d4..e7111c2a4a 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -66,7 +66,7 @@ from .helpers import popen_with_error_handling, prepare_subprocess_env from .helpers import dash_open from .helpers import umount -from .helpers import flags_root, flags_dir +from .helpers import flags_root, flags_dir, flags_follow from .helpers import msgpack from .nanorst import rst_to_terminal from .patterns import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern @@ -639,8 +639,8 @@ def _process(self, *, path, parent_fd=None, name=None, else: special = is_special(st_target.st_mode) if special: - # XXX must FOLLOW symlinks! - status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st_target, cache=cache) + status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st_target, + cache=cache, flags=flags_follow) else: status = fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st) elif stat.S_ISFIFO(st.st_mode): diff --git a/src/borg/helpers/fs.py b/src/borg/helpers/fs.py index 1f39d58df3..7e747e7560 100644 --- a/src/borg/helpers/fs.py +++ b/src/borg/helpers/fs.py @@ -202,8 +202,9 @@ def O_(*flags): return result -flags_base = O_('BINARY', 'NONBLOCK', 'NOCTTY', 'NOFOLLOW') -flags_normal = flags_base | O_('RDONLY') +flags_base = O_('BINARY', 'NONBLOCK', 'NOCTTY') +flags_follow = flags_base | O_('RDONLY') +flags_normal = flags_base | O_('RDONLY', 'NOFOLLOW') flags_noatime = flags_normal | O_('NOATIME') flags_root = O_('RDONLY') flags_dir = O_('DIRECTORY', 'RDONLY', 'NOFOLLOW') diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index 7c1316db24..cc828adb4c 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -1780,6 +1780,20 @@ def test_create_topical(self): output = self.cmd('create', '--list', '--filter=AM', self.repository_location + '::test3', 'input') self.assert_in('file1', output) + def test_create_read_special(self): + self.create_regular_file('regular', size=1024) + os.symlink(os.path.join(self.input_path, 'file'), os.path.join(self.input_path, 'link_regular')) + if are_fifos_supported(): + os.mkfifo(os.path.join(self.input_path, 'fifo')) + os.symlink(os.path.join(self.input_path, 'fifo'), os.path.join(self.input_path, 'link_fifo')) + self.cmd('init', '--encryption=repokey', self.repository_location) + archive = self.repository_location + '::test' + self.cmd('create', '--read-special', archive, 'input') + output = self.cmd('list', archive) + assert 'input/link_regular -> ' in output # not pointing to special file: archived as symlink + if are_fifos_supported(): + assert 'input/link_fifo\n' in output # pointing to a special file: archived following symlink + def test_create_read_special_broken_symlink(self): os.symlink('somewhere doesnt exist', os.path.join(self.input_path, 'link')) self.cmd('init', '--encryption=repokey', self.repository_location)