Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 35 additions & 6 deletions src/borg/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -783,14 +783,14 @@ def extract_helper(self, item, path, hlm, *, dry_run=False):
def extract_item(
self,
item,
*,
restore_attrs=True,
dry_run=False,
stdout=False,
sparse=False,
hlm=None,
stripped_components=0,
original_path=None,
pi=None,
continue_extraction=False,
):
"""
Extract archive item.
Expand All @@ -801,10 +801,28 @@ def extract_item(
:param stdout: write extracted data to stdout
:param sparse: write sparse files (chunk-granularity, independent of the original being sparse)
:param hlm: maps hlid to link_target for extracting subtrees with hardlinks correctly
:param stripped_components: stripped leading path components to correct hard link extraction
:param original_path: 'path' key as stored in archive
:param pi: ProgressIndicatorPercent (or similar) for file extraction progress (in bytes)
:param continue_extraction: continue a previously interrupted extraction of same archive
"""

def same_item(item, st):
"""is the archived item the same as the fs item at same path with stat st?"""
if not stat.S_ISREG(st.st_mode):
# we only "optimize" for regular files.
# other file types are less frequent and have no content extraction we could "optimize away".
return False
if item.mode != st.st_mode or item.size != st.st_size:
# the size check catches incomplete previous file extraction
return False
if item.get("mtime") != st.st_mtime_ns:
# note: mtime is "extracted" late, after xattrs and ACLs, but before flags.
return False
# this is good enough for the intended use case:
# continuing an extraction of same archive that initially started in an empty directory.
# there is a very small risk that "bsdflags" of one file are wrong:
# if a previous extraction was interrupted between setting the mtime and setting non-default flags.
return True

has_damaged_chunks = "chunks_healthy" in item
if dry_run or stdout:
with self.extract_helper(item, "", hlm, dry_run=dry_run or stdout) as hardlink_set:
Expand Down Expand Up @@ -834,15 +852,16 @@ def extract_item(
raise BackupError("File has damaged (all-zero) chunks. Try running borg check --repair.")
return

original_path = original_path or item.path
dest = self.cwd
if item.path.startswith(("/", "../")):
raise Exception("Path should be relative and local")
path = os.path.join(dest, item.path)
# Attempt to remove existing files, ignore errors on failure
try:
st = os.stat(path, follow_symlinks=False)
if stat.S_ISDIR(st.st_mode):
if continue_extraction and same_item(item, st):
return # done! we already have fully extracted this file in a previous run.
elif stat.S_ISDIR(st.st_mode):
os.rmdir(path)
else:
os.unlink(path)
Expand Down Expand Up @@ -998,6 +1017,16 @@ def restore_attrs(self, path, item, symlink=False, fd=None):
set_flags(path, item.bsdflags, fd=fd)
except OSError:
pass
else: # win32
# set timestamps rather late
mtime = item.mtime
atime = item.atime if "atime" in item else mtime
try:
# note: no fd support on win32
os.utime(path, None, ns=(atime, mtime))
except OSError:
# some systems don't support calling utime on a symlink
pass

def set_meta(self, key, value):
metadata = self._load_meta(self.id)
Expand Down
15 changes: 8 additions & 7 deletions src/borg/archiver/extract_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def do_extract(self, args, repository, manifest, archive):
stdout = args.stdout
sparse = args.sparse
strip_components = args.strip_components
continue_extraction = args.continue_extraction
dirs = []
hlm = HardLinkManager(id_type=bytes, info_type=str) # hlid -> path

Expand Down Expand Up @@ -76,13 +77,7 @@ def do_extract(self, args, repository, manifest, archive):
archive.extract_item(item, stdout=stdout, restore_attrs=False)
else:
archive.extract_item(
item,
stdout=stdout,
sparse=sparse,
hlm=hlm,
stripped_components=strip_components,
original_path=orig_path,
pi=pi,
item, stdout=stdout, sparse=sparse, hlm=hlm, pi=pi, continue_extraction=continue_extraction
)
except (BackupOSError, BackupError) as e:
self.print_warning("%s: %s", remove_surrogates(orig_path), e)
Expand Down Expand Up @@ -174,6 +169,12 @@ def build_parser_extract(self, subparsers, common_parser, mid_common_parser):
action="store_true",
help="create holes in output sparse file from all-zero chunks",
)
subparser.add_argument(
"--continue",
dest="continue_extraction",
action="store_true",
help="continue a previously interrupted extraction of same archive",
)
subparser.add_argument("name", metavar="NAME", type=archivename_validator, help="specify the archive name")
subparser.add_argument(
"paths", metavar="PATH", nargs="*", type=str, help="paths to extract; patterns are supported"
Expand Down
45 changes: 44 additions & 1 deletion src/borg/testsuite/archiver/extract_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from ...helpers import flags_noatime, flags_normal
from .. import changedir, same_ts_ns
from .. import are_symlinks_supported, are_hardlinks_supported, is_utime_fully_supported, is_birthtime_fully_supported
from ..platform import is_darwin
from ..platform import is_darwin, is_win32
from . import (
ArchiverTestCaseBase,
ArchiverTestCaseBinaryBase,
Expand Down Expand Up @@ -621,6 +621,49 @@ def patched_setxattr_EACCES(*args, **kwargs):
with patch.object(xattr, "setxattr", patched_setxattr_EACCES):
self.cmd(f"--repo={self.repository_location}", "extract", "test", exit_code=EXIT_WARNING)

def test_extract_continue(self):
CONTENTS1, CONTENTS2, CONTENTS3 = b"contents1" * 100, b"contents2" * 200, b"contents3" * 300
self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
self.create_regular_file("file1", contents=CONTENTS1)
self.create_regular_file("file2", contents=CONTENTS2)
self.create_regular_file("file3", contents=CONTENTS3)
self.cmd(f"--repo={self.repository_location}", "create", "arch", "input")
with changedir("output"):
# we simulate an interrupted/partial extraction:
self.cmd(f"--repo={self.repository_location}", "extract", "arch")
# do not modify file1, it stands for a successfully extracted file
file1_st = os.stat("input/file1")
# simulate a partially extracted file2 (smaller size, archived mtime not yet set)
file2_st = os.stat("input/file2")
os.truncate("input/file2", 123) # -> incorrect size, incorrect mtime
# simulate file3 has not yet been extracted
file3_st = os.stat("input/file3")
os.remove("input/file3")
with changedir("output"):
# now try to continue extracting, using the same archive, same output dir:
self.cmd(f"--repo={self.repository_location}", "extract", "arch", "--continue")
now_file1_st = os.stat("input/file1")
assert file1_st.st_ino == now_file1_st.st_ino # file1 was NOT extracted again
assert file1_st.st_mtime_ns == now_file1_st.st_mtime_ns # has correct mtime
new_file2_st = os.stat("input/file2")
assert file2_st.st_ino != new_file2_st.st_ino # file2 was extracted again
assert file2_st.st_mtime_ns == new_file2_st.st_mtime_ns # has correct mtime
new_file3_st = os.stat("input/file3")
assert file3_st.st_ino != new_file3_st.st_ino # file3 was extracted again
assert file3_st.st_mtime_ns == new_file3_st.st_mtime_ns # has correct mtime
# windows has a strange ctime behaviour when deleting and recreating a file
if not is_win32:
assert file1_st.st_ctime_ns == now_file1_st.st_ctime_ns # file not extracted again
assert file2_st.st_ctime_ns != new_file2_st.st_ctime_ns # file extracted again
assert file3_st.st_ctime_ns != new_file3_st.st_ctime_ns # file extracted again
# check if all contents (and thus also file sizes) are correct:
with open("input/file1", "rb") as f:
assert f.read() == CONTENTS1
with open("input/file2", "rb") as f:
assert f.read() == CONTENTS2
with open("input/file3", "rb") as f:
assert f.read() == CONTENTS3


class RemoteArchiverTestCase(RemoteArchiverTestCaseBase, ArchiverTestCase):
"""run the same tests, but with a remote repository"""
Expand Down