diff --git a/borg/archiver.py b/borg/archiver.py index 28f1d8a3f1..62da098eb6 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -17,6 +17,7 @@ from . import __version__ from .archive import Archive, ArchiveChecker, CHUNKER_PARAMS from .compress import Compressor, COMPR_BUFFER +from .upgrader import AtticRepositoryUpgrader from .repository import Repository from .cache import Cache from .key import key_creator @@ -462,6 +463,24 @@ def do_prune(self, args): stats.print_('Deleted data:', cache) return self.exit_code + def do_upgrade(self, args): + """upgrade a repository from a previous version""" + # XXX: currently only upgrades from Attic repositories, but may + # eventually be extended to deal with major upgrades for borg + # itself. + # + # in this case, it should auto-detect the current repository + # format and fire up necessary upgrade mechanism. this remains + # to be implemented. + + # XXX: should auto-detect if it is an attic repository here + repo = AtticRepositoryUpgrader(args.repository.path, create=False) + try: + repo.upgrade(args.dry_run) + except NotImplementedError as e: + print("warning: %s" % e) + return self.exit_code + helptext = {} helptext['patterns'] = ''' Exclude patterns use a variant of shell pattern syntax, with '*' matching any @@ -896,6 +915,53 @@ def run(self, args=None): type=location_validator(archive=False), help='repository to prune') + upgrade_epilog = textwrap.dedent(""" + upgrade an existing Borg repository in place. this currently + only support converting an Attic repository, but may + eventually be extended to cover major Borg upgrades as well. + + it will change the magic strings in the repository's segments + to match the new Borg magic strings. the keyfiles found in + $ATTIC_KEYS_DIR or ~/.attic/keys/ will also be converted and + copied to $BORG_KEYS_DIR or ~/.borg/keys. + + the cache files are converted, from $ATTIC_CACHE_DIR or + ~/.cache/attic to $BORG_CACHE_DIR or ~/.cache/borg, but the + cache layout between Borg and Attic changed, so it is possible + the first backup after the conversion takes longer than expected + due to the cache resync. + + it is recommended you run this on a copy of the Attic + repository, in case something goes wrong, for example: + + cp -a attic borg + borg upgrade -n borg + borg upgrade borg + + upgrade should be able to resume if interrupted, although it + will still iterate over all segments. if you want to start + from scratch, use `borg delete` over the copied repository to + make sure the cache files are also removed: + + borg delete borg + + the conversion can PERMANENTLY DAMAGE YOUR REPOSITORY! Attic + will also NOT BE ABLE TO READ THE BORG REPOSITORY ANYMORE, as + the magic strings will have changed. + + you have been warned.""") + subparser = subparsers.add_parser('upgrade', parents=[common_parser], + description=self.do_upgrade.__doc__, + epilog=upgrade_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter) + subparser.set_defaults(func=self.do_upgrade) + subparser.add_argument('-n', '--dry-run', dest='dry_run', + default=False, action='store_true', + help='do not change repository') + subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='', + type=location_validator(archive=False), + help='path to the repository to be upgraded') + subparser = subparsers.add_parser('help', parents=[common_parser], description='Extra help') subparser.add_argument('--epilog-only', dest='epilog_only', diff --git a/borg/testsuite/upgrader.py b/borg/testsuite/upgrader.py new file mode 100644 index 0000000000..22278f9ac4 --- /dev/null +++ b/borg/testsuite/upgrader.py @@ -0,0 +1,163 @@ +import os +import shutil +import tempfile + +import pytest + +try: + import attic.repository + import attic.key + import attic.helpers +except ImportError: + attic = None + +from ..upgrader import AtticRepositoryUpgrader, AtticKeyfileKey +from ..helpers import get_keys_dir +from ..key import KeyfileKey +from ..repository import Repository, MAGIC + +pytestmark = pytest.mark.skipif(attic is None, + reason='cannot find an attic install') + + +def repo_valid(path): + """ + utility function to check if borg can open a repository + + :param path: the path to the repository + :returns: if borg can check the repository + """ + repository = Repository(str(path), create=False) + # can't check raises() because check() handles the error + state = repository.check() + repository.close() + return state + + +def key_valid(path): + """ + check that the new keyfile is alright + + :param path: the path to the key file + :returns: if the file starts with the borg magic string + """ + keyfile = os.path.join(get_keys_dir(), + os.path.basename(path)) + with open(keyfile, 'r') as f: + return f.read().startswith(KeyfileKey.FILE_ID) + + +@pytest.fixture() +def attic_repo(tmpdir): + """ + create an attic repo with some stuff in it + + :param tmpdir: path to the repository to be created + :returns: a attic.repository.Repository object + """ + attic_repo = attic.repository.Repository(str(tmpdir), create=True) + # throw some stuff in that repo, copied from `RepositoryTestCase.test1` + for x in range(100): + attic_repo.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') + attic_repo.commit() + attic_repo.close() + return attic_repo + + +def test_convert_segments(tmpdir, attic_repo): + """test segment conversion + + this will load the given attic repository, list all the segments + then convert them one at a time. we need to close the repo before + conversion otherwise we have errors from borg + + :param tmpdir: a temporary directory to run the test in (builtin + fixture) + :param attic_repo: a populated attic repository (fixture) + """ + # check should fail because of magic number + assert not repo_valid(tmpdir) + print("opening attic repository with borg and converting") + repo = AtticRepositoryUpgrader(str(tmpdir), create=False) + segments = [filename for i, filename in repo.io.segment_iterator()] + repo.close() + repo.convert_segments(segments, dryrun=False) + repo.convert_cache(dryrun=False) + assert repo_valid(tmpdir) + + +class MockArgs: + """ + mock attic location + + this is used to simulate a key location with a properly loaded + repository object to create a key file + """ + def __init__(self, path): + self.repository = attic.helpers.Location(path) + + +@pytest.fixture() +def attic_key_file(attic_repo, tmpdir): + """ + create an attic key file from the given repo, in the keys + subdirectory of the given tmpdir + + :param attic_repo: an attic.repository.Repository object (fixture + define above) + :param tmpdir: a temporary directory (a builtin fixture) + :returns: the KeyfileKey object as returned by + attic.key.KeyfileKey.create() + """ + keys_dir = str(tmpdir.mkdir('keys')) + + # we use the repo dir for the created keyfile, because we do + # not want to clutter existing keyfiles + os.environ['ATTIC_KEYS_DIR'] = keys_dir + + # we use the same directory for the converted files, which + # will clutter the previously created one, which we don't care + # about anyways. in real runs, the original key will be retained. + os.environ['BORG_KEYS_DIR'] = keys_dir + os.environ['ATTIC_PASSPHRASE'] = 'test' + return attic.key.KeyfileKey.create(attic_repo, + MockArgs(keys_dir)) + + +def test_keys(tmpdir, attic_repo, attic_key_file): + """test key conversion + + test that we can convert the given key to a properly formatted + borg key. assumes that the ATTIC_KEYS_DIR and BORG_KEYS_DIR have + been properly populated by the attic_key_file fixture. + + :param tmpdir: a temporary directory (a builtin fixture) + :param attic_repo: an attic.repository.Repository object (fixture + define above) + :param attic_key_file: an attic.key.KeyfileKey (fixture created above) + """ + repository = AtticRepositoryUpgrader(str(tmpdir), create=False) + keyfile = AtticKeyfileKey.find_key_file(repository) + AtticRepositoryUpgrader.convert_keyfiles(keyfile, dryrun=False) + assert key_valid(attic_key_file.path) + + +def test_convert_all(tmpdir, attic_repo, attic_key_file): + """test all conversion steps + + this runs everything. mostly redundant test, since everything is + done above. yet we expect a NotImplementedError because we do not + convert caches yet. + + :param tmpdir: a temporary directory (a builtin fixture) + :param attic_repo: an attic.repository.Repository object (fixture + define above) + :param attic_key_file: an attic.key.KeyfileKey (fixture created above) + """ + # check should fail because of magic number + assert not repo_valid(tmpdir) + print("opening attic repository with borg and converting") + repo = AtticRepositoryUpgrader(str(tmpdir), create=False) + repo.upgrade(dryrun=False) + assert key_valid(attic_key_file.path) + assert repo_valid(tmpdir) diff --git a/borg/upgrader.py b/borg/upgrader.py new file mode 100644 index 0000000000..33ef2d3888 --- /dev/null +++ b/borg/upgrader.py @@ -0,0 +1,233 @@ +from binascii import hexlify +import os +import shutil +import time + +from .helpers import get_keys_dir, get_cache_dir +from .locking import UpgradableLock +from .repository import Repository, MAGIC +from .key import KeyfileKey, KeyfileNotFoundError + +ATTIC_MAGIC = b'ATTICSEG' + + +class AtticRepositoryUpgrader(Repository): + def upgrade(self, dryrun=True): + """convert an attic repository to a borg repository + + those are the files that need to be upgraded here, from most + important to least important: segments, key files, and various + caches, the latter being optional, as they will be rebuilt if + missing. + + we nevertheless do the order in reverse, as we prefer to do + the fast stuff first, to improve interactivity. + """ + print("reading segments from attic repository using borg") + # we need to open it to load the configuration and other fields + self.open(self.path, exclusive=False) + segments = [filename for i, filename in self.io.segment_iterator()] + try: + keyfile = self.find_attic_keyfile() + except KeyfileNotFoundError: + print("no key file found for repository") + else: + self.convert_keyfiles(keyfile, dryrun) + self.close() + # partial open: just hold on to the lock + self.lock = UpgradableLock(os.path.join(self.path, 'lock'), + exclusive=True).acquire() + try: + self.convert_cache(dryrun) + self.convert_segments(segments, dryrun) + finally: + self.lock.release() + self.lock = None + + @staticmethod + def convert_segments(segments, dryrun): + """convert repository segments from attic to borg + + replacement pattern is `s/ATTICSEG/BORG_SEG/` in files in + `$ATTIC_REPO/data/**`. + + luckily the magic string length didn't change so we can just + replace the 8 first bytes of all regular files in there.""" + print("converting %d segments..." % len(segments)) + i = 0 + for filename in segments: + i += 1 + print("\rconverting segment %d/%d in place, %.2f%% done (%s)" + % (i, len(segments), 100*float(i)/len(segments), filename), end='') + if dryrun: + time.sleep(0.001) + else: + AtticRepositoryUpgrader.header_replace(filename, ATTIC_MAGIC, MAGIC) + print() + + @staticmethod + def header_replace(filename, old_magic, new_magic): + with open(filename, 'r+b') as segment: + segment.seek(0) + # only write if necessary + if segment.read(len(old_magic)) == old_magic: + segment.seek(0) + segment.write(new_magic) + + def find_attic_keyfile(self): + """find the attic keyfiles + + the keyfiles are loaded by `KeyfileKey.find_key_file()`. that + finds the keys with the right identifier for the repo. + + this is expected to look into $HOME/.attic/keys or + $ATTIC_KEYS_DIR for key files matching the given Borg + repository. + + it is expected to raise an exception (KeyfileNotFoundError) if + no key is found. whether that exception is from Borg or Attic + is unclear. + + this is split in a separate function in case we want to use + the attic code here directly, instead of our local + implementation.""" + return AtticKeyfileKey.find_key_file(self) + + @staticmethod + def convert_keyfiles(keyfile, dryrun): + + """convert key files from attic to borg + + replacement pattern is `s/ATTIC KEY/BORG_KEY/` in + `get_keys_dir()`, that is `$ATTIC_KEYS_DIR` or + `$HOME/.attic/keys`, and moved to `$BORG_KEYS_DIR` or + `$HOME/.borg/keys`. + + no need to decrypt to convert. we need to rewrite the whole + key file because magic string length changed, but that's not a + problem because the keyfiles are small (compared to, say, + all the segments).""" + print("converting keyfile %s" % keyfile) + with open(keyfile, 'r') as f: + data = f.read() + data = data.replace(AtticKeyfileKey.FILE_ID, KeyfileKey.FILE_ID, 1) + keyfile = os.path.join(get_keys_dir(), os.path.basename(keyfile)) + print("writing borg keyfile to %s" % keyfile) + if not dryrun: + with open(keyfile, 'w') as f: + f.write(data) + + def convert_cache(self, dryrun): + """convert caches from attic to borg + + those are all hash indexes, so we need to + `s/ATTICIDX/BORG_IDX/` in a few locations: + + * the repository index (in `$ATTIC_REPO/index.%d`, where `%d` + is the `Repository.get_index_transaction_id()`), which we + should probably update, with a lock, see + `Repository.open()`, which i'm not sure we should use + because it may write data on `Repository.close()`... + + * the `files` and `chunks` cache (in `$ATTIC_CACHE_DIR` or + `$HOME/.cache/attic//`), which we could just drop, + but if we'd want to convert, we could open it with the + `Cache.open()`, edit in place and then `Cache.close()` to + make sure we have locking right + """ + caches = [] + transaction_id = self.get_index_transaction_id() + if transaction_id is None: + print('no index file found for repository %s' % self.path) + else: + caches += [os.path.join(self.path, 'index.%d' % transaction_id).encode('utf-8')] + + # copy of attic's get_cache_dir() + attic_cache_dir = os.environ.get('ATTIC_CACHE_DIR', + os.path.join(os.path.expanduser('~'), + '.cache', 'attic')) + attic_cache_dir = os.path.join(attic_cache_dir, hexlify(self.id).decode('ascii')) + borg_cache_dir = os.path.join(get_cache_dir(), hexlify(self.id).decode('ascii')) + + def copy_cache_file(path): + """copy the given attic cache path into the borg directory + + does nothing if dryrun is True. also expects + attic_cache_dir and borg_cache_dir to be set in the parent + scope, to the directories path including the repository + identifier. + + :params path: the basename of the cache file to copy + (example: "files" or "chunks") as a string + + :returns: the borg file that was created or None if non + was created. + + """ + attic_file = os.path.join(attic_cache_dir, path) + if os.path.exists(attic_file): + borg_file = os.path.join(borg_cache_dir, path) + if os.path.exists(borg_file): + print("borg cache file already exists in %s, skipping conversion of %s" % (borg_file, attic_file)) + else: + print("copying attic cache file from %s to %s" % (attic_file, borg_file)) + if not dryrun: + shutil.copyfile(attic_file, borg_file) + return borg_file + else: + print("no %s cache file found in %s" % (path, attic_file)) + return None + + # XXX: untested, because generating cache files is a PITA, see + # Archiver.do_create() for proof + if os.path.exists(attic_cache_dir): + if not os.path.exists(borg_cache_dir): + os.makedirs(borg_cache_dir) + + # file that we don't have a header to convert, just copy + for cache in ['config', 'files']: + copy_cache_file(cache) + + # we need to convert the headers of those files, copy first + for cache in ['chunks']: + copied = copy_cache_file(cache) + if copied: + print("converting cache %s" % cache) + if not dryrun: + AtticRepositoryUpgrader.header_replace(cache, b'ATTICIDX', b'BORG_IDX') + + +class AtticKeyfileKey(KeyfileKey): + """backwards compatible Attic key file parser""" + FILE_ID = 'ATTIC KEY' + + # verbatim copy from attic + @staticmethod + def get_keys_dir(): + """Determine where to repository keys and cache""" + return os.environ.get('ATTIC_KEYS_DIR', + os.path.join(os.path.expanduser('~'), '.attic', 'keys')) + + @classmethod + def find_key_file(cls, repository): + """copy of attic's `find_key_file`_ + + this has two small modifications: + + 1. it uses the above `get_keys_dir`_ instead of the global one, + assumed to be borg's + + 2. it uses `repository.path`_ instead of + `repository._location.canonical_path`_ because we can't + assume the repository has been opened by the archiver yet + """ + get_keys_dir = cls.get_keys_dir + id = hexlify(repository.id).decode('ascii') + keys_dir = get_keys_dir() + for name in os.listdir(keys_dir): + filename = os.path.join(keys_dir, name) + with open(filename, 'r') as fd: + line = fd.readline().strip() + if line and line.startswith(cls.FILE_ID) and line[10:] == id: + return filename + raise KeyfileNotFoundError(repository.path, get_keys_dir()) diff --git a/tox.ini b/tox.ini index d177c121ad..a9ccb5e040 100644 --- a/tox.ini +++ b/tox.ini @@ -2,13 +2,15 @@ # fakeroot -u tox --recreate [tox] -envlist = py32, py33, py34, py35 +envlist = py{32,33,34,35} [testenv] # Change dir to avoid import problem for cython code. The directory does # not really matter, should be just different from the toplevel dir. changedir = {toxworkdir} -deps = -rrequirements.d/development.txt +deps = + -rrequirements.d/development.txt + attic commands = py.test --cov=borg --pyargs {posargs:borg.testsuite} # fakeroot -u needs some env vars: passenv = *