From de54228809046d3942b5c20e2d8fbea54653aba4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 21:08:47 -0400 Subject: [PATCH 01/63] first stab at an attic-borg converter for now, just in the test suite, but will be migrated to a separate command --- borg/testsuite/convert.py | 108 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 borg/testsuite/convert.py diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py new file mode 100644 index 0000000000..e201581c35 --- /dev/null +++ b/borg/testsuite/convert.py @@ -0,0 +1,108 @@ +import binascii +import os +import pytest +import shutil +import tempfile + +import attic.repository + +from ..helpers import IntegrityError +from ..repository import Repository, MAGIC +from . import BaseTestCase + +class NotImplementedException(Exception): + pass + +class ConversionTestCase(BaseTestCase): + + def open(self, path, repo_type = Repository, create=False): + return repo_type(os.path.join(path, 'repository'), create = create) + + def setUp(self): + self.tmppath = tempfile.mkdtemp() + self.attic_repo = self.open(self.tmppath, + repo_type = attic.repository.Repository, + create = True) + # throw some stuff in that repo, copied from `RepositoryTestCase.test1`_ + for x in range(100): + self.attic_repo.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') + self.attic_repo.close() + + def test_convert(self): + self.repository = self.open(self.tmppath) + # check should fail because of magic number + assert not self.repository.check() # can't check raises() because check() handles the error + self.repository.close() + self.convert() + self.repository = self.open(self.tmppath) + assert self.repository.check() # can't check raises() because check() handles the error + self.repository.close() + + def convert(self): + '''convert an attic repository to a borg repository + + those are the files that need to be converted here, from most + important to least important: segments, key files, and various + caches, the latter being optional, as they will be rebuilt if + missing.''' + self.convert_segments() + with pytest.raises(NotImplementedException): + self.convert_keyfiles() + with pytest.raises(NotImplementedException): + self.convert_cache() + + def convert_segments(self): + '''convert repository segments from attic to borg + + replacement pattern is `s/ATTICSEG/BORG_SEG/` in files in + `$ATTIC_REPO/data/**`. + + luckily the segment length didn't change so we can just + replace the 8 first bytes of all regular files in there. + + `Repository.segment_iterator()` could be used here.''' + self.repository = self.open(self.tmppath) + segs = [ filename for i, filename in self.repository.io.segment_iterator() ] + self.repository.close() + for filename in segs: + print("converting segment %s..." % filename) + with open(filename, 'r+b') as segment: + segment.seek(0) + segment.write(MAGIC) + + def convert_keyfiles(self): + '''convert key files from attic to borg + + replacement pattern is `s/ATTIC KEY/BORG_KEY/` in + `get_keys_dir()`, that is `$ATTIC_KEYS_DIR` or + `$HOME/.attic/keys`, and moved to `$BORG_KEYS_DIR` or + `$HOME/.borg/keys`. + + the keyfiles are loaded by `KeyfileKey.find_key_file()`. that + finds the keys with the right identifier for the repo, no need + to decrypt to convert. will need to rewrite the whole key file + because magic number length changed.''' + raise NotImplementedException('not implemented') + + def convert_cache(self): + '''convert caches from attic to borg + + those are all hash indexes, so we need to + `s/ATTICIDX/BORG_IDX/` in a few locations: + + * the repository index (in `$ATTIC_REPO/index.%d`, where `%d` + is the `Repository.get_index_transaction_id()`), which we + should probably update, with a lock, see + `Repository.open()`, which i'm not sure we should use + because it may write data on `Repository.close()`... + + * the `files` and `chunks` cache (in + `$HOME/.cache/attic//`), which we could just drop, + but if we'd want to convert, we could open it with the + `Cache.open()`, edit in place and then `Cache.close()` to + make sure we have locking right + ''' + raise NotImplementedException('not implemented') + + def tearDown(self): + shutil.rmtree(self.tmppath) From 9ab1e1961e8acf29b17b1acedc62b4f717b1fd65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 22:23:11 -0400 Subject: [PATCH 02/63] keyfile conversion code --- borg/testsuite/convert.py | 84 ++++++++++++++++++++++++++++++++------- 1 file changed, 69 insertions(+), 15 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index e201581c35..ba3af8ee68 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -5,16 +5,27 @@ import tempfile import attic.repository +import attic.key +import attic.helpers -from ..helpers import IntegrityError +from ..helpers import IntegrityError, get_keys_dir from ..repository import Repository, MAGIC +from ..key import KeyfileKey, KeyfileNotFoundError from . import BaseTestCase class NotImplementedException(Exception): pass +class AtticKeyfileKey(KeyfileKey): + '''backwards compatible Attick key file parser''' + FILE_ID = 'ATTIC KEY' + class ConversionTestCase(BaseTestCase): + class MockArgs: + def __init__(self, path): + self.repository = attic.helpers.Location(path) + def open(self, path, repo_type = Repository, create=False): return repo_type(os.path.join(path, 'repository'), create = create) @@ -26,6 +37,10 @@ def setUp(self): # throw some stuff in that repo, copied from `RepositoryTestCase.test1`_ for x in range(100): self.attic_repo.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') + self.keysdir = self.MockArgs(self.tmppath) + os.environ['ATTIC_KEYS_DIR'] = self.tmppath + os.environ['ATTIC_PASSPHRASE'] = 'test' + self.key = attic.key.KeyfileKey.create(self.attic_repo, self.keysdir) self.attic_repo.close() def test_convert(self): @@ -33,9 +48,15 @@ def test_convert(self): # check should fail because of magic number assert not self.repository.check() # can't check raises() because check() handles the error self.repository.close() + os.environ['BORG_KEYS_DIR'] = self.tmppath self.convert() + # check that the new keyfile is alright + keyfile = os.path.join(get_keys_dir(), + os.path.basename(self.key.path)) + with open(keyfile, 'r') as f: + assert f.read().startswith(KeyfileKey.FILE_ID) self.repository = self.open(self.tmppath) - assert self.repository.check() # can't check raises() because check() handles the error + assert self.repository.check() self.repository.close() def convert(self): @@ -45,32 +66,52 @@ def convert(self): important to least important: segments, key files, and various caches, the latter being optional, as they will be rebuilt if missing.''' - self.convert_segments() - with pytest.raises(NotImplementedException): - self.convert_keyfiles() + self.repository = self.open(self.tmppath) + segments = [ filename for i, filename in self.repository.io.segment_iterator() ] + try: + keyfile = self.find_attic_keyfile() + except KeyfileNotFoundError: + print("no key file found for repository, not converting") + else: + self.convert_keyfiles(keyfile) + self.repository.close() + self.convert_segments(segments) with pytest.raises(NotImplementedException): self.convert_cache() - def convert_segments(self): + def convert_segments(self, segments): '''convert repository segments from attic to borg replacement pattern is `s/ATTICSEG/BORG_SEG/` in files in `$ATTIC_REPO/data/**`. luckily the segment length didn't change so we can just - replace the 8 first bytes of all regular files in there. - - `Repository.segment_iterator()` could be used here.''' - self.repository = self.open(self.tmppath) - segs = [ filename for i, filename in self.repository.io.segment_iterator() ] - self.repository.close() - for filename in segs: + replace the 8 first bytes of all regular files in there.''' + for filename in segments: print("converting segment %s..." % filename) with open(filename, 'r+b') as segment: segment.seek(0) segment.write(MAGIC) - def convert_keyfiles(self): + def find_attic_keyfile(self): + '''find the attic keyfiles + + this is expected to look into $HOME/.attic/keys or + $ATTIC_KEYS_DIR for key files matching the given Borg + repository. + + it is expected to raise an exception (KeyfileNotFoundError) if + no key is found. whether that exception is from Borg or Attic + is unclear. + + this is split in a separate function in case we want to + reimplement the attic code here. + ''' + self.repository._location = attic.helpers.Location(self.tmppath) + return attic.key.KeyfileKey().find_key_file(self.repository) + + def convert_keyfiles(self, keyfile): + '''convert key files from attic to borg replacement pattern is `s/ATTIC KEY/BORG_KEY/` in @@ -82,7 +123,20 @@ def convert_keyfiles(self): finds the keys with the right identifier for the repo, no need to decrypt to convert. will need to rewrite the whole key file because magic number length changed.''' - raise NotImplementedException('not implemented') + print("converting keyfile %s" % keyfile) + with open(keyfile, 'r') as f: + data = f.read() + data = data.replace(AtticKeyfileKey.FILE_ID, + KeyfileKey.FILE_ID, + 1) + keyfile = os.path.join(get_keys_dir(), + os.path.basename(keyfile)) + print("writing borg keyfile to %s" % keyfile) + with open(keyfile, 'w') as f: + f.write(data) + with open(keyfile, 'r') as f: + data = f.read() + assert data.startswith(KeyfileKey.FILE_ID) def convert_cache(self): '''convert caches from attic to borg From e88a994c8a2bd269d23a1fb4307dd2d9923c5668 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 22:40:46 -0400 Subject: [PATCH 03/63] reshuffle and document --- borg/testsuite/convert.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index ba3af8ee68..52564134d6 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -37,10 +37,17 @@ def setUp(self): # throw some stuff in that repo, copied from `RepositoryTestCase.test1`_ for x in range(100): self.attic_repo.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') - self.keysdir = self.MockArgs(self.tmppath) + + # we use the repo dir for the created keyfile, because we do + # not want to clutter existing keyfiles os.environ['ATTIC_KEYS_DIR'] = self.tmppath + + # we use the same directory for the converted files, which + # will clutter the previously created one, which we don't care + # about anyways. in real runs, the original key will be retained. + os.environ['BORG_KEYS_DIR'] = self.tmppath os.environ['ATTIC_PASSPHRASE'] = 'test' - self.key = attic.key.KeyfileKey.create(self.attic_repo, self.keysdir) + self.key = attic.key.KeyfileKey.create(self.attic_repo, self.MockArgs(self.tmppath)) self.attic_repo.close() def test_convert(self): @@ -48,7 +55,6 @@ def test_convert(self): # check should fail because of magic number assert not self.repository.check() # can't check raises() because check() handles the error self.repository.close() - os.environ['BORG_KEYS_DIR'] = self.tmppath self.convert() # check that the new keyfile is alright keyfile = os.path.join(get_keys_dir(), @@ -96,6 +102,9 @@ def convert_segments(self, segments): def find_attic_keyfile(self): '''find the attic keyfiles + the keyfiles are loaded by `KeyfileKey.find_key_file()`. that + finds the keys with the right identifier for the repo + this is expected to look into $HOME/.attic/keys or $ATTIC_KEYS_DIR for key files matching the given Borg repository. @@ -119,10 +128,10 @@ def convert_keyfiles(self, keyfile): `$HOME/.attic/keys`, and moved to `$BORG_KEYS_DIR` or `$HOME/.borg/keys`. - the keyfiles are loaded by `KeyfileKey.find_key_file()`. that - finds the keys with the right identifier for the repo, no need - to decrypt to convert. will need to rewrite the whole key file - because magic number length changed.''' + no need to decrypt to convert. we need to rewrite the whole + key file because magic number length changed, but that's not a + problem because the keyfiles are small (compared to, say, + all the segments).''' print("converting keyfile %s" % keyfile) with open(keyfile, 'r') as f: data = f.read() From 2d1988179e5149bb7bc29d589a5fa6887b00e76b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 22:41:38 -0400 Subject: [PATCH 04/63] some debugging code --- borg/testsuite/convert.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 52564134d6..c95ffa793f 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -53,6 +53,7 @@ def setUp(self): def test_convert(self): self.repository = self.open(self.tmppath) # check should fail because of magic number + print("this will show an error, it is expected") assert not self.repository.check() # can't check raises() because check() handles the error self.repository.close() self.convert() @@ -72,7 +73,9 @@ def convert(self): important to least important: segments, key files, and various caches, the latter being optional, as they will be rebuilt if missing.''' + print("opening attic repository with borg") self.repository = self.open(self.tmppath) + print("reading segments from attic repository using borg") segments = [ filename for i, filename in self.repository.io.segment_iterator() ] try: keyfile = self.find_attic_keyfile() @@ -94,7 +97,7 @@ def convert_segments(self, segments): luckily the segment length didn't change so we can just replace the 8 first bytes of all regular files in there.''' for filename in segments: - print("converting segment %s..." % filename) + print("converting segment %s in place" % filename) with open(filename, 'r+b') as segment: segment.seek(0) segment.write(MAGIC) From c7af4c7f1d1f8d5380bce60c27d743f11e0dc81d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 22:43:08 -0400 Subject: [PATCH 05/63] more debug --- borg/testsuite/convert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index c95ffa793f..5ef88893b6 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -80,7 +80,7 @@ def convert(self): try: keyfile = self.find_attic_keyfile() except KeyfileNotFoundError: - print("no key file found for repository, not converting") + print("no key file found for repository") else: self.convert_keyfiles(keyfile) self.repository.close() From 312c3cf738318ec0dba0383a23159f17fa0aa1e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 22:53:58 -0400 Subject: [PATCH 06/63] rewrite converter to avoid using attic code the unit tests themselves still use attic to generate an attic repository for testing, but the converter code should now be standalone --- borg/testsuite/convert.py | 42 +++++++++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 5ef88893b6..44b0a3f394 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -1,4 +1,4 @@ -import binascii +from binascii import hexlify import os import pytest import shutil @@ -20,6 +20,37 @@ class AtticKeyfileKey(KeyfileKey): '''backwards compatible Attick key file parser''' FILE_ID = 'ATTIC KEY' + # verbatim copy from attic + @staticmethod + def get_keys_dir(): + """Determine where to repository keys and cache""" + return os.environ.get('ATTIC_KEYS_DIR', + os.path.join(os.path.expanduser('~'), '.attic', 'keys')) + + @classmethod + def find_key_file(cls, repository): + '''copy of attic's `find_key_file`_ + + this has two small modifications: + + 1. it uses the above `get_keys_dir`_ instead of the global one, + assumed to be borg's + + 2. it uses `repository.path`_ instead of + `repository._location.canonical_path`_ because we can't + assume the repository has been opened by the archiver yet + ''' + get_keys_dir = cls.get_keys_dir + id = hexlify(repository.id).decode('ascii') + keys_dir = get_keys_dir() + for name in os.listdir(keys_dir): + filename = os.path.join(keys_dir, name) + with open(filename, 'r') as fd: + line = fd.readline().strip() + if line and line.startswith(cls.FILE_ID) and line[10:] == id: + return filename + raise KeyfileNotFoundError(repository.path, get_keys_dir()) + class ConversionTestCase(BaseTestCase): class MockArgs: @@ -116,11 +147,10 @@ def find_attic_keyfile(self): no key is found. whether that exception is from Borg or Attic is unclear. - this is split in a separate function in case we want to - reimplement the attic code here. - ''' - self.repository._location = attic.helpers.Location(self.tmppath) - return attic.key.KeyfileKey().find_key_file(self.repository) + this is split in a separate function in case we want to use + the attic code here directly, instead of our local + implementation.''' + return AtticKeyfileKey.find_key_file(self.repository) def convert_keyfiles(self, keyfile): From aa25a217a46b678b14ddbd08d3ec66e2cc11b349 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 23:01:03 -0400 Subject: [PATCH 07/63] move conversion code to a separate class for clarity --- borg/testsuite/convert.py | 86 +++++++++++++++++++-------------------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 44b0a3f394..f4b8bd3db2 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -16,41 +16,6 @@ class NotImplementedException(Exception): pass -class AtticKeyfileKey(KeyfileKey): - '''backwards compatible Attick key file parser''' - FILE_ID = 'ATTIC KEY' - - # verbatim copy from attic - @staticmethod - def get_keys_dir(): - """Determine where to repository keys and cache""" - return os.environ.get('ATTIC_KEYS_DIR', - os.path.join(os.path.expanduser('~'), '.attic', 'keys')) - - @classmethod - def find_key_file(cls, repository): - '''copy of attic's `find_key_file`_ - - this has two small modifications: - - 1. it uses the above `get_keys_dir`_ instead of the global one, - assumed to be borg's - - 2. it uses `repository.path`_ instead of - `repository._location.canonical_path`_ because we can't - assume the repository has been opened by the archiver yet - ''' - get_keys_dir = cls.get_keys_dir - id = hexlify(repository.id).decode('ascii') - keys_dir = get_keys_dir() - for name in os.listdir(keys_dir): - filename = os.path.join(keys_dir, name) - with open(filename, 'r') as fd: - line = fd.readline().strip() - if line and line.startswith(cls.FILE_ID) and line[10:] == id: - return filename - raise KeyfileNotFoundError(repository.path, get_keys_dir()) - class ConversionTestCase(BaseTestCase): class MockArgs: @@ -81,13 +46,17 @@ def setUp(self): self.key = attic.key.KeyfileKey.create(self.attic_repo, self.MockArgs(self.tmppath)) self.attic_repo.close() + def tearDown(self): + shutil.rmtree(self.tmppath) + def test_convert(self): self.repository = self.open(self.tmppath) # check should fail because of magic number print("this will show an error, it is expected") assert not self.repository.check() # can't check raises() because check() handles the error self.repository.close() - self.convert() + print("opening attic repository with borg and converting") + self.open(self.tmppath, repo_type = AtticRepositoryConverter).convert() # check that the new keyfile is alright keyfile = os.path.join(get_keys_dir(), os.path.basename(self.key.path)) @@ -97,6 +66,7 @@ def test_convert(self): assert self.repository.check() self.repository.close() +class AtticRepositoryConverter(Repository): def convert(self): '''convert an attic repository to a borg repository @@ -104,17 +74,15 @@ def convert(self): important to least important: segments, key files, and various caches, the latter being optional, as they will be rebuilt if missing.''' - print("opening attic repository with borg") - self.repository = self.open(self.tmppath) print("reading segments from attic repository using borg") - segments = [ filename for i, filename in self.repository.io.segment_iterator() ] + segments = [ filename for i, filename in self.io.segment_iterator() ] try: keyfile = self.find_attic_keyfile() except KeyfileNotFoundError: print("no key file found for repository") else: self.convert_keyfiles(keyfile) - self.repository.close() + self.close() self.convert_segments(segments) with pytest.raises(NotImplementedException): self.convert_cache() @@ -150,7 +118,7 @@ def find_attic_keyfile(self): this is split in a separate function in case we want to use the attic code here directly, instead of our local implementation.''' - return AtticKeyfileKey.find_key_file(self.repository) + return AtticKeyfileKey.find_key_file(self) def convert_keyfiles(self, keyfile): @@ -200,5 +168,37 @@ def convert_cache(self): ''' raise NotImplementedException('not implemented') - def tearDown(self): - shutil.rmtree(self.tmppath) +class AtticKeyfileKey(KeyfileKey): + '''backwards compatible Attick key file parser''' + FILE_ID = 'ATTIC KEY' + + # verbatim copy from attic + @staticmethod + def get_keys_dir(): + """Determine where to repository keys and cache""" + return os.environ.get('ATTIC_KEYS_DIR', + os.path.join(os.path.expanduser('~'), '.attic', 'keys')) + + @classmethod + def find_key_file(cls, repository): + '''copy of attic's `find_key_file`_ + + this has two small modifications: + + 1. it uses the above `get_keys_dir`_ instead of the global one, + assumed to be borg's + + 2. it uses `repository.path`_ instead of + `repository._location.canonical_path`_ because we can't + assume the repository has been opened by the archiver yet + ''' + get_keys_dir = cls.get_keys_dir + id = hexlify(repository.id).decode('ascii') + keys_dir = get_keys_dir() + for name in os.listdir(keys_dir): + filename = os.path.join(keys_dir, name) + with open(filename, 'r') as fd: + line = fd.readline().strip() + if line and line.startswith(cls.FILE_ID) and line[10:] == id: + return filename + raise KeyfileNotFoundError(repository.path, get_keys_dir()) From 5a1680397c571ad2f42e731d4835b7f2f356aa55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 23:02:21 -0400 Subject: [PATCH 08/63] remove needless use of self --- borg/testsuite/convert.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index f4b8bd3db2..9d5f95142d 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -87,7 +87,8 @@ def convert(self): with pytest.raises(NotImplementedException): self.convert_cache() - def convert_segments(self, segments): + @staticmethod + def convert_segments(segments): '''convert repository segments from attic to borg replacement pattern is `s/ATTICSEG/BORG_SEG/` in files in @@ -120,7 +121,8 @@ def find_attic_keyfile(self): implementation.''' return AtticKeyfileKey.find_key_file(self) - def convert_keyfiles(self, keyfile): + @staticmethod + def convert_keyfiles(keyfile): '''convert key files from attic to borg From c30df4e033834c4d96be67fe4bcedb75014dc115 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 23:18:03 -0400 Subject: [PATCH 09/63] move converter code out of test suite --- borg/converter.py | 147 +++++++++++++++++++++++++++++++++++++ borg/testsuite/convert.py | 151 ++------------------------------------ 2 files changed, 152 insertions(+), 146 deletions(-) create mode 100644 borg/converter.py diff --git a/borg/converter.py b/borg/converter.py new file mode 100644 index 0000000000..b558af8839 --- /dev/null +++ b/borg/converter.py @@ -0,0 +1,147 @@ +from binascii import hexlify +import os + +from .helpers import IntegrityError, get_keys_dir +from .repository import Repository, MAGIC +from .key import KeyfileKey, KeyfileNotFoundError + +class NotImplementedException(Exception): + pass + +class AtticRepositoryConverter(Repository): + def convert(self): + '''convert an attic repository to a borg repository + + those are the files that need to be converted here, from most + important to least important: segments, key files, and various + caches, the latter being optional, as they will be rebuilt if + missing.''' + print("reading segments from attic repository using borg") + segments = [ filename for i, filename in self.io.segment_iterator() ] + try: + keyfile = self.find_attic_keyfile() + except KeyfileNotFoundError: + print("no key file found for repository") + else: + self.convert_keyfiles(keyfile) + self.close() + self.convert_segments(segments) + self.convert_cache() + + @staticmethod + def convert_segments(segments): + '''convert repository segments from attic to borg + + replacement pattern is `s/ATTICSEG/BORG_SEG/` in files in + `$ATTIC_REPO/data/**`. + + luckily the segment length didn't change so we can just + replace the 8 first bytes of all regular files in there.''' + for filename in segments: + print("converting segment %s in place" % filename) + with open(filename, 'r+b') as segment: + segment.seek(0) + segment.write(MAGIC) + + def find_attic_keyfile(self): + '''find the attic keyfiles + + the keyfiles are loaded by `KeyfileKey.find_key_file()`. that + finds the keys with the right identifier for the repo + + this is expected to look into $HOME/.attic/keys or + $ATTIC_KEYS_DIR for key files matching the given Borg + repository. + + it is expected to raise an exception (KeyfileNotFoundError) if + no key is found. whether that exception is from Borg or Attic + is unclear. + + this is split in a separate function in case we want to use + the attic code here directly, instead of our local + implementation.''' + return AtticKeyfileKey.find_key_file(self) + + @staticmethod + def convert_keyfiles(keyfile): + + '''convert key files from attic to borg + + replacement pattern is `s/ATTIC KEY/BORG_KEY/` in + `get_keys_dir()`, that is `$ATTIC_KEYS_DIR` or + `$HOME/.attic/keys`, and moved to `$BORG_KEYS_DIR` or + `$HOME/.borg/keys`. + + no need to decrypt to convert. we need to rewrite the whole + key file because magic number length changed, but that's not a + problem because the keyfiles are small (compared to, say, + all the segments).''' + print("converting keyfile %s" % keyfile) + with open(keyfile, 'r') as f: + data = f.read() + data = data.replace(AtticKeyfileKey.FILE_ID, + KeyfileKey.FILE_ID, + 1) + keyfile = os.path.join(get_keys_dir(), + os.path.basename(keyfile)) + print("writing borg keyfile to %s" % keyfile) + with open(keyfile, 'w') as f: + f.write(data) + with open(keyfile, 'r') as f: + data = f.read() + assert data.startswith(KeyfileKey.FILE_ID) + + def convert_cache(self): + '''convert caches from attic to borg + + those are all hash indexes, so we need to + `s/ATTICIDX/BORG_IDX/` in a few locations: + + * the repository index (in `$ATTIC_REPO/index.%d`, where `%d` + is the `Repository.get_index_transaction_id()`), which we + should probably update, with a lock, see + `Repository.open()`, which i'm not sure we should use + because it may write data on `Repository.close()`... + + * the `files` and `chunks` cache (in + `$HOME/.cache/attic//`), which we could just drop, + but if we'd want to convert, we could open it with the + `Cache.open()`, edit in place and then `Cache.close()` to + make sure we have locking right + ''' + raise NotImplementedException('not implemented') + +class AtticKeyfileKey(KeyfileKey): + '''backwards compatible Attick key file parser''' + FILE_ID = 'ATTIC KEY' + + # verbatim copy from attic + @staticmethod + def get_keys_dir(): + """Determine where to repository keys and cache""" + return os.environ.get('ATTIC_KEYS_DIR', + os.path.join(os.path.expanduser('~'), '.attic', 'keys')) + + @classmethod + def find_key_file(cls, repository): + '''copy of attic's `find_key_file`_ + + this has two small modifications: + + 1. it uses the above `get_keys_dir`_ instead of the global one, + assumed to be borg's + + 2. it uses `repository.path`_ instead of + `repository._location.canonical_path`_ because we can't + assume the repository has been opened by the archiver yet + ''' + get_keys_dir = cls.get_keys_dir + id = hexlify(repository.id).decode('ascii') + keys_dir = get_keys_dir() + for name in os.listdir(keys_dir): + filename = os.path.join(keys_dir, name) + with open(filename, 'r') as fd: + line = fd.readline().strip() + if line and line.startswith(cls.FILE_ID) and line[10:] == id: + return filename + raise KeyfileNotFoundError(repository.path, get_keys_dir()) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 9d5f95142d..74196063eb 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -1,4 +1,3 @@ -from binascii import hexlify import os import pytest import shutil @@ -8,14 +7,12 @@ import attic.key import attic.helpers -from ..helpers import IntegrityError, get_keys_dir +from ..converter import AtticRepositoryConverter, NotImplementedException +from ..helpers import get_keys_dir +from ..key import KeyfileKey from ..repository import Repository, MAGIC -from ..key import KeyfileKey, KeyfileNotFoundError from . import BaseTestCase -class NotImplementedException(Exception): - pass - class ConversionTestCase(BaseTestCase): class MockArgs: @@ -56,7 +53,8 @@ def test_convert(self): assert not self.repository.check() # can't check raises() because check() handles the error self.repository.close() print("opening attic repository with borg and converting") - self.open(self.tmppath, repo_type = AtticRepositoryConverter).convert() + with pytest.raises(NotImplementedException): + self.open(self.tmppath, repo_type = AtticRepositoryConverter).convert() # check that the new keyfile is alright keyfile = os.path.join(get_keys_dir(), os.path.basename(self.key.path)) @@ -65,142 +63,3 @@ def test_convert(self): self.repository = self.open(self.tmppath) assert self.repository.check() self.repository.close() - -class AtticRepositoryConverter(Repository): - def convert(self): - '''convert an attic repository to a borg repository - - those are the files that need to be converted here, from most - important to least important: segments, key files, and various - caches, the latter being optional, as they will be rebuilt if - missing.''' - print("reading segments from attic repository using borg") - segments = [ filename for i, filename in self.io.segment_iterator() ] - try: - keyfile = self.find_attic_keyfile() - except KeyfileNotFoundError: - print("no key file found for repository") - else: - self.convert_keyfiles(keyfile) - self.close() - self.convert_segments(segments) - with pytest.raises(NotImplementedException): - self.convert_cache() - - @staticmethod - def convert_segments(segments): - '''convert repository segments from attic to borg - - replacement pattern is `s/ATTICSEG/BORG_SEG/` in files in - `$ATTIC_REPO/data/**`. - - luckily the segment length didn't change so we can just - replace the 8 first bytes of all regular files in there.''' - for filename in segments: - print("converting segment %s in place" % filename) - with open(filename, 'r+b') as segment: - segment.seek(0) - segment.write(MAGIC) - - def find_attic_keyfile(self): - '''find the attic keyfiles - - the keyfiles are loaded by `KeyfileKey.find_key_file()`. that - finds the keys with the right identifier for the repo - - this is expected to look into $HOME/.attic/keys or - $ATTIC_KEYS_DIR for key files matching the given Borg - repository. - - it is expected to raise an exception (KeyfileNotFoundError) if - no key is found. whether that exception is from Borg or Attic - is unclear. - - this is split in a separate function in case we want to use - the attic code here directly, instead of our local - implementation.''' - return AtticKeyfileKey.find_key_file(self) - - @staticmethod - def convert_keyfiles(keyfile): - - '''convert key files from attic to borg - - replacement pattern is `s/ATTIC KEY/BORG_KEY/` in - `get_keys_dir()`, that is `$ATTIC_KEYS_DIR` or - `$HOME/.attic/keys`, and moved to `$BORG_KEYS_DIR` or - `$HOME/.borg/keys`. - - no need to decrypt to convert. we need to rewrite the whole - key file because magic number length changed, but that's not a - problem because the keyfiles are small (compared to, say, - all the segments).''' - print("converting keyfile %s" % keyfile) - with open(keyfile, 'r') as f: - data = f.read() - data = data.replace(AtticKeyfileKey.FILE_ID, - KeyfileKey.FILE_ID, - 1) - keyfile = os.path.join(get_keys_dir(), - os.path.basename(keyfile)) - print("writing borg keyfile to %s" % keyfile) - with open(keyfile, 'w') as f: - f.write(data) - with open(keyfile, 'r') as f: - data = f.read() - assert data.startswith(KeyfileKey.FILE_ID) - - def convert_cache(self): - '''convert caches from attic to borg - - those are all hash indexes, so we need to - `s/ATTICIDX/BORG_IDX/` in a few locations: - - * the repository index (in `$ATTIC_REPO/index.%d`, where `%d` - is the `Repository.get_index_transaction_id()`), which we - should probably update, with a lock, see - `Repository.open()`, which i'm not sure we should use - because it may write data on `Repository.close()`... - - * the `files` and `chunks` cache (in - `$HOME/.cache/attic//`), which we could just drop, - but if we'd want to convert, we could open it with the - `Cache.open()`, edit in place and then `Cache.close()` to - make sure we have locking right - ''' - raise NotImplementedException('not implemented') - -class AtticKeyfileKey(KeyfileKey): - '''backwards compatible Attick key file parser''' - FILE_ID = 'ATTIC KEY' - - # verbatim copy from attic - @staticmethod - def get_keys_dir(): - """Determine where to repository keys and cache""" - return os.environ.get('ATTIC_KEYS_DIR', - os.path.join(os.path.expanduser('~'), '.attic', 'keys')) - - @classmethod - def find_key_file(cls, repository): - '''copy of attic's `find_key_file`_ - - this has two small modifications: - - 1. it uses the above `get_keys_dir`_ instead of the global one, - assumed to be borg's - - 2. it uses `repository.path`_ instead of - `repository._location.canonical_path`_ because we can't - assume the repository has been opened by the archiver yet - ''' - get_keys_dir = cls.get_keys_dir - id = hexlify(repository.id).decode('ascii') - keys_dir = get_keys_dir() - for name in os.listdir(keys_dir): - filename = os.path.join(keys_dir, name) - with open(filename, 'r') as fd: - line = fd.readline().strip() - if line and line.startswith(cls.FILE_ID) and line[10:] == id: - return filename - raise KeyfileNotFoundError(repository.path, get_keys_dir()) From 77ed6dec2ba3dcbbec4b4027ffd3313fa03f6905 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 23:27:55 -0400 Subject: [PATCH 10/63] skip converter tests if attic isn't installed --- borg/testsuite/convert.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 74196063eb..29b7c49f0a 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -3,9 +3,14 @@ import shutil import tempfile -import attic.repository -import attic.key -import attic.helpers +try: + import attic.repository + import attic.key + import attic.helpers +except ImportError: + attic = None +pytestmark = pytest.mark.skipif(attic is None, + reason = 'cannot find an attic install') from ..converter import AtticRepositoryConverter, NotImplementedException from ..helpers import get_keys_dir From e5543657658b46ef48248697531ca447ec86bcda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 23:28:07 -0400 Subject: [PATCH 11/63] remove unused import --- borg/converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/converter.py b/borg/converter.py index b558af8839..a416f3e79e 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -1,7 +1,7 @@ from binascii import hexlify import os -from .helpers import IntegrityError, get_keys_dir +from .helpers import get_keys_dir from .repository import Repository, MAGIC from .key import KeyfileKey, KeyfileNotFoundError From f35e8e17f2b4b4379bb250d3b495c6c59f734cb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 23:50:35 -0400 Subject: [PATCH 12/63] add dry run support to converter --- borg/converter.py | 31 +++++++++++++++++-------------- borg/testsuite/convert.py | 2 +- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index a416f3e79e..d949fd31a4 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -9,7 +9,7 @@ class NotImplementedException(Exception): pass class AtticRepositoryConverter(Repository): - def convert(self): + def convert(self, dryrun=True): '''convert an attic repository to a borg repository those are the files that need to be converted here, from most @@ -23,13 +23,13 @@ def convert(self): except KeyfileNotFoundError: print("no key file found for repository") else: - self.convert_keyfiles(keyfile) + self.convert_keyfiles(keyfile, dryrun) self.close() - self.convert_segments(segments) - self.convert_cache() + self.convert_segments(segments, dryrun) + self.convert_cache(dryrun) @staticmethod - def convert_segments(segments): + def convert_segments(segments, dryrun): '''convert repository segments from attic to borg replacement pattern is `s/ATTICSEG/BORG_SEG/` in files in @@ -39,6 +39,8 @@ def convert_segments(segments): replace the 8 first bytes of all regular files in there.''' for filename in segments: print("converting segment %s in place" % filename) + if dryrun: + continue with open(filename, 'r+b') as segment: segment.seek(0) segment.write(MAGIC) @@ -63,7 +65,7 @@ def find_attic_keyfile(self): return AtticKeyfileKey.find_key_file(self) @staticmethod - def convert_keyfiles(keyfile): + def convert_keyfiles(keyfile, dryrun): '''convert key files from attic to borg @@ -85,13 +87,14 @@ def convert_keyfiles(keyfile): keyfile = os.path.join(get_keys_dir(), os.path.basename(keyfile)) print("writing borg keyfile to %s" % keyfile) - with open(keyfile, 'w') as f: - f.write(data) - with open(keyfile, 'r') as f: - data = f.read() - assert data.startswith(KeyfileKey.FILE_ID) - - def convert_cache(self): + if not dryrun: + with open(keyfile, 'w') as f: + f.write(data) + with open(keyfile, 'r') as f: + data = f.read() + assert data.startswith(KeyfileKey.FILE_ID) + + def convert_cache(self, dryrun): '''convert caches from attic to borg those are all hash indexes, so we need to @@ -109,7 +112,7 @@ def convert_cache(self): `Cache.open()`, edit in place and then `Cache.close()` to make sure we have locking right ''' - raise NotImplementedException('not implemented') + raise NotImplementedException('cache conversion not implemented, next borg backup will take longer to rebuild those caches') class AtticKeyfileKey(KeyfileKey): '''backwards compatible Attick key file parser''' diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 29b7c49f0a..e708ea60dc 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -59,7 +59,7 @@ def test_convert(self): self.repository.close() print("opening attic repository with borg and converting") with pytest.raises(NotImplementedException): - self.open(self.tmppath, repo_type = AtticRepositoryConverter).convert() + self.open(self.tmppath, repo_type = AtticRepositoryConverter).convert(dryrun=False) # check that the new keyfile is alright keyfile = os.path.join(get_keys_dir(), os.path.basename(self.key.path)) From a5f32b0a27c076326500e5b335c6592e083dc130 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Wed, 30 Sep 2015 23:50:46 -0400 Subject: [PATCH 13/63] add convert command --- borg/archiver.py | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/borg/archiver.py b/borg/archiver.py index 28f1d8a3f1..2c4302b3a3 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -17,6 +17,7 @@ from . import __version__ from .archive import Archive, ArchiveChecker, CHUNKER_PARAMS from .compress import Compressor, COMPR_BUFFER +from .converter import AtticRepositoryConverter, NotImplementedException from .repository import Repository from .cache import Cache from .key import key_creator @@ -462,6 +463,15 @@ def do_prune(self, args): stats.print_('Deleted data:', cache) return self.exit_code + def do_convert(self, parser, commands, args): + '''convert a repository from attic to borg''' + repo = AtticRepositoryConverter(os.path.join(args.repository, 'repository'), create=False) + try: + repo.convert(args.dry_run) + except NotImplementedException as e: + print("warning: %s" % e) + return self.exit_code + helptext = {} helptext['patterns'] = ''' Exclude patterns use a variant of shell pattern syntax, with '*' matching any @@ -896,6 +906,43 @@ def run(self, args=None): type=location_validator(archive=False), help='repository to prune') + convert_epilog = textwrap.dedent(""" + convert will convert an existing Attic repository to Borg in place. + + it will change the magic numbers in the repository's segments + to match the new Borg magic numbers. the keyfiles found in + $ATTIC_KEYS_DIR or ~/.attic/keys/ will also be converted and + copied to $BORG_KEYS_DIR or ~/.borg/keys. + + the cache files are *not* currently converted, which will + result in a much longer backup the first time. you can run + `borg check --repair` to rebuild those files after the + conversion. + + the conversion can IRREMEDIABLY DAMAGE YOUR REPOSITORY! Attic + will also NOT BE ABLE TO READ THE BORG REPOSITORY ANYMORE, as + the magic numbers will have changed. + + it is recommended you run this on a copy of the Attic + repository, in case something goes wrong, for example: + + cp -a attic borg + borg convert -n borg + borg convert borg + + you have been warned.""") + subparser = subparsers.add_parser('convert', parents=[common_parser], + description=self.do_convert.__doc__, + epilog=convert_epilog, + formatter_class=argparse.RawDescriptionHelpFormatter) + subparser.set_defaults(func=self.do_convert) + subparser.add_argument('-n', '--dry-run', dest='dry_run', + default=False, action='store_true', + help='do not change repository') + subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='', + type=location_validator(archive=False), + help='path to the attic repository to be converted') + subparser = subparsers.add_parser('help', parents=[common_parser], description='Extra help') subparser.add_argument('--epilog-only', dest='epilog_only', From 1b29699403facffc1396c0741936df3dd8a1c8f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 00:15:12 -0400 Subject: [PATCH 14/63] cosmetic: reorder --- borg/testsuite/convert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index e708ea60dc..d48a0e05b0 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -35,6 +35,7 @@ def setUp(self): # throw some stuff in that repo, copied from `RepositoryTestCase.test1`_ for x in range(100): self.attic_repo.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') + self.attic_repo.close() # we use the repo dir for the created keyfile, because we do # not want to clutter existing keyfiles @@ -46,7 +47,6 @@ def setUp(self): os.environ['BORG_KEYS_DIR'] = self.tmppath os.environ['ATTIC_PASSPHRASE'] = 'test' self.key = attic.key.KeyfileKey.create(self.attic_repo, self.MockArgs(self.tmppath)) - self.attic_repo.close() def tearDown(self): shutil.rmtree(self.tmppath) From 1ba856d2b3ff3fdbbd6bc3afb25701a7bcb57c45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 00:15:25 -0400 Subject: [PATCH 15/63] refactor: group test repo subroutine --- borg/testsuite/convert.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index d48a0e05b0..cb9f5ec4c9 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -51,12 +51,16 @@ def setUp(self): def tearDown(self): shutil.rmtree(self.tmppath) - def test_convert(self): + def check_repo(self, state = True): + if not state: + print("this will show an error, this is expected") self.repository = self.open(self.tmppath) - # check should fail because of magic number - print("this will show an error, it is expected") - assert not self.repository.check() # can't check raises() because check() handles the error + assert self.repository.check() is state # can't check raises() because check() handles the error self.repository.close() + + def test_convert(self): + # check should fail because of magic number + self.check_repo(False) print("opening attic repository with borg and converting") with pytest.raises(NotImplementedException): self.open(self.tmppath, repo_type = AtticRepositoryConverter).convert(dryrun=False) @@ -65,6 +69,4 @@ def test_convert(self): os.path.basename(self.key.path)) with open(keyfile, 'r') as f: assert f.read().startswith(KeyfileKey.FILE_ID) - self.repository = self.open(self.tmppath) - assert self.repository.check() - self.repository.close() + self.check_repo() From bcd94b96e0e2a7932e75557403e08d70d2e7fc94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 00:32:34 -0400 Subject: [PATCH 16/63] split up keyfile, segments and overall testing in converter --- borg/testsuite/convert.py | 53 ++++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 14 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index cb9f5ec4c9..172b308ec9 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -12,7 +12,7 @@ pytestmark = pytest.mark.skipif(attic is None, reason = 'cannot find an attic install') -from ..converter import AtticRepositoryConverter, NotImplementedException +from ..converter import AtticRepositoryConverter, NotImplementedException, AtticKeyfileKey from ..helpers import get_keys_dir from ..key import KeyfileKey from ..repository import Repository, MAGIC @@ -20,10 +20,6 @@ class ConversionTestCase(BaseTestCase): - class MockArgs: - def __init__(self, path): - self.repository = attic.helpers.Location(path) - def open(self, path, repo_type = Repository, create=False): return repo_type(os.path.join(path, 'repository'), create = create) @@ -37,6 +33,34 @@ def setUp(self): self.attic_repo.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') self.attic_repo.close() + def tearDown(self): + shutil.rmtree(self.tmppath) + + def check_repo(self, state = True): + if not state: + print("this will show an error, this is expected") + repository = self.open(self.tmppath) + assert repository.check() is state # can't check raises() because check() handles the error + repository.close() + + def test_convert_segments(self): + # check should fail because of magic number + self.check_repo(False) + print("opening attic repository with borg and converting") + repo = self.open(self.tmppath, repo_type = AtticRepositoryConverter) + segments = [ filename for i, filename in repo.io.segment_iterator() ] + repo.close() + repo.convert_segments(segments, dryrun=False) + self.check_repo() + +class EncryptedConversionTestCase(ConversionTestCase): + class MockArgs: + def __init__(self, path): + self.repository = attic.helpers.Location(path) + + def setUp(self): + super().setUp() + # we use the repo dir for the created keyfile, because we do # not want to clutter existing keyfiles os.environ['ATTIC_KEYS_DIR'] = self.tmppath @@ -48,17 +72,18 @@ def setUp(self): os.environ['ATTIC_PASSPHRASE'] = 'test' self.key = attic.key.KeyfileKey.create(self.attic_repo, self.MockArgs(self.tmppath)) - def tearDown(self): - shutil.rmtree(self.tmppath) + def test_keys(self): + repository = self.open(self.tmppath, repo_type = AtticRepositoryConverter) + keyfile = AtticKeyfileKey.find_key_file(repository) + AtticRepositoryConverter.convert_keyfiles(keyfile, dryrun=False) - def check_repo(self, state = True): - if not state: - print("this will show an error, this is expected") - self.repository = self.open(self.tmppath) - assert self.repository.check() is state # can't check raises() because check() handles the error - self.repository.close() + # check that the new keyfile is alright + keyfile = os.path.join(get_keys_dir(), + os.path.basename(self.key.path)) + with open(keyfile, 'r') as f: + assert f.read().startswith(KeyfileKey.FILE_ID) - def test_convert(self): + def test_convert_all(self): # check should fail because of magic number self.check_repo(False) print("opening attic repository with borg and converting") From c99082922553114c981109178e4b3d5c8c13f195 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 00:51:19 -0400 Subject: [PATCH 17/63] add attic dependency for build as a separate factor this way we don't depend on attic for regular build, but we can still see proper test coverage --- tox.ini | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tox.ini b/tox.ini index d177c121ad..8fd697657e 100644 --- a/tox.ini +++ b/tox.ini @@ -2,13 +2,15 @@ # fakeroot -u tox --recreate [tox] -envlist = py32, py33, py34, py35 +envlist = py{32,33,34,35}{,-attic} [testenv] # Change dir to avoid import problem for cython code. The directory does # not really matter, should be just different from the toplevel dir. changedir = {toxworkdir} -deps = -rrequirements.d/development.txt +deps = + -rrequirements.d/development.txt + attic: attic commands = py.test --cov=borg --pyargs {posargs:borg.testsuite} # fakeroot -u needs some env vars: passenv = * From a81755f1a98f071f668287e49a32964c92466d5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 08:34:18 -0400 Subject: [PATCH 18/63] use triple-double-quoted instead of single-double-quoted at the request of TW, see #231 --- borg/archiver.py | 2 +- borg/converter.py | 26 +++++++++++++------------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 2c4302b3a3..696291f1c0 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -464,7 +464,7 @@ def do_prune(self, args): return self.exit_code def do_convert(self, parser, commands, args): - '''convert a repository from attic to borg''' + """convert a repository from attic to borg""" repo = AtticRepositoryConverter(os.path.join(args.repository, 'repository'), create=False) try: repo.convert(args.dry_run) diff --git a/borg/converter.py b/borg/converter.py index d949fd31a4..66606095f0 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -10,12 +10,12 @@ class NotImplementedException(Exception): class AtticRepositoryConverter(Repository): def convert(self, dryrun=True): - '''convert an attic repository to a borg repository + """convert an attic repository to a borg repository those are the files that need to be converted here, from most important to least important: segments, key files, and various caches, the latter being optional, as they will be rebuilt if - missing.''' + missing.""" print("reading segments from attic repository using borg") segments = [ filename for i, filename in self.io.segment_iterator() ] try: @@ -30,13 +30,13 @@ def convert(self, dryrun=True): @staticmethod def convert_segments(segments, dryrun): - '''convert repository segments from attic to borg + """convert repository segments from attic to borg replacement pattern is `s/ATTICSEG/BORG_SEG/` in files in `$ATTIC_REPO/data/**`. luckily the segment length didn't change so we can just - replace the 8 first bytes of all regular files in there.''' + replace the 8 first bytes of all regular files in there.""" for filename in segments: print("converting segment %s in place" % filename) if dryrun: @@ -46,7 +46,7 @@ def convert_segments(segments, dryrun): segment.write(MAGIC) def find_attic_keyfile(self): - '''find the attic keyfiles + """find the attic keyfiles the keyfiles are loaded by `KeyfileKey.find_key_file()`. that finds the keys with the right identifier for the repo @@ -61,13 +61,13 @@ def find_attic_keyfile(self): this is split in a separate function in case we want to use the attic code here directly, instead of our local - implementation.''' + implementation.""" return AtticKeyfileKey.find_key_file(self) @staticmethod def convert_keyfiles(keyfile, dryrun): - '''convert key files from attic to borg + """convert key files from attic to borg replacement pattern is `s/ATTIC KEY/BORG_KEY/` in `get_keys_dir()`, that is `$ATTIC_KEYS_DIR` or @@ -77,7 +77,7 @@ def convert_keyfiles(keyfile, dryrun): no need to decrypt to convert. we need to rewrite the whole key file because magic number length changed, but that's not a problem because the keyfiles are small (compared to, say, - all the segments).''' + all the segments).""" print("converting keyfile %s" % keyfile) with open(keyfile, 'r') as f: data = f.read() @@ -95,7 +95,7 @@ def convert_keyfiles(keyfile, dryrun): assert data.startswith(KeyfileKey.FILE_ID) def convert_cache(self, dryrun): - '''convert caches from attic to borg + """convert caches from attic to borg those are all hash indexes, so we need to `s/ATTICIDX/BORG_IDX/` in a few locations: @@ -111,11 +111,11 @@ def convert_cache(self, dryrun): but if we'd want to convert, we could open it with the `Cache.open()`, edit in place and then `Cache.close()` to make sure we have locking right - ''' + """ raise NotImplementedException('cache conversion not implemented, next borg backup will take longer to rebuild those caches') class AtticKeyfileKey(KeyfileKey): - '''backwards compatible Attick key file parser''' + """backwards compatible Attick key file parser""" FILE_ID = 'ATTIC KEY' # verbatim copy from attic @@ -127,7 +127,7 @@ def get_keys_dir(): @classmethod def find_key_file(cls, repository): - '''copy of attic's `find_key_file`_ + """copy of attic's `find_key_file`_ this has two small modifications: @@ -137,7 +137,7 @@ def find_key_file(cls, repository): 2. it uses `repository.path`_ instead of `repository._location.canonical_path`_ because we can't assume the repository has been opened by the archiver yet - ''' + """ get_keys_dir = cls.get_keys_dir id = hexlify(repository.id).decode('ascii') keys_dir = get_keys_dir() From efbad396f4d90a03f84cad859bedfa9ec169735b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 08:36:20 -0400 Subject: [PATCH 19/63] help text review: magic s/number/string/, s/can/must/ --- borg/archiver.py | 8 ++++---- borg/converter.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 696291f1c0..832983520c 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -909,19 +909,19 @@ def run(self, args=None): convert_epilog = textwrap.dedent(""" convert will convert an existing Attic repository to Borg in place. - it will change the magic numbers in the repository's segments - to match the new Borg magic numbers. the keyfiles found in + it will change the magic strings in the repository's segments + to match the new Borg magic strings. the keyfiles found in $ATTIC_KEYS_DIR or ~/.attic/keys/ will also be converted and copied to $BORG_KEYS_DIR or ~/.borg/keys. the cache files are *not* currently converted, which will - result in a much longer backup the first time. you can run + result in a much longer backup the first time. you must run `borg check --repair` to rebuild those files after the conversion. the conversion can IRREMEDIABLY DAMAGE YOUR REPOSITORY! Attic will also NOT BE ABLE TO READ THE BORG REPOSITORY ANYMORE, as - the magic numbers will have changed. + the magic strings will have changed. it is recommended you run this on a copy of the Attic repository, in case something goes wrong, for example: diff --git a/borg/converter.py b/borg/converter.py index 66606095f0..6b35b6cb7f 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -35,7 +35,7 @@ def convert_segments(segments, dryrun): replacement pattern is `s/ATTICSEG/BORG_SEG/` in files in `$ATTIC_REPO/data/**`. - luckily the segment length didn't change so we can just + luckily the magic string length didn't change so we can just replace the 8 first bytes of all regular files in there.""" for filename in segments: print("converting segment %s in place" % filename) @@ -75,7 +75,7 @@ def convert_keyfiles(keyfile, dryrun): `$HOME/.borg/keys`. no need to decrypt to convert. we need to rewrite the whole - key file because magic number length changed, but that's not a + key file because magic string length changed, but that's not a problem because the keyfiles are small (compared to, say, all the segments).""" print("converting keyfile %s" % keyfile) From c2913f5f1052e47872ca1cc3bcc55db3c92123d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 08:40:56 -0400 Subject: [PATCH 20/63] style: don't use continue for nothing --- borg/converter.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index 6b35b6cb7f..61c26b3ae7 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -39,11 +39,10 @@ def convert_segments(segments, dryrun): replace the 8 first bytes of all regular files in there.""" for filename in segments: print("converting segment %s in place" % filename) - if dryrun: - continue - with open(filename, 'r+b') as segment: - segment.seek(0) - segment.write(MAGIC) + if not dryrun: + with open(filename, 'r+b') as segment: + segment.seek(0) + segment.write(MAGIC) def find_attic_keyfile(self): """find the attic keyfiles From dbd4ac7f8d09265ef468d2531446d42843897d51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 08:41:44 -0400 Subject: [PATCH 21/63] add missing colon --- borg/converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/converter.py b/borg/converter.py index 61c26b3ae7..6be3f823f7 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -48,7 +48,7 @@ def find_attic_keyfile(self): """find the attic keyfiles the keyfiles are loaded by `KeyfileKey.find_key_file()`. that - finds the keys with the right identifier for the repo + finds the keys with the right identifier for the repo. this is expected to look into $HOME/.attic/keys or $ATTIC_KEYS_DIR for key files matching the given Borg From 5b8cb63479b1f189a59979417682aa1d56467df4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 08:43:05 -0400 Subject: [PATCH 22/63] remove duplicate code with the unit test --- borg/converter.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index 6be3f823f7..751791006a 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -89,9 +89,6 @@ def convert_keyfiles(keyfile, dryrun): if not dryrun: with open(keyfile, 'w') as f: f.write(data) - with open(keyfile, 'r') as f: - data = f.read() - assert data.startswith(KeyfileKey.FILE_ID) def convert_cache(self, dryrun): """convert caches from attic to borg From ef0ed409b683c4dbaede8c75a8c68585aecc449f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 08:44:17 -0400 Subject: [PATCH 23/63] fix typo --- borg/converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/converter.py b/borg/converter.py index 751791006a..a9a706ad96 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -111,7 +111,7 @@ def convert_cache(self, dryrun): raise NotImplementedException('cache conversion not implemented, next borg backup will take longer to rebuild those caches') class AtticKeyfileKey(KeyfileKey): - """backwards compatible Attick key file parser""" + """backwards compatible Attic key file parser""" FILE_ID = 'ATTIC KEY' # verbatim copy from attic From d66516351f0885524a1b8c24375ca19c9d330909 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 08:46:30 -0400 Subject: [PATCH 24/63] use builtin NotImplementedError instead of writing our own NotImplemented didn't work with pytest.raise(), i didn't know about NotImplementedError, thanks tw --- borg/archiver.py | 4 ++-- borg/converter.py | 5 +---- borg/testsuite/convert.py | 4 ++-- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 832983520c..5c08880d6c 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -17,7 +17,7 @@ from . import __version__ from .archive import Archive, ArchiveChecker, CHUNKER_PARAMS from .compress import Compressor, COMPR_BUFFER -from .converter import AtticRepositoryConverter, NotImplementedException +from .converter import AtticRepositoryConverter from .repository import Repository from .cache import Cache from .key import key_creator @@ -468,7 +468,7 @@ def do_convert(self, parser, commands, args): repo = AtticRepositoryConverter(os.path.join(args.repository, 'repository'), create=False) try: repo.convert(args.dry_run) - except NotImplementedException as e: + except NotImplementedError as e: print("warning: %s" % e) return self.exit_code diff --git a/borg/converter.py b/borg/converter.py index a9a706ad96..8261d92810 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -5,9 +5,6 @@ from .repository import Repository, MAGIC from .key import KeyfileKey, KeyfileNotFoundError -class NotImplementedException(Exception): - pass - class AtticRepositoryConverter(Repository): def convert(self, dryrun=True): """convert an attic repository to a borg repository @@ -108,7 +105,7 @@ def convert_cache(self, dryrun): `Cache.open()`, edit in place and then `Cache.close()` to make sure we have locking right """ - raise NotImplementedException('cache conversion not implemented, next borg backup will take longer to rebuild those caches') + raise NotImplementedError('cache conversion not implemented, next borg backup will take longer to rebuild those caches') class AtticKeyfileKey(KeyfileKey): """backwards compatible Attic key file parser""" diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 172b308ec9..208f6604e1 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -12,7 +12,7 @@ pytestmark = pytest.mark.skipif(attic is None, reason = 'cannot find an attic install') -from ..converter import AtticRepositoryConverter, NotImplementedException, AtticKeyfileKey +from ..converter import AtticRepositoryConverter, AtticKeyfileKey from ..helpers import get_keys_dir from ..key import KeyfileKey from ..repository import Repository, MAGIC @@ -87,7 +87,7 @@ def test_convert_all(self): # check should fail because of magic number self.check_repo(False) print("opening attic repository with borg and converting") - with pytest.raises(NotImplementedException): + with pytest.raises(NotImplementedError): self.open(self.tmppath, repo_type = AtticRepositoryConverter).convert(dryrun=False) # check that the new keyfile is alright keyfile = os.path.join(get_keys_dir(), From d5198c551b1d650f60e1b520eb672a8f5b5fb7f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 08:47:23 -0400 Subject: [PATCH 25/63] split out depends in imports --- borg/testsuite/convert.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 208f6604e1..3a413072ed 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -1,8 +1,9 @@ import os -import pytest import shutil import tempfile +import pytest + try: import attic.repository import attic.key From 5f6eb87385e0945022582b7a2f160794eedb52b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 08:50:06 -0400 Subject: [PATCH 26/63] much nicer validation checking --- borg/testsuite/convert.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 3a413072ed..155e568c35 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -37,22 +37,21 @@ def setUp(self): def tearDown(self): shutil.rmtree(self.tmppath) - def check_repo(self, state = True): - if not state: - print("this will show an error, this is expected") + def repo_valid(self,): repository = self.open(self.tmppath) - assert repository.check() is state # can't check raises() because check() handles the error + state = repository.check() # can't check raises() because check() handles the error repository.close() + return state def test_convert_segments(self): # check should fail because of magic number - self.check_repo(False) + assert not self.repo_valid() print("opening attic repository with borg and converting") repo = self.open(self.tmppath, repo_type = AtticRepositoryConverter) segments = [ filename for i, filename in repo.io.segment_iterator() ] repo.close() repo.convert_segments(segments, dryrun=False) - self.check_repo() + assert self.repo_valid() class EncryptedConversionTestCase(ConversionTestCase): class MockArgs: @@ -86,7 +85,7 @@ def test_keys(self): def test_convert_all(self): # check should fail because of magic number - self.check_repo(False) + assert not self.repo_valid() print("opening attic repository with borg and converting") with pytest.raises(NotImplementedError): self.open(self.tmppath, repo_type = AtticRepositoryConverter).convert(dryrun=False) @@ -95,4 +94,4 @@ def test_convert_all(self): os.path.basename(self.key.path)) with open(keyfile, 'r') as f: assert f.read().startswith(KeyfileKey.FILE_ID) - self.check_repo() + assert self.repo_valid() From 4a85f2d0f54fa236d792cce2a1a4f96fca13dfd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 08:58:00 -0400 Subject: [PATCH 27/63] fix most pep8 warnings * limit all lines to 80 chars * remove spaces around parameters * missing blank lines --- borg/testsuite/convert.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 155e568c35..1943b5df25 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -11,7 +11,7 @@ except ImportError: attic = None pytestmark = pytest.mark.skipif(attic is None, - reason = 'cannot find an attic install') + reason='cannot find an attic install') from ..converter import AtticRepositoryConverter, AtticKeyfileKey from ..helpers import get_keys_dir @@ -19,17 +19,18 @@ from ..repository import Repository, MAGIC from . import BaseTestCase + class ConversionTestCase(BaseTestCase): - def open(self, path, repo_type = Repository, create=False): - return repo_type(os.path.join(path, 'repository'), create = create) + def open(self, path, repo_type=Repository, create=False): + return repo_type(os.path.join(path, 'repository'), create=create) def setUp(self): self.tmppath = tempfile.mkdtemp() self.attic_repo = self.open(self.tmppath, - repo_type = attic.repository.Repository, - create = True) - # throw some stuff in that repo, copied from `RepositoryTestCase.test1`_ + repo_type=attic.repository.Repository, + create=True) + # throw some stuff in that repo, copied from `RepositoryTestCase.test1` for x in range(100): self.attic_repo.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') self.attic_repo.close() @@ -39,7 +40,8 @@ def tearDown(self): def repo_valid(self,): repository = self.open(self.tmppath) - state = repository.check() # can't check raises() because check() handles the error + # can't check raises() because check() handles the error + state = repository.check() repository.close() return state @@ -47,12 +49,13 @@ def test_convert_segments(self): # check should fail because of magic number assert not self.repo_valid() print("opening attic repository with borg and converting") - repo = self.open(self.tmppath, repo_type = AtticRepositoryConverter) - segments = [ filename for i, filename in repo.io.segment_iterator() ] + repo = self.open(self.tmppath, repo_type=AtticRepositoryConverter) + segments = [filename for i, filename in repo.io.segment_iterator()] repo.close() repo.convert_segments(segments, dryrun=False) assert self.repo_valid() + class EncryptedConversionTestCase(ConversionTestCase): class MockArgs: def __init__(self, path): @@ -70,10 +73,12 @@ def setUp(self): # about anyways. in real runs, the original key will be retained. os.environ['BORG_KEYS_DIR'] = self.tmppath os.environ['ATTIC_PASSPHRASE'] = 'test' - self.key = attic.key.KeyfileKey.create(self.attic_repo, self.MockArgs(self.tmppath)) + self.key = attic.key.KeyfileKey.create(self.attic_repo, + self.MockArgs(self.tmppath)) def test_keys(self): - repository = self.open(self.tmppath, repo_type = AtticRepositoryConverter) + repository = self.open(self.tmppath, + repo_type=AtticRepositoryConverter) keyfile = AtticKeyfileKey.find_key_file(repository) AtticRepositoryConverter.convert_keyfiles(keyfile, dryrun=False) @@ -87,8 +92,9 @@ def test_convert_all(self): # check should fail because of magic number assert not self.repo_valid() print("opening attic repository with borg and converting") + repo = self.open(self.tmppath, repo_type=AtticRepositoryConverter) with pytest.raises(NotImplementedError): - self.open(self.tmppath, repo_type = AtticRepositoryConverter).convert(dryrun=False) + repo.convert(dryrun=False) # check that the new keyfile is alright keyfile = os.path.join(get_keys_dir(), os.path.basename(self.key.path)) From b9c474d1877190ef73e295c46ac8b7ae58a803cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 08:59:01 -0400 Subject: [PATCH 28/63] pep8: put pytest skip marker after imports --- borg/testsuite/convert.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 1943b5df25..08472ef93b 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -10,8 +10,6 @@ import attic.helpers except ImportError: attic = None -pytestmark = pytest.mark.skipif(attic is None, - reason='cannot find an attic install') from ..converter import AtticRepositoryConverter, AtticKeyfileKey from ..helpers import get_keys_dir @@ -19,6 +17,9 @@ from ..repository import Repository, MAGIC from . import BaseTestCase +pytestmark = pytest.mark.skipif(attic is None, + reason='cannot find an attic install') + class ConversionTestCase(BaseTestCase): From 79d9aebaf2e0f1b533f81815b4eefde20ba9938a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 09:00:49 -0400 Subject: [PATCH 29/63] use permanently instead of irrevocably, which is less common --- borg/archiver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/archiver.py b/borg/archiver.py index 5c08880d6c..5c33b5f7b2 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -919,7 +919,7 @@ def run(self, args=None): `borg check --repair` to rebuild those files after the conversion. - the conversion can IRREMEDIABLY DAMAGE YOUR REPOSITORY! Attic + the conversion can PERMANENTLY DAMAGE YOUR REPOSITORY! Attic will also NOT BE ABLE TO READ THE BORG REPOSITORY ANYMORE, as the magic strings will have changed. From 57801a288d43c96e9a93894334a61e6ffc6c89f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 09:03:41 -0400 Subject: [PATCH 30/63] keep tests simple by always adding attic depends note that we do not depend on attic to build borg, just to do those tests. if attic goes away, we could eventually do this another way or just stop testing this altogether. --- tox.ini | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tox.ini b/tox.ini index 8fd697657e..a9ccb5e040 100644 --- a/tox.ini +++ b/tox.ini @@ -2,7 +2,7 @@ # fakeroot -u tox --recreate [tox] -envlist = py{32,33,34,35}{,-attic} +envlist = py{32,33,34,35} [testenv] # Change dir to avoid import problem for cython code. The directory does @@ -10,7 +10,7 @@ envlist = py{32,33,34,35}{,-attic} changedir = {toxworkdir} deps = -rrequirements.d/development.txt - attic: attic + attic commands = py.test --cov=borg --pyargs {posargs:borg.testsuite} # fakeroot -u needs some env vars: passenv = * From 58815bc28a795bf4a77a288c4edbda7b32c004f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 09:23:17 -0400 Subject: [PATCH 31/63] fix commandline dispatch for converter --- borg/archiver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 5c33b5f7b2..02c6ea781c 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -463,9 +463,9 @@ def do_prune(self, args): stats.print_('Deleted data:', cache) return self.exit_code - def do_convert(self, parser, commands, args): + def do_convert(self, args): """convert a repository from attic to borg""" - repo = AtticRepositoryConverter(os.path.join(args.repository, 'repository'), create=False) + repo = AtticRepositoryConverter(args.repository.path, create=False) try: repo.convert(args.dry_run) except NotImplementedError as e: From 98e4e6bc253f067cc5c45f046073d179e2d668d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 09:35:17 -0400 Subject: [PATCH 32/63] lock repository when converting segments --- borg/converter.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/borg/converter.py b/borg/converter.py index 8261d92810..99de15170b 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -2,6 +2,7 @@ import os from .helpers import get_keys_dir +from .locking import UpgradableLock from .repository import Repository, MAGIC from .key import KeyfileKey, KeyfileNotFoundError @@ -22,7 +23,12 @@ def convert(self, dryrun=True): else: self.convert_keyfiles(keyfile, dryrun) self.close() + # partial open: just hold on to the lock + self.lock = UpgradableLock(os.path.join(self.path, 'lock'), + exclusive=True).acquire() self.convert_segments(segments, dryrun) + self.lock.release() + self.lock = None self.convert_cache(dryrun) @staticmethod @@ -34,6 +40,7 @@ def convert_segments(segments, dryrun): luckily the magic string length didn't change so we can just replace the 8 first bytes of all regular files in there.""" + print("converting %d segments..." % len(segments)) for filename in segments: print("converting segment %s in place" % filename) if not dryrun: From f5cb0f4e731bf63b5a7c0795eb612c01b95ac7ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 10:41:31 -0400 Subject: [PATCH 33/63] rewrite convert tests with pytest fixtures --- borg/testsuite/convert.py | 160 ++++++++++++++++++-------------------- 1 file changed, 77 insertions(+), 83 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 08472ef93b..ac7d6cbca0 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -15,90 +15,84 @@ from ..helpers import get_keys_dir from ..key import KeyfileKey from ..repository import Repository, MAGIC -from . import BaseTestCase pytestmark = pytest.mark.skipif(attic is None, reason='cannot find an attic install') - -class ConversionTestCase(BaseTestCase): - - def open(self, path, repo_type=Repository, create=False): - return repo_type(os.path.join(path, 'repository'), create=create) - - def setUp(self): - self.tmppath = tempfile.mkdtemp() - self.attic_repo = self.open(self.tmppath, - repo_type=attic.repository.Repository, - create=True) - # throw some stuff in that repo, copied from `RepositoryTestCase.test1` - for x in range(100): - self.attic_repo.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') - self.attic_repo.close() - - def tearDown(self): - shutil.rmtree(self.tmppath) - - def repo_valid(self,): - repository = self.open(self.tmppath) - # can't check raises() because check() handles the error - state = repository.check() - repository.close() - return state - - def test_convert_segments(self): - # check should fail because of magic number - assert not self.repo_valid() - print("opening attic repository with borg and converting") - repo = self.open(self.tmppath, repo_type=AtticRepositoryConverter) - segments = [filename for i, filename in repo.io.segment_iterator()] - repo.close() - repo.convert_segments(segments, dryrun=False) - assert self.repo_valid() - - -class EncryptedConversionTestCase(ConversionTestCase): - class MockArgs: - def __init__(self, path): - self.repository = attic.helpers.Location(path) - - def setUp(self): - super().setUp() - - # we use the repo dir for the created keyfile, because we do - # not want to clutter existing keyfiles - os.environ['ATTIC_KEYS_DIR'] = self.tmppath - - # we use the same directory for the converted files, which - # will clutter the previously created one, which we don't care - # about anyways. in real runs, the original key will be retained. - os.environ['BORG_KEYS_DIR'] = self.tmppath - os.environ['ATTIC_PASSPHRASE'] = 'test' - self.key = attic.key.KeyfileKey.create(self.attic_repo, - self.MockArgs(self.tmppath)) - - def test_keys(self): - repository = self.open(self.tmppath, - repo_type=AtticRepositoryConverter) - keyfile = AtticKeyfileKey.find_key_file(repository) - AtticRepositoryConverter.convert_keyfiles(keyfile, dryrun=False) - - # check that the new keyfile is alright - keyfile = os.path.join(get_keys_dir(), - os.path.basename(self.key.path)) - with open(keyfile, 'r') as f: - assert f.read().startswith(KeyfileKey.FILE_ID) - - def test_convert_all(self): - # check should fail because of magic number - assert not self.repo_valid() - print("opening attic repository with borg and converting") - repo = self.open(self.tmppath, repo_type=AtticRepositoryConverter) - with pytest.raises(NotImplementedError): - repo.convert(dryrun=False) - # check that the new keyfile is alright - keyfile = os.path.join(get_keys_dir(), - os.path.basename(self.key.path)) - with open(keyfile, 'r') as f: - assert f.read().startswith(KeyfileKey.FILE_ID) - assert self.repo_valid() +def repo_open(path, repo_type=Repository, create=False): + return repo_type(os.path.join(str(path), 'repository'), create=create) + +def repo_valid(path): + repository = repo_open(str(path)) + # can't check raises() because check() handles the error + state = repository.check() + repository.close() + return state + +@pytest.fixture(autouse=True) +def attic_repo(tmpdir): + attic_repo = repo_open(str(tmpdir), + repo_type=attic.repository.Repository, + create=True) + # throw some stuff in that repo, copied from `RepositoryTestCase.test1` + for x in range(100): + attic_repo.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') + attic_repo.close() + return attic_repo + +@pytest.mark.usefixtures("tmpdir") +def test_convert_segments(tmpdir, attic_repo): + # check should fail because of magic number + assert not repo_valid(tmpdir) + print("opening attic repository with borg and converting") + repo = repo_open(tmpdir, repo_type=AtticRepositoryConverter) + segments = [filename for i, filename in repo.io.segment_iterator()] + repo.close() + repo.convert_segments(segments, dryrun=False) + assert repo_valid(tmpdir) + +class MockArgs: + def __init__(self, path): + self.repository = attic.helpers.Location(path) + +@pytest.fixture() +def attic_key_file(attic_repo, tmpdir): + keys_dir = str(tmpdir.mkdir('keys')) + + # we use the repo dir for the created keyfile, because we do + # not want to clutter existing keyfiles + os.environ['ATTIC_KEYS_DIR'] = keys_dir + + # we use the same directory for the converted files, which + # will clutter the previously created one, which we don't care + # about anyways. in real runs, the original key will be retained. + os.environ['BORG_KEYS_DIR'] = keys_dir + os.environ['ATTIC_PASSPHRASE'] = 'test' + return attic.key.KeyfileKey.create(attic_repo, + MockArgs(keys_dir)) + +def test_keys(tmpdir, attic_repo, attic_key_file): + repository = repo_open(tmpdir, + repo_type=AtticRepositoryConverter) + keyfile = AtticKeyfileKey.find_key_file(repository) + AtticRepositoryConverter.convert_keyfiles(keyfile, dryrun=False) + + # check that the new keyfile is alright + keyfile = os.path.join(get_keys_dir(), + os.path.basename(attic_key_file.path)) + with open(keyfile, 'r') as f: + assert f.read().startswith(KeyfileKey.FILE_ID) + +def test_convert_all(tmpdir, attic_repo, attic_key_file): + # check should fail because of magic number + assert not repo_valid(tmpdir) + print("opening attic repository with borg and converting") + repo = repo_open(tmpdir, repo_type=AtticRepositoryConverter) + with pytest.raises(NotImplementedError): + repo.convert(dryrun=False) + # check that the new keyfile is alright + keyfile = os.path.join(get_keys_dir(), + os.path.basename(attic_key_file.path)) + with open(keyfile, 'r') as f: + assert f.read().startswith(KeyfileKey.FILE_ID) + assert repo_valid(tmpdir) From a08bcb21aee3594287551ec4b1e8e8c119c8f65d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 11:10:00 -0400 Subject: [PATCH 34/63] refactor common code we get rid of repo_open() which doesn't same much typing, and add a validator for keys --- borg/testsuite/convert.py | 35 +++++++++++++---------------------- 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index ac7d6cbca0..e3f9be5d14 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -19,21 +19,22 @@ pytestmark = pytest.mark.skipif(attic is None, reason='cannot find an attic install') -def repo_open(path, repo_type=Repository, create=False): - return repo_type(os.path.join(str(path), 'repository'), create=create) - def repo_valid(path): - repository = repo_open(str(path)) + repository = Repository(str(path), create=False) # can't check raises() because check() handles the error state = repository.check() repository.close() return state +def key_valid(path): + keyfile = os.path.join(get_keys_dir(), + os.path.basename(path)) + with open(keyfile, 'r') as f: + return f.read().startswith(KeyfileKey.FILE_ID) + @pytest.fixture(autouse=True) def attic_repo(tmpdir): - attic_repo = repo_open(str(tmpdir), - repo_type=attic.repository.Repository, - create=True) + attic_repo = attic.repository.Repository(str(tmpdir), create=True) # throw some stuff in that repo, copied from `RepositoryTestCase.test1` for x in range(100): attic_repo.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') @@ -45,7 +46,7 @@ def test_convert_segments(tmpdir, attic_repo): # check should fail because of magic number assert not repo_valid(tmpdir) print("opening attic repository with borg and converting") - repo = repo_open(tmpdir, repo_type=AtticRepositoryConverter) + repo = AtticRepositoryConverter(str(tmpdir), create=False) segments = [filename for i, filename in repo.io.segment_iterator()] repo.close() repo.convert_segments(segments, dryrun=False) @@ -72,27 +73,17 @@ def attic_key_file(attic_repo, tmpdir): MockArgs(keys_dir)) def test_keys(tmpdir, attic_repo, attic_key_file): - repository = repo_open(tmpdir, - repo_type=AtticRepositoryConverter) + repository = AtticRepositoryConverter(str(tmpdir), create=False) keyfile = AtticKeyfileKey.find_key_file(repository) AtticRepositoryConverter.convert_keyfiles(keyfile, dryrun=False) - - # check that the new keyfile is alright - keyfile = os.path.join(get_keys_dir(), - os.path.basename(attic_key_file.path)) - with open(keyfile, 'r') as f: - assert f.read().startswith(KeyfileKey.FILE_ID) + assert key_valid(attic_key_file.path) def test_convert_all(tmpdir, attic_repo, attic_key_file): # check should fail because of magic number assert not repo_valid(tmpdir) print("opening attic repository with borg and converting") - repo = repo_open(tmpdir, repo_type=AtticRepositoryConverter) + repo = AtticRepositoryConverter(str(tmpdir), create=False) with pytest.raises(NotImplementedError): repo.convert(dryrun=False) - # check that the new keyfile is alright - keyfile = os.path.join(get_keys_dir(), - os.path.basename(attic_key_file.path)) - with open(keyfile, 'r') as f: - assert f.read().startswith(KeyfileKey.FILE_ID) + assert key_valid(attic_key_file.path) assert repo_valid(tmpdir) From 7f6fd1f30686ffcb68bbfa87ba75978d691b8b0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 11:11:30 -0400 Subject: [PATCH 35/63] add docs for all converter test code --- borg/testsuite/convert.py | 66 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index e3f9be5d14..cc85dfca38 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -20,6 +20,12 @@ reason='cannot find an attic install') def repo_valid(path): + """ + utility function to check if borg can open a repository + + :param path: the path to the repository + :returns: if borg can check the repository + """ repository = Repository(str(path), create=False) # can't check raises() because check() handles the error state = repository.check() @@ -27,6 +33,12 @@ def repo_valid(path): return state def key_valid(path): + """ + check that the new keyfile is alright + + :param path: the path to the key file + :returns: if the file starts with the borg magic string + """ keyfile = os.path.join(get_keys_dir(), os.path.basename(path)) with open(keyfile, 'r') as f: @@ -34,6 +46,12 @@ def key_valid(path): @pytest.fixture(autouse=True) def attic_repo(tmpdir): + """ + create an attic repo with some stuff in it + + :param tmpdir: path to the repository to be created + :returns: a attic.repository.Repository object + """ attic_repo = attic.repository.Repository(str(tmpdir), create=True) # throw some stuff in that repo, copied from `RepositoryTestCase.test1` for x in range(100): @@ -43,6 +61,16 @@ def attic_repo(tmpdir): @pytest.mark.usefixtures("tmpdir") def test_convert_segments(tmpdir, attic_repo): + """test segment conversion + + this will load the given attic repository, list all the segments + then convert them one at a time. we need to close the repo before + conversion otherwise we have errors from borg + + :param tmpdir: a temporary directory to run the test in (builtin + fixture) + :param attic_repo: a populated attic repository (fixture) + """ # check should fail because of magic number assert not repo_valid(tmpdir) print("opening attic repository with borg and converting") @@ -53,11 +81,27 @@ def test_convert_segments(tmpdir, attic_repo): assert repo_valid(tmpdir) class MockArgs: + """ + mock attic location + + this is used to simulate a key location with a properly loaded + repository object to create a key file + """ def __init__(self, path): self.repository = attic.helpers.Location(path) @pytest.fixture() def attic_key_file(attic_repo, tmpdir): + """ + create an attic key file from the given repo, in the keys + subdirectory of the given tmpdir + + :param attic_repo: an attic.repository.Repository object (fixture + define above) + :param tmpdir: a temporary directory (a builtin fixture) + :returns: the KeyfileKey object as returned by + attic.key.KeyfileKey.create() + """ keys_dir = str(tmpdir.mkdir('keys')) # we use the repo dir for the created keyfile, because we do @@ -73,12 +117,34 @@ def attic_key_file(attic_repo, tmpdir): MockArgs(keys_dir)) def test_keys(tmpdir, attic_repo, attic_key_file): + """test key conversion + + test that we can convert the given key to a properly formatted + borg key. assumes that the ATTIC_KEYS_DIR and BORG_KEYS_DIR have + been properly populated by the attic_key_file fixture. + + :param tmpdir: a temporary directory (a builtin fixture) + :param attic_repo: an attic.repository.Repository object (fixture + define above) + :param attic_key_file: an attic.key.KeyfileKey (fixture created above) + """ repository = AtticRepositoryConverter(str(tmpdir), create=False) keyfile = AtticKeyfileKey.find_key_file(repository) AtticRepositoryConverter.convert_keyfiles(keyfile, dryrun=False) assert key_valid(attic_key_file.path) def test_convert_all(tmpdir, attic_repo, attic_key_file): + """test all conversion steps + + this runs everything. mostly redundant test, since everything is + done above. yet we expect a NotImplementedError because we do not + convert caches yet. + + :param tmpdir: a temporary directory (a builtin fixture) + :param attic_repo: an attic.repository.Repository object (fixture + define above) + :param attic_key_file: an attic.key.KeyfileKey (fixture created above) + """ # check should fail because of magic number assert not repo_valid(tmpdir) print("opening attic repository with borg and converting") From 6c318a0f273e522851100f7094a961396f4743e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 11:12:23 -0400 Subject: [PATCH 36/63] re-pep8 --- borg/testsuite/convert.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index cc85dfca38..5596f4e65a 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -19,6 +19,7 @@ pytestmark = pytest.mark.skipif(attic is None, reason='cannot find an attic install') + def repo_valid(path): """ utility function to check if borg can open a repository @@ -32,6 +33,7 @@ def repo_valid(path): repository.close() return state + def key_valid(path): """ check that the new keyfile is alright @@ -44,6 +46,7 @@ def key_valid(path): with open(keyfile, 'r') as f: return f.read().startswith(KeyfileKey.FILE_ID) + @pytest.fixture(autouse=True) def attic_repo(tmpdir): """ @@ -59,6 +62,7 @@ def attic_repo(tmpdir): attic_repo.close() return attic_repo + @pytest.mark.usefixtures("tmpdir") def test_convert_segments(tmpdir, attic_repo): """test segment conversion @@ -80,6 +84,7 @@ def test_convert_segments(tmpdir, attic_repo): repo.convert_segments(segments, dryrun=False) assert repo_valid(tmpdir) + class MockArgs: """ mock attic location @@ -90,6 +95,7 @@ class MockArgs: def __init__(self, path): self.repository = attic.helpers.Location(path) + @pytest.fixture() def attic_key_file(attic_repo, tmpdir): """ @@ -116,6 +122,7 @@ def attic_key_file(attic_repo, tmpdir): return attic.key.KeyfileKey.create(attic_repo, MockArgs(keys_dir)) + def test_keys(tmpdir, attic_repo, attic_key_file): """test key conversion @@ -133,6 +140,7 @@ def test_keys(tmpdir, attic_repo, attic_key_file): AtticRepositoryConverter.convert_keyfiles(keyfile, dryrun=False) assert key_valid(attic_key_file.path) + def test_convert_all(tmpdir, attic_repo, attic_key_file): """test all conversion steps From 946aca97a1ce48e94e5c00be146e58b3f2f5a28a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 11:25:02 -0400 Subject: [PATCH 37/63] avoid flooding the console instead we add progress information --- borg/converter.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index 99de15170b..b662c1a32f 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -1,5 +1,6 @@ from binascii import hexlify import os +import time from .helpers import get_keys_dir from .locking import UpgradableLock @@ -41,12 +42,17 @@ def convert_segments(segments, dryrun): luckily the magic string length didn't change so we can just replace the 8 first bytes of all regular files in there.""" print("converting %d segments..." % len(segments)) + i = 0 for filename in segments: - print("converting segment %s in place" % filename) - if not dryrun: + print("\rconverting segment %s in place (%d/%d)" % (filename, i, len(segments)), end='') + i += 1 + if dryrun: + time.sleep(0.001) + else: with open(filename, 'r+b') as segment: segment.seek(0) segment.write(MAGIC) + print() def find_attic_keyfile(self): """find the attic keyfiles From 0d457bc8466e9fbbdb7f069f8707ea93333ce4d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 11:25:12 -0400 Subject: [PATCH 38/63] clarify what to do about the cache warning --- borg/converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/converter.py b/borg/converter.py index b662c1a32f..04dd911a71 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -118,7 +118,7 @@ def convert_cache(self, dryrun): `Cache.open()`, edit in place and then `Cache.close()` to make sure we have locking right """ - raise NotImplementedError('cache conversion not implemented, next borg backup will take longer to rebuild those caches') + raise NotImplementedError('cache conversion not implemented, next borg backup will take longer to rebuild those caches. use borg check --repair to rebuild now') class AtticKeyfileKey(KeyfileKey): """backwards compatible Attic key file parser""" From 3bb3bd45fc1074a840b5c60dff391c92d6981074 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 12:36:53 -0400 Subject: [PATCH 39/63] add percentage progress --- borg/converter.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/borg/converter.py b/borg/converter.py index 04dd911a71..f32187fe85 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -44,7 +44,8 @@ def convert_segments(segments, dryrun): print("converting %d segments..." % len(segments)) i = 0 for filename in segments: - print("\rconverting segment %s in place (%d/%d)" % (filename, i, len(segments)), end='') + print("\rconverting segment %d/%d in place, %.2f%% done (%s)" + % (i, len(segments), float(i)/len(segments), filename), end='') i += 1 if dryrun: time.sleep(0.001) From 6a72252b69e0ef07b9e0c54b669e0a762f4f233d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 14:22:29 -0400 Subject: [PATCH 40/63] release lock properly if segment conversion crashes --- borg/converter.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index f32187fe85..7e8e2f75ef 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -27,9 +27,11 @@ def convert(self, dryrun=True): # partial open: just hold on to the lock self.lock = UpgradableLock(os.path.join(self.path, 'lock'), exclusive=True).acquire() - self.convert_segments(segments, dryrun) - self.lock.release() - self.lock = None + try: + self.convert_segments(segments, dryrun) + finally: + self.lock.release() + self.lock = None self.convert_cache(dryrun) @staticmethod From 180dfcb18f87555d2a1c555c9af28ed7061e3afb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 14:23:43 -0400 Subject: [PATCH 41/63] remove needless indentation --- borg/converter.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index 7e8e2f75ef..a65f887e00 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -93,11 +93,8 @@ def convert_keyfiles(keyfile, dryrun): print("converting keyfile %s" % keyfile) with open(keyfile, 'r') as f: data = f.read() - data = data.replace(AtticKeyfileKey.FILE_ID, - KeyfileKey.FILE_ID, - 1) - keyfile = os.path.join(get_keys_dir(), - os.path.basename(keyfile)) + data = data.replace(AtticKeyfileKey.FILE_ID, KeyfileKey.FILE_ID, 1) + keyfile = os.path.join(get_keys_dir(), os.path.basename(keyfile)) print("writing borg keyfile to %s" % keyfile) if not dryrun: with open(keyfile, 'w') as f: From 35b219597f1a1a9ce85a7f676d0513959699a1dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 14:28:49 -0400 Subject: [PATCH 42/63] only write magic num if necessary this could allow speeding up conversions resumed after interruption --- borg/converter.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/borg/converter.py b/borg/converter.py index a65f887e00..89f912a7de 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -7,6 +7,8 @@ from .repository import Repository, MAGIC from .key import KeyfileKey, KeyfileNotFoundError +ATTIC_MAGIC = b'ATTICSEG' + class AtticRepositoryConverter(Repository): def convert(self, dryrun=True): """convert an attic repository to a borg repository @@ -54,7 +56,10 @@ def convert_segments(segments, dryrun): else: with open(filename, 'r+b') as segment: segment.seek(0) - segment.write(MAGIC) + # only write if necessary + if (segment.read(len(ATTIC_MAGIC)) == ATTIC_MAGIC): + segment.seek(0) + segment.write(MAGIC) print() def find_attic_keyfile(self): From a7902e56575b3bcddea7057dd2c4a06d8c63cf2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 14:29:09 -0400 Subject: [PATCH 43/63] cosmetic: show 100% when done, not n-1/n% --- borg/converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/converter.py b/borg/converter.py index 89f912a7de..5734115844 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -48,9 +48,9 @@ def convert_segments(segments, dryrun): print("converting %d segments..." % len(segments)) i = 0 for filename in segments: + i += 1 print("\rconverting segment %d/%d in place, %.2f%% done (%s)" % (i, len(segments), float(i)/len(segments), filename), end='') - i += 1 if dryrun: time.sleep(0.001) else: From 7c32f555ac45fb02e0c821d697e43976c005cdd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 15:43:16 -0400 Subject: [PATCH 44/63] repository index conversion --- borg/converter.py | 32 ++++++++++++++++++++++++-------- borg/testsuite/convert.py | 5 +++-- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index 5734115844..8999799000 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -18,6 +18,8 @@ def convert(self, dryrun=True): caches, the latter being optional, as they will be rebuilt if missing.""" print("reading segments from attic repository using borg") + # we need to open it to load the configuration and other fields + self.open(self.path, exclusive=False) segments = [ filename for i, filename in self.io.segment_iterator() ] try: keyfile = self.find_attic_keyfile() @@ -31,10 +33,10 @@ def convert(self, dryrun=True): exclusive=True).acquire() try: self.convert_segments(segments, dryrun) + self.convert_cache(dryrun) finally: self.lock.release() self.lock = None - self.convert_cache(dryrun) @staticmethod def convert_segments(segments, dryrun): @@ -54,14 +56,19 @@ def convert_segments(segments, dryrun): if dryrun: time.sleep(0.001) else: - with open(filename, 'r+b') as segment: - segment.seek(0) - # only write if necessary - if (segment.read(len(ATTIC_MAGIC)) == ATTIC_MAGIC): - segment.seek(0) - segment.write(MAGIC) + AtticRepositoryConverter.header_replace(filename, ATTIC_MAGIC, MAGIC) print() + @staticmethod + def header_replace(filename, old_magic, new_magic): + print("changing header on %s" % filename) + with open(filename, 'r+b') as segment: + segment.seek(0) + # only write if necessary + if (segment.read(len(old_magic)) == old_magic): + segment.seek(0) + segment.write(new_magic) + def find_attic_keyfile(self): """find the attic keyfiles @@ -123,7 +130,16 @@ def convert_cache(self, dryrun): `Cache.open()`, edit in place and then `Cache.close()` to make sure we have locking right """ - raise NotImplementedError('cache conversion not implemented, next borg backup will take longer to rebuild those caches. use borg check --repair to rebuild now') + caches = [] + transaction_id = self.get_index_transaction_id() + if transaction_id is None: + print('no index file found for repository %s' % self.path) + else: + caches += [os.path.join(self.path, 'index.%d' % transaction_id).encode('utf-8')] + for cache in caches: + print("converting cache %s" % cache) + AtticRepositoryConverter.header_replace(cache, b'ATTICIDX', b'BORG_IDX') + class AtticKeyfileKey(KeyfileKey): """backwards compatible Attic key file parser""" diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index 5596f4e65a..b57e77097c 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -59,6 +59,7 @@ def attic_repo(tmpdir): # throw some stuff in that repo, copied from `RepositoryTestCase.test1` for x in range(100): attic_repo.put(('%-32d' % x).encode('ascii'), b'SOMEDATA') + attic_repo.commit() attic_repo.close() return attic_repo @@ -82,6 +83,7 @@ def test_convert_segments(tmpdir, attic_repo): segments = [filename for i, filename in repo.io.segment_iterator()] repo.close() repo.convert_segments(segments, dryrun=False) + repo.convert_cache(dryrun=False) assert repo_valid(tmpdir) @@ -157,7 +159,6 @@ def test_convert_all(tmpdir, attic_repo, attic_key_file): assert not repo_valid(tmpdir) print("opening attic repository with borg and converting") repo = AtticRepositoryConverter(str(tmpdir), create=False) - with pytest.raises(NotImplementedError): - repo.convert(dryrun=False) + repo.convert(dryrun=False) assert key_valid(attic_key_file.path) assert repo_valid(tmpdir) From 022de5be47174b6017152b60577ab54c9b309a76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 16:01:01 -0400 Subject: [PATCH 45/63] untested file/chunks cache conversion i couldn't figure out how to generate a cache set directly, Archiver is a pain... --- borg/converter.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index 8999799000..8436f94868 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -1,8 +1,9 @@ from binascii import hexlify import os +import shutil import time -from .helpers import get_keys_dir +from .helpers import get_keys_dir, get_cache_dir from .locking import UpgradableLock from .repository import Repository, MAGIC from .key import KeyfileKey, KeyfileNotFoundError @@ -124,7 +125,7 @@ def convert_cache(self, dryrun): `Repository.open()`, which i'm not sure we should use because it may write data on `Repository.close()`... - * the `files` and `chunks` cache (in + * the `files` and `chunks` cache (in `$ATTIC_CACHE_DIR` or `$HOME/.cache/attic//`), which we could just drop, but if we'd want to convert, we could open it with the `Cache.open()`, edit in place and then `Cache.close()` to @@ -136,6 +137,20 @@ def convert_cache(self, dryrun): print('no index file found for repository %s' % self.path) else: caches += [os.path.join(self.path, 'index.%d' % transaction_id).encode('utf-8')] + + # copy of attic's get_cache_dir() + attic_cache_dir = os.environ.get('ATTIC_CACHE_DIR', + os.path.join(os.path.expanduser('~'), '.cache', 'attic')) + + # XXX: untested, because generating cache files is a PITA, see + # Archiver.do_create() for proof + for cache in [ 'files', 'chunks' ]: + attic_cache = os.path.join(attic_cache_dir, hexlify(self.id).decode('ascii'), cache) + if os.path.exists(attic_cache): + borg_cache = os.path.join(get_cache_dir(), hexlify(self.id).decode('ascii'), cache) + shutil.copy(attic_cache, borg_cache) + caches += [borg_cache] + for cache in caches: print("converting cache %s" % cache) AtticRepositoryConverter.header_replace(cache, b'ATTICIDX', b'BORG_IDX') From 4f9a411ad843469133426c4eea5c4815198a8777 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 16:01:17 -0400 Subject: [PATCH 46/63] remove unneeded fixture decorator --- borg/testsuite/convert.py | 1 - 1 file changed, 1 deletion(-) diff --git a/borg/testsuite/convert.py b/borg/testsuite/convert.py index b57e77097c..ceb3efb115 100644 --- a/borg/testsuite/convert.py +++ b/borg/testsuite/convert.py @@ -64,7 +64,6 @@ def attic_repo(tmpdir): return attic_repo -@pytest.mark.usefixtures("tmpdir") def test_convert_segments(tmpdir, attic_repo): """test segment conversion From 28a033d1d35555a2b46b4a50edb010544cf5e749 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 16:03:52 -0400 Subject: [PATCH 47/63] remove debug output that clobbers segment spinner --- borg/converter.py | 1 - 1 file changed, 1 deletion(-) diff --git a/borg/converter.py b/borg/converter.py index 8436f94868..27d174b471 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -62,7 +62,6 @@ def convert_segments(segments, dryrun): @staticmethod def header_replace(filename, old_magic, new_magic): - print("changing header on %s" % filename) with open(filename, 'r+b') as segment: segment.seek(0) # only write if necessary From 55f79b4999429c10cb99d154bb667fc781986629 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 16:24:28 -0400 Subject: [PATCH 48/63] complete cache conversion code we need to create the borg cache directory dry run was ignored, fixed. process cache before segment, because we want to do the faster stuff first --- borg/converter.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index 27d174b471..ced2b409d0 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -17,7 +17,11 @@ def convert(self, dryrun=True): those are the files that need to be converted here, from most important to least important: segments, key files, and various caches, the latter being optional, as they will be rebuilt if - missing.""" + missing. + + we nevertheless do the order in reverse, as we prefer to do + the fast stuff first, to improve interactivity. + """ print("reading segments from attic repository using borg") # we need to open it to load the configuration and other fields self.open(self.path, exclusive=False) @@ -33,8 +37,8 @@ def convert(self, dryrun=True): self.lock = UpgradableLock(os.path.join(self.path, 'lock'), exclusive=True).acquire() try: - self.convert_segments(segments, dryrun) self.convert_cache(dryrun) + self.convert_segments(segments, dryrun) finally: self.lock.release() self.lock = None @@ -146,13 +150,21 @@ def convert_cache(self, dryrun): for cache in [ 'files', 'chunks' ]: attic_cache = os.path.join(attic_cache_dir, hexlify(self.id).decode('ascii'), cache) if os.path.exists(attic_cache): - borg_cache = os.path.join(get_cache_dir(), hexlify(self.id).decode('ascii'), cache) - shutil.copy(attic_cache, borg_cache) + borg_cache_dir = os.path.join(get_cache_dir(), hexlify(self.id).decode('ascii')) + if not os.path.exists(borg_cache_dir): + os.makedirs(borg_cache_dir) + borg_cache = os.path.join(borg_cache_dir, cache) + print("copying attic cache from %s to %s" % (attic_cache, borg_cache)) + if not dryrun: + shutil.copy(attic_cache, borg_cache) caches += [borg_cache] + else: + print("no %s cache found in %s" % (cache, attic_cache)) for cache in caches: print("converting cache %s" % cache) - AtticRepositoryConverter.header_replace(cache, b'ATTICIDX', b'BORG_IDX') + if not dryrun: + AtticRepositoryConverter.header_replace(cache, b'ATTICIDX', b'BORG_IDX') class AtticKeyfileKey(KeyfileKey): From 8022e563a9316883636cbfe3243d0a24277a111f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 16:27:19 -0400 Subject: [PATCH 49/63] don't clobber existing borg cache --- borg/converter.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index ced2b409d0..4566477325 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -154,10 +154,13 @@ def convert_cache(self, dryrun): if not os.path.exists(borg_cache_dir): os.makedirs(borg_cache_dir) borg_cache = os.path.join(borg_cache_dir, cache) - print("copying attic cache from %s to %s" % (attic_cache, borg_cache)) - if not dryrun: - shutil.copy(attic_cache, borg_cache) - caches += [borg_cache] + if os.path.exists(borg_cache): + print("borg cache already exists in %s, skipping conversion of %s" % (borg_cache, attic_cache)) + else: + print("copying attic cache from %s to %s" % (attic_cache, borg_cache)) + if not dryrun: + shutil.copyfile(attic_cache, borg_cache) + caches += [borg_cache] else: print("no %s cache found in %s" % (cache, attic_cache)) From 3e7fa0d63339d49b04792bac3f050c42c3e1cba0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Thu, 1 Oct 2015 16:41:17 -0400 Subject: [PATCH 50/63] also copy the cache config file to workaround #234 --- borg/converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/converter.py b/borg/converter.py index 4566477325..2657a27a36 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -147,7 +147,7 @@ def convert_cache(self, dryrun): # XXX: untested, because generating cache files is a PITA, see # Archiver.do_create() for proof - for cache in [ 'files', 'chunks' ]: + for cache in [ 'files', 'chunks', 'config' ]: attic_cache = os.path.join(attic_cache_dir, hexlify(self.id).decode('ascii'), cache) if os.path.exists(attic_cache): borg_cache_dir = os.path.join(get_cache_dir(), hexlify(self.id).decode('ascii')) From 081b91bea016b43f569e66a681366514af5b0f8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Fri, 2 Oct 2015 09:43:10 -0400 Subject: [PATCH 51/63] remove needless paren --- borg/converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/converter.py b/borg/converter.py index 2657a27a36..db4fca8f74 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -69,7 +69,7 @@ def header_replace(filename, old_magic, new_magic): with open(filename, 'r+b') as segment: segment.seek(0) # only write if necessary - if (segment.read(len(old_magic)) == old_magic): + if segment.read(len(old_magic)) == old_magic: segment.seek(0) segment.write(new_magic) From 41e9942efea82394585bd3ddae4bf995dc31c8a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Fri, 2 Oct 2015 09:43:51 -0400 Subject: [PATCH 52/63] follow naming of tested module --- borg/testsuite/{convert.py => converter.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename borg/testsuite/{convert.py => converter.py} (100%) diff --git a/borg/testsuite/convert.py b/borg/testsuite/converter.py similarity index 100% rename from borg/testsuite/convert.py rename to borg/testsuite/converter.py From d4d1b414b5c7b53ba37f32d7cb5ed8a15ffd6b68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Fri, 2 Oct 2015 09:44:53 -0400 Subject: [PATCH 53/63] remove needless autouse --- borg/testsuite/converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/testsuite/converter.py b/borg/testsuite/converter.py index ceb3efb115..b7e3748e9b 100644 --- a/borg/testsuite/converter.py +++ b/borg/testsuite/converter.py @@ -47,7 +47,7 @@ def key_valid(path): return f.read().startswith(KeyfileKey.FILE_ID) -@pytest.fixture(autouse=True) +@pytest.fixture() def attic_repo(tmpdir): """ create an attic repo with some stuff in it From 69040588cdf7a7ed7630302378209ad71bf1d47a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Fri, 2 Oct 2015 10:10:43 -0400 Subject: [PATCH 54/63] update docs to reflect that cache is converted --- borg/archiver.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 02c6ea781c..041f442606 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -914,10 +914,9 @@ def run(self, args=None): $ATTIC_KEYS_DIR or ~/.attic/keys/ will also be converted and copied to $BORG_KEYS_DIR or ~/.borg/keys. - the cache files are *not* currently converted, which will - result in a much longer backup the first time. you must run - `borg check --repair` to rebuild those files after the - conversion. + the cache files are converted, but the cache layout between Borg + and Attic changed, so it is possible the first backup after the + conversion takes longer than expected due to the cache resync. the conversion can PERMANENTLY DAMAGE YOUR REPOSITORY! Attic will also NOT BE ABLE TO READ THE BORG REPOSITORY ANYMORE, as From ad85f64842a95f37445faad2d4bd6d5323100323 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Fri, 2 Oct 2015 10:10:50 -0400 Subject: [PATCH 55/63] whitespace --- borg/archiver.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 041f442606..357bdad86c 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -925,9 +925,9 @@ def run(self, args=None): it is recommended you run this on a copy of the Attic repository, in case something goes wrong, for example: - cp -a attic borg - borg convert -n borg - borg convert borg + cp -a attic borg + borg convert -n borg + borg convert borg you have been warned.""") subparser = subparsers.add_parser('convert', parents=[common_parser], From ea5d00436c723d09769cccc618eed4f69585d73a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Fri, 2 Oct 2015 10:12:13 -0400 Subject: [PATCH 56/63] also document the cache locations --- borg/archiver.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 357bdad86c..2f53252571 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -914,9 +914,11 @@ def run(self, args=None): $ATTIC_KEYS_DIR or ~/.attic/keys/ will also be converted and copied to $BORG_KEYS_DIR or ~/.borg/keys. - the cache files are converted, but the cache layout between Borg - and Attic changed, so it is possible the first backup after the - conversion takes longer than expected due to the cache resync. + the cache files are converted, from $ATTIC_CACHE_DIR or + ~/.cache/attic to $BORG_CACHE_DIR or ~/.cache/borg, but the + cache layout between Borg and Attic changed, so it is possible + the first backup after the conversion takes longer than expected + due to the cache resync. the conversion can PERMANENTLY DAMAGE YOUR REPOSITORY! Attic will also NOT BE ABLE TO READ THE BORG REPOSITORY ANYMORE, as From 2c66e7c23373cd2ee04ae7199d391d39a5a51a8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Sat, 3 Oct 2015 10:49:29 -0400 Subject: [PATCH 57/63] make percentage a real percentage --- borg/converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/borg/converter.py b/borg/converter.py index db4fca8f74..7f4127cd87 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -57,7 +57,7 @@ def convert_segments(segments, dryrun): for filename in segments: i += 1 print("\rconverting segment %d/%d in place, %.2f%% done (%s)" - % (i, len(segments), float(i)/len(segments), filename), end='') + % (i, len(segments), 100*float(i)/len(segments), filename), end='') if dryrun: time.sleep(0.001) else: From 3773681f00c030b0deff0c503d3d94577b9f32a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Sat, 3 Oct 2015 11:07:37 -0400 Subject: [PATCH 58/63] rewire cache copy mechanisms we separate the conversion and the copy in order to be able to copy arbitrary files from attic without converting them. this allows us to copy the config file cleanly without attempting to rewrite its magic number --- borg/converter.py | 54 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index 7f4127cd87..39fe3788a3 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -144,25 +144,49 @@ def convert_cache(self, dryrun): # copy of attic's get_cache_dir() attic_cache_dir = os.environ.get('ATTIC_CACHE_DIR', os.path.join(os.path.expanduser('~'), '.cache', 'attic')) + attic_cache_dir = os.path.join(attic_cache_dir, hexlify(self.id).decode('ascii')) + borg_cache_dir = os.path.join(get_cache_dir(), hexlify(self.id).decode('ascii')) - # XXX: untested, because generating cache files is a PITA, see - # Archiver.do_create() for proof - for cache in [ 'files', 'chunks', 'config' ]: - attic_cache = os.path.join(attic_cache_dir, hexlify(self.id).decode('ascii'), cache) - if os.path.exists(attic_cache): - borg_cache_dir = os.path.join(get_cache_dir(), hexlify(self.id).decode('ascii')) - if not os.path.exists(borg_cache_dir): - os.makedirs(borg_cache_dir) - borg_cache = os.path.join(borg_cache_dir, cache) - if os.path.exists(borg_cache): - print("borg cache already exists in %s, skipping conversion of %s" % (borg_cache, attic_cache)) + def copy_cache_file(file): + """copy the given attic cache file into the borg directory + + does nothing if dryrun is True. also expects + attic_cache_dir and borg_cache_dir to be set in the parent + scope, to the directories path including the repository + identifier. + + :params file: the basename of the cache file to copy + (example: "files" or "chunks") as a string + + :returns: the borg file that was created or None if non + was created. + + """ + attic_file = os.path.join(attic_cache_dir, file) + if os.path.exists(attic_file): + borg_file = os.path.join(borg_cache_dir, file) + if os.path.exists(borg_file): + print("borg cache file already exists in %s, skipping conversion of %s" % (borg_file, attic_file)) else: - print("copying attic cache from %s to %s" % (attic_cache, borg_cache)) + print("copying attic cache file from %s to %s" % (attic_file, borg_file)) if not dryrun: - shutil.copyfile(attic_cache, borg_cache) - caches += [borg_cache] + shutil.copyfile(attic_file, borg_file) + return borg_file else: - print("no %s cache found in %s" % (cache, attic_cache)) + print("no %s cache file found in %s" % (file, attic_file)) + return None + + if os.path.exists(attic_cache_dir): + if not os.path.exists(borg_cache_dir): + os.makedirs(borg_cache_dir) + copy_cache_file('config') + + # XXX: untested, because generating cache files is a PITA, see + # Archiver.do_create() for proof + for cache in [ 'files', 'chunks' ]: + copied = copy_cache_file(cache) + if copied: + caches += [copied] for cache in caches: print("converting cache %s" % cache) From 690541264e8beb6f5789c11c1a426ce65a263344 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Sat, 3 Oct 2015 11:49:01 -0400 Subject: [PATCH 59/63] style fixes (pep8, append, file builtin) --- borg/converter.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index 39fe3788a3..14aedb9bbe 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -10,6 +10,7 @@ ATTIC_MAGIC = b'ATTICSEG' + class AtticRepositoryConverter(Repository): def convert(self, dryrun=True): """convert an attic repository to a borg repository @@ -25,7 +26,7 @@ def convert(self, dryrun=True): print("reading segments from attic repository using borg") # we need to open it to load the configuration and other fields self.open(self.path, exclusive=False) - segments = [ filename for i, filename in self.io.segment_iterator() ] + segments = [filename for i, filename in self.io.segment_iterator()] try: keyfile = self.find_attic_keyfile() except KeyfileNotFoundError: @@ -121,7 +122,7 @@ def convert_cache(self, dryrun): those are all hash indexes, so we need to `s/ATTICIDX/BORG_IDX/` in a few locations: - + * the repository index (in `$ATTIC_REPO/index.%d`, where `%d` is the `Repository.get_index_transaction_id()`), which we should probably update, with a lock, see @@ -143,28 +144,29 @@ def convert_cache(self, dryrun): # copy of attic's get_cache_dir() attic_cache_dir = os.environ.get('ATTIC_CACHE_DIR', - os.path.join(os.path.expanduser('~'), '.cache', 'attic')) + os.path.join(os.path.expanduser('~'), + '.cache', 'attic')) attic_cache_dir = os.path.join(attic_cache_dir, hexlify(self.id).decode('ascii')) borg_cache_dir = os.path.join(get_cache_dir(), hexlify(self.id).decode('ascii')) - def copy_cache_file(file): - """copy the given attic cache file into the borg directory + def copy_cache_file(path): + """copy the given attic cache path into the borg directory does nothing if dryrun is True. also expects attic_cache_dir and borg_cache_dir to be set in the parent scope, to the directories path including the repository identifier. - :params file: the basename of the cache file to copy + :params path: the basename of the cache file to copy (example: "files" or "chunks") as a string :returns: the borg file that was created or None if non was created. """ - attic_file = os.path.join(attic_cache_dir, file) + attic_file = os.path.join(attic_cache_dir, path) if os.path.exists(attic_file): - borg_file = os.path.join(borg_cache_dir, file) + borg_file = os.path.join(borg_cache_dir, path) if os.path.exists(borg_file): print("borg cache file already exists in %s, skipping conversion of %s" % (borg_file, attic_file)) else: @@ -173,7 +175,7 @@ def copy_cache_file(file): shutil.copyfile(attic_file, borg_file) return borg_file else: - print("no %s cache file found in %s" % (file, attic_file)) + print("no %s cache file found in %s" % (path, attic_file)) return None if os.path.exists(attic_cache_dir): @@ -183,10 +185,10 @@ def copy_cache_file(file): # XXX: untested, because generating cache files is a PITA, see # Archiver.do_create() for proof - for cache in [ 'files', 'chunks' ]: + for cache in ['files', 'chunks']: copied = copy_cache_file(cache) if copied: - caches += [copied] + caches.append(copied) for cache in caches: print("converting cache %s" % cache) From 48b7c8cea3abe8c0dc8f8cb7d4dd549489659094 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Sat, 3 Oct 2015 11:52:12 -0400 Subject: [PATCH 60/63] avoid checking for non-existent files if there's no attic cache, it's no use checking for individual files this also makes the code a little clearer also added comments --- borg/converter.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/borg/converter.py b/borg/converter.py index 14aedb9bbe..402ea8b42a 100644 --- a/borg/converter.py +++ b/borg/converter.py @@ -178,22 +178,26 @@ def copy_cache_file(path): print("no %s cache file found in %s" % (path, attic_file)) return None + # XXX: untested, because generating cache files is a PITA, see + # Archiver.do_create() for proof if os.path.exists(attic_cache_dir): if not os.path.exists(borg_cache_dir): os.makedirs(borg_cache_dir) + + # non-binary file that we don't need to convert, just copy copy_cache_file('config') - # XXX: untested, because generating cache files is a PITA, see - # Archiver.do_create() for proof - for cache in ['files', 'chunks']: - copied = copy_cache_file(cache) - if copied: - caches.append(copied) - - for cache in caches: - print("converting cache %s" % cache) - if not dryrun: - AtticRepositoryConverter.header_replace(cache, b'ATTICIDX', b'BORG_IDX') + # we need to convert the headers of those files, copy first + for cache in ['files', 'chunks']: + copied = copy_cache_file(cache) + if copied: + caches.append(copied) + + # actually convert the headers of the detected files + for cache in caches: + print("converting cache %s" % cache) + if not dryrun: + AtticRepositoryConverter.header_replace(cache, b'ATTICIDX', b'BORG_IDX') class AtticKeyfileKey(KeyfileKey): From c91c5d0029cb364168533d33a6ee28c27b9f1340 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Sat, 3 Oct 2015 12:36:52 -0400 Subject: [PATCH 61/63] rename convert command to upgrade convert is too generic for the Attic conversion: we may have other converters, from other, more foreign systems that will require different options and different upgrade mechanisms that convert could never cover appropriately. we are more likely to use an approach similar to "git fast-import" instead here, and have the conversion tools be external tool that feed standard data into borg during conversion. upgrade seems like a more natural fit: Attic could be considered like a pre-historic version of Borg that requires invasive changes for borg to be able to use the repository. we may require such changes in the future of borg as well: if we make backwards-incompatible changes to the repository layout or data format, it is possible that we require such changes to be performed on the repository before it is usable again. instead of scattering those conversions all over the code, we should simply have assertions that check the layout is correct and point the user to upgrade if it is not. upgrade should eventually automatically detect the repository format or version and perform appropriate conversions. Attic is only the first one. we still need to implement an adequate API for auto-detection and upgrade, only the seeds of that are present for now. of course, changes to the upgrade command should be thoroughly documented in the release notes and an eventual upgrade manual. --- borg/archiver.py | 39 +++++++++++++------- borg/testsuite/{converter.py => upgrader.py} | 12 +++--- borg/{converter.py => upgrader.py} | 10 ++--- 3 files changed, 36 insertions(+), 25 deletions(-) rename borg/testsuite/{converter.py => upgrader.py} (93%) rename borg/{converter.py => upgrader.py} (96%) diff --git a/borg/archiver.py b/borg/archiver.py index 2f53252571..202ae0ef61 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -17,7 +17,7 @@ from . import __version__ from .archive import Archive, ArchiveChecker, CHUNKER_PARAMS from .compress import Compressor, COMPR_BUFFER -from .converter import AtticRepositoryConverter +from .upgrader import AtticRepositoryUpgrader from .repository import Repository from .cache import Cache from .key import key_creator @@ -463,11 +463,20 @@ def do_prune(self, args): stats.print_('Deleted data:', cache) return self.exit_code - def do_convert(self, args): - """convert a repository from attic to borg""" - repo = AtticRepositoryConverter(args.repository.path, create=False) + def do_upgrade(self, args): + """upgrade a repository from a previous version""" + # XXX: currently only upgrades from Attic repositories, but may + # eventually be extended to deal with major upgrades for borg + # itself. + # + # in this case, it should auto-detect the current repository + # format and fire up necessary upgrade mechanism. this remains + # to be implemented. + + # XXX: should auto-detect if it is an attic repository here + repo = AtticRepositoryUpgrader(args.repository.path, create=False) try: - repo.convert(args.dry_run) + repo.upgrade(args.dry_run) except NotImplementedError as e: print("warning: %s" % e) return self.exit_code @@ -906,8 +915,10 @@ def run(self, args=None): type=location_validator(archive=False), help='repository to prune') - convert_epilog = textwrap.dedent(""" - convert will convert an existing Attic repository to Borg in place. + upgrade_epilog = textwrap.dedent(""" + upgrade an existing Borg repository in place. this currently + only support converting an Attic repository, but may + eventually be extended to cover major Borg upgrades as well. it will change the magic strings in the repository's segments to match the new Borg magic strings. the keyfiles found in @@ -928,21 +939,21 @@ def run(self, args=None): repository, in case something goes wrong, for example: cp -a attic borg - borg convert -n borg - borg convert borg + borg upgrade -n borg + borg upgrade borg you have been warned.""") - subparser = subparsers.add_parser('convert', parents=[common_parser], - description=self.do_convert.__doc__, - epilog=convert_epilog, + subparser = subparsers.add_parser('upgrade', parents=[common_parser], + description=self.do_upgrade.__doc__, + epilog=upgrade_epilog, formatter_class=argparse.RawDescriptionHelpFormatter) - subparser.set_defaults(func=self.do_convert) + subparser.set_defaults(func=self.do_upgrade) subparser.add_argument('-n', '--dry-run', dest='dry_run', default=False, action='store_true', help='do not change repository') subparser.add_argument('repository', metavar='REPOSITORY', nargs='?', default='', type=location_validator(archive=False), - help='path to the attic repository to be converted') + help='path to the repository to be upgraded') subparser = subparsers.add_parser('help', parents=[common_parser], description='Extra help') diff --git a/borg/testsuite/converter.py b/borg/testsuite/upgrader.py similarity index 93% rename from borg/testsuite/converter.py rename to borg/testsuite/upgrader.py index b7e3748e9b..22278f9ac4 100644 --- a/borg/testsuite/converter.py +++ b/borg/testsuite/upgrader.py @@ -11,7 +11,7 @@ except ImportError: attic = None -from ..converter import AtticRepositoryConverter, AtticKeyfileKey +from ..upgrader import AtticRepositoryUpgrader, AtticKeyfileKey from ..helpers import get_keys_dir from ..key import KeyfileKey from ..repository import Repository, MAGIC @@ -78,7 +78,7 @@ def test_convert_segments(tmpdir, attic_repo): # check should fail because of magic number assert not repo_valid(tmpdir) print("opening attic repository with borg and converting") - repo = AtticRepositoryConverter(str(tmpdir), create=False) + repo = AtticRepositoryUpgrader(str(tmpdir), create=False) segments = [filename for i, filename in repo.io.segment_iterator()] repo.close() repo.convert_segments(segments, dryrun=False) @@ -136,9 +136,9 @@ def test_keys(tmpdir, attic_repo, attic_key_file): define above) :param attic_key_file: an attic.key.KeyfileKey (fixture created above) """ - repository = AtticRepositoryConverter(str(tmpdir), create=False) + repository = AtticRepositoryUpgrader(str(tmpdir), create=False) keyfile = AtticKeyfileKey.find_key_file(repository) - AtticRepositoryConverter.convert_keyfiles(keyfile, dryrun=False) + AtticRepositoryUpgrader.convert_keyfiles(keyfile, dryrun=False) assert key_valid(attic_key_file.path) @@ -157,7 +157,7 @@ def test_convert_all(tmpdir, attic_repo, attic_key_file): # check should fail because of magic number assert not repo_valid(tmpdir) print("opening attic repository with borg and converting") - repo = AtticRepositoryConverter(str(tmpdir), create=False) - repo.convert(dryrun=False) + repo = AtticRepositoryUpgrader(str(tmpdir), create=False) + repo.upgrade(dryrun=False) assert key_valid(attic_key_file.path) assert repo_valid(tmpdir) diff --git a/borg/converter.py b/borg/upgrader.py similarity index 96% rename from borg/converter.py rename to borg/upgrader.py index 402ea8b42a..2efb9216ca 100644 --- a/borg/converter.py +++ b/borg/upgrader.py @@ -11,11 +11,11 @@ ATTIC_MAGIC = b'ATTICSEG' -class AtticRepositoryConverter(Repository): - def convert(self, dryrun=True): +class AtticRepositoryUpgrader(Repository): + def upgrade(self, dryrun=True): """convert an attic repository to a borg repository - those are the files that need to be converted here, from most + those are the files that need to be upgraded here, from most important to least important: segments, key files, and various caches, the latter being optional, as they will be rebuilt if missing. @@ -62,7 +62,7 @@ def convert_segments(segments, dryrun): if dryrun: time.sleep(0.001) else: - AtticRepositoryConverter.header_replace(filename, ATTIC_MAGIC, MAGIC) + AtticRepositoryUpgrader.header_replace(filename, ATTIC_MAGIC, MAGIC) print() @staticmethod @@ -197,7 +197,7 @@ def copy_cache_file(path): for cache in caches: print("converting cache %s" % cache) if not dryrun: - AtticRepositoryConverter.header_replace(cache, b'ATTICIDX', b'BORG_IDX') + AtticRepositoryUpgrader.header_replace(cache, b'ATTICIDX', b'BORG_IDX') class AtticKeyfileKey(KeyfileKey): From fded2219a8c842b56a80926324cbeee8413409f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Sat, 3 Oct 2015 12:46:23 -0400 Subject: [PATCH 62/63] mention borg delete borg this makes it clear how to start from scratch, in case the chunk cache was failed to be copied and so on. --- borg/archiver.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/borg/archiver.py b/borg/archiver.py index 202ae0ef61..62da098eb6 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -931,10 +931,6 @@ def run(self, args=None): the first backup after the conversion takes longer than expected due to the cache resync. - the conversion can PERMANENTLY DAMAGE YOUR REPOSITORY! Attic - will also NOT BE ABLE TO READ THE BORG REPOSITORY ANYMORE, as - the magic strings will have changed. - it is recommended you run this on a copy of the Attic repository, in case something goes wrong, for example: @@ -942,6 +938,17 @@ def run(self, args=None): borg upgrade -n borg borg upgrade borg + upgrade should be able to resume if interrupted, although it + will still iterate over all segments. if you want to start + from scratch, use `borg delete` over the copied repository to + make sure the cache files are also removed: + + borg delete borg + + the conversion can PERMANENTLY DAMAGE YOUR REPOSITORY! Attic + will also NOT BE ABLE TO READ THE BORG REPOSITORY ANYMORE, as + the magic strings will have changed. + you have been warned.""") subparser = subparsers.add_parser('upgrade', parents=[common_parser], description=self.do_upgrade.__doc__, From 5409cbaa678eda55c7846726f1146be90ea9b648 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antoine=20Beaupr=C3=A9?= Date: Sat, 3 Oct 2015 12:56:03 -0400 Subject: [PATCH 63/63] also copy files cache verbatim it seems the file cache does *not* have the ATTIC magic header (nor does it have one in borg), so we don't need to edit the file - we just copy it like a regular file. while i'm here, simplify the cache conversion loop: it's no use splitting the copy and the edition since the latter is so fast, just do everything in one loop, which makes it much easier to read. --- borg/upgrader.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/borg/upgrader.py b/borg/upgrader.py index 2efb9216ca..33ef2d3888 100644 --- a/borg/upgrader.py +++ b/borg/upgrader.py @@ -184,20 +184,17 @@ def copy_cache_file(path): if not os.path.exists(borg_cache_dir): os.makedirs(borg_cache_dir) - # non-binary file that we don't need to convert, just copy - copy_cache_file('config') + # file that we don't have a header to convert, just copy + for cache in ['config', 'files']: + copy_cache_file(cache) # we need to convert the headers of those files, copy first - for cache in ['files', 'chunks']: + for cache in ['chunks']: copied = copy_cache_file(cache) if copied: - caches.append(copied) - - # actually convert the headers of the detected files - for cache in caches: - print("converting cache %s" % cache) - if not dryrun: - AtticRepositoryUpgrader.header_replace(cache, b'ATTICIDX', b'BORG_IDX') + print("converting cache %s" % cache) + if not dryrun: + AtticRepositoryUpgrader.header_replace(cache, b'ATTICIDX', b'BORG_IDX') class AtticKeyfileKey(KeyfileKey):