Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 19 additions & 19 deletions docs/internals/data-structures.rst
Original file line number Diff line number Diff line change
Expand Up @@ -329,17 +329,17 @@ or modified. It looks like this:
.. code-block:: python

{
b'version': 1,
b'timestamp': b'2017-05-05T12:42:23.042864',
b'item_keys': [b'acl_access', b'acl_default', ...],
b'config': {},
b'archives': {
b'2017-05-05-system-backup': {
b'id': b'<32 byte binary object ID>',
b'time': b'2017-05-05T12:42:22.942864',
'version': 1,
'timestamp': '2017-05-05T12:42:23.042864',
'item_keys': ['acl_access', 'acl_default', ...],
'config': {},
'archives': {
'2017-05-05-system-backup': {
'id': b'<32 byte binary object ID>',
'time': '2017-05-05T12:42:22.942864',
},
},
b'tam': ...,
'tam': ...,
}

The *version* field can be either 1 or 2. The versions differ in the
Expand Down Expand Up @@ -393,15 +393,15 @@ The *config* key stores the feature flags enabled on a repository:
.. code-block:: python

config = {
b'feature_flags': {
b'read': {
b'mandatory': [b'some_feature'],
'feature_flags': {
'read': {
'mandatory': ['some_feature'],
},
b'check': {
b'mandatory': [b'other_feature'],
'check': {
'mandatory': ['other_feature'],
}
b'write': ...,
b'delete': ...
'write': ...,
'delete': ...
},
}

Expand Down Expand Up @@ -1220,9 +1220,9 @@ transaction ID in the file names. Integrity data is stored in a third file
.. code-block:: python

{
b'version': 2,
b'hints': b'{"algorithm": "XXH64", "digests": {"final": "411208db2aa13f1a"}}',
b'index': b'{"algorithm": "XXH64", "digests": {"HashHeader": "846b7315f91b8e48", "final": "cb3e26cadc173e40"}}'
'version': 2,
'hints': '{"algorithm": "XXH64", "digests": {"final": "411208db2aa13f1a"}}',
'index': '{"algorithm": "XXH64", "digests": {"HashHeader": "846b7315f91b8e48", "final": "cb3e26cadc173e40"}}'
}

The *version* key started at 2, the same version used for the hints. Since Borg has
Expand Down
42 changes: 21 additions & 21 deletions src/borg/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from .platform import uid2user, user2uid, gid2group, group2gid
from .helpers import parse_timestamp, to_localtime
from .helpers import OutputTimestamp, format_timedelta, format_file_size, file_status, FileSize
from .helpers import safe_encode, safe_decode, make_path_safe, remove_surrogates
from .helpers import safe_encode, make_path_safe, remove_surrogates
from .helpers import StableDict
from .helpers import bin_to_hex
from .helpers import safe_ns
Expand Down Expand Up @@ -392,14 +392,14 @@ def get_item_uid_gid(item, *, numeric, uid_forced=None, gid_forced=None, uid_def
if uid_forced is not None:
uid = uid_forced
else:
uid = None if numeric else user2uid(item.user)
uid = None if numeric else user2uid(item.get('user'))
uid = item.uid if uid is None else uid
if uid < 0:
uid = uid_default
if gid_forced is not None:
gid = gid_forced
else:
gid = None if numeric else group2gid(item.group)
gid = None if numeric else group2gid(item.get('group'))
gid = item.gid if gid is None else gid
if gid < 0:
gid = gid_default
Expand Down Expand Up @@ -479,7 +479,6 @@ def _load_meta(self, id):
def load(self, id):
self.id = id
self.metadata = self._load_meta(self.id)
self.metadata.cmdline = [safe_decode(arg) for arg in self.metadata.cmdline]
self.name = self.metadata.name
self.comment = self.metadata.get('comment', '')

Expand Down Expand Up @@ -1090,11 +1089,13 @@ def stat_simple_attrs(self, st):
if not self.nobirthtime and hasattr(st, 'st_birthtime'):
# sadly, there's no stat_result.st_birthtime_ns
attrs['birthtime'] = safe_ns(int(st.st_birthtime * 10**9))
if self.numeric_ids:
attrs['user'] = attrs['group'] = None
else:
attrs['user'] = uid2user(st.st_uid)
attrs['group'] = gid2group(st.st_gid)
if not self.numeric_ids:
user = uid2user(st.st_uid)
if user is not None:
attrs['user'] = user
group = gid2group(st.st_gid)
if group is not None:
attrs['group'] = group
return attrs

def stat_ext_attrs(self, st, path, fd=None):
Expand Down Expand Up @@ -1427,8 +1428,11 @@ def s_to_ns(s):
return safe_ns(int(float(s) * 1e9))

item = Item(path=make_path_safe(tarinfo.name), mode=tarinfo.mode | type,
uid=tarinfo.uid, gid=tarinfo.gid, user=tarinfo.uname or None, group=tarinfo.gname or None,
mtime=s_to_ns(tarinfo.mtime))
uid=tarinfo.uid, gid=tarinfo.gid, mtime=s_to_ns(tarinfo.mtime))
if tarinfo.uname:
item.user = tarinfo.uname
if tarinfo.gname:
item.group = tarinfo.gname
if ph:
# note: for mtime this is a bit redundant as it is already done by tarfile module,
# but we just do it in our way to be consistent for sure.
Expand Down Expand Up @@ -1515,7 +1519,7 @@ class RobustUnpacker:
"""
def __init__(self, validator, item_keys):
super().__init__()
self.item_keys = [msgpack.packb(name.encode()) for name in item_keys]
self.item_keys = [msgpack.packb(name) for name in item_keys]
self.validator = validator
self._buffered_data = []
self._resync = False
Expand Down Expand Up @@ -1719,13 +1723,10 @@ def rebuild_manifest(self):

Iterates through all objects in the repository looking for archive metadata blocks.
"""
required_archive_keys = frozenset(key.encode() for key in REQUIRED_ARCHIVE_KEYS)

def valid_archive(obj):
if not isinstance(obj, dict):
return False
keys = set(obj)
return required_archive_keys.issubset(keys)
return REQUIRED_ARCHIVE_KEYS.issubset(obj)

logger.info('Rebuilding missing manifest, this might take some time...')
# as we have lost the manifest, we do not know any more what valid item keys we had.
Expand All @@ -1734,7 +1735,7 @@ def valid_archive(obj):
# lost manifest on a older borg version than the most recent one that was ever used
# within this repository (assuming that newer borg versions support more item keys).
manifest = Manifest(self.key, self.repository)
archive_keys_serialized = [msgpack.packb(name.encode()) for name in ARCHIVE_KEYS]
archive_keys_serialized = [msgpack.packb(name) for name in ARCHIVE_KEYS]
pi = ProgressIndicatorPercent(total=len(self.chunks), msg="Rebuilding manifest %6.2f%%", step=0.01,
msgid='check.rebuild_manifest')
for chunk_id, _ in self.chunks.iteritems():
Expand Down Expand Up @@ -1881,9 +1882,9 @@ def robust_iterator(archive):

Missing item chunks will be skipped and the msgpack stream will be restarted
"""
item_keys = frozenset(key.encode() for key in self.manifest.item_keys)
required_item_keys = frozenset(key.encode() for key in REQUIRED_ITEM_KEYS)
unpacker = RobustUnpacker(lambda item: isinstance(item, StableDict) and b'path' in item,
item_keys = self.manifest.item_keys
required_item_keys = REQUIRED_ITEM_KEYS
unpacker = RobustUnpacker(lambda item: isinstance(item, StableDict) and 'path' in item,
self.manifest.item_keys)
_state = 0

Expand Down Expand Up @@ -1991,7 +1992,6 @@ def valid_item(obj):
archive = ArchiveItem(internal_dict=msgpack.unpackb(data))
if archive.version != 2:
raise Exception('Unknown archive metadata version')
archive.cmdline = [safe_decode(arg) for arg in archive.cmdline]
items_buffer = ChunkBuffer(self.key)
items_buffer.write_chunk = add_callback
for item in robust_iterator(archive):
Expand Down
22 changes: 9 additions & 13 deletions src/borg/archiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
from .helpers import PrefixSpec, GlobSpec, CommentSpec, SortBySpec, FilesCacheMode
from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter
from .helpers import format_timedelta, format_file_size, parse_file_size, format_archive
from .helpers import safe_encode, remove_surrogates, bin_to_hex, prepare_dump_dict, eval_escapes
from .helpers import remove_surrogates, bin_to_hex, prepare_dump_dict, eval_escapes
from .helpers import interval, prune_within, prune_split, PRUNING_PATTERNS
from .helpers import timestamp
from .helpers import get_cache_dir, os_stat
Expand Down Expand Up @@ -366,10 +366,6 @@ def upgrade_item(item):
if chunks_healthy is not None:
item._dict['chunks_healthy'] = chunks
item._dict.pop('source') # not used for hardlinks any more, replaced by hlid
for attr in 'atime', 'ctime', 'mtime', 'birthtime':
if attr in item:
ns = getattr(item, attr) # decode (bigint or Timestamp) --> int ns
setattr(item, attr, ns) # encode int ns --> msgpack.Timestamp only, no bigint any more
# make sure we only have desired stuff in the new item. specifically, make sure to get rid of:
# - 'acl' remnants of bug in attic <= 0.13
# - 'hardlink_master' (superseded by hlid)
Expand Down Expand Up @@ -1359,8 +1355,8 @@ def item_to_tarinfo(item, original_path):
tarinfo.mode = stat.S_IMODE(item.mode)
tarinfo.uid = item.uid
tarinfo.gid = item.gid
tarinfo.uname = item.user or ''
tarinfo.gname = item.group or ''
tarinfo.uname = item.get('user', '')
tarinfo.gname = item.get('group', '')
# The linkname in tar has 2 uses:
# for symlinks it means the destination, while for hardlinks it refers to the file.
# Since hardlinks in tar have a different type code (LNKTYPE) the format might
Expand Down Expand Up @@ -1944,12 +1940,12 @@ def do_upgrade(self, args, repository, manifest=None, key=None):
print('This repository is not encrypted, cannot enable TAM.')
return EXIT_ERROR

if not manifest.tam_verified or not manifest.config.get(b'tam_required', False):
if not manifest.tam_verified or not manifest.config.get('tam_required', False):
# The standard archive listing doesn't include the archive ID like in borg 1.1.x
print('Manifest contents:')
for archive_info in manifest.archives.list(sort_by=['ts']):
print(format_archive(archive_info), '[%s]' % bin_to_hex(archive_info.id))
manifest.config[b'tam_required'] = True
manifest.config['tam_required'] = True
manifest.write()
repository.commit(compact=False)
if not key.tam_required:
Expand All @@ -1972,7 +1968,7 @@ def do_upgrade(self, args, repository, manifest=None, key=None):
print('Key updated')
if hasattr(key, 'find_key'):
print('Key location:', key.find_key())
manifest.config[b'tam_required'] = False
manifest.config['tam_required'] = False
manifest.write()
repository.commit(compact=False)
else:
Expand Down Expand Up @@ -2304,7 +2300,7 @@ def do_debug_dump_archive(self, args, repository, manifest, key):
"""dump decoded archive metadata (not: data)"""

try:
archive_meta_orig = manifest.archives.get_raw_dict()[safe_encode(args.location.archive)]
archive_meta_orig = manifest.archives.get_raw_dict()[args.location.archive]
except KeyError:
raise Archive.DoesNotExist(args.location.archive)

Expand All @@ -2321,7 +2317,7 @@ def output(fd):
fd.write(do_indent(prepare_dump_dict(archive_meta_orig)))
fd.write(',\n')

data = key.decrypt(archive_meta_orig[b'id'], repository.get(archive_meta_orig[b'id']))
data = key.decrypt(archive_meta_orig['id'], repository.get(archive_meta_orig['id']))
archive_org_dict = msgpack.unpackb(data, object_hook=StableDict)

fd.write(' "_meta":\n')
Expand All @@ -2331,7 +2327,7 @@ def output(fd):

unpacker = msgpack.Unpacker(use_list=False, object_hook=StableDict)
first = True
for item_id in archive_org_dict[b'items']:
for item_id in archive_org_dict['items']:
data = key.decrypt(item_id, repository.get(item_id))
unpacker.feed(data)
for item in unpacker:
Expand Down
36 changes: 20 additions & 16 deletions src/borg/cache_sync/unpack.h
Original file line number Diff line number Diff line change
Expand Up @@ -384,19 +384,11 @@ static inline int unpack_callback_map_end(unpack_user* u)

static inline int unpack_callback_raw(unpack_user* u, const char* b, const char* p, unsigned int length)
{
/* raw = what Borg uses for binary stuff and strings as well */
/* raw = what Borg uses for text stuff */
/* Note: p points to an internal buffer which contains l bytes. */
(void)b;

switch(u->expect) {
case expect_key:
if(length != 32) {
SET_LAST_ERROR("Incorrect key length");
return -1;
}
memcpy(u->current.key, p, 32);
u->expect = expect_size;
break;
case expect_map_key:
if(length == 6 && !memcmp("chunks", p, 6)) {
u->expect = expect_chunks_begin;
Expand All @@ -409,19 +401,31 @@ static inline int unpack_callback_raw(unpack_user* u, const char* b, const char*
u->expect = expect_map_item_end;
}
break;
default:
if(u->inside_chunks) {
SET_LAST_ERROR("Unexpected bytes in chunks structure");
return -1;
}
}
return 0;
}

static inline int unpack_callback_bin(unpack_user* u, const char* b, const char* p, unsigned int length)
{
(void)u; (void)b; (void)p; (void)length;
UNEXPECTED("bin");
/* bin = what Borg uses for binary stuff */
/* Note: p points to an internal buffer which contains l bytes. */
(void)b;

switch(u->expect) {
case expect_key:
if(length != 32) {
SET_LAST_ERROR("Incorrect key length");
return -1;
}
memcpy(u->current.key, p, 32);
u->expect = expect_size;
break;
default:
if(u->inside_chunks) {
SET_LAST_ERROR("Unexpected bytes in chunks structure");
return -1;
}
}
return 0;
}

Expand Down
2 changes: 1 addition & 1 deletion src/borg/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ class KeyType:
# upper 4 bits are ciphersuite, 0 == legacy AES-CTR
KEYFILE = 0x00
# repos with PASSPHRASE mode could not be created any more since borg 1.0, see #97.
# in borg 1.3 all of its code and also the "borg key migrate-to-repokey" command was removed.
# in borg 2. all of its code and also the "borg key migrate-to-repokey" command was removed.
# if you still need to, you can use "borg key migrate-to-repokey" with borg 1.0, 1.1 and 1.2.
# Nowadays, we just dispatch this to RepoKey and assume the passphrase was migrated to a repokey.
PASSPHRASE = 0x01 # legacy, attic and borg < 1.0
Expand Down
16 changes: 9 additions & 7 deletions src/borg/crypto/key.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from ..helpers.passphrase import Passphrase, PasswordRetriesExceeded, PassphraseWrong
from ..helpers import msgpack
from ..helpers.manifest import Manifest
from ..item import Key, EncryptedKey
from ..item import Key, EncryptedKey, want_bytes
from ..platform import SaveFile

from .nonces import NonceManager
Expand Down Expand Up @@ -232,26 +232,28 @@ def unpack_and_verify_manifest(self, data, force_tam_not_required=False):
unpacker = get_limited_unpacker('manifest')
unpacker.feed(data)
unpacked = unpacker.unpack()
if b'tam' not in unpacked:
if 'tam' not in unpacked:
if tam_required:
raise TAMRequiredError(self.repository._location.canonical_path())
else:
logger.debug('TAM not found and not required')
return unpacked, False
tam = unpacked.pop(b'tam', None)
tam = unpacked.pop('tam', None)
if not isinstance(tam, dict):
raise TAMInvalid()
tam_type = tam.get(b'type', b'<none>').decode('ascii', 'replace')
tam_type = tam.get('type', '<none>')
if tam_type != 'HKDF_HMAC_SHA512':
if tam_required:
raise TAMUnsupportedSuiteError(repr(tam_type))
else:
logger.debug('Ignoring TAM made with unsupported suite, since TAM is not required: %r', tam_type)
return unpacked, False
tam_hmac = tam.get(b'hmac')
tam_salt = tam.get(b'salt')
if not isinstance(tam_salt, bytes) or not isinstance(tam_hmac, bytes):
tam_hmac = tam.get('hmac')
tam_salt = tam.get('salt')
if not isinstance(tam_salt, (bytes, str)) or not isinstance(tam_hmac, (bytes, str)):
raise TAMInvalid()
tam_hmac = want_bytes(tam_hmac) # legacy
tam_salt = want_bytes(tam_salt) # legacy
offset = data.index(tam_hmac)
data[offset:offset + 64] = bytes(64)
tam_key = self._tam_key(tam_salt, context=b'manifest')
Expand Down
2 changes: 1 addition & 1 deletion src/borg/helpers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from .time import * # NOQA
from .yes import * # NOQA

from .msgpack import is_slow_msgpack, is_supported_msgpack, int_to_bigint, bigint_to_int, get_limited_unpacker
from .msgpack import is_slow_msgpack, is_supported_msgpack, get_limited_unpacker
from . import msgpack

# generic mechanism to enable users to invoke workarounds by setting the
Expand Down
4 changes: 2 additions & 2 deletions src/borg/helpers/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,8 @@ def borg1_hardlink_slave(self, item): # legacy

def hardlink_id_from_path(self, path):
"""compute a hardlink id from a path"""
assert isinstance(path, bytes)
return hashlib.sha256(path).digest()
assert isinstance(path, str)
return hashlib.sha256(path.encode('utf-8', errors='surrogateescape')).digest()

def hardlink_id_from_inode(self, *, ino, dev):
"""compute a hardlink id from an inode"""
Expand Down
Loading