diff --git a/docs/internals/data-structures.rst b/docs/internals/data-structures.rst index d1a5a4cd38..8de20761b7 100644 --- a/docs/internals/data-structures.rst +++ b/docs/internals/data-structures.rst @@ -329,17 +329,17 @@ or modified. It looks like this: .. code-block:: python { - b'version': 1, - b'timestamp': b'2017-05-05T12:42:23.042864', - b'item_keys': [b'acl_access', b'acl_default', ...], - b'config': {}, - b'archives': { - b'2017-05-05-system-backup': { - b'id': b'<32 byte binary object ID>', - b'time': b'2017-05-05T12:42:22.942864', + 'version': 1, + 'timestamp': '2017-05-05T12:42:23.042864', + 'item_keys': ['acl_access', 'acl_default', ...], + 'config': {}, + 'archives': { + '2017-05-05-system-backup': { + 'id': b'<32 byte binary object ID>', + 'time': '2017-05-05T12:42:22.942864', }, }, - b'tam': ..., + 'tam': ..., } The *version* field can be either 1 or 2. The versions differ in the @@ -393,15 +393,15 @@ The *config* key stores the feature flags enabled on a repository: .. code-block:: python config = { - b'feature_flags': { - b'read': { - b'mandatory': [b'some_feature'], + 'feature_flags': { + 'read': { + 'mandatory': ['some_feature'], }, - b'check': { - b'mandatory': [b'other_feature'], + 'check': { + 'mandatory': ['other_feature'], } - b'write': ..., - b'delete': ... + 'write': ..., + 'delete': ... }, } @@ -1220,9 +1220,9 @@ transaction ID in the file names. Integrity data is stored in a third file .. code-block:: python { - b'version': 2, - b'hints': b'{"algorithm": "XXH64", "digests": {"final": "411208db2aa13f1a"}}', - b'index': b'{"algorithm": "XXH64", "digests": {"HashHeader": "846b7315f91b8e48", "final": "cb3e26cadc173e40"}}' + 'version': 2, + 'hints': '{"algorithm": "XXH64", "digests": {"final": "411208db2aa13f1a"}}', + 'index': '{"algorithm": "XXH64", "digests": {"HashHeader": "846b7315f91b8e48", "final": "cb3e26cadc173e40"}}' } The *version* key started at 2, the same version used for the hints. Since Borg has diff --git a/src/borg/archive.py b/src/borg/archive.py index 15cfc5d55e..e068413f57 100644 --- a/src/borg/archive.py +++ b/src/borg/archive.py @@ -34,7 +34,7 @@ from .platform import uid2user, user2uid, gid2group, group2gid from .helpers import parse_timestamp, to_localtime from .helpers import OutputTimestamp, format_timedelta, format_file_size, file_status, FileSize -from .helpers import safe_encode, safe_decode, make_path_safe, remove_surrogates +from .helpers import safe_encode, make_path_safe, remove_surrogates from .helpers import StableDict from .helpers import bin_to_hex from .helpers import safe_ns @@ -392,14 +392,14 @@ def get_item_uid_gid(item, *, numeric, uid_forced=None, gid_forced=None, uid_def if uid_forced is not None: uid = uid_forced else: - uid = None if numeric else user2uid(item.user) + uid = None if numeric else user2uid(item.get('user')) uid = item.uid if uid is None else uid if uid < 0: uid = uid_default if gid_forced is not None: gid = gid_forced else: - gid = None if numeric else group2gid(item.group) + gid = None if numeric else group2gid(item.get('group')) gid = item.gid if gid is None else gid if gid < 0: gid = gid_default @@ -479,7 +479,6 @@ def _load_meta(self, id): def load(self, id): self.id = id self.metadata = self._load_meta(self.id) - self.metadata.cmdline = [safe_decode(arg) for arg in self.metadata.cmdline] self.name = self.metadata.name self.comment = self.metadata.get('comment', '') @@ -1090,11 +1089,13 @@ def stat_simple_attrs(self, st): if not self.nobirthtime and hasattr(st, 'st_birthtime'): # sadly, there's no stat_result.st_birthtime_ns attrs['birthtime'] = safe_ns(int(st.st_birthtime * 10**9)) - if self.numeric_ids: - attrs['user'] = attrs['group'] = None - else: - attrs['user'] = uid2user(st.st_uid) - attrs['group'] = gid2group(st.st_gid) + if not self.numeric_ids: + user = uid2user(st.st_uid) + if user is not None: + attrs['user'] = user + group = gid2group(st.st_gid) + if group is not None: + attrs['group'] = group return attrs def stat_ext_attrs(self, st, path, fd=None): @@ -1427,8 +1428,11 @@ def s_to_ns(s): return safe_ns(int(float(s) * 1e9)) item = Item(path=make_path_safe(tarinfo.name), mode=tarinfo.mode | type, - uid=tarinfo.uid, gid=tarinfo.gid, user=tarinfo.uname or None, group=tarinfo.gname or None, - mtime=s_to_ns(tarinfo.mtime)) + uid=tarinfo.uid, gid=tarinfo.gid, mtime=s_to_ns(tarinfo.mtime)) + if tarinfo.uname: + item.user = tarinfo.uname + if tarinfo.gname: + item.group = tarinfo.gname if ph: # note: for mtime this is a bit redundant as it is already done by tarfile module, # but we just do it in our way to be consistent for sure. @@ -1515,7 +1519,7 @@ class RobustUnpacker: """ def __init__(self, validator, item_keys): super().__init__() - self.item_keys = [msgpack.packb(name.encode()) for name in item_keys] + self.item_keys = [msgpack.packb(name) for name in item_keys] self.validator = validator self._buffered_data = [] self._resync = False @@ -1719,13 +1723,10 @@ def rebuild_manifest(self): Iterates through all objects in the repository looking for archive metadata blocks. """ - required_archive_keys = frozenset(key.encode() for key in REQUIRED_ARCHIVE_KEYS) - def valid_archive(obj): if not isinstance(obj, dict): return False - keys = set(obj) - return required_archive_keys.issubset(keys) + return REQUIRED_ARCHIVE_KEYS.issubset(obj) logger.info('Rebuilding missing manifest, this might take some time...') # as we have lost the manifest, we do not know any more what valid item keys we had. @@ -1734,7 +1735,7 @@ def valid_archive(obj): # lost manifest on a older borg version than the most recent one that was ever used # within this repository (assuming that newer borg versions support more item keys). manifest = Manifest(self.key, self.repository) - archive_keys_serialized = [msgpack.packb(name.encode()) for name in ARCHIVE_KEYS] + archive_keys_serialized = [msgpack.packb(name) for name in ARCHIVE_KEYS] pi = ProgressIndicatorPercent(total=len(self.chunks), msg="Rebuilding manifest %6.2f%%", step=0.01, msgid='check.rebuild_manifest') for chunk_id, _ in self.chunks.iteritems(): @@ -1881,9 +1882,9 @@ def robust_iterator(archive): Missing item chunks will be skipped and the msgpack stream will be restarted """ - item_keys = frozenset(key.encode() for key in self.manifest.item_keys) - required_item_keys = frozenset(key.encode() for key in REQUIRED_ITEM_KEYS) - unpacker = RobustUnpacker(lambda item: isinstance(item, StableDict) and b'path' in item, + item_keys = self.manifest.item_keys + required_item_keys = REQUIRED_ITEM_KEYS + unpacker = RobustUnpacker(lambda item: isinstance(item, StableDict) and 'path' in item, self.manifest.item_keys) _state = 0 @@ -1991,7 +1992,6 @@ def valid_item(obj): archive = ArchiveItem(internal_dict=msgpack.unpackb(data)) if archive.version != 2: raise Exception('Unknown archive metadata version') - archive.cmdline = [safe_decode(arg) for arg in archive.cmdline] items_buffer = ChunkBuffer(self.key) items_buffer.write_chunk = add_callback for item in robust_iterator(archive): diff --git a/src/borg/archiver.py b/src/borg/archiver.py index dc0db82f23..8d5513973c 100644 --- a/src/borg/archiver.py +++ b/src/borg/archiver.py @@ -55,7 +55,7 @@ from .helpers import PrefixSpec, GlobSpec, CommentSpec, SortBySpec, FilesCacheMode from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter from .helpers import format_timedelta, format_file_size, parse_file_size, format_archive - from .helpers import safe_encode, remove_surrogates, bin_to_hex, prepare_dump_dict, eval_escapes + from .helpers import remove_surrogates, bin_to_hex, prepare_dump_dict, eval_escapes from .helpers import interval, prune_within, prune_split, PRUNING_PATTERNS from .helpers import timestamp from .helpers import get_cache_dir, os_stat @@ -366,10 +366,6 @@ def upgrade_item(item): if chunks_healthy is not None: item._dict['chunks_healthy'] = chunks item._dict.pop('source') # not used for hardlinks any more, replaced by hlid - for attr in 'atime', 'ctime', 'mtime', 'birthtime': - if attr in item: - ns = getattr(item, attr) # decode (bigint or Timestamp) --> int ns - setattr(item, attr, ns) # encode int ns --> msgpack.Timestamp only, no bigint any more # make sure we only have desired stuff in the new item. specifically, make sure to get rid of: # - 'acl' remnants of bug in attic <= 0.13 # - 'hardlink_master' (superseded by hlid) @@ -1359,8 +1355,8 @@ def item_to_tarinfo(item, original_path): tarinfo.mode = stat.S_IMODE(item.mode) tarinfo.uid = item.uid tarinfo.gid = item.gid - tarinfo.uname = item.user or '' - tarinfo.gname = item.group or '' + tarinfo.uname = item.get('user', '') + tarinfo.gname = item.get('group', '') # The linkname in tar has 2 uses: # for symlinks it means the destination, while for hardlinks it refers to the file. # Since hardlinks in tar have a different type code (LNKTYPE) the format might @@ -1944,12 +1940,12 @@ def do_upgrade(self, args, repository, manifest=None, key=None): print('This repository is not encrypted, cannot enable TAM.') return EXIT_ERROR - if not manifest.tam_verified or not manifest.config.get(b'tam_required', False): + if not manifest.tam_verified or not manifest.config.get('tam_required', False): # The standard archive listing doesn't include the archive ID like in borg 1.1.x print('Manifest contents:') for archive_info in manifest.archives.list(sort_by=['ts']): print(format_archive(archive_info), '[%s]' % bin_to_hex(archive_info.id)) - manifest.config[b'tam_required'] = True + manifest.config['tam_required'] = True manifest.write() repository.commit(compact=False) if not key.tam_required: @@ -1972,7 +1968,7 @@ def do_upgrade(self, args, repository, manifest=None, key=None): print('Key updated') if hasattr(key, 'find_key'): print('Key location:', key.find_key()) - manifest.config[b'tam_required'] = False + manifest.config['tam_required'] = False manifest.write() repository.commit(compact=False) else: @@ -2304,7 +2300,7 @@ def do_debug_dump_archive(self, args, repository, manifest, key): """dump decoded archive metadata (not: data)""" try: - archive_meta_orig = manifest.archives.get_raw_dict()[safe_encode(args.location.archive)] + archive_meta_orig = manifest.archives.get_raw_dict()[args.location.archive] except KeyError: raise Archive.DoesNotExist(args.location.archive) @@ -2321,7 +2317,7 @@ def output(fd): fd.write(do_indent(prepare_dump_dict(archive_meta_orig))) fd.write(',\n') - data = key.decrypt(archive_meta_orig[b'id'], repository.get(archive_meta_orig[b'id'])) + data = key.decrypt(archive_meta_orig['id'], repository.get(archive_meta_orig['id'])) archive_org_dict = msgpack.unpackb(data, object_hook=StableDict) fd.write(' "_meta":\n') @@ -2331,7 +2327,7 @@ def output(fd): unpacker = msgpack.Unpacker(use_list=False, object_hook=StableDict) first = True - for item_id in archive_org_dict[b'items']: + for item_id in archive_org_dict['items']: data = key.decrypt(item_id, repository.get(item_id)) unpacker.feed(data) for item in unpacker: diff --git a/src/borg/cache_sync/unpack.h b/src/borg/cache_sync/unpack.h index f6dd9ca8e8..bba566419f 100644 --- a/src/borg/cache_sync/unpack.h +++ b/src/borg/cache_sync/unpack.h @@ -384,19 +384,11 @@ static inline int unpack_callback_map_end(unpack_user* u) static inline int unpack_callback_raw(unpack_user* u, const char* b, const char* p, unsigned int length) { - /* raw = what Borg uses for binary stuff and strings as well */ + /* raw = what Borg uses for text stuff */ /* Note: p points to an internal buffer which contains l bytes. */ (void)b; switch(u->expect) { - case expect_key: - if(length != 32) { - SET_LAST_ERROR("Incorrect key length"); - return -1; - } - memcpy(u->current.key, p, 32); - u->expect = expect_size; - break; case expect_map_key: if(length == 6 && !memcmp("chunks", p, 6)) { u->expect = expect_chunks_begin; @@ -409,19 +401,31 @@ static inline int unpack_callback_raw(unpack_user* u, const char* b, const char* u->expect = expect_map_item_end; } break; - default: - if(u->inside_chunks) { - SET_LAST_ERROR("Unexpected bytes in chunks structure"); - return -1; - } } return 0; } static inline int unpack_callback_bin(unpack_user* u, const char* b, const char* p, unsigned int length) { - (void)u; (void)b; (void)p; (void)length; - UNEXPECTED("bin"); + /* bin = what Borg uses for binary stuff */ + /* Note: p points to an internal buffer which contains l bytes. */ + (void)b; + + switch(u->expect) { + case expect_key: + if(length != 32) { + SET_LAST_ERROR("Incorrect key length"); + return -1; + } + memcpy(u->current.key, p, 32); + u->expect = expect_size; + break; + default: + if(u->inside_chunks) { + SET_LAST_ERROR("Unexpected bytes in chunks structure"); + return -1; + } + } return 0; } diff --git a/src/borg/constants.py b/src/borg/constants.py index 13eb8bd232..d2fa119196 100644 --- a/src/borg/constants.py +++ b/src/borg/constants.py @@ -128,7 +128,7 @@ class KeyType: # upper 4 bits are ciphersuite, 0 == legacy AES-CTR KEYFILE = 0x00 # repos with PASSPHRASE mode could not be created any more since borg 1.0, see #97. - # in borg 1.3 all of its code and also the "borg key migrate-to-repokey" command was removed. + # in borg 2. all of its code and also the "borg key migrate-to-repokey" command was removed. # if you still need to, you can use "borg key migrate-to-repokey" with borg 1.0, 1.1 and 1.2. # Nowadays, we just dispatch this to RepoKey and assume the passphrase was migrated to a repokey. PASSPHRASE = 0x01 # legacy, attic and borg < 1.0 diff --git a/src/borg/crypto/key.py b/src/borg/crypto/key.py index 6ca6fbac02..2b7b50da86 100644 --- a/src/borg/crypto/key.py +++ b/src/borg/crypto/key.py @@ -22,7 +22,7 @@ from ..helpers.passphrase import Passphrase, PasswordRetriesExceeded, PassphraseWrong from ..helpers import msgpack from ..helpers.manifest import Manifest -from ..item import Key, EncryptedKey +from ..item import Key, EncryptedKey, want_bytes from ..platform import SaveFile from .nonces import NonceManager @@ -232,26 +232,28 @@ def unpack_and_verify_manifest(self, data, force_tam_not_required=False): unpacker = get_limited_unpacker('manifest') unpacker.feed(data) unpacked = unpacker.unpack() - if b'tam' not in unpacked: + if 'tam' not in unpacked: if tam_required: raise TAMRequiredError(self.repository._location.canonical_path()) else: logger.debug('TAM not found and not required') return unpacked, False - tam = unpacked.pop(b'tam', None) + tam = unpacked.pop('tam', None) if not isinstance(tam, dict): raise TAMInvalid() - tam_type = tam.get(b'type', b'').decode('ascii', 'replace') + tam_type = tam.get('type', '') if tam_type != 'HKDF_HMAC_SHA512': if tam_required: raise TAMUnsupportedSuiteError(repr(tam_type)) else: logger.debug('Ignoring TAM made with unsupported suite, since TAM is not required: %r', tam_type) return unpacked, False - tam_hmac = tam.get(b'hmac') - tam_salt = tam.get(b'salt') - if not isinstance(tam_salt, bytes) or not isinstance(tam_hmac, bytes): + tam_hmac = tam.get('hmac') + tam_salt = tam.get('salt') + if not isinstance(tam_salt, (bytes, str)) or not isinstance(tam_hmac, (bytes, str)): raise TAMInvalid() + tam_hmac = want_bytes(tam_hmac) # legacy + tam_salt = want_bytes(tam_salt) # legacy offset = data.index(tam_hmac) data[offset:offset + 64] = bytes(64) tam_key = self._tam_key(tam_salt, context=b'manifest') diff --git a/src/borg/helpers/__init__.py b/src/borg/helpers/__init__.py index 34a59b903d..d8a4cda620 100644 --- a/src/borg/helpers/__init__.py +++ b/src/borg/helpers/__init__.py @@ -18,7 +18,7 @@ from .time import * # NOQA from .yes import * # NOQA -from .msgpack import is_slow_msgpack, is_supported_msgpack, int_to_bigint, bigint_to_int, get_limited_unpacker +from .msgpack import is_slow_msgpack, is_supported_msgpack, get_limited_unpacker from . import msgpack # generic mechanism to enable users to invoke workarounds by setting the diff --git a/src/borg/helpers/fs.py b/src/borg/helpers/fs.py index fecda9c69e..5509e0d0f0 100644 --- a/src/borg/helpers/fs.py +++ b/src/borg/helpers/fs.py @@ -205,8 +205,8 @@ def borg1_hardlink_slave(self, item): # legacy def hardlink_id_from_path(self, path): """compute a hardlink id from a path""" - assert isinstance(path, bytes) - return hashlib.sha256(path).digest() + assert isinstance(path, str) + return hashlib.sha256(path.encode('utf-8', errors='surrogateescape')).digest() def hardlink_id_from_inode(self, *, ino, dev): """compute a hardlink id from an inode""" diff --git a/src/borg/helpers/manifest.py b/src/borg/helpers/manifest.py index 425feb4e68..1b9d91fb49 100644 --- a/src/borg/helpers/manifest.py +++ b/src/borg/helpers/manifest.py @@ -12,7 +12,7 @@ logger = create_logger() from .datastruct import StableDict -from .parseformat import bin_to_hex, safe_encode, safe_decode +from .parseformat import bin_to_hex from .time import parse_timestamp from .. import shellpattern from ..constants import * # NOQA @@ -39,39 +39,35 @@ class Archives(abc.MutableMapping): str timestamps or datetime timestamps. """ def __init__(self): - # key: encoded archive name, value: dict(b'id': bytes_id, b'time': bytes_iso_ts) + # key: str archive name, value: dict('id': bytes_id, 'time': str_iso_ts) self._archives = {} def __len__(self): return len(self._archives) def __iter__(self): - return iter(safe_decode(name) for name in self._archives) + return iter(self._archives) def __getitem__(self, name): assert isinstance(name, str) - _name = safe_encode(name) - values = self._archives.get(_name) + values = self._archives.get(name) if values is None: raise KeyError - ts = parse_timestamp(values[b'time'].decode()) - return ArchiveInfo(name=name, id=values[b'id'], ts=ts) + ts = parse_timestamp(values['time']) + return ArchiveInfo(name=name, id=values['id'], ts=ts) def __setitem__(self, name, info): assert isinstance(name, str) - name = safe_encode(name) assert isinstance(info, tuple) id, ts = info assert isinstance(id, bytes) if isinstance(ts, datetime): ts = ts.replace(tzinfo=None).strftime(ISO_FORMAT) assert isinstance(ts, str) - ts = ts.encode() - self._archives[name] = {b'id': id, b'time': ts} + self._archives[name] = {'id': id, 'time': ts} def __delitem__(self, name): assert isinstance(name, str) - name = safe_encode(name) del self._archives[name] def list(self, *, glob=None, match_end=r'\Z', sort_by=(), consider_checkpoints=True, first=None, last=None, reverse=False): @@ -116,8 +112,8 @@ def list_considering(self, args): def set_raw_dict(self, d): """set the dict we get from the msgpack unpacker""" for k, v in d.items(): - assert isinstance(k, bytes) - assert isinstance(v, dict) and b'id' in v and b'time' in v + assert isinstance(k, str) + assert isinstance(v, dict) and 'id' in v and 'time' in v self._archives[k] = v def get_raw_dict(self): @@ -196,10 +192,10 @@ def load(cls, repository, operations, key=None, force_tam_not_required=False): manifest.timestamp = m.get('timestamp') manifest.config = m.config # valid item keys are whatever is known in the repo or every key we know - manifest.item_keys = ITEM_KEYS | frozenset(key.decode() for key in m.get('item_keys', [])) + manifest.item_keys = ITEM_KEYS | frozenset(m.get('item_keys', [])) if manifest.tam_verified: - manifest_required = manifest.config.get(b'tam_required', False) + manifest_required = manifest.config.get('tam_required', False) security_required = tam_required(repository) if manifest_required and not security_required: logger.debug('Manifest is TAM verified and says TAM is required, updating security database...') @@ -214,32 +210,32 @@ def load(cls, repository, operations, key=None, force_tam_not_required=False): def check_repository_compatibility(self, operations): for operation in operations: assert isinstance(operation, self.Operation) - feature_flags = self.config.get(b'feature_flags', None) + feature_flags = self.config.get('feature_flags', None) if feature_flags is None: return - if operation.value.encode() not in feature_flags: + if operation.value not in feature_flags: continue - requirements = feature_flags[operation.value.encode()] - if b'mandatory' in requirements: - unsupported = set(requirements[b'mandatory']) - self.SUPPORTED_REPO_FEATURES + requirements = feature_flags[operation.value] + if 'mandatory' in requirements: + unsupported = set(requirements['mandatory']) - self.SUPPORTED_REPO_FEATURES if unsupported: - raise MandatoryFeatureUnsupported([f.decode() for f in unsupported]) + raise MandatoryFeatureUnsupported(list(unsupported)) def get_all_mandatory_features(self): result = {} - feature_flags = self.config.get(b'feature_flags', None) + feature_flags = self.config.get('feature_flags', None) if feature_flags is None: return result for operation, requirements in feature_flags.items(): - if b'mandatory' in requirements: - result[operation.decode()] = {feature.decode() for feature in requirements[b'mandatory']} + if 'mandatory' in requirements: + result[operation] = set(requirements['mandatory']) return result def write(self): from ..item import ManifestItem if self.key.tam_required: - self.config[b'tam_required'] = True + self.config['tam_required'] = True # self.timestamp needs to be strictly monotonically increasing. Clocks often are not set correctly if self.timestamp is None: self.timestamp = datetime.utcnow().strftime(ISO_FORMAT) diff --git a/src/borg/helpers/msgpack.py b/src/borg/helpers/msgpack.py index 411f00fec4..ffb87be032 100644 --- a/src/borg/helpers/msgpack.py +++ b/src/borg/helpers/msgpack.py @@ -1,21 +1,53 @@ +""" +wrapping msgpack +================ + +We wrap msgpack here the way we need it - to avoid having lots of clutter in the calling code. + +Packing +------- +- use_bin_type = True (used by borg since borg 2.0) + This is used to generate output according to new msgpack 2.0 spec. + This cleanly keeps bytes and str types apart. + +- use_bin_type = False (used by borg < 1.3) + This creates output according to the older msgpack spec. + BAD: str and bytes were packed into same "raw" representation. + +- unicode_errors = 'surrogateescape' + Guess backup applications are one of the rare cases when this needs to be used. + It is needed because borg also needs to deal with data that does not cleanly encode/decode using utf-8. + There's a lot of crap out there, e.g. in filenames and as a backup tool, we must keep them as good as possible. + +Unpacking +--------- +- raw = False (used by borg since borg 2.0) + We already can use this with borg 2.0 due to the type conversion to the desired type in item.py update_internal + methods. This type conversion code can be removed in future, when we do not have to deal with data any more + that was packed the old way. + It will then unpack according to the msgpack 2.0 spec format and directly output bytes or str. + +- raw = True (the old way, used by borg < 1.3) + +- unicode_errors = 'surrogateescape' -> see description above (will be used when raw is False). + +As of borg 2.0, we have fixed most of the msgpack str/bytes mess, #968. +Borg now still needs to **read** old repos, archives, keys, ... so we can not yet fix it completely. +But from now on, borg only **writes** new data according to the new msgpack 2.0 spec, +thus we can remove some legacy support in a later borg release (some places are marked with "legacy"). + +current way in msgpack terms +---------------------------- + +- pack with use_bin_type=True (according to msgpack 2.0 spec) +- packs str -> raw and bytes -> bin +- unpack with raw=False (according to msgpack 2.0 spec, using unicode_errors='surrogateescape') +- unpacks bin to bytes and raw to str (thus we need to convert to desired type if we want bytes from "raw") +""" + from .datastruct import StableDict from ..constants import * # NOQA -# wrapping msgpack --------------------------------------------------------------------------------------------------- -# -# due to the planned breaking api changes in upstream msgpack, we wrap it the way we need it - -# to avoid having lots of clutter in the calling code. see tickets #968 and #3632. -# -# Packing -# ------- -# use_bin_type = False is needed to generate the old msgpack format (not msgpack 2.0 spec) as borg always did. -# unicode_errors = None is needed because usage of it is deprecated -# -# Unpacking -# --------- -# raw = True is needed to unpack the old msgpack format to bytes (not str, about the decoding see item.pyx). -# unicode_errors = None is needed because usage of it is deprecated - from msgpack import Packer as mp_Packer from msgpack import packb as mp_packb from msgpack import pack as mp_pack @@ -30,6 +62,10 @@ version = mp_version +USE_BIN_TYPE = True +RAW = False +UNICODE_ERRORS = 'surrogateescape' + class PackException(Exception): """Exception while msgpack packing""" @@ -40,10 +76,10 @@ class UnpackException(Exception): class Packer(mp_Packer): - def __init__(self, *, default=None, unicode_errors=None, - use_single_float=False, autoreset=True, use_bin_type=False, + def __init__(self, *, default=None, unicode_errors=UNICODE_ERRORS, + use_single_float=False, autoreset=True, use_bin_type=USE_BIN_TYPE, strict_types=False): - assert unicode_errors is None + assert unicode_errors == UNICODE_ERRORS super().__init__(default=default, unicode_errors=unicode_errors, use_single_float=use_single_float, autoreset=autoreset, use_bin_type=use_bin_type, strict_types=strict_types) @@ -55,16 +91,16 @@ def pack(self, obj): raise PackException(e) -def packb(o, *, use_bin_type=False, unicode_errors=None, **kwargs): - assert unicode_errors is None +def packb(o, *, use_bin_type=USE_BIN_TYPE, unicode_errors=UNICODE_ERRORS, **kwargs): + assert unicode_errors == UNICODE_ERRORS try: return mp_packb(o, use_bin_type=use_bin_type, unicode_errors=unicode_errors, **kwargs) except Exception as e: raise PackException(e) -def pack(o, stream, *, use_bin_type=False, unicode_errors=None, **kwargs): - assert unicode_errors is None +def pack(o, stream, *, use_bin_type=USE_BIN_TYPE, unicode_errors=UNICODE_ERRORS, **kwargs): + assert unicode_errors == UNICODE_ERRORS try: return mp_pack(o, stream, use_bin_type=use_bin_type, unicode_errors=unicode_errors, **kwargs) except Exception as e: @@ -72,13 +108,13 @@ def pack(o, stream, *, use_bin_type=False, unicode_errors=None, **kwargs): class Unpacker(mp_Unpacker): - def __init__(self, file_like=None, *, read_size=0, use_list=True, raw=True, + def __init__(self, file_like=None, *, read_size=0, use_list=True, raw=RAW, object_hook=None, object_pairs_hook=None, list_hook=None, - unicode_errors=None, max_buffer_size=0, + unicode_errors=UNICODE_ERRORS, max_buffer_size=0, ext_hook=ExtType, strict_map_key=False): - assert raw is True - assert unicode_errors is None + assert raw == RAW + assert unicode_errors == UNICODE_ERRORS kw = dict(file_like=file_like, read_size=read_size, use_list=use_list, raw=raw, object_hook=object_hook, object_pairs_hook=object_pairs_hook, list_hook=list_hook, unicode_errors=unicode_errors, max_buffer_size=max_buffer_size, @@ -105,10 +141,11 @@ def __next__(self): next = __next__ -def unpackb(packed, *, raw=True, unicode_errors=None, +def unpackb(packed, *, raw=RAW, unicode_errors=UNICODE_ERRORS, strict_map_key=False, **kwargs): - assert unicode_errors is None + assert raw == RAW + assert unicode_errors == UNICODE_ERRORS try: kw = dict(raw=raw, unicode_errors=unicode_errors, strict_map_key=strict_map_key) @@ -118,10 +155,11 @@ def unpackb(packed, *, raw=True, unicode_errors=None, raise UnpackException(e) -def unpack(stream, *, raw=True, unicode_errors=None, +def unpack(stream, *, raw=RAW, unicode_errors=UNICODE_ERRORS, strict_map_key=False, **kwargs): - assert unicode_errors is None + assert raw == RAW + assert unicode_errors == UNICODE_ERRORS try: kw = dict(raw=raw, unicode_errors=unicode_errors, strict_map_key=strict_map_key) @@ -164,30 +202,11 @@ def get_limited_unpacker(kind): return Unpacker(**args) -def bigint_to_int(mtime): # legacy - """Convert bytearray to int - """ - if isinstance(mtime, bytes): - return int.from_bytes(mtime, 'little', signed=True) - return mtime - - -def int_to_bigint(value): # legacy - """Convert integers larger than 64 bits to bytearray - - Smaller integers are left alone - """ - if value.bit_length() > 63: - return value.to_bytes((value.bit_length() + 9) // 8, 'little', signed=True) - return value - - def int_to_timestamp(ns): + assert isinstance(ns, int) return Timestamp.from_unix_nano(ns) def timestamp_to_int(ts): - if isinstance(ts, Timestamp): - return ts.to_unix_nano() - # legacy support note: we need to keep the bigint conversion for compatibility with borg < 1.3 archives. - return bigint_to_int(ts) + assert isinstance(ts, Timestamp) + return ts.to_unix_nano() diff --git a/src/borg/helpers/parseformat.py b/src/borg/helpers/parseformat.py index 414402de0f..e772f38c47 100644 --- a/src/borg/helpers/parseformat.py +++ b/src/borg/helpers/parseformat.py @@ -808,8 +808,8 @@ def get_item_data(self, item): hlid = bin_to_hex(hlid) if hlid else '' item_data['type'] = item_type item_data['mode'] = mode - item_data['user'] = item.user or item.uid - item_data['group'] = item.group or item.gid + item_data['user'] = item.get('user', str(item.uid)) + item_data['group'] = item.get('group', str(item.gid)) item_data['uid'] = item.uid item_data['gid'] = item.gid item_data['path'] = remove_surrogates(item.path) diff --git a/src/borg/item.pyx b/src/borg/item.pyx index 9ea76f2de4..cd1a3544c3 100644 --- a/src/borg/item.pyx +++ b/src/borg/item.pyx @@ -2,10 +2,9 @@ import stat from collections import namedtuple from .constants import ITEM_KEYS, ARCHIVE_KEYS -from .helpers import safe_encode, safe_decode from .helpers import StableDict from .helpers import format_file_size -from .helpers.msgpack import timestamp_to_int, int_to_timestamp +from .helpers.msgpack import timestamp_to_int, int_to_timestamp, Timestamp cdef extern from "_item.c": @@ -16,6 +15,102 @@ cdef extern from "_item.c": API_VERSION = '1.2_01' +def fix_key(data, key, *, errors='strict'): + """if k is a bytes-typed key, migrate key/value to a str-typed key in dict data""" + if isinstance(key, bytes): + value = data.pop(key) + key = key.decode('utf-8', errors=errors) + data[key] = value + assert isinstance(key, str) + return key + + +def fix_str_value(data, key, errors='surrogateescape'): + """makes sure that data[key] is a str (decode if it is bytes)""" + assert isinstance(key, str) # fix_key must be called first + value = data[key] + value = want_str(value, errors=errors) + data[key] = value + return value + + +def fix_bytes_value(data, key): + """makes sure that data[key] is bytes (encode if it is str)""" + assert isinstance(key, str) # fix_key must be called first + value = data[key] + value = want_bytes(value) + data[key] = value + return value + + +def fix_list_of_str(v): + """make sure we have a list of str""" + assert isinstance(v, (tuple, list)) + return [want_str(e) for e in v] + + +def fix_list_of_bytes(v): + """make sure we have a list of bytes""" + assert isinstance(v, (tuple, list)) + return [want_bytes(e) for e in v] + + +def fix_list_of_chunkentries(v): + """make sure we have a list of correct chunkentries""" + assert isinstance(v, (tuple, list)) + chunks = [] + for ce in v: + assert isinstance(ce, (tuple, list)) + assert len(ce) == 3 # id, size, csize + assert isinstance(ce[1], int) + assert isinstance(ce[2], int) + ce_fixed = [want_bytes(ce[0]), ce[1], ce[2]] # list! + chunks.append(ce_fixed) # create a list of lists + return chunks + + +def fix_tuple_of_str(v): + """make sure we have a tuple of str""" + assert isinstance(v, (tuple, list)) + return tuple(want_str(e) for e in v) + + +def fix_tuple_of_str_and_int(v): + """make sure we have a tuple of str""" + assert isinstance(v, (tuple, list)) + t = tuple(e.decode() if isinstance(e, bytes) else e for e in v) + assert all(isinstance(e, (str, int)) for e in t), repr(t) + return t + + +def fix_timestamp(v): + """make sure v is a Timestamp""" + if isinstance(v, Timestamp): + return v + # legacy support + if isinstance(v, bytes): # was: bigint_to_int() + v = int.from_bytes(v, 'little', signed=True) + assert isinstance(v, int) + return int_to_timestamp(v) + + +def want_bytes(v, *, errors='surrogateescape'): + """we know that we want bytes and the value should be bytes""" + # legacy support: it being str can be caused by msgpack unpack decoding old data that was packed with use_bin_type=False + if isinstance(v, str): + v = v.encode('utf-8', errors=errors) + assert isinstance(v, bytes), f'not a bytes object, but {v!r}' + return v + + +def want_str(v, *, errors='surrogateescape'): + """we know that we want str and the value should be str""" + if isinstance(v, bytes): + v = v.decode('utf-8', errors=errors) + assert isinstance(v, str), f'not a str object, but {v!r}' + return v + + class PropDict: """ Manage a dictionary via properties. @@ -112,6 +207,8 @@ class PropDict: raise AttributeError(attr_error_msg) from None if decode is not None: value = decode(value) + if not isinstance(value, value_type): + raise TypeError(type_error_msg) return value def _set(self, value): @@ -139,14 +236,9 @@ class Item(PropDict): Items are created either from msgpack unpacker output, from another dict, from kwargs or built step-by-step by setting attributes. - msgpack gives us a dict with bytes-typed keys, just give it to Item(internal_dict=d) and use item.key_name later. - msgpack gives us byte-typed values for stuff that should be str, we automatically decode when getting - such a property and encode when setting it. + msgpack unpacker gives us a dict, just give it to Item(internal_dict=d) and use item.key_name later. If an Item shall be serialized, give as_dict() method output to msgpack packer. - - A bug in Attic up to and including release 0.13 added a (meaningless) 'acl' key to every item. - We must never re-use this key. See test_attic013_acl_bug for details. """ VALID_KEYS = ITEM_KEYS | {'deleted', 'nlink', } # str-typed keys @@ -155,10 +247,10 @@ class Item(PropDict): # properties statically defined, so that IDEs can know their names: - path = PropDict._make_property('path', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - source = PropDict._make_property('source', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - user = PropDict._make_property('user', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode) - group = PropDict._make_property('group', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode) + path = PropDict._make_property('path', str, 'surrogate-escaped str') + source = PropDict._make_property('source', str, 'surrogate-escaped str') + user = PropDict._make_property('user', str, 'surrogate-escaped str') + group = PropDict._make_property('group', str, 'surrogate-escaped str') acl_access = PropDict._make_property('acl_access', bytes) acl_default = PropDict._make_property('acl_default', bytes) @@ -177,14 +269,13 @@ class Item(PropDict): birthtime = PropDict._make_property('birthtime', int, 'int (ns)', encode=int_to_timestamp, decode=timestamp_to_int) # size is only present for items with a chunk list and then it is sum(chunk_sizes) - # compatibility note: this is a new feature, in old archives size will be missing. size = PropDict._make_property('size', int) hlid = PropDict._make_property('hlid', bytes) # hard link id: same value means same hard link. hardlink_master = PropDict._make_property('hardlink_master', bool) # legacy - chunks = PropDict._make_property('chunks', (list, type(None)), 'list or None') - chunks_healthy = PropDict._make_property('chunks_healthy', (list, type(None)), 'list or None') + chunks = PropDict._make_property('chunks', list, 'list') + chunks_healthy = PropDict._make_property('chunks_healthy', list, 'list') xattrs = PropDict._make_property('xattrs', StableDict) @@ -193,12 +284,10 @@ class Item(PropDict): part = PropDict._make_property('part', int) - def get_size(self, hardlink_masters=None, memorize=False, compressed=False, from_chunks=False, consider_ids=None): + def get_size(self, memorize=False, compressed=False, from_chunks=False, consider_ids=None): """ Determine the (uncompressed or compressed) size of this item. - :param hardlink_masters: If given, the size of hardlink slaves is computed via the hardlink master's chunk list, - otherwise size will be returned as 0. :param memorize: Whether the computed size value will be stored into the item. :param compressed: Whether the compressed or uncompressed size will be returned. :param from_chunks: If true, size is computed from chunks even if a precomputed value is available. @@ -218,31 +307,14 @@ class Item(PropDict): # no precomputed (c)size value available, compute it: try: chunks = getattr(self, 'chunks') - having_chunks = True except AttributeError: - having_chunks = False - # this item has no (own) chunks list, but if this is a hardlink slave - # and we know the master, we can still compute the size. - if hardlink_masters is None: - chunks = None - else: - try: - master = getattr(self, 'source') - except AttributeError: - # not a hardlink slave, likely a directory or special file w/o chunks - chunks = None - else: - # hardlink slave, try to fetch hardlink master's chunks list - # todo: put precomputed size into hardlink_masters' values and use it, if present - chunks, _ = hardlink_masters.get(master, (None, None)) - if chunks is None: - return 0 + return 0 if consider_ids is not None: size = sum(getattr(ChunkListEntry(*chunk), attr) for chunk in chunks if chunk.id in consider_ids) else: size = sum(getattr(ChunkListEntry(*chunk), attr) for chunk in chunks) # if requested, memorize the precomputed (c)size for items that have an own chunks list: - if memorize and having_chunks: + if memorize: setattr(self, attr, size) return size @@ -290,6 +362,31 @@ class Item(PropDict): except AttributeError: return False + def update_internal(self, d): + # legacy support for migration (data from old msgpacks comes in as bytes always, but sometimes we want str), + # also need to fix old timestamp data types. + for k, v in list(d.items()): + k = fix_key(d, k) + if k in ('path', 'source', 'user', 'group'): + v = fix_str_value(d, k) + if k in ('chunks', 'chunks_healthy'): + v = fix_list_of_chunkentries(v) + if k in ('atime', 'ctime', 'mtime', 'birthtime'): + v = fix_timestamp(v) + if k in ('acl_access', 'acl_default', 'acl_extended', 'acl_nfs4'): + v = fix_bytes_value(d, k) + if k == 'xattrs': + if not isinstance(v, StableDict): + v = StableDict(v) + v_new = StableDict() + for xk, xv in list(v.items()): + xk = want_bytes(xk) + # old borg used to store None instead of a b'' value + xv = b'' if xv is None else want_bytes(xv) + v_new[xk] = xv + v = v_new # xattrs is a StableDict(bytes keys -> bytes values) + self._dict[k] = v + class EncryptedKey(PropDict): """ @@ -298,18 +395,18 @@ class EncryptedKey(PropDict): A EncryptedKey is created either from msgpack unpacker output, from another dict, from kwargs or built step-by-step by setting attributes. - msgpack gives us a dict with bytes-typed keys, just give it to EncryptedKey(d) and use enc_key.xxx later. + msgpack unpacker gives us a dict, just give it to EncryptedKey(d) and use enc_key.xxx later. If a EncryptedKey shall be serialized, give as_dict() method output to msgpack packer. """ - VALID_KEYS = { 'version', 'algorithm', 'iterations', 'salt', 'hash', 'data', - 'argon2_time_cost', 'argon2_memory_cost', 'argon2_parallelism', 'argon2_type' } + VALID_KEYS = {'version', 'algorithm', 'iterations', 'salt', 'hash', 'data', + 'argon2_time_cost', 'argon2_memory_cost', 'argon2_parallelism', 'argon2_type'} __slots__ = ("_dict", ) # avoid setting attributes not supported by properties version = PropDict._make_property('version', int) - algorithm = PropDict._make_property('algorithm', str, encode=str.encode, decode=bytes.decode) + algorithm = PropDict._make_property('algorithm', str) iterations = PropDict._make_property('iterations', int) salt = PropDict._make_property('salt', bytes) hash = PropDict._make_property('hash', bytes) @@ -317,7 +414,19 @@ class EncryptedKey(PropDict): argon2_time_cost = PropDict._make_property('argon2_time_cost', int) argon2_memory_cost = PropDict._make_property('argon2_memory_cost', int) argon2_parallelism = PropDict._make_property('argon2_parallelism', int) - argon2_type = PropDict._make_property('argon2_type', str, encode=str.encode, decode=bytes.decode) + argon2_type = PropDict._make_property('argon2_type', str) + + def update_internal(self, d): + # legacy support for migration (data from old msgpacks comes in as bytes always, but sometimes we want str) + for k, v in list(d.items()): + k = fix_key(d, k) + if k == 'version': + assert isinstance(v, int) + if k in ('algorithm', 'argon2_type'): + v = fix_str_value(d, k) + if k in ('salt', 'hash', 'data'): + v = fix_bytes_value(d, k) + self._dict[k] = v class Key(PropDict): @@ -327,7 +436,7 @@ class Key(PropDict): A Key is created either from msgpack unpacker output, from another dict, from kwargs or built step-by-step by setting attributes. - msgpack gives us a dict with bytes-typed keys, just give it to Key(d) and use key.xxx later. + msgpack unpacker gives us a dict, just give it to Key(d) and use key.xxx later. If a Key shall be serialized, give as_dict() method output to msgpack packer. """ @@ -344,17 +453,15 @@ class Key(PropDict): chunk_seed = PropDict._make_property('chunk_seed', int) tam_required = PropDict._make_property('tam_required', bool) - -def tuple_encode(t): - """encode a tuple that might contain str items""" - # we have str, but want to give bytes to msgpack.pack - return tuple(safe_encode(e) if isinstance(e, str) else e for e in t) - - -def tuple_decode(t): - """decode a tuple that might contain bytes items""" - # we get bytes objects from msgpack.unpack, but want str - return tuple(safe_decode(e) if isinstance(e, bytes) else e for e in t) + def update_internal(self, d): + # legacy support for migration (data from old msgpacks comes in as bytes always, but sometimes we want str) + for k, v in list(d.items()): + k = fix_key(d, k) + if k == 'version': + assert isinstance(v, int) + if k in ('repository_id', 'enc_key', 'enc_hmac_key', 'id_key'): + v = fix_bytes_value(d, k) + self._dict[k] = v class ArchiveItem(PropDict): @@ -364,7 +471,7 @@ class ArchiveItem(PropDict): An ArchiveItem is created either from msgpack unpacker output, from another dict, from kwargs or built step-by-step by setting attributes. - msgpack gives us a dict with bytes-typed keys, just give it to ArchiveItem(d) and use arch.xxx later. + msgpack unpacker gives us a dict, just give it to ArchiveItem(d) and use arch.xxx later. If a ArchiveItem shall be serialized, give as_dict() method output to msgpack packer. """ @@ -374,15 +481,15 @@ class ArchiveItem(PropDict): __slots__ = ("_dict", ) # avoid setting attributes not supported by properties version = PropDict._make_property('version', int) - name = PropDict._make_property('name', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + name = PropDict._make_property('name', str, 'surrogate-escaped str') items = PropDict._make_property('items', list) cmdline = PropDict._make_property('cmdline', list) # list of s-e-str - hostname = PropDict._make_property('hostname', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - username = PropDict._make_property('username', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - time = PropDict._make_property('time', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - time_end = PropDict._make_property('time_end', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - comment = PropDict._make_property('comment', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - chunker_params = PropDict._make_property('chunker_params', tuple, 'chunker-params tuple', encode=tuple_encode, decode=tuple_decode) + hostname = PropDict._make_property('hostname', str, 'surrogate-escaped str') + username = PropDict._make_property('username', str, 'surrogate-escaped str') + time = PropDict._make_property('time', str) + time_end = PropDict._make_property('time_end', str) + comment = PropDict._make_property('comment', str, 'surrogate-escaped str') + chunker_params = PropDict._make_property('chunker_params', tuple) recreate_cmdline = PropDict._make_property('recreate_cmdline', list) # list of s-e-str # recreate_source_id, recreate_args, recreate_partial_chunks were used in 1.1.0b1 .. b2 recreate_source_id = PropDict._make_property('recreate_source_id', bytes) @@ -395,6 +502,24 @@ class ArchiveItem(PropDict): csize_parts = PropDict._make_property('csize_parts', int) nfiles_parts = PropDict._make_property('nfiles_parts', int) + def update_internal(self, d): + # legacy support for migration (data from old msgpacks comes in as bytes always, but sometimes we want str) + for k, v in list(d.items()): + k = fix_key(d, k) + if k == 'version': + assert isinstance(v, int) + if k in ('name', 'hostname', 'username', 'comment'): + v = fix_str_value(d, k) + if k in ('time', 'time_end'): + v = fix_str_value(d, k, 'replace') + if k == 'chunker_params': + v = fix_tuple_of_str_and_int(v) + if k in ('cmdline', 'recreate_cmdline'): + v = fix_list_of_str(v) + if k == 'items': + v = fix_list_of_bytes(v) + self._dict[k] = v + class ManifestItem(PropDict): """ @@ -403,7 +528,7 @@ class ManifestItem(PropDict): A ManifestItem is created either from msgpack unpacker output, from another dict, from kwargs or built step-by-step by setting attributes. - msgpack gives us a dict with bytes-typed keys, just give it to ManifestItem(d) and use manifest.xxx later. + msgpack unpacker gives us a dict, just give it to ManifestItem(d) and use manifest.xxx later. If a ManifestItem shall be serialized, give as_dict() method output to msgpack packer. """ @@ -413,10 +538,54 @@ class ManifestItem(PropDict): __slots__ = ("_dict", ) # avoid setting attributes not supported by properties version = PropDict._make_property('version', int) - archives = PropDict._make_property('archives', dict) # name -> dict - timestamp = PropDict._make_property('timestamp', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + archives = PropDict._make_property('archives', dict, 'dict of str -> dict') # name -> dict + timestamp = PropDict._make_property('timestamp', str) config = PropDict._make_property('config', dict) - item_keys = PropDict._make_property('item_keys', tuple) + item_keys = PropDict._make_property('item_keys', tuple, 'tuple of str') + + def update_internal(self, d): + # legacy support for migration (data from old msgpacks comes in as bytes always, but sometimes we want str) + for k, v in list(d.items()): + k = fix_key(d, k) + if k == 'version': + assert isinstance(v, int) + if k == 'archives': + ad = v + assert isinstance(ad, dict) + for ak, av in list(ad.items()): + ak = fix_key(ad, ak, errors='surrogateescape') + assert isinstance(av, dict) + for ik, iv in list(av.items()): + ik = fix_key(av, ik) + if ik == 'id': + fix_bytes_value(av, 'id') + if ik == 'time': + fix_str_value(av, 'time') + assert set(av) == {'id', 'time'} + if k == 'timestamp': + v = fix_str_value(d, k, 'replace') + if k == 'config': + cd = v + assert isinstance(cd, dict) + for ck, cv in list(cd.items()): + ck = fix_key(cd, ck) + if ck == 'tam_required': + assert isinstance(cv, bool) + if ck == 'feature_flags': + assert isinstance(cv, dict) + ops = {'read', 'check', 'write', 'delete'} + for op, specs in list(cv.items()): + op = fix_key(cv, op) + assert op in ops + for speck, specv in list(specs.items()): + speck = fix_key(specs, speck) + if speck == 'mandatory': + specs[speck] = fix_tuple_of_str(specv) + assert set(cv).issubset(ops) + if k == 'item_keys': + v = fix_tuple_of_str(v) + self._dict[k] = v + class ItemDiff: """ diff --git a/src/borg/remote.py b/src/borg/remote.py index 2870d71d98..6ea51d3c3d 100644 --- a/src/borg/remote.py +++ b/src/borg/remote.py @@ -38,7 +38,7 @@ RPC_PROTOCOL_VERSION = 2 BORG_VERSION = parse_version(__version__) -MSGID, MSG, ARGS, RESULT = b'i', b'm', b'a', b'r' +MSGID, MSG, ARGS, RESULT = 'i', 'm', 'a', 'r' MAX_INFLIGHT = 100 @@ -138,10 +138,6 @@ def __init__(self, data): } -def decode_keys(d): - return {k.decode(): d[k] for k in d} - - class RepositoryServer: # pragma: no cover rpc_methods = ( '__len__', @@ -217,13 +213,12 @@ def serve(self): if isinstance(unpacked, dict): dictFormat = True msgid = unpacked[MSGID] - method = unpacked[MSG].decode() - args = decode_keys(unpacked[ARGS]) + method = unpacked[MSG] + args = unpacked[ARGS] elif isinstance(unpacked, tuple) and len(unpacked) == 4: dictFormat = False # The first field 'type' was always 1 and has always been ignored _, msgid, method, args = unpacked - method = method.decode() args = self.positional_to_named(method, args) else: if self.repository is not None: @@ -256,21 +251,21 @@ def serve(self): try: msg = msgpack.packb({MSGID: msgid, - b'exception_class': e.__class__.__name__, - b'exception_args': e.args, - b'exception_full': ex_full, - b'exception_short': ex_short, - b'exception_trace': ex_trace, - b'sysinfo': sysinfo()}) + 'exception_class': e.__class__.__name__, + 'exception_args': e.args, + 'exception_full': ex_full, + 'exception_short': ex_short, + 'exception_trace': ex_trace, + 'sysinfo': sysinfo()}) except TypeError: msg = msgpack.packb({MSGID: msgid, - b'exception_class': e.__class__.__name__, - b'exception_args': [x if isinstance(x, (str, bytes, int)) else None - for x in e.args], - b'exception_full': ex_full, - b'exception_short': ex_short, - b'exception_trace': ex_trace, - b'sysinfo': sysinfo()}) + 'exception_class': e.__class__.__name__, + 'exception_args': [x if isinstance(x, (str, bytes, int)) else None + for x in e.args], + 'exception_full': ex_full, + 'exception_short': ex_short, + 'exception_trace': ex_trace, + 'sysinfo': sysinfo()}) os_write(stdout_fd, msg) else: @@ -307,7 +302,7 @@ def negotiate(self, client_data): # clients since 1.1.0b3 use a dict as client_data # clients since 1.1.0b6 support json log format from server if isinstance(client_data, dict): - self.client_version = client_data[b'client_version'] + self.client_version = client_data['client_version'] level = logging.getLevelName(logging.getLogger('').level) setup_logging(is_serve=True, json=True, level=level) logger.debug('Initialized logging system for JSON-based protocol') @@ -369,7 +364,6 @@ def open(self, path, create=False, lock_wait=None, lock=True, exclusive=None, ap return self.repository.id def inject_exception(self, kind): - kind = kind.decode() s1 = 'test string' s2 = 'test string2' if kind == 'DoesNotExist': @@ -483,35 +477,35 @@ class RemoteRepository: class RPCError(Exception): def __init__(self, unpacked): - # for borg < 1.1: unpacked only has b'exception_class' as key - # for borg 1.1+: unpacked has keys: b'exception_args', b'exception_full', b'exception_short', b'sysinfo' + # for borg < 1.1: unpacked only has 'exception_class' as key + # for borg 1.1+: unpacked has keys: 'exception_args', 'exception_full', 'exception_short', 'sysinfo' self.unpacked = unpacked def get_message(self): - if b'exception_short' in self.unpacked: - return b'\n'.join(self.unpacked[b'exception_short']).decode() + if 'exception_short' in self.unpacked: + return '\n'.join(self.unpacked['exception_short']) else: return self.exception_class @property def traceback(self): - return self.unpacked.get(b'exception_trace', True) + return self.unpacked.get('exception_trace', True) @property def exception_class(self): - return self.unpacked[b'exception_class'].decode() + return self.unpacked['exception_class'] @property def exception_full(self): - if b'exception_full' in self.unpacked: - return b'\n'.join(self.unpacked[b'exception_full']).decode() + if 'exception_full' in self.unpacked: + return '\n'.join(self.unpacked['exception_full']) else: return self.get_message() + '\nRemote Exception (see remote log for the traceback)' @property def sysinfo(self): - if b'sysinfo' in self.unpacked: - return self.unpacked[b'sysinfo'].decode() + if 'sysinfo' in self.unpacked: + return self.unpacked['sysinfo'] else: return '' @@ -570,15 +564,15 @@ def __init__(self, location, create=False, exclusive=False, lock_wait=None, lock try: try: version = self.call('negotiate', {'client_data': { - b'client_version': BORG_VERSION, + 'client_version': BORG_VERSION, }}) except ConnectionClosed: raise ConnectionClosedWithHint('Is borg working on the server?') from None if version == RPC_PROTOCOL_VERSION: self.dictFormat = False - elif isinstance(version, dict) and b'server_version' in version: + elif isinstance(version, dict) and 'server_version' in version: self.dictFormat = True - self.server_version = version[b'server_version'] + self.server_version = version['server_version'] else: raise Exception('Server insisted on using unsupported protocol version %s' % version) @@ -733,9 +727,9 @@ def pop_preload_msgid(chunkid): return msgid def handle_error(unpacked): - error = unpacked[b'exception_class'].decode() - old_server = b'exception_args' not in unpacked - args = unpacked.get(b'exception_args') + error = unpacked['exception_class'] + old_server = 'exception_args' not in unpacked + args = unpacked.get('exception_args') if error == 'DoesNotExist': raise Repository.DoesNotExist(self.location.processed) @@ -747,29 +741,29 @@ def handle_error(unpacked): if old_server: raise IntegrityError('(not available)') else: - raise IntegrityError(args[0].decode()) + raise IntegrityError(args[0]) elif error == 'AtticRepository': if old_server: raise Repository.AtticRepository('(not available)') else: - raise Repository.AtticRepository(args[0].decode()) + raise Repository.AtticRepository(args[0]) elif error == 'PathNotAllowed': if old_server: raise PathNotAllowed('(unknown)') else: - raise PathNotAllowed(args[0].decode()) + raise PathNotAllowed(args[0]) elif error == 'ParentPathDoesNotExist': - raise Repository.ParentPathDoesNotExist(args[0].decode()) + raise Repository.ParentPathDoesNotExist(args[0]) elif error == 'ObjectNotFound': if old_server: raise Repository.ObjectNotFound('(not available)', self.location.processed) else: - raise Repository.ObjectNotFound(args[0].decode(), self.location.processed) + raise Repository.ObjectNotFound(args[0], self.location.processed) elif error == 'InvalidRPCMethod': if old_server: raise InvalidRPCMethod('(not available)') else: - raise InvalidRPCMethod(args[0].decode()) + raise InvalidRPCMethod(args[0]) else: raise self.RPCError(unpacked) @@ -788,7 +782,7 @@ def handle_error(unpacked): try: unpacked = self.responses.pop(waiting_for[0]) waiting_for.pop(0) - if b'exception_class' in unpacked: + if 'exception_class' in unpacked: handle_error(unpacked) else: yield unpacked[RESULT] @@ -808,7 +802,7 @@ def handle_error(unpacked): else: return else: - if b'exception_class' in unpacked: + if 'exception_class' in unpacked: handle_error(unpacked) else: yield unpacked[RESULT] @@ -834,7 +828,7 @@ def handle_error(unpacked): _, msgid, error, res = unpacked if error: # ignore res, because it is only a fixed string anyway. - unpacked = {MSGID: msgid, b'exception_class': error} + unpacked = {MSGID: msgid, 'exception_class': error} else: unpacked = {MSGID: msgid, RESULT: res} else: @@ -842,7 +836,7 @@ def handle_error(unpacked): if msgid in self.ignore_responses: self.ignore_responses.remove(msgid) # async methods never return values, but may raise exceptions. - if b'exception_class' in unpacked: + if 'exception_class' in unpacked: self.async_responses[msgid] = unpacked else: # we currently do not have async result values except "None", diff --git a/src/borg/repository.py b/src/borg/repository.py index 9267fe0e6c..3fcc72aadd 100644 --- a/src/borg/repository.py +++ b/src/borg/repository.py @@ -516,16 +516,16 @@ def _read_integrity(self, transaction_id, key): integrity = msgpack.unpack(fd) except FileNotFoundError: return - if integrity.get(b'version') != 2: - logger.warning('Unknown integrity data version %r in %s', integrity.get(b'version'), integrity_file) + if integrity.get('version') != 2: + logger.warning('Unknown integrity data version %r in %s', integrity.get('version'), integrity_file) return - return integrity[key].decode() + return integrity[key] def open_index(self, transaction_id, auto_recover=True): if transaction_id is None: return NSIndex() index_path = os.path.join(self.path, 'index.%d' % transaction_id) - integrity_data = self._read_integrity(transaction_id, b'index') + integrity_data = self._read_integrity(transaction_id, 'index') try: with IntegrityCheckedFile(index_path, write=False, integrity_data=integrity_data) as fd: return NSIndex.read(fd) @@ -575,7 +575,7 @@ def prepare_txn(self, transaction_id, do_cleanup=True): self.io.cleanup(transaction_id) hints_path = os.path.join(self.path, 'hints.%d' % transaction_id) index_path = os.path.join(self.path, 'index.%d' % transaction_id) - integrity_data = self._read_integrity(transaction_id, b'hints') + integrity_data = self._read_integrity(transaction_id, 'hints') try: with IntegrityCheckedFile(hints_path, write=False, integrity_data=integrity_data) as fd: hints = msgpack.unpack(fd) @@ -588,23 +588,23 @@ def prepare_txn(self, transaction_id, do_cleanup=True): self.check_transaction() self.prepare_txn(transaction_id) return - if hints[b'version'] == 1: + if hints['version'] == 1: logger.debug('Upgrading from v1 hints.%d', transaction_id) - self.segments = hints[b'segments'] + self.segments = hints['segments'] self.compact = FreeSpace() self.storage_quota_use = 0 self.shadow_index = {} - for segment in sorted(hints[b'compact']): + for segment in sorted(hints['compact']): logger.debug('Rebuilding sparse info for segment %d', segment) self._rebuild_sparse(segment) logger.debug('Upgrade to v2 hints complete') - elif hints[b'version'] != 2: - raise ValueError('Unknown hints file version: %d' % hints[b'version']) + elif hints['version'] != 2: + raise ValueError('Unknown hints file version: %d' % hints['version']) else: - self.segments = hints[b'segments'] - self.compact = FreeSpace(hints[b'compact']) - self.storage_quota_use = hints.get(b'storage_quota_use', 0) - self.shadow_index = hints.get(b'shadow_index', {}) + self.segments = hints['segments'] + self.compact = FreeSpace(hints['compact']) + self.storage_quota_use = hints.get('storage_quota_use', 0) + self.shadow_index = hints.get('shadow_index', {}) self.log_storage_quota() # Drop uncommitted segments in the shadow index for key, shadowed_segments in self.shadow_index.items(): @@ -621,16 +621,16 @@ def rename_tmp(file): os.rename(file + '.tmp', file) hints = { - b'version': 2, - b'segments': self.segments, - b'compact': self.compact, - b'storage_quota_use': self.storage_quota_use, - b'shadow_index': self.shadow_index, + 'version': 2, + 'segments': self.segments, + 'compact': self.compact, + 'storage_quota_use': self.storage_quota_use, + 'shadow_index': self.shadow_index, } integrity = { # Integrity version started at 2, the current hints version. # Thus, integrity version == hints version, for now. - b'version': 2, + 'version': 2, } transaction_id = self.io.get_segments_transaction_id() assert transaction_id is not None @@ -647,7 +647,7 @@ def rename_tmp(file): with IntegrityCheckedFile(hints_file + '.tmp', filename=hints_name, write=True) as fd: msgpack.pack(hints, fd) flush_and_sync(fd) - integrity[b'hints'] = fd.integrity_data + integrity['hints'] = fd.integrity_data # Write repository index index_name = 'index.%d' % transaction_id @@ -656,7 +656,7 @@ def rename_tmp(file): # XXX: Consider using SyncFile for index write-outs. self.index.write(fd) flush_and_sync(fd) - integrity[b'index'] = fd.integrity_data + integrity['index'] = fd.integrity_data # Write integrity file, containing checksums of the hints and index files integrity_name = 'integrity.%d' % transaction_id diff --git a/src/borg/testsuite/archive.py b/src/borg/testsuite/archive.py index 3f8535ff5a..9cdcf50466 100644 --- a/src/borg/testsuite/archive.py +++ b/src/borg/testsuite/archive.py @@ -171,7 +171,7 @@ def make_chunks(self, items): return b''.join(msgpack.packb({'path': item}) for item in items) def _validator(self, value): - return isinstance(value, dict) and value.get(b'path') in (b'foo', b'bar', b'boo', b'baz') + return isinstance(value, dict) and value.get('path') in ('foo', 'bar', 'boo', 'baz') def process(self, input): unpacker = RobustUnpacker(validator=self._validator, item_keys=ITEM_KEYS) @@ -186,14 +186,14 @@ def process(self, input): return result def test_extra_garbage_no_sync(self): - chunks = [(False, [self.make_chunks([b'foo', b'bar'])]), - (False, [b'garbage'] + [self.make_chunks([b'boo', b'baz'])])] + chunks = [(False, [self.make_chunks(['foo', 'bar'])]), + (False, [b'garbage'] + [self.make_chunks(['boo', 'baz'])])] result = self.process(chunks) self.assert_equal(result, [ - {b'path': b'foo'}, {b'path': b'bar'}, + {'path': 'foo'}, {'path': 'bar'}, 103, 97, 114, 98, 97, 103, 101, - {b'path': b'boo'}, - {b'path': b'baz'}]) + {'path': 'boo'}, + {'path': 'baz'}]) def split(self, left, length): parts = [] @@ -203,22 +203,22 @@ def split(self, left, length): return parts def test_correct_stream(self): - chunks = self.split(self.make_chunks([b'foo', b'bar', b'boo', b'baz']), 2) + chunks = self.split(self.make_chunks(['foo', 'bar', 'boo', 'baz']), 2) input = [(False, chunks)] result = self.process(input) - self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'bar'}, {b'path': b'boo'}, {b'path': b'baz'}]) + self.assert_equal(result, [{'path': 'foo'}, {'path': 'bar'}, {'path': 'boo'}, {'path': 'baz'}]) def test_missing_chunk(self): - chunks = self.split(self.make_chunks([b'foo', b'bar', b'boo', b'baz']), 4) + chunks = self.split(self.make_chunks(['foo', 'bar', 'boo', 'baz']), 4) input = [(False, chunks[:3]), (True, chunks[4:])] result = self.process(input) - self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}]) + self.assert_equal(result, [{'path': 'foo'}, {'path': 'boo'}, {'path': 'baz'}]) def test_corrupt_chunk(self): - chunks = self.split(self.make_chunks([b'foo', b'bar', b'boo', b'baz']), 4) + chunks = self.split(self.make_chunks(['foo', 'bar', 'boo', 'baz']), 4) input = [(False, chunks[:3]), (True, [b'gar', b'bage'] + chunks[3:])] result = self.process(input) - self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}]) + self.assert_equal(result, [{'path': 'foo'}, {'path': 'boo'}, {'path': 'baz'}]) @pytest.fixture @@ -242,7 +242,7 @@ def test_invalid_msgpacked_item(packed, item_keys_serialized): @pytest.mark.parametrize('packed', [msgpack.packb(o) for o in [ - {b'path': b'/a/b/c'}, # small (different msgpack mapping type!) + {'path': b'/a/b/c'}, # small (different msgpack mapping type!) OrderedDict((k, b'') for k in IK), # as big (key count) as it gets OrderedDict((k, b'x' * 1000) for k in IK), # as big (key count and volume) as it gets ]]) @@ -251,7 +251,7 @@ def test_valid_msgpacked_items(packed, item_keys_serialized): def test_key_length_msgpacked_items(): - key = b'x' * 32 # 31 bytes is the limit for fixstr msgpack type + key = 'x' * 32 # 31 bytes is the limit for fixstr msgpack type data = {key: b''} item_keys_serialized = [msgpack.packb(key), ] assert valid_msgpacked_dict(msgpack.packb(data), item_keys_serialized) diff --git a/src/borg/testsuite/archiver.py b/src/borg/testsuite/archiver.py index 32feeb44ee..a4205ea760 100644 --- a/src/borg/testsuite/archiver.py +++ b/src/borg/testsuite/archiver.py @@ -1810,7 +1810,7 @@ def test_create_dry_run(self): def add_unknown_feature(self, operation): with Repository(self.repository_path, exclusive=True) as repository: manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK) - manifest.config[b'feature_flags'] = {operation.value.encode(): {b'mandatory': [b'unknown-feature']}} + manifest.config['feature_flags'] = {operation.value: {'mandatory': ['unknown-feature']}} manifest.write() repository.commit(compact=False) @@ -3623,14 +3623,14 @@ def test_init_defaults_to_argon2(self): self.cmd('init', '--encryption=repokey', self.repository_location) with Repository(self.repository_path) as repository: key = msgpack.unpackb(a2b_base64(repository.load_key())) - assert key[b'algorithm'] == b'argon2 chacha20-poly1305' + assert key['algorithm'] == 'argon2 chacha20-poly1305' def test_init_with_explicit_key_algorithm(self): """https://github.com/borgbackup/borg/issues/747#issuecomment-1076160401""" self.cmd('init', '--encryption=repokey', '--key-algorithm=pbkdf2', self.repository_location) with Repository(self.repository_path) as repository: key = msgpack.unpackb(a2b_base64(repository.load_key())) - assert key[b'algorithm'] == b'sha256' + assert key['algorithm'] == 'sha256' def verify_change_passphrase_does_not_change_algorithm(self, given_algorithm, expected_algorithm): self.cmd('init', '--encryption=repokey', '--key-algorithm', given_algorithm, self.repository_location) @@ -3640,13 +3640,13 @@ def verify_change_passphrase_does_not_change_algorithm(self, given_algorithm, ex with Repository(self.repository_path) as repository: key = msgpack.unpackb(a2b_base64(repository.load_key())) - assert key[b'algorithm'] == expected_algorithm + assert key['algorithm'] == expected_algorithm def test_change_passphrase_does_not_change_algorithm_argon2(self): - self.verify_change_passphrase_does_not_change_algorithm('argon2', b'argon2 chacha20-poly1305') + self.verify_change_passphrase_does_not_change_algorithm('argon2', 'argon2 chacha20-poly1305') def test_change_passphrase_does_not_change_algorithm_pbkdf2(self): - self.verify_change_passphrase_does_not_change_algorithm('pbkdf2', b'sha256') + self.verify_change_passphrase_does_not_change_algorithm('pbkdf2', 'sha256') def verify_change_location_does_not_change_algorithm(self, given_algorithm, expected_algorithm): self.cmd('init', '--encryption=keyfile', '--key-algorithm', given_algorithm, self.repository_location) @@ -3655,13 +3655,13 @@ def verify_change_location_does_not_change_algorithm(self, given_algorithm, expe with Repository(self.repository_path) as repository: key = msgpack.unpackb(a2b_base64(repository.load_key())) - assert key[b'algorithm'] == expected_algorithm + assert key['algorithm'] == expected_algorithm def test_change_location_does_not_change_algorithm_argon2(self): - self.verify_change_location_does_not_change_algorithm('argon2', b'argon2 chacha20-poly1305') + self.verify_change_location_does_not_change_algorithm('argon2', 'argon2 chacha20-poly1305') def test_change_location_does_not_change_algorithm_pbkdf2(self): - self.verify_change_location_does_not_change_algorithm('pbkdf2', b'sha256') + self.verify_change_location_does_not_change_algorithm('pbkdf2', 'sha256') def test_key_change_algorithm(self): self.cmd('init', '--encryption=repokey', '--key-algorithm=pbkdf2', self.repository_location) @@ -3969,7 +3969,7 @@ def test_not_required(self): key.change_passphrase(key._passphrase) manifest = msgpack.unpackb(key.decrypt(Manifest.MANIFEST_ID, repository.get(Manifest.MANIFEST_ID))) - del manifest[b'tam'] + del manifest['tam'] repository.put(Manifest.MANIFEST_ID, key.encrypt(Manifest.MANIFEST_ID, msgpack.packb(manifest))) repository.commit(compact=False) output = self.cmd('list', '--debug', self.repository_location) diff --git a/src/borg/testsuite/helpers.py b/src/borg/testsuite/helpers.py index b181da564e..fd04141167 100644 --- a/src/borg/testsuite/helpers.py +++ b/src/borg/testsuite/helpers.py @@ -22,7 +22,7 @@ from ..helpers import is_slow_msgpack from ..helpers import msgpack from ..helpers import yes, TRUISH, FALSISH, DEFAULTISH -from ..helpers import StableDict, int_to_bigint, bigint_to_int, bin_to_hex +from ..helpers import StableDict, bin_to_hex from ..helpers import parse_timestamp, ChunkIteratorFileWrapper, ChunkerParams from ..helpers import ProgressIndicatorPercent, ProgressIndicatorEndless from ..helpers import swidth_slice @@ -38,18 +38,6 @@ from . import BaseTestCase, FakeInputs -class BigIntTestCase(BaseTestCase): - - def test_bigint(self): - self.assert_equal(int_to_bigint(0), 0) - self.assert_equal(int_to_bigint(2**63-1), 2**63-1) - self.assert_equal(int_to_bigint(-2**63+1), -2**63+1) - self.assert_equal(int_to_bigint(2**63), b'\x00\x00\x00\x00\x00\x00\x00\x80\x00') - self.assert_equal(int_to_bigint(-2**63), b'\x00\x00\x00\x00\x00\x00\x00\x80\xff') - self.assert_equal(bigint_to_int(int_to_bigint(-2**70)), -2**70) - self.assert_equal(bigint_to_int(int_to_bigint(2**70)), 2**70) - - def test_bin_to_hex(): assert bin_to_hex(b'') == '' assert bin_to_hex(b'\x00\x01\xff') == '0001ff' diff --git a/src/borg/testsuite/item.py b/src/borg/testsuite/item.py index 80b38edce4..a0c1d9d134 100644 --- a/src/borg/testsuite/item.py +++ b/src/borg/testsuite/item.py @@ -89,20 +89,12 @@ def test_item_mptimestamp_property(): assert item.as_dict() == {'atime': Timestamp.from_unix_nano(big)} -def test_item_user_group_none(): - item = Item() - item.user = None - assert item.user is None - item.group = None - assert item.group is None - - def test_item_se_str_property(): # start simple item = Item() item.path = '/a/b/c' assert item.path == '/a/b/c' - assert item.as_dict() == {'path': b'/a/b/c'} + assert item.as_dict() == {'path': '/a/b/c'} del item.path assert item.as_dict() == {} with pytest.raises(TypeError): @@ -111,11 +103,11 @@ def test_item_se_str_property(): # non-utf-8 path, needing surrogate-escaping for latin-1 u-umlaut item = Item(internal_dict={'path': b'/a/\xfc/c'}) assert item.path == '/a/\udcfc/c' # getting a surrogate-escaped representation - assert item.as_dict() == {'path': b'/a/\xfc/c'} + assert item.as_dict() == {'path': '/a/\udcfc/c'} del item.path assert 'path' not in item item.path = '/a/\udcfc/c' # setting using a surrogate-escaped representation - assert item.as_dict() == {'path': b'/a/\xfc/c'} + assert item.as_dict() == {'path': '/a/\udcfc/c'} def test_item_list_property(): diff --git a/src/borg/testsuite/key.py b/src/borg/testsuite/key.py index 5073c5b23b..02eaa86e5e 100644 --- a/src/borg/testsuite/key.py +++ b/src/borg/testsuite/key.py @@ -360,23 +360,23 @@ def test_round_trip(self, key): assert blob.startswith(b'\x82') unpacked = msgpack.unpackb(blob) - assert unpacked[b'tam'][b'type'] == b'HKDF_HMAC_SHA512' + assert unpacked['tam']['type'] == 'HKDF_HMAC_SHA512' unpacked, verified = key.unpack_and_verify_manifest(blob) assert verified - assert unpacked[b'foo'] == b'bar' - assert b'tam' not in unpacked + assert unpacked['foo'] == 'bar' + assert 'tam' not in unpacked - @pytest.mark.parametrize('which', (b'hmac', b'salt')) + @pytest.mark.parametrize('which', ('hmac', 'salt')) def test_tampered(self, key, which): data = {'foo': 'bar'} blob = key.pack_and_authenticate_metadata(data) assert blob.startswith(b'\x82') unpacked = msgpack.unpackb(blob, object_hook=StableDict) - assert len(unpacked[b'tam'][which]) == 64 - unpacked[b'tam'][which] = unpacked[b'tam'][which][0:32] + bytes(32) - assert len(unpacked[b'tam'][which]) == 64 + assert len(unpacked['tam'][which]) == 64 + unpacked['tam'][which] = unpacked['tam'][which][0:32] + bytes(32) + assert len(unpacked['tam'][which]) == 64 blob = msgpack.packb(unpacked) with pytest.raises(TAMInvalid): @@ -421,4 +421,4 @@ def to_dict(key): load_me = RepoKey.detect(repository, manifest_data=None) assert to_dict(load_me) == to_dict(save_me) - assert msgpack.unpackb(a2b_base64(saved))[b'algorithm'] == expected_algorithm.encode() + assert msgpack.unpackb(a2b_base64(saved))['algorithm'] == expected_algorithm diff --git a/src/borg/testsuite/repository.py b/src/borg/testsuite/repository.py index b4944e58a1..52f03e668d 100644 --- a/src/borg/testsuite/repository.py +++ b/src/borg/testsuite/repository.py @@ -655,8 +655,8 @@ def _subtly_corrupted_hints_setup(self): hints = msgpack.unpack(fd) fd.seek(0) # Corrupt segment refcount - assert hints[b'segments'][2] == 1 - hints[b'segments'][2] = 0 + assert hints['segments'][2] == 1 + hints['segments'][2] = 0 msgpack.pack(hints, fd) fd.truncate() diff --git a/src/borg/xattr.py b/src/borg/xattr.py index 8a731340c8..175f6372cb 100644 --- a/src/borg/xattr.py +++ b/src/borg/xattr.py @@ -76,9 +76,7 @@ def get_all(path, follow_symlinks=False): for name in names: try: # xattr name is a bytes object, we directly use it. - # if we get an empty xattr value (b''), we store None into the result dict - - # borg always did it like that... - result[name] = getxattr(path, name, follow_symlinks=follow_symlinks) or None + result[name] = getxattr(path, name, follow_symlinks=follow_symlinks) except OSError as e: name_str = name.decode() if isinstance(path, int): @@ -122,9 +120,7 @@ def set_all(path, xattrs, follow_symlinks=False): warning = False for k, v in xattrs.items(): try: - # the key k is a bytes object due to msgpack unpacking it as such. - # if we have a None value, it means "empty", so give b'' to setxattr in that case: - setxattr(path, k, v or b'', follow_symlinks=follow_symlinks) + setxattr(path, k, v, follow_symlinks=follow_symlinks) except OSError as e: warning = True k_str = k.decode()