diff --git a/borg/archive.py b/borg/archive.py index 31e3c05716..ff9cdeeea5 100644 --- a/borg/archive.py +++ b/borg/archive.py @@ -24,11 +24,11 @@ ProgressIndicatorPercent, ChunkIteratorFileWrapper, remove_surrogates, log_multi, \ PathPrefixPattern, FnmatchPattern, open_item, file_status, format_file_size, consume from .repository import Repository +from . import msg_pack from .platform import acl_get, acl_set from .chunker import Chunker from .hashindex import ChunkIndex, ChunkIndexEntry from .cache import ChunkListEntry -import msgpack has_lchmod = hasattr(os, 'lchmod') has_lchflags = hasattr(os, 'lchflags') @@ -44,19 +44,19 @@ def __init__(self, repository, key): self.key = key def unpack_many(self, ids, filter=None, preload=False): - unpacker = msgpack.Unpacker(use_list=False) + unpacker = msg_pack.Unpacker(use_list=False) for _, data in self.fetch_many(ids): unpacker.feed(data) items = [decode_dict(item, ITEM_TEXT_KEYS) for item in unpacker] if filter: items = [item for item in items if filter(item)] for item in items: - if b'chunks' in item: - item[b'chunks'] = [ChunkListEntry(*e) for e in item[b'chunks']] + if 'chunks' in item: + item['chunks'] = [ChunkListEntry(*e) for e in item['chunks']] if preload: for item in items: - if b'chunks' in item: - self.repository.preload([c.id for c in item[b'chunks']]) + if 'chunks' in item: + self.repository.preload([c.id for c in item['chunks']]) for item in items: yield item @@ -70,7 +70,7 @@ class ChunkBuffer: def __init__(self, key, chunker_params=ITEMS_CHUNKER_PARAMS): self.buffer = BytesIO() - self.packer = msgpack.Packer(unicode_errors='surrogateescape') + self.packer = msg_pack.Packer(unicode_errors='surrogateescape') self.chunks = [] self.key = key self.chunker = Chunker(self.key.chunk_seed, *chunker_params) @@ -162,13 +162,13 @@ def __init__(self, repository, key, manifest, name, cache=None, create=False, if name not in self.manifest.archives: raise self.DoesNotExist(name) info = self.manifest.archives[name] - self.load(info[b'id']) + self.load(info['id']) self.zeros = b'\0' * (1 << chunker_params[1]) def _load_meta(self, id): _, data = self.key.decrypt(id, self.repository.get(id)) - metadata = msgpack.unpackb(data) - if metadata[b'version'] != 1: + metadata = msg_pack.unpackb(data) + if metadata['version'] != 1: raise Exception('Unknown archive metadata version') return metadata @@ -176,20 +176,19 @@ def load(self, id): self.id = id self.metadata = self._load_meta(self.id) decode_dict(self.metadata, ARCHIVE_TEXT_KEYS) - self.metadata[b'cmdline'] = [arg.decode('utf-8', 'surrogateescape') for arg in self.metadata[b'cmdline']] - self.name = self.metadata[b'name'] + self.name = self.metadata['name'] @property def ts(self): """Timestamp of archive creation (start) in UTC""" - ts = self.metadata[b'time'] + ts = self.metadata['time'] return parse_timestamp(ts) @property def ts_end(self): """Timestamp of archive creation (end) in UTC""" # fall back to time if there is no time_end present in metadata - ts = self.metadata.get(b'time_end') or self.metadata[b'time'] + ts = self.metadata.get('time_end') or self.metadata['time'] return parse_timestamp(ts) @property @@ -216,7 +215,7 @@ def __repr__(self): return 'Archive(%r)' % self.name def iter_items(self, filter=None, preload=False): - for item in self.pipeline.unpack_many(self.metadata[b'items'], filter=filter, preload=preload): + for item in self.pipeline.unpack_many(self.metadata['items'], filter=filter, preload=preload): yield item def add_item(self, item): @@ -261,7 +260,7 @@ def save(self, name=None, comment=None, timestamp=None, additional_metadata=None 'chunker_params': self.chunker_params, } metadata.update(additional_metadata or {}) - data = msgpack.packb(StableDict(metadata), unicode_errors='surrogateescape') + data = msg_pack.packb(StableDict(metadata), unicode_errors='surrogateescape') self.id = self.key.id_hash(data) self.cache.add_chunk(self.id, Chunk(data), self.stats) self.manifest.archives[name] = {'id': self.id, 'time': metadata['time']} @@ -281,18 +280,18 @@ def add_file_chunks(chunks): # This function is a bit evil since it abuses the cache to calculate # the stats. The cache transaction must be rolled back afterwards - unpacker = msgpack.Unpacker(use_list=False) + unpacker = msg_pack.Unpacker(use_list=False) cache.begin_txn() stats = Statistics() add(self.id) - for id, chunk in zip(self.metadata[b'items'], self.repository.get_many(self.metadata[b'items'])): + for id, chunk in zip(self.metadata['items'], self.repository.get_many(self.metadata['items'])): add(id) _, data = self.key.decrypt(id, chunk) unpacker.feed(data) for item in unpacker: - if b'chunks' in item: + if 'chunks' in item: stats.nfiles += 1 - add_file_chunks(item[b'chunks']) + add_file_chunks(item['chunks']) cache.rollback() return stats @@ -307,22 +306,22 @@ def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sp :param stdout: write extracted data to stdout :param sparse: write sparse files (chunk-granularity, independent of the original being sparse) :param hardlink_masters: maps paths to (chunks, link_target) for extracting subtrees with hardlinks correctly - :param original_path: b'path' key as stored in archive + :param original_path: 'path' key as stored in archive """ if dry_run or stdout: - if b'chunks' in item: - for data in self.pipeline.fetch_many([c.id for c in item[b'chunks']], is_preloaded=True): + if 'chunks' in item: + for data in self.pipeline.fetch_many([c.id for c in item['chunks']], is_preloaded=True): if stdout: sys.stdout.buffer.write(data) if stdout: sys.stdout.buffer.flush() return - original_path = original_path or item[b'path'] + original_path = original_path or item['path'] dest = self.cwd - if item[b'path'].startswith('/') or item[b'path'].startswith('..'): + if item['path'].startswith('/') or item['path'].startswith('..'): raise Exception('Path should be relative and local') - path = os.path.join(dest, item[b'path']) + path = os.path.join(dest, item['path']) # Attempt to remove existing files, ignore errors on failure try: st = os.lstat(path) @@ -334,27 +333,27 @@ def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sp raise self.IncompatibleFilesystemEncodingError(path, sys.getfilesystemencoding()) from None except OSError: pass - mode = item[b'mode'] + mode = item['mode'] if stat.S_ISREG(mode): if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) # Hard link? - if b'source' in item: - source = os.path.join(dest, item[b'source']) + if 'source' in item: + source = os.path.join(dest, item['source']) if os.path.exists(path): os.unlink(path) if not hardlink_masters: os.link(source, path) return - item[b'chunks'], link_target = hardlink_masters[item[b'source']] + item['chunks'], link_target = hardlink_masters[item['source']] if link_target: # Hard link was extracted previously, just link os.link(link_target, path) return # Extract chunks, since the item which had the chunks was not extracted with open(path, 'wb') as fd: - ids = [c.id for c in item[b'chunks']] + ids = [c.id for c in item['chunks']] for _, data in self.pipeline.fetch_many(ids, is_preloaded=True): if sparse and self.zeros.startswith(data): # all-zero chunk: create a hole in a sparse file @@ -367,7 +366,7 @@ def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sp self.restore_attrs(path, item, fd=fd.fileno()) if hardlink_masters: # Update master entry with extracted file path, so that following hardlinks don't extract twice. - hardlink_masters[item.get(b'source') or original_path] = (None, path) + hardlink_masters[item.get('source') or original_path] = (None, path) elif stat.S_ISDIR(mode): if not os.path.exists(path): os.makedirs(path) @@ -376,7 +375,7 @@ def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sp elif stat.S_ISLNK(mode): if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) - source = item[b'source'] + source = item['source'] if os.path.exists(path): os.unlink(path) try: @@ -390,18 +389,18 @@ def extract_item(self, item, restore_attrs=True, dry_run=False, stdout=False, sp os.mkfifo(path) self.restore_attrs(path, item) elif stat.S_ISCHR(mode) or stat.S_ISBLK(mode): - os.mknod(path, item[b'mode'], item[b'rdev']) + os.mknod(path, item['mode'], item['rdev']) self.restore_attrs(path, item) else: - raise Exception('Unknown archive item type %r' % item[b'mode']) + raise Exception('Unknown archive item type %r' % item['mode']) def restore_attrs(self, path, item, symlink=False, fd=None): uid = gid = None if not self.numeric_owner: - uid = user2uid(item[b'user']) - gid = group2gid(item[b'group']) - uid = item[b'uid'] if uid is None else uid - gid = item[b'gid'] if gid is None else gid + uid = user2uid(item['user']) + gid = group2gid(item['group']) + uid = item['uid'] if uid is None else uid + gid = item['gid'] if gid is None else gid # This code is a bit of a mess due to os specific differences try: if fd: @@ -411,14 +410,14 @@ def restore_attrs(self, path, item, symlink=False, fd=None): except OSError: pass if fd: - os.fchmod(fd, item[b'mode']) + os.fchmod(fd, item['mode']) elif not symlink: - os.chmod(path, item[b'mode']) + os.chmod(path, item['mode']) elif has_lchmod: # Not available on Linux - os.lchmod(path, item[b'mode']) - mtime = bigint_to_int(item[b'mtime']) - if b'atime' in item: - atime = bigint_to_int(item[b'atime']) + os.lchmod(path, item['mode']) + mtime = bigint_to_int(item['mtime']) + if 'atime' in item: + atime = bigint_to_int(item['atime']) else: # old archives only had mtime in item metadata atime = mtime @@ -428,14 +427,14 @@ def restore_attrs(self, path, item, symlink=False, fd=None): os.utime(path, None, ns=(atime, mtime), follow_symlinks=False) acl_set(path, item, self.numeric_owner) # Only available on OS X and FreeBSD - if has_lchflags and b'bsdflags' in item: + if has_lchflags and 'bsdflags' in item: try: - os.lchflags(path, item[b'bsdflags']) + os.lchflags(path, item['bsdflags']) except OSError: pass # chown removes Linux capabilities, so set the extended attributes at the end, after chown, since they include # the Linux capabilities in the "security.capability" attribute. - xattrs = item.get(b'xattrs', {}) + xattrs = item.get('xattrs', {}) for k, v in xattrs.items(): try: xattr.setxattr(fd or path, k, v, follow_symlinks=False) @@ -450,10 +449,10 @@ def restore_attrs(self, path, item, symlink=False, fd=None): def set_meta(self, key, value): metadata = StableDict(self._load_meta(self.id)) metadata[key] = value - data = msgpack.packb(metadata, unicode_errors='surrogateescape') + data = msg_pack.packb(metadata, unicode_errors='surrogateescape') new_id = self.key.id_hash(data) self.cache.add_chunk(new_id, Chunk(data), self.stats) - self.manifest.archives[self.name] = {'id': new_id, 'time': metadata[b'time']} + self.manifest.archives[self.name] = {'id': new_id, 'time': metadata['time']} self.cache.chunk_decref(self.id, self.stats) self.id = new_id @@ -462,12 +461,12 @@ def rename(self, name): raise self.AlreadyExists(name) oldname = self.name self.name = name - self.set_meta(b'name', name) + self.set_meta('name', name) del self.manifest.archives[oldname] def delete(self, stats, progress=False): - unpacker = msgpack.Unpacker(use_list=False) - items_ids = self.metadata[b'items'] + unpacker = msg_pack.Unpacker(use_list=False) + items_ids = self.metadata['items'] pi = ProgressIndicatorPercent(total=len(items_ids), msg="Decrementing references %3.0f%%", same_line=True) for (i, (items_id, data)) in enumerate(zip(items_ids, self.repository.get_many(items_ids))): if progress: @@ -476,8 +475,8 @@ def delete(self, stats, progress=False): unpacker.feed(data) self.cache.chunk_decref(items_id, stats) for item in unpacker: - if b'chunks' in item: - for chunk_id, size, csize in item[b'chunks']: + if 'chunks' in item: + for chunk_id, size, csize in item['chunks']: self.cache.chunk_decref(chunk_id, stats) if progress: pi.finish() @@ -486,37 +485,37 @@ def delete(self, stats, progress=False): def stat_attrs(self, st, path): item = { - b'mode': st.st_mode, - b'uid': st.st_uid, b'user': uid2user(st.st_uid), - b'gid': st.st_gid, b'group': gid2group(st.st_gid), - b'atime': int_to_bigint(st.st_atime_ns), - b'ctime': int_to_bigint(st.st_ctime_ns), - b'mtime': int_to_bigint(st.st_mtime_ns), + 'mode': st.st_mode, + 'uid': st.st_uid, 'user': uid2user(st.st_uid), + 'gid': st.st_gid, 'group': gid2group(st.st_gid), + 'atime': int_to_bigint(st.st_atime_ns), + 'ctime': int_to_bigint(st.st_ctime_ns), + 'mtime': int_to_bigint(st.st_mtime_ns), } if self.numeric_owner: - item[b'user'] = item[b'group'] = None + item['user'] = item['group'] = None xattrs = xattr.get_all(path, follow_symlinks=False) if xattrs: - item[b'xattrs'] = StableDict(xattrs) + item['xattrs'] = StableDict(xattrs) if has_lchflags and st.st_flags: - item[b'bsdflags'] = st.st_flags + item['bsdflags'] = st.st_flags acl_get(path, item, st, self.numeric_owner) return item def process_dir(self, path, st): - item = {b'path': make_path_safe(path)} + item = {'path': make_path_safe(path)} item.update(self.stat_attrs(st, path)) self.add_item(item) return 'd' # directory def process_fifo(self, path, st): - item = {b'path': make_path_safe(path)} + item = {'path': make_path_safe(path)} item.update(self.stat_attrs(st, path)) self.add_item(item) return 'f' # fifo def process_dev(self, path, st): - item = {b'path': make_path_safe(path), b'rdev': st.st_rdev} + item = {'path': make_path_safe(path), 'rdev': st.st_rdev} item.update(self.stat_attrs(st, path)) self.add_item(item) if stat.S_ISCHR(st.st_mode): @@ -526,7 +525,7 @@ def process_dev(self, path, st): def process_symlink(self, path, st): source = os.readlink(path) - item = {b'path': make_path_safe(path), b'source': source} + item = {'path': make_path_safe(path), 'source': source} item.update(self.stat_attrs(st, path)) self.add_item(item) return 's' # symlink @@ -540,12 +539,12 @@ def process_stdin(self, path, cache): self.stats.nfiles += 1 t = int_to_bigint(int(time.time()) * 1000000000) item = { - b'path': path, - b'chunks': chunks, - b'mode': 0o100660, # regular file, ug=rw - b'uid': uid, b'user': uid2user(uid), - b'gid': gid, b'group': gid2group(gid), - b'mtime': t, b'atime': t, b'ctime': t, + 'path': path, + 'chunks': chunks, + 'mode': 0o100660, # regular file, ug=rw + 'uid': uid, 'user': uid2user(uid), + 'gid': gid, 'group': gid2group(gid), + 'mtime': t, 'atime': t, 'ctime': t, } self.add_item(item) return 'i' # stdin @@ -559,8 +558,8 @@ def process_file(self, path, st, cache, ignore_inode=False): if (st.st_ino, st.st_dev) in self.hard_links: item = self.stat_attrs(st, path) item.update({ - b'path': safe_path, - b'source': source, + 'path': safe_path, + 'source': source, }) self.add_item(item) status = 'h' # regular file, hardlink (to already seen inodes) @@ -584,8 +583,8 @@ def process_file(self, path, st, cache, ignore_inode=False): else: status = 'A' # regular file, added item = { - b'path': safe_path, - b'hardlink_master': st.st_nlink > 1, # item is a hard link and has the chunks + 'path': safe_path, + 'hardlink_master': st.st_nlink > 1, # item is a hard link and has the chunks } # Only chunkify the file if needed if chunks is None: @@ -598,7 +597,7 @@ def process_file(self, path, st, cache, ignore_inode=False): self.stats.show_progress(item=item, dt=0.2) cache.memorize_file(path_hash, st, [c.id for c in chunks]) status = status or 'M' # regular file, modified (if not 'A' already) - item[b'chunks'] = chunks + item['chunks'] = chunks item.update(self.stat_attrs(st, path)) self.stats.nfiles += 1 self.add_item(item) @@ -628,11 +627,11 @@ class RobustUnpacker: """ def __init__(self, validator): super().__init__() - self.item_keys = [msgpack.packb(name) for name in ITEM_KEYS] + self.item_keys = [msg_pack.packb(name) for name in ITEM_KEYS] self.validator = validator self._buffered_data = [] self._resync = False - self._unpacker = msgpack.Unpacker(object_hook=StableDict) + self._unpacker = msg_pack.Unpacker(object_hook=StableDict) def resync(self): self._buffered_data = [] @@ -665,7 +664,7 @@ def __next__(self): data = data[1:] continue - self._unpacker = msgpack.Unpacker(object_hook=StableDict) + self._unpacker = msg_pack.Unpacker(object_hook=StableDict) self._unpacker.feed(data) try: item = next(self._unpacker) @@ -746,14 +745,14 @@ def rebuild_manifest(self): if b'cmdline' not in data or b'\xa7version\x01' not in data: continue try: - archive = msgpack.unpackb(data) + archive = msg_pack.unpackb(data) # Ignore exceptions that might be raised when feeding # msgpack with invalid data except (TypeError, ValueError, StopIteration): continue - if isinstance(archive, dict) and b'items' in archive and b'cmdline' in archive: - logger.info('Found archive %s', archive[b'name'].decode('utf-8')) - manifest.archives[archive[b'name'].decode('utf-8')] = {b'id': chunk_id, b'time': archive[b'time']} + if isinstance(archive, dict) and 'items' in archive and 'cmdline' in archive: + logger.info('Found archive %s', archive['name']) + manifest.archives[archive['name']] = {'id': chunk_id, 'time': archive['time']} logger.info('Manifest rebuild complete.') return manifest @@ -792,10 +791,10 @@ def verify_file_chunks(item): """ offset = 0 chunk_list = [] - for chunk_id, size, csize in item[b'chunks']: + for chunk_id, size, csize in item['chunks']: if chunk_id not in self.chunks: # If a file chunk is missing, create an all empty replacement chunk - logger.error('{}: Missing file chunk detected (Byte {}-{})'.format(item[b'path'].decode('utf-8', 'surrogateescape'), offset, offset + size)) + logger.error('{}: Missing file chunk detected (Byte {}-{})'.format(item['path'], offset, offset + size)) self.error_found = True data = bytes(size) chunk_id = self.key.id_hash(data) @@ -806,14 +805,14 @@ def verify_file_chunks(item): add_reference(chunk_id, size, csize) chunk_list.append((chunk_id, size, csize)) offset += size - item[b'chunks'] = chunk_list + item['chunks'] = chunk_list def robust_iterator(archive): """Iterates through all archive items Missing item chunks will be skipped and the msgpack stream will be restarted """ - unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and b'path' in item) + unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and 'path' in item) _state = 0 def missing_chunk_detector(chunk_id): @@ -829,7 +828,7 @@ def report(msg, chunk_id, chunk_no): logger.error(msg) i = 0 - for state, items in groupby(archive[b'items'], missing_chunk_detector): + for state, items in groupby(archive['items'], missing_chunk_detector): items = list(items) if state % 2: for chunk_id in items: @@ -855,7 +854,7 @@ def report(msg, chunk_id, chunk_no): if archive is None: # we need last N or all archives archive_items = sorted(self.manifest.archives.items(), reverse=True, - key=lambda name_info: name_info[1][b'time']) + key=lambda name_info: name_info[1]['time']) if prefix is not None: archive_items = [item for item in archive_items if item[0].startswith(prefix)] num_archives = len(archive_items) @@ -869,7 +868,7 @@ def report(msg, chunk_id, chunk_no): with cache_if_remote(self.repository) as repository: for i, (name, info) in enumerate(archive_items[:end]): logger.info('Analyzing archive {} ({}/{})'.format(name, num_archives - i, num_archives)) - archive_id = info[b'id'] + archive_id = info['id'] if archive_id not in self.chunks: logger.error('Archive metadata block is missing!') self.error_found = True @@ -878,26 +877,25 @@ def report(msg, chunk_id, chunk_no): mark_as_possibly_superseded(archive_id) cdata = self.repository.get(archive_id) _, data = self.key.decrypt(archive_id, cdata) - archive = StableDict(msgpack.unpackb(data)) - if archive[b'version'] != 1: + archive = StableDict(msg_pack.unpackb(data)) + if archive['version'] != 1: raise Exception('Unknown archive metadata version') decode_dict(archive, ARCHIVE_TEXT_KEYS) - archive[b'cmdline'] = [arg.decode('utf-8', 'surrogateescape') for arg in archive[b'cmdline']] items_buffer = ChunkBuffer(self.key) items_buffer.write_chunk = add_callback for item in robust_iterator(archive): - if b'chunks' in item: + if 'chunks' in item: verify_file_chunks(item) items_buffer.add(item) items_buffer.flush(flush=True) - for previous_item_id in archive[b'items']: + for previous_item_id in archive['items']: mark_as_possibly_superseded(previous_item_id) - archive[b'items'] = items_buffer.chunks - data = msgpack.packb(archive, unicode_errors='surrogateescape') + archive['items'] = items_buffer.chunks + data = msg_pack.packb(archive, unicode_errors='surrogateescape') new_archive_id = self.key.id_hash(data) cdata = self.key.encrypt(Chunk(data)) add_reference(new_archive_id, len(data), len(cdata), cdata) - info[b'id'] = new_archive_id + info['id'] = new_archive_id def orphan_chunks_check(self): if self.check_all: @@ -989,38 +987,38 @@ def process_items(self, archive, target, resume_from=None): def item_is_hardlink_master(item): return (target_is_subset and - stat.S_ISREG(item[b'mode']) and - item.get(b'hardlink_master', True) and - b'source' not in item and - not matcher.match(item[b'path'])) + stat.S_ISREG(item['mode']) and + item.get('hardlink_master', True) and + 'source' not in item and + not matcher.match(item['path'])) for item in archive.iter_items(): if item_is_hardlink_master(item): # Re-visit all of these items in the archive even when fast-forwarding to rebuild hardlink_masters - hardlink_masters[item[b'path']] = (item.get(b'chunks'), None) + hardlink_masters[item['path']] = (item.get('chunks'), None) continue if resume_from: # Fast forward to after the last processed file - if item[b'path'] == resume_from: - logger.info('Fast-forwarded to %s', remove_surrogates(item[b'path'])) + if item['path'] == resume_from: + logger.info('Fast-forwarded to %s', remove_surrogates(item['path'])) resume_from = None continue - if not matcher.match(item[b'path']): - self.print_file_status('x', item[b'path']) + if not matcher.match(item['path']): + self.print_file_status('x', item['path']) continue - if target_is_subset and stat.S_ISREG(item[b'mode']) and item.get(b'source') in hardlink_masters: + if target_is_subset and stat.S_ISREG(item['mode']) and item.get('source') in hardlink_masters: # master of this hard link is outside the target subset - chunks, new_source = hardlink_masters[item[b'source']] + chunks, new_source = hardlink_masters[item['source']] if new_source is None: # First item to use this master, move the chunks - item[b'chunks'] = chunks - hardlink_masters[item[b'source']] = (None, item[b'path']) - del item[b'source'] + item['chunks'] = chunks + hardlink_masters[item['source']] = (None, item['path']) + del item['source'] else: # Master was already moved, only update this item's source - item[b'source'] = new_source + item['source'] = new_source if self.dry_run: - self.print_file_status('-', item[b'path']) + self.print_file_status('-', item['path']) else: try: self.process_item(archive, target, item) @@ -1032,20 +1030,20 @@ def item_is_hardlink_master(item): target.stats.show_progress(final=True) def process_item(self, archive, target, item): - if b'chunks' in item: - item[b'chunks'] = self.process_chunks(archive, target, item) + if 'chunks' in item: + item['chunks'] = self.process_chunks(archive, target, item) target.stats.nfiles += 1 target.add_item(item) - self.print_file_status(file_status(item[b'mode']), item[b'path']) + self.print_file_status(file_status(item['mode']), item['path']) if self.interrupt: raise self.Interrupted def process_chunks(self, archive, target, item): """Return new chunk ID list for 'item'.""" if not self.recompress and not target.recreate_rechunkify: - for chunk_id, size, csize in item[b'chunks']: + for chunk_id, size, csize in item['chunks']: self.cache.chunk_incref(chunk_id, target.stats) - return item[b'chunks'] + return item['chunks'] new_chunks = self.process_partial_chunks(target) chunk_iterator = self.create_chunk_iterator(archive, target, item) consume(chunk_iterator, len(new_chunks)) @@ -1076,7 +1074,7 @@ def process_chunks(self, archive, target, item): def create_chunk_iterator(self, archive, target, item): """Return iterator of chunks to store for 'item' from 'archive' in 'target'.""" - chunk_iterator = archive.pipeline.fetch_many([chunk_id for chunk_id, _, _ in item[b'chunks']]) + chunk_iterator = archive.pipeline.fetch_many([chunk_id for chunk_id, _, _ in item['chunks']]) if target.recreate_rechunkify: # The target.chunker will read the file contents through ChunkIteratorFileWrapper chunk-by-chunk # (does not load the entire file into memory) @@ -1109,9 +1107,9 @@ def save(self, archive, target, comment=None, completed=True, metadata=None): if completed: timestamp = archive.ts.replace(tzinfo=None) if comment is None: - comment = archive.metadata.get(b'comment', '') + comment = archive.metadata.get('comment', '') target.save(timestamp=timestamp, comment=comment, additional_metadata={ - 'cmdline': archive.metadata[b'cmdline'], + 'cmdline': archive.metadata['cmdline'], 'recreate_cmdline': sys.argv, }) archive.delete(Statistics(), progress=self.progress) @@ -1138,7 +1136,7 @@ def matcher_add_tagged_dirs(self, archive): """Add excludes to the matcher created by exclude_cache and exclude_if_present.""" def exclude(dir, tag_item): if self.keep_tag_files: - tag_files.append(PathPrefixPattern(tag_item[b'path'])) + tag_files.append(PathPrefixPattern(tag_item['path'])) tagged_dirs.append(FnmatchPattern(dir + '/')) else: tagged_dirs.append(PathPrefixPattern(dir)) @@ -1150,18 +1148,18 @@ def exclude(dir, tag_item): cachedir_masters = {} for item in archive.iter_items( - filter=lambda item: item[b'path'].endswith(CACHE_TAG_NAME) or matcher.match(item[b'path'])): - if item[b'path'].endswith(CACHE_TAG_NAME): - cachedir_masters[item[b'path']] = item - if stat.S_ISREG(item[b'mode']): - dir, tag_file = os.path.split(item[b'path']) + filter=lambda item: item['path'].endswith(CACHE_TAG_NAME) or matcher.match(item['path'])): + if item['path'].endswith(CACHE_TAG_NAME): + cachedir_masters[item['path']] = item + if stat.S_ISREG(item['mode']): + dir, tag_file = os.path.split(item['path']) if tag_file in self.exclude_if_present: exclude(dir, item) if self.exclude_caches and tag_file == CACHE_TAG_NAME: - if b'chunks' in item: + if 'chunks' in item: file = open_item(archive, item) else: - file = open_item(archive, cachedir_masters[item[b'source']]) + file = open_item(archive, cachedir_masters[item['source']]) if file.read(len(CACHE_TAG_CONTENTS)).startswith(CACHE_TAG_CONTENTS): exclude(dir, item) matcher.add(tag_files, True) @@ -1179,15 +1177,15 @@ def create_target_or_resume(self, archive): if not target: target = self.create_target_archive(target_name) # If the archives use the same chunker params, then don't rechunkify - target.recreate_rechunkify = tuple(archive.metadata.get(b'chunker_params')) != self.chunker_params + target.recreate_rechunkify = tuple(archive.metadata.get('chunker_params')) != self.chunker_params return target, resume_from def try_resume(self, archive, target_name): """Try to resume from temporary archive. Return (target archive, resume from path) if successful.""" logger.info('Found %s, will resume interrupted operation', target_name) old_target = self.open_archive(target_name) - resume_id = old_target.metadata[b'recreate_source_id'] - resume_args = [arg.decode('utf-8', 'surrogateescape') for arg in old_target.metadata[b'recreate_args']] + resume_id = old_target.metadata['recreate_source_id'] + resume_args = old_target.metadata['recreate_args'] if resume_id != archive.id: logger.warning('Source archive changed, will discard %s and start over', target_name) logger.warning('Saved fingerprint: %s', hexlify(resume_id).decode('ascii')) @@ -1202,18 +1200,18 @@ def try_resume(self, archive, target_name): logger.info('Replaying items from interrupted operation...') item = None for item in old_target.iter_items(): - if b'chunks' in item: - for chunk in item[b'chunks']: + if 'chunks' in item: + for chunk in item['chunks']: self.cache.chunk_incref(chunk.id, target.stats) target.stats.nfiles += 1 target.add_item(item) if item: - resume_from = item[b'path'] + resume_from = item['path'] else: resume_from = None if self.progress: old_target.stats.show_progress(final=True) - target.recreate_partial_chunks = old_target.metadata.get(b'recreate_partial_chunks', []) + target.recreate_partial_chunks = old_target.metadata.get('recreate_partial_chunks', []) for chunk_id, size, csize in target.recreate_partial_chunks: if not self.cache.seen_chunk(chunk_id): try: diff --git a/borg/archiver.py b/borg/archiver.py index 53b22079d1..58ccfe1738 100644 --- a/borg/archiver.py +++ b/borg/archiver.py @@ -21,11 +21,12 @@ parse_pattern, PathPrefixPattern, to_localtime, timestamp, \ get_cache_dir, prune_within, prune_split, \ Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \ - dir_is_tagged, ChunkerParams, CompressionSpec, is_slow_msgpack, yes, sysinfo, \ + dir_is_tagged, ChunkerParams, CompressionSpec, yes, sysinfo, \ log_multi, PatternMatcher, ItemFormatter from .logger import create_logger, setup_logging logger = create_logger() from . import helpers +from . import msg_pack from .compress import Compressor, COMPR_BUFFER from .upgrader import AtticRepositoryUpgrader, BorgRepositoryUpgrader from .repository import Repository @@ -394,22 +395,22 @@ def do_extract(self, args, repository, manifest, key, archive): hardlink_masters = {} if partial_extract else None def item_is_hardlink_master(item): - return (partial_extract and stat.S_ISREG(item[b'mode']) and - item.get(b'hardlink_master', True) and b'source' not in item) + return (partial_extract and stat.S_ISREG(item['mode']) and + item.get('hardlink_master', True) and 'source' not in item) for item in archive.iter_items(preload=True, - filter=lambda item: item_is_hardlink_master(item) or matcher.match(item[b'path'])): - orig_path = item[b'path'] + filter=lambda item: item_is_hardlink_master(item) or matcher.match(item['path'])): + orig_path = item['path'] if item_is_hardlink_master(item): - hardlink_masters[orig_path] = (item.get(b'chunks'), None) - if not matcher.match(item[b'path']): + hardlink_masters[orig_path] = (item.get('chunks'), None) + if not matcher.match(item['path']): continue if strip_components: - item[b'path'] = os.sep.join(orig_path.split(os.sep)[strip_components:]) - if not item[b'path']: + item['path'] = os.sep.join(orig_path.split(os.sep)[strip_components:]) + if not item['path']: continue if not args.dry_run: - while dirs and not item[b'path'].startswith(dirs[-1][b'path']): + while dirs and not item['path'].startswith(dirs[-1]['path']): archive.extract_item(dirs.pop(-1), stdout=stdout) if output_list: logger.info(remove_surrogates(orig_path)) @@ -417,7 +418,7 @@ def item_is_hardlink_master(item): if dry_run: archive.extract_item(item, dry_run=True) else: - if stat.S_ISDIR(item[b'mode']): + if stat.S_ISDIR(item['mode']): dirs.append(item) archive.extract_item(item, restore_attrs=False) else: @@ -444,58 +445,58 @@ def fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2): return self.compare_chunk_contents(chunks1, chunks2) def sum_chunk_size(item, consider_ids=None): - if item.get(b'deleted'): + if item.get('deleted'): return None else: - return sum(c.size for c in item[b'chunks'] + return sum(c.size for c in item['chunks'] if consider_ids is None or c.id in consider_ids) def get_owner(item): if args.numeric_owner: - return item[b'uid'], item[b'gid'] + return item['uid'], item['gid'] else: - return item[b'user'], item[b'group'] + return item['user'], item['group'] def get_mode(item): - if b'mode' in item: - return stat.filemode(item[b'mode']) + if 'mode' in item: + return stat.filemode(item['mode']) else: return [None] def has_hardlink_master(item, hardlink_masters): - return stat.S_ISREG(item[b'mode']) and item.get(b'source') in hardlink_masters + return stat.S_ISREG(item['mode']) and item.get('source') in hardlink_masters def compare_link(item1, item2): # These are the simple link cases. For special cases, e.g. if a # regular file is replaced with a link or vice versa, it is # indicated in compare_mode instead. - if item1.get(b'deleted'): + if item1.get('deleted'): return 'added link' - elif item2.get(b'deleted'): + elif item2.get('deleted'): return 'removed link' - elif b'source' in item1 and b'source' in item2 and item1[b'source'] != item2[b'source']: + elif 'source' in item1 and 'source' in item2 and item1['source'] != item2['source']: return 'changed link' def contents_changed(item1, item2): if can_compare_chunk_ids: - return item1[b'chunks'] != item2[b'chunks'] + return item1['chunks'] != item2['chunks'] else: if sum_chunk_size(item1) != sum_chunk_size(item2): return True else: - chunk_ids1 = [c.id for c in item1[b'chunks']] - chunk_ids2 = [c.id for c in item2[b'chunks']] + chunk_ids1 = [c.id for c in item1['chunks']] + chunk_ids2 = [c.id for c in item2['chunks']] return not fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2) def compare_content(path, item1, item2): if contents_changed(item1, item2): - if item1.get(b'deleted'): + if item1.get('deleted'): return ('added {:>13}'.format(format_file_size(sum_chunk_size(item2)))) - elif item2.get(b'deleted'): + elif item2.get('deleted'): return ('removed {:>11}'.format(format_file_size(sum_chunk_size(item1)))) else: - chunk_ids1 = {c.id for c in item1[b'chunks']} - chunk_ids2 = {c.id for c in item2[b'chunks']} + chunk_ids1 = {c.id for c in item1['chunks']} + chunk_ids2 = {c.id for c in item2['chunks']} added_ids = chunk_ids2 - chunk_ids1 removed_ids = chunk_ids1 - chunk_ids2 added = sum_chunk_size(item2, added_ids) @@ -504,9 +505,9 @@ def compare_content(path, item1, item2): format_file_size(-removed, precision=1, sign=True))) def compare_directory(item1, item2): - if item2.get(b'deleted') and not item1.get(b'deleted'): + if item2.get('deleted') and not item1.get('deleted'): return 'removed directory' - elif item1.get(b'deleted') and not item2.get(b'deleted'): + elif item1.get('deleted') and not item2.get('deleted'): return 'added directory' def compare_owner(item1, item2): @@ -516,7 +517,7 @@ def compare_owner(item1, item2): return '[{}:{} -> {}:{}]'.format(user1, group1, user2, group2) def compare_mode(item1, item2): - if item1[b'mode'] != item2[b'mode']: + if item1['mode'] != item2['mode']: return '[{} -> {}]'.format(get_mode(item1), get_mode(item2)) def compare_items(output, path, item1, item2, hardlink_masters, deleted=False): @@ -527,15 +528,15 @@ def compare_items(output, path, item1, item2, hardlink_masters, deleted=False): changes = [] if has_hardlink_master(item1, hardlink_masters): - item1 = hardlink_masters[item1[b'source']][0] + item1 = hardlink_masters[item1['source']][0] if has_hardlink_master(item2, hardlink_masters): - item2 = hardlink_masters[item2[b'source']][1] + item2 = hardlink_masters[item2['source']][1] if get_mode(item1)[0] == 'l' or get_mode(item2)[0] == 'l': changes.append(compare_link(item1, item2)) - if b'chunks' in item1 and b'chunks' in item2: + if 'chunks' in item1 and 'chunks' in item2: changes.append(compare_content(path, item1, item2)) if get_mode(item1)[0] == 'd' or get_mode(item2)[0] == 'd': @@ -559,21 +560,21 @@ def print_output(line): def compare_archives(archive1, archive2, matcher): def hardlink_master_seen(item): - return b'source' not in item or not stat.S_ISREG(item[b'mode']) or item[b'source'] in hardlink_masters + return 'source' not in item or not stat.S_ISREG(item['mode']) or item['source'] in hardlink_masters def is_hardlink_master(item): - return item.get(b'hardlink_master', True) and b'source' not in item + return item.get('hardlink_master', True) and 'source' not in item def update_hardlink_masters(item1, item2): if is_hardlink_master(item1) or is_hardlink_master(item2): - hardlink_masters[item1[b'path']] = (item1, item2) + hardlink_masters[item1['path']] = (item1, item2) def compare_or_defer(item1, item2): update_hardlink_masters(item1, item2) if not hardlink_master_seen(item1) or not hardlink_master_seen(item2): deferred.append((item1, item2)) else: - compare_items(output, item1[b'path'], item1, item2, hardlink_masters) + compare_items(output, item1['path'], item1, item2, hardlink_masters) orphans_archive1 = collections.OrderedDict() orphans_archive2 = collections.OrderedDict() @@ -582,44 +583,44 @@ def compare_or_defer(item1, item2): output = [] for item1, item2 in zip_longest( - archive1.iter_items(lambda item: matcher.match(item[b'path'])), - archive2.iter_items(lambda item: matcher.match(item[b'path'])), + archive1.iter_items(lambda item: matcher.match(item['path'])), + archive2.iter_items(lambda item: matcher.match(item['path'])), ): - if item1 and item2 and item1[b'path'] == item2[b'path']: + if item1 and item2 and item1['path'] == item2['path']: compare_or_defer(item1, item2) continue if item1: - matching_orphan = orphans_archive2.pop(item1[b'path'], None) + matching_orphan = orphans_archive2.pop(item1['path'], None) if matching_orphan: compare_or_defer(item1, matching_orphan) else: - orphans_archive1[item1[b'path']] = item1 + orphans_archive1[item1['path']] = item1 if item2: - matching_orphan = orphans_archive1.pop(item2[b'path'], None) + matching_orphan = orphans_archive1.pop(item2['path'], None) if matching_orphan: compare_or_defer(matching_orphan, item2) else: - orphans_archive2[item2[b'path']] = item2 + orphans_archive2[item2['path']] = item2 # At this point orphans_* contain items that had no matching partner in the other archive deleted_item = { - b'deleted': True, - b'chunks': [], - b'mode': 0, + 'deleted': True, + 'chunks': [], + 'mode': 0, } for added in orphans_archive2.values(): - path = added[b'path'] - deleted_item[b'path'] = path + path = added['path'] + deleted_item['path'] = path update_hardlink_masters(deleted_item, added) compare_items(output, path, deleted_item, added, hardlink_masters, deleted=True) for deleted in orphans_archive1.values(): - path = deleted[b'path'] - deleted_item[b'path'] = path + path = deleted['path'] + deleted_item['path'] = path update_hardlink_masters(deleted, deleted_item) compare_items(output, path, deleted, deleted_item, hardlink_masters, deleted=True) for item1, item2 in deferred: assert hardlink_master_seen(item1) assert hardlink_master_seen(item2) - compare_items(output, item1[b'path'], item1, item2, hardlink_masters) + compare_items(output, item1['path'], item1, item2, hardlink_masters) for line in sorted(output): print_output(line) @@ -627,8 +628,8 @@ def compare_or_defer(item1, item2): archive1 = archive archive2 = Archive(repository, key, manifest, args.archive2) - can_compare_chunk_ids = archive1.metadata.get(b'chunker_params', False) == archive2.metadata.get( - b'chunker_params', True) or args.same_chunker_params + can_compare_chunk_ids = archive1.metadata.get('chunker_params', False) == archive2.metadata.get( + 'chunker_params', True) or args.same_chunker_params if not can_compare_chunk_ids: self.print_warning('--chunker-params might be different between archives, diff will be slow.\n' 'If you know for certain that they are the same, pass --same-chunker-params ' @@ -738,7 +739,7 @@ def write(bytestring): sys.stdout.write(bytestring.decode('utf-8', errors='replace')) else: write = sys.stdout.buffer.write - for item in archive.iter_items(lambda item: matcher.match(item[b'path'])): + for item in archive.iter_items(lambda item: matcher.match(item['path'])): write(formatter.format_item(item).encode('utf-8', errors='surrogateescape')) else: for archive_info in manifest.list_archive_infos(sort_by='ts'): @@ -760,12 +761,12 @@ def format_cmdline(cmdline): stats = archive.calc_stats(cache) print('Name:', archive.name) print('Fingerprint: %s' % hexlify(archive.id).decode('ascii')) - print('Comment:', archive.metadata.get(b'comment', '')) - print('Hostname:', archive.metadata[b'hostname']) - print('Username:', archive.metadata[b'username']) + print('Comment:', archive.metadata.get('comment', '')) + print('Hostname:', archive.metadata['hostname']) + print('Username:', archive.metadata['username']) print('Time (start): %s' % format_time(to_localtime(archive.ts))) print('Time (end): %s' % format_time(to_localtime(archive.ts_end))) - print('Command line:', format_cmdline(archive.metadata[b'cmdline'])) + print('Command line:', format_cmdline(archive.metadata['cmdline'])) print('Number of files: %d' % stats.nfiles) print() print(str(stats)) @@ -899,7 +900,7 @@ def interrupt(signal_num, stack_frame): def do_debug_dump_archive_items(self, args, repository, manifest, key): """dump (decrypted, decompressed) archive items metadata (not: data)""" archive = Archive(repository, key, manifest, args.location.archive) - for i, item_id in enumerate(archive.metadata[b'items']): + for i, item_id in enumerate(archive.metadata['items']): _, data = key.decrypt(item_id, repository.get(item_id)) filename = '%06d_%s.items' % (i, hexlify(item_id).decode('ascii')) print('Dumping', filename) @@ -1921,7 +1922,7 @@ def run(self, args): if args.show_version: logger.info('borgbackup version %s' % __version__) check_extension_modules() - if is_slow_msgpack(): + if msg_pack.is_slow(): logger.warning("Using a pure-python msgpack! This will result in lower performance.") return args.func(args) @@ -1940,7 +1941,7 @@ def sig_info_handler(signum, stack): # pragma: no cover logger.info("{0} {1}/{2}".format(path, format_file_size(pos), format_file_size(total))) break if func in ('extract_item', ): # extract op - path = loc['item'][b'path'] + path = loc['item']['path'] try: pos = loc['fd'].tell() except Exception: diff --git a/borg/cache.py b/borg/cache.py index 5902365c2e..ce7c0ab309 100644 --- a/borg/cache.py +++ b/borg/cache.py @@ -12,10 +12,9 @@ from .helpers import Error, get_cache_dir, decode_dict, int_to_bigint, \ bigint_to_int, format_file_size, yes from .locking import UpgradableLock +from . import msg_pack from .hashindex import ChunkIndex, ChunkIndexEntry -import msgpack - ChunkListEntry = namedtuple('ChunkListEntry', 'id size csize') FileCacheEntry = namedtuple('FileCacheEntry', 'age inode size mtime chunk_ids') @@ -179,7 +178,7 @@ def _read_files(self): self._newest_mtime = 0 logger.debug('Reading files cache ...') with open(os.path.join(self.path, 'files'), 'rb') as fd: - u = msgpack.Unpacker(use_list=True) + u = msg_pack.Unpacker(use_list=True) while True: data = fd.read(64 * 1024) if not data: @@ -188,7 +187,7 @@ def _read_files(self): for path_hash, item in u: entry = FileCacheEntry(*item) # in the end, this takes about 240 Bytes per file - self.files[path_hash] = msgpack.packb(entry._replace(age=entry.age + 1)) + self.files[path_hash] = msg_pack.packb(entry._replace(age=entry.age + 1)) def begin_txn(self): # Initialize transaction snapshot @@ -211,9 +210,10 @@ def commit(self): for path_hash, item in self.files.items(): # Discard cached files with the newest mtime to avoid # issues with filesystem snapshots and mtime precision - entry = FileCacheEntry(*msgpack.unpackb(item)) + entry = FileCacheEntry(*msg_pack.unpackb(item)) if entry.age < 10 and bigint_to_int(entry.mtime) < self._newest_mtime: - msgpack.pack((path_hash, entry), fd) + data = msg_pack.packb((path_hash, entry)) + fd.write(data) self.config.set('cache', 'manifest', hexlify(self.manifest.id).decode('ascii')) self.config.set('cache', 'timestamp', self.manifest.timestamp) self.config.set('cache', 'key_type', str(self.key.TYPE)) @@ -270,7 +270,7 @@ def cached_archives(): return set() def repo_archives(): - return set(info[b'id'] for info in self.manifest.archives.values()) + return set(info['id'] for info in self.manifest.archives.values()) def cleanup_outdated(ids): for id in ids: @@ -281,12 +281,12 @@ def fetch_and_build_idx(archive_id, repository, key): cdata = repository.get(archive_id) _, data = key.decrypt(archive_id, cdata) chunk_idx.add(archive_id, 1, len(data), len(cdata)) - archive = msgpack.unpackb(data) - if archive[b'version'] != 1: + archive = msg_pack.unpackb(data) + if archive['version'] != 1: raise Exception('Unknown archive metadata version') - decode_dict(archive, (b'name',)) - unpacker = msgpack.Unpacker() - for item_id, chunk in zip(archive[b'items'], repository.get_many(archive[b'items'])): + decode_dict(archive, ('name', )) + unpacker = msg_pack.Unpacker() + for item_id, chunk in zip(archive['items'], repository.get_many(archive['items'])): _, data = key.decrypt(item_id, chunk) chunk_idx.add(item_id, 1, len(data), len(chunk)) unpacker.feed(data) @@ -294,8 +294,8 @@ def fetch_and_build_idx(archive_id, repository, key): if not isinstance(item, dict): logger.error('Error: Did not get expected metadata dict - archive corrupted!') continue - if b'chunks' in item: - for chunk_id, size, csize in item[b'chunks']: + if 'chunks' in item: + for chunk_id, size, csize in item['chunks']: chunk_idx.add(chunk_id, 1, size, csize) if self.do_cache: fn = mkpath(archive_id) @@ -310,7 +310,7 @@ def fetch_and_build_idx(archive_id, repository, key): def lookup_name(archive_id): for name, info in self.manifest.archives.items(): - if info[b'id'] == archive_id: + if info['id'] == archive_id: return name def create_master_idx(chunk_idx): @@ -417,10 +417,10 @@ def file_known_and_unchanged(self, path_hash, st, ignore_inode=False): entry = self.files.get(path_hash) if not entry: return None - entry = FileCacheEntry(*msgpack.unpackb(entry)) + entry = FileCacheEntry(*msg_pack.unpackb(entry)) if (entry.size == st.st_size and bigint_to_int(entry.mtime) == st.st_mtime_ns and (ignore_inode or entry.inode == st.st_ino)): - self.files[path_hash] = msgpack.packb(entry._replace(age=0)) + self.files[path_hash] = msg_pack.packb(entry._replace(age=0)) return entry.chunk_ids else: return None @@ -429,5 +429,5 @@ def memorize_file(self, path_hash, st, ids): if not (self.do_files and stat.S_ISREG(st.st_mode)): return entry = FileCacheEntry(age=0, inode=st.st_ino, size=st.st_size, mtime=int_to_bigint(st.st_mtime_ns), chunk_ids=ids) - self.files[path_hash] = msgpack.packb(entry) + self.files[path_hash] = msg_pack.packb(entry) self._newest_mtime = max(self._newest_mtime, st.st_mtime_ns) diff --git a/borg/constants.py b/borg/constants.py index 95b16c47ab..9d834a6c10 100644 --- a/borg/constants.py +++ b/borg/constants.py @@ -1,10 +1,10 @@ # this set must be kept complete, otherwise the RobustUnpacker might malfunction: -ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks', b'hardlink_master', - b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime', - b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ]) +ITEM_KEYS = set(['path', 'source', 'rdev', 'chunks', 'hardlink_master', + 'mode', 'user', 'group', 'uid', 'gid', 'mtime', 'atime', 'ctime', + 'xattrs', 'bsdflags', 'acl_nfs4', 'acl_access', 'acl_default', 'acl_extended', ]) -ARCHIVE_TEXT_KEYS = (b'name', b'comment', b'hostname', b'username', b'time', b'time_end') -ITEM_TEXT_KEYS = (b'path', b'source', b'user', b'group') +ARCHIVE_TEXT_KEYS = ('name', 'comment', 'hostname', 'username', 'time', 'time_end') +ITEM_TEXT_KEYS = ('path', 'source', 'user', 'group') # default umask, overriden by --umask, defaults to read/write only for owner UMASK_DEFAULT = 0o077 diff --git a/borg/fuse.py b/borg/fuse.py index e1de56281c..e64113d9bb 100644 --- a/borg/fuse.py +++ b/borg/fuse.py @@ -9,7 +9,7 @@ from .archive import Archive from .helpers import daemonize, bigint_to_int from distutils.version import LooseVersion -import msgpack +from . import msg_pack # Does this version of llfuse support ns precision? have_fuse_xtime_ns = hasattr(llfuse.EntryAttributes, 'st_mtime_ns') @@ -31,12 +31,12 @@ def __init__(self): def add(self, item): pos = self.fd.seek(0, io.SEEK_END) - self.fd.write(msgpack.packb(item)) + self.fd.write(msg_pack.packb(item)) return pos + self.offset def get(self, inode): self.fd.seek(inode - self.offset, io.SEEK_SET) - return next(msgpack.Unpacker(self.fd, read_size=1024)) + return next(msg_pack.Unpacker(self.fd, read_size=1024)) class FuseOperations(llfuse.Operations): @@ -50,7 +50,7 @@ def __init__(self, key, repository, manifest, archive, cached_repo): self.items = {} self.parent = {} self.contents = defaultdict(dict) - self.default_dir = {b'mode': 0o40755, b'mtime': int(time.time() * 1e9), b'uid': os.getuid(), b'gid': os.getgid()} + self.default_dir = {'mode': 0o40755, 'mtime': int(time.time() * 1e9), 'uid': os.getuid(), 'gid': os.getgid()} self.pending_archives = {} self.accounted_chunks = {} self.cache = ItemCache() @@ -71,13 +71,13 @@ def __init__(self, key, repository, manifest, archive, cached_repo): def process_archive(self, archive, prefix=[]): """Build fuse inode hierarchy from archive metadata """ - unpacker = msgpack.Unpacker() - for key, chunk in zip(archive.metadata[b'items'], self.repository.get_many(archive.metadata[b'items'])): + unpacker = msg_pack.Unpacker() + for key, chunk in zip(archive.metadata['items'], self.repository.get_many(archive.metadata['items'])): _, data = self.key.decrypt(key, chunk) unpacker.feed(data) for item in unpacker: - segments = prefix + os.fsencode(os.path.normpath(item[b'path'])).split(b'/') - del item[b'path'] + segments = prefix + os.fsencode(os.path.normpath(item['path'])).split(b'/') + del item['path'] num_segments = len(segments) parent = 1 for i, segment in enumerate(segments, 1): @@ -88,10 +88,10 @@ def process_archive(self, archive, prefix=[]): self.parent[archive_inode] = parent # Leaf segment? if i == num_segments: - if b'source' in item and stat.S_ISREG(item[b'mode']): - inode = self._find_inode(item[b'source'], prefix) + if 'source' in item and stat.S_ISREG(item['mode']): + inode = self._find_inode(item['source'], prefix) item = self.cache.get(inode) - item[b'nlink'] = item.get(b'nlink', 1) + 1 + item['nlink'] = item.get('nlink', 1) + 1 self.items[inode] = item else: inode = self.cache.add(item) @@ -142,7 +142,7 @@ def getattr(self, inode, ctx=None): size = 0 dsize = 0 try: - for key, chunksize, _ in item[b'chunks']: + for key, chunksize, _ in item['chunks']: size += chunksize if self.accounted_chunks.get(key, inode) == inode: self.accounted_chunks[key] = inode @@ -154,45 +154,45 @@ def getattr(self, inode, ctx=None): entry.generation = 0 entry.entry_timeout = 300 entry.attr_timeout = 300 - entry.st_mode = item[b'mode'] - entry.st_nlink = item.get(b'nlink', 1) - entry.st_uid = item[b'uid'] - entry.st_gid = item[b'gid'] - entry.st_rdev = item.get(b'rdev', 0) + entry.st_mode = item['mode'] + entry.st_nlink = item.get('nlink', 1) + entry.st_uid = item['uid'] + entry.st_gid = item['gid'] + entry.st_rdev = item.get('rdev', 0) entry.st_size = size entry.st_blksize = 512 entry.st_blocks = dsize / 512 # note: older archives only have mtime (not atime nor ctime) if have_fuse_xtime_ns: - entry.st_mtime_ns = bigint_to_int(item[b'mtime']) - if b'atime' in item: - entry.st_atime_ns = bigint_to_int(item[b'atime']) + entry.st_mtime_ns = bigint_to_int(item['mtime']) + if 'atime' in item: + entry.st_atime_ns = bigint_to_int(item['atime']) else: - entry.st_atime_ns = bigint_to_int(item[b'mtime']) - if b'ctime' in item: - entry.st_ctime_ns = bigint_to_int(item[b'ctime']) + entry.st_atime_ns = bigint_to_int(item['mtime']) + if 'ctime' in item: + entry.st_ctime_ns = bigint_to_int(item['ctime']) else: - entry.st_ctime_ns = bigint_to_int(item[b'mtime']) + entry.st_ctime_ns = bigint_to_int(item['mtime']) else: - entry.st_mtime = bigint_to_int(item[b'mtime']) / 1e9 - if b'atime' in item: - entry.st_atime = bigint_to_int(item[b'atime']) / 1e9 + entry.st_mtime = bigint_to_int(item['mtime']) / 1e9 + if 'atime' in item: + entry.st_atime = bigint_to_int(item['atime']) / 1e9 else: - entry.st_atime = bigint_to_int(item[b'mtime']) / 1e9 - if b'ctime' in item: - entry.st_ctime = bigint_to_int(item[b'ctime']) / 1e9 + entry.st_atime = bigint_to_int(item['mtime']) / 1e9 + if 'ctime' in item: + entry.st_ctime = bigint_to_int(item['ctime']) / 1e9 else: - entry.st_ctime = bigint_to_int(item[b'mtime']) / 1e9 + entry.st_ctime = bigint_to_int(item['mtime']) / 1e9 return entry def listxattr(self, inode, ctx=None): item = self.get_item(inode) - return item.get(b'xattrs', {}).keys() + return item.get('xattrs', {}).keys() def getxattr(self, inode, name, ctx=None): item = self.get_item(inode) try: - return item.get(b'xattrs', {})[name] + return item.get('xattrs', {})[name] except KeyError: raise llfuse.FUSEError(errno.ENODATA) from None @@ -224,7 +224,7 @@ def opendir(self, inode, ctx=None): def read(self, fh, offset, size): parts = [] item = self.get_item(fh) - for id, s, csize in item[b'chunks']: + for id, s, csize in item['chunks']: if s < offset: offset -= s continue @@ -245,7 +245,7 @@ def readdir(self, fh, off): def readlink(self, inode, ctx=None): item = self.get_item(inode) - return os.fsencode(item[b'source']) + return os.fsencode(item['source']) def mount(self, mountpoint, extra_options, foreground=False): options = ['fsname=borgfs', 'ro'] diff --git a/borg/helpers.py b/borg/helpers.py index 9259c8f817..78875b3667 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -31,9 +31,8 @@ from . import chunker from .constants import * # NOQA from . import crypto +from . import msg_pack from . import shellpattern -import msgpack -import msgpack.fallback import socket @@ -104,19 +103,17 @@ def load(cls, repository, key=None): manifest = cls(key, repository) _, data = key.decrypt(None, cdata) manifest.id = key.id_hash(data) - m = msgpack.unpackb(data) - if not m.get(b'version') == 1: + m = msg_pack.unpackb(data) + if not m.get('version') == 1: raise ValueError('Invalid manifest version') - manifest.archives = dict((k.decode('utf-8'), v) for k, v in m[b'archives'].items()) - manifest.timestamp = m.get(b'timestamp') - if manifest.timestamp: - manifest.timestamp = manifest.timestamp.decode('ascii') - manifest.config = m[b'config'] + manifest.archives = dict(m['archives'].items()) + manifest.timestamp = m.get('timestamp') + manifest.config = m['config'] return manifest, key def write(self): self.timestamp = datetime.utcnow().isoformat() - data = msgpack.packb(StableDict({ + data = msg_pack.packb(StableDict({ 'version': 1, 'archives': self.archives, 'timestamp': self.timestamp, @@ -130,8 +127,8 @@ def list_archive_infos(self, sort_by=None, reverse=False): ArchiveInfo = namedtuple('ArchiveInfo', 'name id ts') archives = [] for name, values in self.archives.items(): - ts = parse_timestamp(values[b'time'].decode('utf-8')) - id = values[b'id'] + ts = parse_timestamp(values['time']) + id = values['id'] archives.append(ArchiveInfo(name=name, id=id, ts=ts)) if sort_by is not None: archives = sorted(archives, key=attrgetter(sort_by), reverse=reverse) @@ -208,7 +205,7 @@ def show_progress(self, item=None, final=False, stream=None, dt=None): columns, lines = get_terminal_size() if not final: msg = '{0.osize_fmt} O {0.csize_fmt} C {0.usize_fmt} D {0.nfiles} N '.format(self) - path = remove_surrogates(item[b'path']) if item else '' + path = remove_surrogates(item['path']) if item else '' space = columns - len(msg) if space < len('...') + len(path): path = '%s...%s' % (path[:(space // 2) - len('...')], path[-space // 2:]) @@ -935,10 +932,6 @@ def int_to_bigint(value): return value -def is_slow_msgpack(): - return msgpack.Packer is msgpack.fallback.Packer - - FALSISH = ('No', 'NO', 'no', 'N', 'n', '0', ) TRUISH = ('Yes', 'YES', 'yes', 'Y', 'y', '1', ) DEFAULTISH = ('Default', 'DEFAULT', 'default', 'D', 'd', '', ) @@ -1166,8 +1159,8 @@ class FakeArchive: fpr = name = "" fake_item = { - b'mode': 0, b'path': '', b'user': '', b'group': '', b'mtime': 0, - b'uid': 0, b'gid': 0, + 'mode': 0, 'path': '', 'user': '', 'group': '', 'mtime': 0, + 'uid': 0, 'gid': 0, } formatter = cls(FakeArchive, "") keys = [] @@ -1204,12 +1197,12 @@ def __init__(self, archive, format): 'csize': self.calculate_csize, 'num_chunks': self.calculate_num_chunks, 'unique_chunks': self.calculate_unique_chunks, - 'isomtime': partial(self.format_time, b'mtime'), - 'isoctime': partial(self.format_time, b'ctime'), - 'isoatime': partial(self.format_time, b'atime'), - 'mtime': partial(self.time, b'mtime'), - 'ctime': partial(self.time, b'ctime'), - 'atime': partial(self.time, b'atime'), + 'isomtime': partial(self.format_time, 'mtime'), + 'isoctime': partial(self.format_time, 'ctime'), + 'isoatime': partial(self.format_time, 'atime'), + 'mtime': partial(self.time, 'mtime'), + 'ctime': partial(self.time, 'ctime'), + 'atime': partial(self.time, 'atime'), } for hash_function in hashlib.algorithms_guaranteed: self.add_key(hash_function, partial(self.hash_item, hash_function)) @@ -1221,11 +1214,11 @@ def add_key(self, key, callable_with_item): self.used_call_keys = set(self.call_keys) & self.format_keys def get_item_data(self, item): - mode = stat.filemode(item[b'mode']) + mode = stat.filemode(item['mode']) item_type = mode[0] item_data = self.item_data - source = item.get(b'source', '') + source = item.get('source', '') extra = '' if source: source = remove_surrogates(source) @@ -1236,12 +1229,12 @@ def get_item_data(self, item): extra = ' link to %s' % source item_data['type'] = item_type item_data['mode'] = mode - item_data['user'] = item[b'user'] or item[b'uid'] - item_data['group'] = item[b'group'] or item[b'gid'] - item_data['uid'] = item[b'uid'] - item_data['gid'] = item[b'gid'] - item_data['path'] = remove_surrogates(item[b'path']) - item_data['bpath'] = item[b'path'] + item_data['user'] = item['user'] or item['uid'] + item_data['group'] = item['group'] or item['gid'] + item_data['uid'] = item['uid'] + item_data['gid'] = item['gid'] + item_data['path'] = remove_surrogates(item['path']) + item_data['bpath'] = item['path'] item_data['source'] = source item_data['linktarget'] = source item_data['extra'] = extra @@ -1253,31 +1246,31 @@ def format_item(self, item): return self.format.format_map(self.get_item_data(item)) def calculate_num_chunks(self, item): - return len(item.get(b'chunks', [])) + return len(item.get('chunks', [])) def calculate_unique_chunks(self, item): chunk_index = self.archive.cache.chunks - return sum(1 for c in item.get(b'chunks', []) if chunk_index[c.id].refcount == 1) + return sum(1 for c in item.get('chunks', []) if chunk_index[c.id].refcount == 1) def calculate_size(self, item): - return sum(c.size for c in item.get(b'chunks', [])) + return sum(c.size for c in item.get('chunks', [])) def calculate_csize(self, item): - return sum(c.csize for c in item.get(b'chunks', [])) + return sum(c.csize for c in item.get('chunks', [])) def hash_item(self, hash_function, item): - if b'chunks' not in item: + if 'chunks' not in item: return "" hash = hashlib.new(hash_function) - for _, data in self.archive.pipeline.fetch_many([c.id for c in item[b'chunks']]): + for _, data in self.archive.pipeline.fetch_many([c.id for c in item['chunks']]): hash.update(data) return hash.hexdigest() def format_time(self, key, item): - return format_time(safe_timestamp(item.get(key) or item[b'mtime'])) + return format_time(safe_timestamp(item.get(key) or item['mtime'])) def time(self, key, item): - return safe_timestamp(item.get(key) or item[b'mtime']) + return safe_timestamp(item.get(key) or item['mtime']) class ChunkIteratorFileWrapper: @@ -1321,7 +1314,7 @@ def read(self, nbytes): def open_item(archive, item): """Return file-like object for archived item (with chunks).""" - chunk_iterator = archive.pipeline.fetch_many([c.id for c in item[b'chunks']]) + chunk_iterator = archive.pipeline.fetch_many([c.id for c in item['chunks']]) return ChunkIteratorFileWrapper(chunk_iterator) diff --git a/borg/key.py b/borg/key.py index 124376d0e0..d2a5774ee8 100644 --- a/borg/key.py +++ b/borg/key.py @@ -14,6 +14,8 @@ from .constants import * # NOQA from .crypto import AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks, hmac_sha256 from .compress import Compressor, COMPR_BUFFER + +# NOTE: this is still using msgpack (not msg_pack) for key files to stay compatible with old key files import msgpack PREFIX = b'\0' * 8 diff --git a/borg/msg_pack.py b/borg/msg_pack.py new file mode 100644 index 0000000000..51a45e1077 --- /dev/null +++ b/borg/msg_pack.py @@ -0,0 +1,77 @@ +# this is a wrapper around msgpack that fixes some bad defaults. +# - we want bytes -> pack -> unpack -> bytes +# - we want str -> pack -> unpack -> str (and use utf-8 encoding) + +import msgpack +import msgpack.fallback + +msgpack_packb = msgpack.packb +msgpack_unpackb = msgpack.unpackb + + +def is_slow(): + return msgpack.Packer is msgpack.fallback.Packer + + +def packb(data, **kw): + return msgpack_packb(data, encoding='utf-8', use_bin_type=True, **kw) + + +def unpackb(data, **kw): + return msgpack_unpackb(data, encoding='utf-8', **kw) + + +class Packer(msgpack.Packer): + def __init__(self, + default=None, + encoding='utf-8', + unicode_errors='strict', + use_single_float=False, + autoreset=1, + use_bin_type=1, # different from base class + ): + super().__init__( + default=default, + encoding=encoding, + unicode_errors=unicode_errors, + use_single_float=use_single_float, + autoreset=autoreset, + use_bin_type=use_bin_type, + ) + + +class Unpacker(msgpack.Unpacker): + def __init__(self, + file_like=None, + read_size=0, + use_list=1, + object_hook=None, + object_pairs_hook=None, + list_hook=None, + encoding='utf-8', # different from base class + unicode_errors='strict', + max_buffer_size=0, + ext_hook=None, + max_str_len=2147483647, + max_bin_len=2147483647, + max_array_len=2147483647, + max_map_len=2147483647, + max_ext_len=2147483647, + ): + super().__init__( + file_like=file_like, + read_size=read_size, + use_list=use_list, + object_hook=object_hook, + object_pairs_hook=object_pairs_hook, + list_hook=list_hook, + encoding=encoding, + unicode_errors=unicode_errors, + max_buffer_size=max_buffer_size, + ext_hook=ext_hook, + max_str_len=max_str_len, + max_bin_len=max_bin_len, + max_array_len=max_array_len, + max_map_len=max_map_len, + max_ext_len=max_ext_len, + ) diff --git a/borg/remote.py b/borg/remote.py index 051b9c0e8e..e60e4f580c 100644 --- a/borg/remote.py +++ b/borg/remote.py @@ -13,6 +13,7 @@ from .helpers import Error, IntegrityError, get_home_dir, sysinfo from .repository import Repository +# NOTE: this is still using msgpack (not msg_pack) for RPC to stay compatible with old remotes import msgpack RPC_PROTOCOL_VERSION = 2 diff --git a/borg/repository.py b/borg/repository.py index 7a57e7cc15..7c10a30434 100644 --- a/borg/repository.py +++ b/borg/repository.py @@ -11,12 +11,12 @@ import struct from zlib import crc32 -import msgpack from .constants import * # NOQA from .helpers import Error, ErrorWithTraceback, IntegrityError, Location, ProgressIndicatorPercent from .hashindex import NSIndex from .locking import UpgradableLock, LockError, LockErrorT from .lrucache import LRUCache +from . import msg_pack MAX_OBJECT_SIZE = 20 * 1024 * 1024 MAGIC = b'BORG_SEG' @@ -210,20 +210,22 @@ def prepare_txn(self, transaction_id, do_cleanup=True): if do_cleanup: self.io.cleanup(transaction_id) with open(os.path.join(self.path, 'hints.%d' % transaction_id), 'rb') as fd: - hints = msgpack.unpack(fd) - if hints[b'version'] != 1: + data = fd.read() + hints = msg_pack.unpackb(data) + if hints['version'] != 1: raise ValueError('Unknown hints file version: %d' % hints['version']) - self.segments = hints[b'segments'] - self.compact = set(hints[b'compact']) + self.segments = hints['segments'] + self.compact = set(hints['compact']) def write_index(self): - hints = {b'version': 1, - b'segments': self.segments, - b'compact': list(self.compact)} + hints = {'version': 1, + 'segments': self.segments, + 'compact': list(self.compact)} transaction_id = self.io.get_segments_transaction_id() hints_file = os.path.join(self.path, 'hints.%d' % transaction_id) with open(hints_file + '.tmp', 'wb') as fd: - msgpack.pack(hints, fd) + data = msg_pack.packb(hints) + fd.write(data) fd.flush() os.fsync(fd.fileno()) os.rename(hints_file + '.tmp', hints_file) diff --git a/borg/testsuite/archive.py b/borg/testsuite/archive.py index e9722cf221..108e060844 100644 --- a/borg/testsuite/archive.py +++ b/borg/testsuite/archive.py @@ -1,10 +1,9 @@ from datetime import datetime, timezone from unittest.mock import Mock -import msgpack - from ..archive import Archive, CacheChunkBuffer, RobustUnpacker from ..key import PlaintextKey +from .. import msg_pack from ..helpers import Manifest from . import BaseTestCase @@ -26,7 +25,7 @@ def _test_timestamp_parsing(self, isoformat, expected): key = PlaintextKey(repository) manifest = Manifest(repository, key) a = Archive(repository, key, manifest, 'test', create=True) - a.metadata = {b'time': isoformat} + a.metadata = {'time': isoformat} self.assert_equal(a.ts, expected) def test_with_microseconds(self): @@ -52,7 +51,7 @@ def test(self): chunks.flush() chunks.flush(flush=True) self.assert_equal(len(chunks.chunks), 2) - unpacker = msgpack.Unpacker() + unpacker = msg_pack.Unpacker() for id in chunks.chunks: unpacker.feed(cache.objects[id]) self.assert_equal(data, list(unpacker)) @@ -73,7 +72,7 @@ def test_partial(self): chunks.flush(flush=True) self.assert_equal(len(chunks.chunks), 4) self.assert_true(chunks.buffer.tell() == 0) - unpacker = msgpack.Unpacker() + unpacker = msg_pack.Unpacker() for id in chunks.chunks: unpacker.feed(cache.objects[id]) self.assert_equal(data, list(unpacker)) @@ -82,10 +81,10 @@ def test_partial(self): class RobustUnpackerTestCase(BaseTestCase): def make_chunks(self, items): - return b''.join(msgpack.packb({'path': item}) for item in items) + return b''.join(msg_pack.packb({'path': item}) for item in items) def _validator(self, value): - return isinstance(value, dict) and value.get(b'path') in (b'foo', b'bar', b'boo', b'baz') + return isinstance(value, dict) and value.get('path') in (b'foo', b'bar', b'boo', b'baz') def process(self, input): unpacker = RobustUnpacker(validator=self._validator) @@ -104,10 +103,10 @@ def test_extra_garbage_no_sync(self): (False, [b'garbage'] + [self.make_chunks([b'boo', b'baz'])])] result = self.process(chunks) self.assert_equal(result, [ - {b'path': b'foo'}, {b'path': b'bar'}, + {'path': b'foo'}, {'path': b'bar'}, 103, 97, 114, 98, 97, 103, 101, - {b'path': b'boo'}, - {b'path': b'baz'}]) + {'path': b'boo'}, + {'path': b'baz'}]) def split(self, left, length): parts = [] @@ -120,16 +119,16 @@ def test_correct_stream(self): chunks = self.split(self.make_chunks([b'foo', b'bar', b'boo', b'baz']), 2) input = [(False, chunks)] result = self.process(input) - self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'bar'}, {b'path': b'boo'}, {b'path': b'baz'}]) + self.assert_equal(result, [{'path': b'foo'}, {'path': b'bar'}, {'path': b'boo'}, {'path': b'baz'}]) def test_missing_chunk(self): chunks = self.split(self.make_chunks([b'foo', b'bar', b'boo', b'baz']), 4) input = [(False, chunks[:3]), (True, chunks[4:])] result = self.process(input) - self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}]) + self.assert_equal(result, [{'path': b'foo'}, {'path': b'boo'}, {'path': b'baz'}]) def test_corrupt_chunk(self): chunks = self.split(self.make_chunks([b'foo', b'bar', b'boo', b'baz']), 4) input = [(False, chunks[:3]), (True, [b'gar', b'bage'] + chunks[3:])] result = self.process(input) - self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}]) + self.assert_equal(result, [{'path': b'foo'}, {'path': b'boo'}, {'path': b'baz'}]) diff --git a/borg/testsuite/archiver.py b/borg/testsuite/archiver.py index 115e14736f..a7028d9413 100644 --- a/borg/testsuite/archiver.py +++ b/borg/testsuite/archiver.py @@ -1455,8 +1455,8 @@ def test_missing_file_chunk(self): archive, repository = self.open_archive('archive1') with repository: for item in archive.iter_items(): - if item[b'path'].endswith('testsuite/archiver.py'): - repository.delete(item[b'chunks'][-1].id) + if item['path'].endswith('testsuite/archiver.py'): + repository.delete(item['chunks'][-1].id) break repository.commit() self.cmd('check', self.repository_location, exit_code=1) @@ -1466,7 +1466,7 @@ def test_missing_file_chunk(self): def test_missing_archive_item_chunk(self): archive, repository = self.open_archive('archive1') with repository: - repository.delete(archive.metadata[b'items'][-5]) + repository.delete(archive.metadata['items'][-5]) repository.commit() self.cmd('check', self.repository_location, exit_code=1) self.cmd('check', '--repair', self.repository_location, exit_code=0) diff --git a/borg/testsuite/helpers.py b/borg/testsuite/helpers.py index 51d344818c..f78a1889a3 100644 --- a/borg/testsuite/helpers.py +++ b/borg/testsuite/helpers.py @@ -6,16 +6,15 @@ import pytest import sys -import msgpack -import msgpack.fallback import time from ..helpers import Location, format_file_size, format_timedelta, make_path_safe, \ - prune_within, prune_split, get_cache_dir, get_keys_dir, Statistics, is_slow_msgpack, \ + prune_within, prune_split, get_cache_dir, get_keys_dir, Statistics, \ yes, TRUISH, FALSISH, DEFAULTISH, \ StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams, Chunk, \ ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, parse_pattern, \ PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, partial_format, ChunkIteratorFileWrapper +from .. import msg_pack from . import BaseTestCase, environment_variable, FakeInputs @@ -573,13 +572,12 @@ def dotest(test_archives, within, indices): dotest(test_archives, '1m', [0, 1, 2, 3, 4, 5]) dotest(test_archives, '1y', [0, 1, 2, 3, 4, 5]) - class StableDictTestCase(BaseTestCase): def test(self): d = StableDict(foo=1, bar=2, boo=3, baz=4) self.assert_equal(list(d.items()), [('bar', 2), ('baz', 4), ('boo', 3), ('foo', 1)]) - self.assert_equal(hashlib.md5(msgpack.packb(d)).hexdigest(), 'fc78df42cd60691b3ac3dd2a2b39903f') + self.assert_equal(hashlib.md5(msg_pack.packb(d)).hexdigest(), 'fc78df42cd60691b3ac3dd2a2b39903f') class TestParseTimestamp(BaseTestCase): @@ -649,12 +647,12 @@ def tests_stats_progress(stats, columns=80): out = StringIO() stats.update(10**3, 0, unique=False) - stats.show_progress(item={b'path': 'foo'}, final=False, stream=out) + stats.show_progress(item={'path': 'foo'}, final=False, stream=out) s = '1.02 kB O 10 B C 10 B D 0 N foo' buf = ' ' * (columns - len(s)) assert out.getvalue() == s + buf + "\r" out = StringIO() - stats.show_progress(item={b'path': 'foo'*40}, final=False, stream=out) + stats.show_progress(item={'path': 'foo'*40}, final=False, stream=out) s = '1.02 kB O 10 B C 10 B D 0 N foofoofoofoofoofoofoofo...oofoofoofoofoofoofoofoofoo' buf = ' ' * (columns - len(s)) assert out.getvalue() == s + buf + "\r" @@ -717,17 +715,6 @@ def test_file_size_sign(): assert format_file_size(size, sign=True) == fmt -def test_is_slow_msgpack(): - saved_packer = msgpack.Packer - try: - msgpack.Packer = msgpack.fallback.Packer - assert is_slow_msgpack() - finally: - msgpack.Packer = saved_packer - # this assumes that we have fast msgpack on test platform: - assert not is_slow_msgpack() - - def test_yes_input(): inputs = list(TRUISH) input = FakeInputs(inputs) diff --git a/borg/testsuite/msg_pack.py b/borg/testsuite/msg_pack.py new file mode 100644 index 0000000000..fcc622d0a3 --- /dev/null +++ b/borg/testsuite/msg_pack.py @@ -0,0 +1,62 @@ +import pytest + +import msgpack + +from ..msg_pack import packb, unpackb, is_slow + + +def test_is_slow(): + saved_packer = msgpack.Packer + try: + msgpack.Packer = msgpack.fallback.Packer + assert is_slow() + finally: + msgpack.Packer = saved_packer + # this assumes that we have fast msgpack on test platform: + assert not is_slow() + + +def test_msg_pack_roundtrip(): + # this is how the new code works, easily roundtripping bytes and str + data = b'bytes-ascii\x00' + assert data == unpackb(packb(data)) + data = b'bytes-arbitrary-\xe4\xf6\xfc\x00' + assert data == unpackb(packb(data)) + data = 'text-ascii\u0000' + assert data == unpackb(packb(data)) + data = 'text-unicode-äöü\u0000' + assert data == unpackb(packb(data)) + + +def test_msgpack_roundtrip(): + # this is how the old code works (directly using msgpack) + # str needs addtl. decode to round-trip + data = 'text-unicode-äöü\u0000' + assert msgpack.unpackb(msgpack.packb(data)).decode('utf-8') == data + # bytes do roundtrip without addtl. effort + data = b'bytes-arbitrary-\xe4\xf6\xfc\x00' + assert msgpack.unpackb(msgpack.packb(data)) == data + + +def test_compat_keys(): + key_old = b'keyname' # old code item dict key + key_new = 'keyname' # new code item dict key + # old code with old key produces same output as new code with new key + assert msgpack.packb(key_old) == packb(key_new) + + +def test_compat_values_str(): + # old and new code produce same output for str + value = 'str-ascii' + assert msgpack.packb(value) == packb(value) + value = 'str-unicode-äöü\u0000' + assert msgpack.packb(value) == packb(value) + + +@pytest.mark.xfail(reason="impossible, bytes must serialize differently now from str to have clean roundtrip") +def test_compat_values_bytes(): + # old and new code do NOT produce same output for bytes + value = b'bytes-ascii' + assert msgpack.packb(value) == packb(value) + value = b'bytes-arbitrary-\xe4\xf6\xfc\x00' + assert msgpack.packb(value) == packb(value)