Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions docs/faq.rst
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,6 @@ Which file types, attributes, etc. are *not* preserved?
Archive extraction has optional support to extract all-zero chunks as
holes in a sparse file.
* Some filesystem specific attributes, like btrfs NOCOW, see :ref:`platforms`.
* For hardlinked symlinks, the hardlinking can not be archived (and thus,
the hardlinking will not be done at extraction time). The symlinks will
be archived and extracted as non-hardlinked symlinks, see :issue:`2379`.

Are there other known limitations?
----------------------------------
Expand Down
2 changes: 1 addition & 1 deletion docs/internals/data-structures.rst
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,7 @@ dictionary created by the ``Item`` class that contains:
* uid
* gid
* mode (item type + permissions)
* source (for symlinks, and for hardlinks within one archive)
* source (for symlinks)
* rdev (for device files)
* mtime, atime, ctime in nanoseconds
* xattrs
Expand Down
2 changes: 1 addition & 1 deletion docs/usage/general/file-metadata.rst.inc
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Besides regular file and directory structures, Borg can preserve
* FIFOs ("named pipes")
* special file *contents* can be backed up in ``--read-special`` mode.
By default the metadata to create them with mknod(2), mkfifo(2) etc. is stored.
* hardlinked regular files, devices, FIFOs (considering all items in the same archive)
* hardlinked regular files, devices, symlinks, FIFOs (considering all items in the same archive)
* timestamps in nanosecond precision: mtime, atime, ctime
* other timestamps: birthtime (on platforms supporting it)
* permissions:
Expand Down
314 changes: 116 additions & 198 deletions src/borg/archive.py

Large diffs are not rendered by default.

252 changes: 194 additions & 58 deletions src/borg/archiver.py

Large diffs are not rendered by default.

13 changes: 7 additions & 6 deletions src/borg/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from .helpers import Error
from .helpers import Manifest
from .helpers import get_cache_dir, get_security_dir
from .helpers import int_to_bigint, bigint_to_int, bin_to_hex, parse_stringified_list
from .helpers import bin_to_hex, parse_stringified_list
from .helpers import format_file_size
from .helpers import safe_ns
from .helpers import yes
Expand All @@ -28,6 +28,7 @@
from .helpers import set_ec, EXIT_WARNING
from .helpers import safe_unlink
from .helpers import msgpack
from .helpers.msgpack import int_to_timestamp, timestamp_to_int
from .item import ArchiveItem, ChunkListEntry
from .crypto.key import PlaintextKey
from .crypto.file_integrity import IntegrityCheckedFile, DetachedIntegrityCheckedFile, FileIntegrityError
Expand Down Expand Up @@ -623,7 +624,7 @@ def commit(self):
# this is to avoid issues with filesystem snapshots and cmtime granularity.
# Also keep files from older backups that have not reached BORG_FILES_CACHE_TTL yet.
entry = FileCacheEntry(*msgpack.unpackb(item))
if entry.age == 0 and bigint_to_int(entry.cmtime) < self._newest_cmtime or \
if entry.age == 0 and timestamp_to_int(entry.cmtime) < self._newest_cmtime or \
entry.age > 0 and entry.age < ttl:
msgpack.pack((path_hash, entry), fd)
entry_count += 1
Expand Down Expand Up @@ -756,7 +757,7 @@ def fetch_and_build_idx(archive_id, decrypted_repository, chunk_idx):
csize, data = decrypted_repository.get(archive_id)
chunk_idx.add(archive_id, 1, len(data), csize)
archive = ArchiveItem(internal_dict=msgpack.unpackb(data))
if archive.version != 1:
if archive.version not in (1, 2): # legacy
raise Exception('Unknown archive metadata version')
sync = CacheSynchronizer(chunk_idx)
for item_id, (csize, data) in zip(archive.items, decrypted_repository.get_many(archive.items)):
Expand Down Expand Up @@ -1018,10 +1019,10 @@ def file_known_and_unchanged(self, hashed_path, path_hash, st):
if 'i' in cache_mode and entry.inode != st.st_ino:
files_cache_logger.debug('KNOWN-CHANGED: file inode number has changed: %r', hashed_path)
return True, None
if 'c' in cache_mode and bigint_to_int(entry.cmtime) != st.st_ctime_ns:
if 'c' in cache_mode and timestamp_to_int(entry.cmtime) != st.st_ctime_ns:
files_cache_logger.debug('KNOWN-CHANGED: file ctime has changed: %r', hashed_path)
return True, None
elif 'm' in cache_mode and bigint_to_int(entry.cmtime) != st.st_mtime_ns:
elif 'm' in cache_mode and timestamp_to_int(entry.cmtime) != st.st_mtime_ns:
files_cache_logger.debug('KNOWN-CHANGED: file mtime has changed: %r', hashed_path)
return True, None
# we ignored the inode number in the comparison above or it is still same.
Expand Down Expand Up @@ -1049,7 +1050,7 @@ def memorize_file(self, hashed_path, path_hash, st, ids):
elif 'm' in cache_mode:
cmtime_type = 'mtime'
cmtime_ns = safe_ns(st.st_mtime_ns)
entry = FileCacheEntry(age=0, inode=st.st_ino, size=st.st_size, cmtime=int_to_bigint(cmtime_ns), chunk_ids=ids)
entry = FileCacheEntry(age=0, inode=st.st_ino, size=st.st_size, cmtime=int_to_timestamp(cmtime_ns), chunk_ids=ids)
self.files[path_hash] = msgpack.packb(entry)
self._newest_cmtime = max(self._newest_cmtime or 0, cmtime_ns)
files_cache_logger.debug('FILES-CACHE-UPDATE: put %r [has %s] <- %r',
Expand Down
101 changes: 74 additions & 27 deletions src/borg/compress.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -56,16 +56,21 @@ cdef class CompressorBase:
also handles compression format auto detection and
adding/stripping the ID header (which enable auto detection).
"""
ID = b'\xFF\xFF' # reserved and not used
# overwrite with a unique 2-bytes bytestring in child classes
ID = b'\xFF' # reserved and not used
# overwrite with a unique 1-byte bytestring in child classes
name = 'baseclass'

@classmethod
def detect(cls, data):
return data.startswith(cls.ID)

def __init__(self, **kwargs):
pass
def __init__(self, level=255, **kwargs):
assert 0 <= level <= 255
if self.ID is not None:
self.id_level = self.ID + bytes((level, )) # level 255 means "unknown level"
assert len(self.id_level) == 2
else:
self.id_level = None

def decide(self, data):
"""
Expand All @@ -85,8 +90,8 @@ cdef class CompressorBase:
Compress *data* (bytes) and return bytes result. Prepend the ID bytes of this compressor,
which is needed so that the correct decompressor can be used for decompression.
"""
# add ID bytes
return self.ID + data
# add id_level bytes
return self.id_level + data

def decompress(self, data):
"""
Expand All @@ -96,7 +101,7 @@ cdef class CompressorBase:
Only handles input generated by _this_ Compressor - for a general purpose
decompression method see *Compressor.decompress*.
"""
# strip ID bytes
# strip id_level bytes
return data[2:]

cdef class DecidingCompressor(CompressorBase):
Expand All @@ -106,8 +111,8 @@ cdef class DecidingCompressor(CompressorBase):
"""
name = 'decidebaseclass'

def __init__(self, **kwargs):
super().__init__(**kwargs)
def __init__(self, level=255, **kwargs):
super().__init__(level=level, **kwargs)

def _decide(self, data):
"""
Expand Down Expand Up @@ -148,9 +153,12 @@ class CNONE(CompressorBase):
"""
none - no compression, just pass through data
"""
ID = b'\x00\x00'
ID = b'\x00'
name = 'none'

def __init__(self, level=255, **kwargs):
super().__init__(level=level, **kwargs) # no defined levels for CNONE, so just say "unknown"

def compress(self, data):
return super().compress(data)

Expand All @@ -170,11 +178,11 @@ class LZ4(DecidingCompressor):
- wrapper releases CPython's GIL to support multithreaded code
- uses safe lz4 methods that never go beyond the end of the output buffer
"""
ID = b'\x01\x00'
ID = b'\x01'
name = 'lz4'

def __init__(self, **kwargs):
pass
def __init__(self, level=255, **kwargs):
super().__init__(level=level, **kwargs) # no defined levels for LZ4, so just say "unknown"

def _decide(self, idata):
"""
Expand Down Expand Up @@ -235,11 +243,11 @@ class LZMA(DecidingCompressor):
"""
lzma compression / decompression
"""
ID = b'\x02\x00'
ID = b'\x02'
name = 'lzma'

def __init__(self, level=6, **kwargs):
super().__init__(**kwargs)
super().__init__(level=level, **kwargs)
self.level = level
if lzma is None:
raise ValueError('No lzma support found.')
Expand Down Expand Up @@ -270,11 +278,11 @@ class ZSTD(DecidingCompressor):
# This is a NOT THREAD SAFE implementation.
# Only ONE python context must be created at a time.
# It should work flawlessly as long as borg will call ONLY ONE compression job at time.
ID = b'\x03\x00'
ID = b'\x03'
name = 'zstd'

def __init__(self, level=3, **kwargs):
super().__init__(**kwargs)
super().__init__(level=level, **kwargs)
self.level = level

def _decide(self, idata):
Expand Down Expand Up @@ -331,14 +339,52 @@ class ZSTD(DecidingCompressor):
return dest[:osize]


class ZLIB(CompressorBase):
class ZLIB(DecidingCompressor):
"""
zlib compression / decompression (python stdlib)
"""
ID = b'\x08\x00' # not used here, see detect()
# avoid all 0x.8.. IDs elsewhere!
ID = b'\x05'
name = 'zlib'

def __init__(self, level=6, **kwargs):
super().__init__(level=level, **kwargs)
self.level = level

def _decide(self, data):
"""
Decides what to do with *data*. Returns (compressor, zlib_data).

*zlib_data* is the ZLIB result if *compressor* is ZLIB as well, otherwise it is None.
"""
zlib_data = zlib.compress(data, self.level)
if len(zlib_data) < len(data):
return self, zlib_data
else:
return NONE_COMPRESSOR, None

def decompress(self, data):
data = super().decompress(data)
try:
return zlib.decompress(data)
except zlib.error as e:
raise DecompressionError(str(e)) from None


class ZLIB_legacy(CompressorBase):
"""
zlib compression / decompression (python stdlib)

Note: This is the legacy ZLIB support as used by borg < 1.3.
It still suffers from attic *only* supporting zlib and not having separate
ID bytes to differentiate between differently compressed chunks.
This just works because zlib compressed stuff always starts with 0x.8.. bytes.
Newer borg uses the ZLIB class that has separate ID bytes (as all the other
compressors) and does not need this hack.
"""
ID = b'\x08' # not used here, see detect()
# avoid all 0x.8 IDs elsewhere!
name = 'zlib_legacy'

@classmethod
def detect(cls, data):
# matches misc. patterns 0x.8.. used by zlib
Expand All @@ -348,7 +394,7 @@ class ZLIB(CompressorBase):
return check_ok and is_deflate

def __init__(self, level=6, **kwargs):
super().__init__(**kwargs)
super().__init__(level=level, **kwargs)
self.level = level

def compress(self, data):
Expand Down Expand Up @@ -440,14 +486,14 @@ class ObfuscateSize(CompressorBase):
"""
Meta-Compressor that obfuscates the compressed data size.
"""
ID = b'\x04\x00'
ID = b'\x04'
name = 'obfuscate'

header_fmt = Struct('>I')
header_fmt = Struct('<I')
header_len = len(header_fmt.pack(0))

def __init__(self, level=None, compressor=None):
super().__init__()
super().__init__(level=level) # data will be encrypted, so we can tell the level
self.compressor = compressor
if level is None:
pass # decompression
Expand Down Expand Up @@ -502,13 +548,14 @@ COMPRESSOR_TABLE = {
CNONE.name: CNONE,
LZ4.name: LZ4,
ZLIB.name: ZLIB,
ZLIB_legacy.name: ZLIB_legacy,
LZMA.name: LZMA,
Auto.name: Auto,
ZSTD.name: ZSTD,
ObfuscateSize.name: ObfuscateSize,
}
# List of possible compression types. Does not include Auto, since it is a meta-Compressor.
COMPRESSOR_LIST = [LZ4, ZSTD, CNONE, ZLIB, LZMA, ObfuscateSize, ] # check fast stuff first
COMPRESSOR_LIST = [LZ4, ZSTD, CNONE, ZLIB, ZLIB_legacy, LZMA, ObfuscateSize, ] # check fast stuff first

def get_compressor(name, **kwargs):
cls = COMPRESSOR_TABLE[name]
Expand Down Expand Up @@ -554,7 +601,7 @@ class CompressionSpec:
self.name = values[0]
if self.name in ('none', 'lz4', ):
return
elif self.name in ('zlib', 'lzma', ):
elif self.name in ('zlib', 'lzma', 'zlib_legacy'): # zlib_legacy just for testing
if count < 2:
level = 6 # default compression level in py stdlib
elif count == 2:
Expand Down Expand Up @@ -597,7 +644,7 @@ class CompressionSpec:
def compressor(self):
if self.name in ('none', 'lz4', ):
return get_compressor(self.name)
elif self.name in ('zlib', 'lzma', 'zstd', ):
elif self.name in ('zlib', 'lzma', 'zstd', 'zlib_legacy'):
return get_compressor(self.name, level=self.level)
elif self.name == 'auto':
return get_compressor(self.name, compressor=self.inner.compressor)
Expand Down
2 changes: 1 addition & 1 deletion src/borg/constants.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# this set must be kept complete, otherwise the RobustUnpacker might malfunction:
ITEM_KEYS = frozenset(['path', 'source', 'rdev', 'chunks', 'chunks_healthy', 'hardlink_master',
ITEM_KEYS = frozenset(['path', 'source', 'rdev', 'chunks', 'chunks_healthy', 'hardlink_master', 'hlid',
'mode', 'user', 'group', 'uid', 'gid', 'mtime', 'atime', 'ctime', 'birthtime', 'size',
'xattrs', 'bsdflags', 'acl_nfs4', 'acl_access', 'acl_default', 'acl_extended',
'part'])
Expand Down
Loading