Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 40 additions & 6 deletions docs/usage/tar.rst
Original file line number Diff line number Diff line change
@@ -1,23 +1,57 @@
.. include:: import-tar.rst.inc

.. include:: export-tar.rst.inc

.. include:: import-tar.rst.inc

Examples
~~~~~~~~
::

# export as uncompressed tar
$ borg export-tar /path/to/repo::Monday Monday.tar

# exclude some types, compress using gzip
# import an uncompressed tar
$ borg import-tar /path/to/repo::Monday Monday.tar

# exclude some file types, compress using gzip
$ borg export-tar /path/to/repo::Monday Monday.tar.gz --exclude '*.so'

# use higher compression level with gzip
$ borg export-tar --tar-filter="gzip -9" testrepo::linux Monday.tar.gz
$ borg export-tar --tar-filter="gzip -9" repo::Monday Monday.tar.gz

# export a tar, but instead of storing it on disk,
# upload it to a remote site using curl.
# copy an archive from repoA to repoB
$ borg export-tar --tar-format=BORG repoA::archive - | borg import-tar repoB::archive -

# export a tar, but instead of storing it on disk, upload it to remote site using curl
$ borg export-tar /path/to/repo::Monday - | curl --data-binary @- https://somewhere/to/POST

# remote extraction via "tarpipe"
$ borg export-tar /path/to/repo::Monday - | ssh somewhere "cd extracted; tar x"

Archives transfer script
~~~~~~~~~~~~~~~~~~~~~~~~

Outputs a script that copies all archives from repo1 to repo2:

::

for A T in `borg list --format='{archive} {time:%Y-%m-%dT%H:%M:%S}{LF}' repo1`
do
echo "borg export-tar --tar-format=BORG repo1::$A - | borg import-tar --timestamp=$T repo2::$A -"
done

Kept:

- archive name, archive timestamp
- archive contents (all items with metadata and data)

Lost:

- some archive metadata (like the original commandline, execution time, etc.)

Please note:

- all data goes over that pipe, again and again for every archive
- the pipe is dumb, there is no data or transfer time reduction there due to deduplication
- maybe add compression
- pipe over ssh for remote transfer
- no special sparse file support
35 changes: 21 additions & 14 deletions src/borg/archive.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import base64
import json
import os
import socket
Expand Down Expand Up @@ -1445,20 +1446,26 @@ def __init__(self, *, cache, key,

@contextmanager
def create_helper(self, tarinfo, status=None, type=None):
def s_to_ns(s):
return safe_ns(int(float(s) * 1e9))

item = Item(path=make_path_safe(tarinfo.name), mode=tarinfo.mode | type,
uid=tarinfo.uid, gid=tarinfo.gid, user=tarinfo.uname or None, group=tarinfo.gname or None,
mtime=s_to_ns(tarinfo.mtime))
if tarinfo.pax_headers:
ph = tarinfo.pax_headers
# note: for mtime this is a bit redundant as it is already done by tarfile module,
# but we just do it in our way to be consistent for sure.
for name in 'atime', 'ctime', 'mtime':
if name in ph:
ns = s_to_ns(ph[name])
setattr(item, name, ns)
ph = tarinfo.pax_headers
if ph and 'BORG.item.version' in ph:
assert ph['BORG.item.version'] == '1'
meta_bin = base64.b64decode(ph['BORG.item.meta'])
meta_dict = msgpack.unpackb(meta_bin, object_hook=StableDict)
item = Item(internal_dict=meta_dict)
else:
def s_to_ns(s):
return safe_ns(int(float(s) * 1e9))

item = Item(path=make_path_safe(tarinfo.name), mode=tarinfo.mode | type,
uid=tarinfo.uid, gid=tarinfo.gid, user=tarinfo.uname or None, group=tarinfo.gname or None,
mtime=s_to_ns(tarinfo.mtime))
if ph:
# note: for mtime this is a bit redundant as it is already done by tarfile module,
# but we just do it in our way to be consistent for sure.
for name in 'atime', 'ctime', 'mtime':
if name in ph:
ns = s_to_ns(ph[name])
setattr(item, name, ns)
yield item, status
# if we get here, "with"-block worked ok without error/exception, the item was processed ok...
self.add_item(item, stats=self.stats)
Expand Down
65 changes: 44 additions & 21 deletions src/borg/archiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

try:
import argparse
import base64
import collections
import configparser
import faulthandler
Expand Down Expand Up @@ -1137,7 +1138,7 @@ def peek_and_store_hardlink_masters(item, matched):

# The | (pipe) symbol instructs tarfile to use a streaming mode of operation
# where it never seeks on the passed fileobj.
tar_format = dict(GNU=tarfile.GNU_FORMAT, PAX=tarfile.PAX_FORMAT)[args.tar_format]
tar_format = dict(GNU=tarfile.GNU_FORMAT, PAX=tarfile.PAX_FORMAT, BORG=tarfile.PAX_FORMAT)[args.tar_format]
tar = tarfile.open(fileobj=tarstream, mode='w|', format=tar_format)

if progress:
Expand Down Expand Up @@ -1230,22 +1231,37 @@ def item_to_tarinfo(item, original_path):
return None, stream
return tarinfo, stream

def item_to_paxheaders(item):
def item_to_paxheaders(format, item):
"""
Transform (parts of) a Borg *item* into a pax_headers dict.
"""
# PAX format
# ----------
# When using the PAX (POSIX) format, we can support some things that aren't possible
# with classic tar formats, including GNU tar, such as:
# - atime, ctime (DONE)
# - possibly Linux capabilities, security.* xattrs (TODO)
# - various additions supported by GNU tar in POSIX mode (TODO)
#
# BORG format
# -----------
# This is based on PAX, but additionally adds BORG.* pax headers.
# Additionally to the standard tar / PAX metadata and data, it transfers
# ALL borg item metadata in a BORG specific way.
#
ph = {}
# note: for mtime this is a bit redundant as it is already done by tarfile module,
# but we just do it in our way to be consistent for sure.
for name in 'atime', 'ctime', 'mtime':
if hasattr(item, name):
ns = getattr(item, name)
ph[name] = str(ns / 1e9)
if format == 'BORG': # BORG format additions
ph['BORG.item.version'] = '1'
# BORG.item.meta - just serialize all metadata we have:
meta_bin = msgpack.packb(item.as_dict())
meta_text = base64.b64encode(meta_bin).decode()
ph['BORG.item.meta'] = meta_text
return ph

for item in archive.iter_items(filter, partial_extract=partial_extract,
Expand All @@ -1255,8 +1271,8 @@ def item_to_paxheaders(item):
item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
tarinfo, stream = item_to_tarinfo(item, orig_path)
if tarinfo:
if args.tar_format == 'PAX':
tarinfo.pax_headers = item_to_paxheaders(item)
if args.tar_format in ('BORG', 'PAX'):
tarinfo.pax_headers = item_to_paxheaders(args.tar_format, item)
if output_list:
logging.getLogger('borg.output.list').info(remove_surrogates(orig_path))
tar.addfile(tarinfo, stream)
Expand Down Expand Up @@ -4057,15 +4073,18 @@ def define_borg_mount(parser):
read the uncompressed tar stream from stdin and write a compressed/filtered
tar stream to stdout.

Depending on the ```-tar-format``option, the generated tarball uses this format:
Depending on the ``-tar-format`` option, these formats are created:

- PAX: POSIX.1-2001 (pax) format
- GNU: GNU tar format

export-tar is a lossy conversion:
BSD flags, ACLs, extended attributes (xattrs), atime and ctime are not exported.
Timestamp resolution is limited to whole seconds, not the nanosecond resolution
otherwise supported by Borg.
+--------------+---------------------------+----------------------------+
| --tar-format | Specification | Metadata |
+--------------+---------------------------+----------------------------+
| BORG | BORG specific, like PAX | all as supported by borg |
+--------------+---------------------------+----------------------------+
| PAX | POSIX.1-2001 (pax) format | GNU + atime/ctime/mtime ns |
+--------------+---------------------------+----------------------------+
| GNU | GNU tar format | mtime s, no atime/ctime, |
| | | no ACLs/xattrs/bsdflags |
+--------------+---------------------------+----------------------------+

A ``--sparse`` option (as found in borg extract) is not supported.

Expand All @@ -4089,8 +4108,8 @@ def define_borg_mount(parser):
subparser.add_argument('--list', dest='output_list', action='store_true',
help='output verbose list of items (files, dirs, ...)')
subparser.add_argument('--tar-format', metavar='FMT', dest='tar_format', default='GNU',
choices=('PAX', 'GNU'),
help='select tar format: PAX or GNU')
choices=('BORG', 'PAX', 'GNU'),
help='select tar format: BORG, PAX or GNU')
subparser.add_argument('location', metavar='ARCHIVE',
type=location_validator(archive=True),
help='archive to export')
Expand Down Expand Up @@ -4939,15 +4958,19 @@ def define_borg_mount(parser):
Most documentation of borg create applies. Note that this command does not
support excluding files.

import-tar is a lossy conversion:
BSD flags, ACLs, extended attributes (xattrs), atime and ctime are not exported.
Timestamp resolution is limited to whole seconds, not the nanosecond resolution
otherwise supported by Borg.

A ``--sparse`` option (as found in borg create) is not supported.

import-tar reads POSIX.1-1988 (ustar), POSIX.1-2001 (pax), GNU tar, UNIX V7 tar
and SunOS tar with extended attributes.
About tar formats and metadata conservation or loss, please see ``borg export-tar``.

import-tar reads these tar formats:

- BORG: borg specific (PAX-based)
- PAX: POSIX.1-2001
- GNU: GNU tar
- POSIX.1-1988 (ustar)
- UNIX V7 tar
- SunOS tar with extended attributes

""")
subparser = subparsers.add_parser('import-tar', parents=[common_parser], add_help=False,
description=self.do_import_tar.__doc__,
Expand Down
10 changes: 10 additions & 0 deletions src/borg/testsuite/archiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -3499,6 +3499,16 @@ def test_import_tar_gz(self, tar_format='GNU'):
self.cmd('extract', self.repository_location + '::dst')
self.assert_dirs_equal('input', 'output/input', ignore_ns=True, ignore_xattrs=True)

def test_roundtrip_pax_borg(self):
self.create_test_files()
self.cmd('init', '--encryption=none', self.repository_location)
self.cmd('create', self.repository_location + '::src', 'input')
self.cmd('export-tar', self.repository_location + '::src', 'simple.tar', '--tar-format=BORG')
self.cmd('import-tar', self.repository_location + '::dst', 'simple.tar')
with changedir(self.output_path):
self.cmd('extract', self.repository_location + '::dst')
self.assert_dirs_equal('input', 'output/input')

# derived from test_extract_xattrs_errors()
@pytest.mark.skipif(not xattr.XATTR_FAKEROOT, reason='xattr not supported on this system or on this version of'
'fakeroot')
Expand Down