From c6f59899057b8b496214d2ce65e3232e536eb7be Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 25 Apr 2016 05:57:30 +0200 Subject: [PATCH 1/5] Item implementation + tests --- borg/item.py | 129 +++++++++++++++++++++++++++++++++++++++++ borg/testsuite/item.py | 106 +++++++++++++++++++++++++++++++++ 2 files changed, 235 insertions(+) create mode 100644 borg/item.py create mode 100644 borg/testsuite/item.py diff --git a/borg/item.py b/borg/item.py new file mode 100644 index 0000000000..ff0b71b552 --- /dev/null +++ b/borg/item.py @@ -0,0 +1,129 @@ +from .constants import ITEM_KEYS +from .helpers import safe_encode, safe_decode, StableDict + +# we want str keys for this code +ITEM_KEYS = set(key.decode() for key in ITEM_KEYS) + + +class Item: + """ + Item abstraction that deals with validation and the low-level details internally: + + - msgpack gives us a dict with bytes-typed keys - but we do not want to have the ugly + bytes-typed keys and the hard-to-type dict item access all over the place (like: item[b'keyname']), + so we define properties (and use it like: item.keyname) + - msgpack gives us byte-typed values for stuff that should be str, we need to decode/encode them here. + - we want to be safe against typos in keys and badly typed values, so we check them. + + Items are created either from msgpack unpacker output, from another dict or + built step-by-step by setting attributes. + + If an Item shall be serialized, give as_dict() method output to msgpack packer. + """ + + VALID_KEYS = ITEM_KEYS + + def __init__(self, data_dict=None, **kw): + if data_dict is None: + data = kw + elif not isinstance(data_dict, dict): + raise TypeError("data_dict must be dict") + else: + data = data_dict + # internally, we want an dict with only str-typed keys + _dict = {} + for k, v in data.items(): + if isinstance(k, bytes): + k = k.decode() + elif not isinstance(k, str): + raise TypeError("dict keys must be str or bytes, not %r" % k) + _dict[k] = v + unknown_keys = set(_dict) - self.VALID_KEYS + if unknown_keys: + raise ValueError("dict contains unknown keys %s" % ','.join(unknown_keys)) + self._dict = _dict + + def as_dict(self): + """return the internal dictionary""" + return self._dict # XXX use StableDict? + + def _check_key(self, key): + """make sure key is of type str and known""" + if not isinstance(key, str): + raise TypeError("key must be str") + if key not in self.VALID_KEYS: + raise ValueError("key '%s' unknown" % key) + return key + + def __contains__(self, key): + """do we have this key?""" + return self._check_key(key) in self._dict + + def get(self, key, default=None): + """get value for key, return default if key does not exist""" + return getattr(self, self._check_key(key), default) + + def _make_property(key, value_type, value_type_name=None, encode=None, decode=None): + """return a property that deals with self._dict[key]: + + - sets the value (checking type and optionally encoding it) + - gets the value (optionally decoding it) + - deletes the entry from the internal dict + - creates reasonable docstring and exceptions / exception messages + """ + assert isinstance(key, str) + if value_type_name is None: + value_type_name = value_type.__name__ + doc = "%s (%s)" % (key, value_type_name) + type_error_msg = "%s value must be %s" % (key, value_type_name) + attr_error_msg = "attribute %s not found" % key + + def _get(self): + try: + value = self._dict[key] + except KeyError: + raise AttributeError(attr_error_msg) from None + if decode is not None: + value = decode(value) + return value + + def _set(self, value): + if not isinstance(value, value_type): + raise TypeError(type_error_msg) + if encode is not None: + value = encode(value) + self._dict[key] = value + + def _del(self): + try: + del self._dict[key] + except KeyError: + raise AttributeError(attr_error_msg) from None + + return property(_get, _set, _del, doc=doc) + + # properties statically defined, so that IDEs can know their names: + + path = _make_property('path', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + source = _make_property('source', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + user = _make_property('user', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + group = _make_property('group', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + acl_access = _make_property('acl_access', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + acl_default = _make_property('acl_default', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + acl_extended = _make_property('acl_extended', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + acl_nfs4 = _make_property('acl_nfs4', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + + mode = _make_property('mode', int) + uid = _make_property('uid', int) + gid = _make_property('gid', int) + atime = _make_property('atime', int) + ctime = _make_property('ctime', int) + mtime = _make_property('mtime', int) + rdev = _make_property('rdev', int) + bsdflags = _make_property('bsdflags', int) + + hardlink_master = _make_property('hardlink_master', bool) + + chunks = _make_property('chunks', list) + + xattrs = _make_property('xattrs', StableDict) diff --git a/borg/testsuite/item.py b/borg/testsuite/item.py new file mode 100644 index 0000000000..e41d6ae18a --- /dev/null +++ b/borg/testsuite/item.py @@ -0,0 +1,106 @@ +import pytest + +from ..item import Item +from ..helpers import StableDict + + +def test_item_empty(): + item = Item() + + assert item.as_dict() == {} + + assert 'path' not in item + with pytest.raises(ValueError): + 'invalid-key' in item + with pytest.raises(TypeError): + b'path' in item + with pytest.raises(TypeError): + 42 in item + + assert item.get('mode') is None + assert item.get('mode', 0o666) == 0o666 + with pytest.raises(ValueError): + item.get('invalid-key') + with pytest.raises(TypeError): + item.get(b'mode') + with pytest.raises(TypeError): + item.get(42) + + with pytest.raises(AttributeError): + item.path + + with pytest.raises(AttributeError): + del item.path + + +def test_item_from_dict(): + # does not matter whether we get str or bytes keys + item = Item({b'path': b'/a/b/c', b'mode': 0o666}) + assert item.path == '/a/b/c' + assert item.mode == 0o666 + assert 'path' in item + + item = Item({'path': b'/a/b/c', 'mode': 0o666}) + assert item.path == '/a/b/c' + assert item.mode == 0o666 + assert 'mode' in item + + +def test_item_from_kw(): + item = Item(path=b'/a/b/c', mode=0o666) + assert item.path == '/a/b/c' + assert item.mode == 0o666 + + +def test_item_int_property(): + item = Item() + item.mode = 0o666 + assert item.mode == 0o666 + assert item.as_dict() == {'mode': 0o666} + del item.mode + assert item.as_dict() == {} + with pytest.raises(TypeError): + item.mode = "invalid" + + +def test_item_se_str_property(): + # start simple + item = Item() + item.path = '/a/b/c' + assert item.path == '/a/b/c' + assert item.as_dict() == {'path': b'/a/b/c'} + del item.path + assert item.as_dict() == {} + with pytest.raises(TypeError): + item.path = 42 + + # non-utf-8 path, needing surrogate-escaping for latin-1 u-umlaut + item = Item({'path': b'/a/\xfc/c'}) + assert item.path == '/a/\udcfc/c' # getting a surrogate-escaped representation + assert item.as_dict() == {'path': b'/a/\xfc/c'} + del item.path + assert 'path' not in item + item.path = '/a/\udcfc/c' # setting using a surrogate-escaped representation + assert item.as_dict() == {'path': b'/a/\xfc/c'} + + +def test_item_list_property(): + item = Item() + item.chunks = [] + assert item.chunks == [] + item.chunks.append(0) + assert item.chunks == [0] + item.chunks.append(1) + assert item.chunks == [0, 1] + assert item.as_dict() == {'chunks': [0, 1]} + + +def test_item_dict_property(): + item = Item() + item.xattrs = StableDict() + assert item.xattrs == StableDict() + item.xattrs['foo'] = 'bar' + assert item.xattrs['foo'] == 'bar' + item.xattrs['bar'] = 'baz' + assert item.xattrs == StableDict({'foo': 'bar', 'bar': 'baz'}) + assert item.as_dict() == {'xattrs': {'foo': 'bar', 'bar': 'baz'}} From c18209fc908f15eaa90f3b2f6ba5479670408f1f Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 25 Apr 2016 07:39:17 +0200 Subject: [PATCH 2/5] split into generic PropDict and special Item class --- borg/item.py | 104 +++++++++++++++++++++++++++------------------------ 1 file changed, 55 insertions(+), 49 deletions(-) diff --git a/borg/item.py b/borg/item.py index ff0b71b552..2bf9c650b1 100644 --- a/borg/item.py +++ b/borg/item.py @@ -1,27 +1,21 @@ from .constants import ITEM_KEYS from .helpers import safe_encode, safe_decode, StableDict -# we want str keys for this code -ITEM_KEYS = set(key.decode() for key in ITEM_KEYS) - -class Item: +class PropDict: """ - Item abstraction that deals with validation and the low-level details internally: - - - msgpack gives us a dict with bytes-typed keys - but we do not want to have the ugly - bytes-typed keys and the hard-to-type dict item access all over the place (like: item[b'keyname']), - so we define properties (and use it like: item.keyname) - - msgpack gives us byte-typed values for stuff that should be str, we need to decode/encode them here. - - we want to be safe against typos in keys and badly typed values, so we check them. - - Items are created either from msgpack unpacker output, from another dict or - built step-by-step by setting attributes. - - If an Item shall be serialized, give as_dict() method output to msgpack packer. + Manage a dictionary via properties. + + - initialization by giving a dict or kw args + - on initialization, normalize dict keys to be str type + - access dict via properties, like: x.key_name + - membership check via: 'key_name' in x + - optionally, encode when setting a value + - optionally, decode when getting a value + - be safe against typos in key names: check against VALID_KEYS + - when setting a value: check type of value """ - - VALID_KEYS = ITEM_KEYS + VALID_KEYS = None # override with in child class def __init__(self, data_dict=None, **kw): if data_dict is None: @@ -52,7 +46,7 @@ def _check_key(self, key): if not isinstance(key, str): raise TypeError("key must be str") if key not in self.VALID_KEYS: - raise ValueError("key '%s' unknown" % key) + raise ValueError("key '%s' is not a valid key" % key) return key def __contains__(self, key): @@ -63,14 +57,9 @@ def get(self, key, default=None): """get value for key, return default if key does not exist""" return getattr(self, self._check_key(key), default) + @staticmethod def _make_property(key, value_type, value_type_name=None, encode=None, decode=None): - """return a property that deals with self._dict[key]: - - - sets the value (checking type and optionally encoding it) - - gets the value (optionally decoding it) - - deletes the entry from the internal dict - - creates reasonable docstring and exceptions / exception messages - """ + """return a property that deals with self._dict[key]""" assert isinstance(key, str) if value_type_name is None: value_type_name = value_type.__name__ @@ -102,28 +91,45 @@ def _del(self): return property(_get, _set, _del, doc=doc) + +class Item(PropDict): + """ + Item abstraction that deals with validation and the low-level details internally: + + Items are created either from msgpack unpacker output, from another dict, from kwargs or + built step-by-step by setting attributes. + + msgpack gives us a dict with bytes-typed keys, just give it to Item(d) and use item.key_name later. + msgpack gives us byte-typed values for stuff that should be str, we automatically decode when getting + such a property and encode when setting it. + + If an Item shall be serialized, give as_dict() method output to msgpack packer. + """ + + VALID_KEYS = set(key.decode() for key in ITEM_KEYS) # we want str-typed keys + # properties statically defined, so that IDEs can know their names: - path = _make_property('path', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - source = _make_property('source', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - user = _make_property('user', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - group = _make_property('group', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - acl_access = _make_property('acl_access', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - acl_default = _make_property('acl_default', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - acl_extended = _make_property('acl_extended', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - acl_nfs4 = _make_property('acl_nfs4', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - - mode = _make_property('mode', int) - uid = _make_property('uid', int) - gid = _make_property('gid', int) - atime = _make_property('atime', int) - ctime = _make_property('ctime', int) - mtime = _make_property('mtime', int) - rdev = _make_property('rdev', int) - bsdflags = _make_property('bsdflags', int) - - hardlink_master = _make_property('hardlink_master', bool) - - chunks = _make_property('chunks', list) - - xattrs = _make_property('xattrs', StableDict) + path = PropDict._make_property('path', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + source = PropDict._make_property('source', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + user = PropDict._make_property('user', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + group = PropDict._make_property('group', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + acl_access = PropDict._make_property('acl_access', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + acl_default = PropDict._make_property('acl_default', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + acl_extended = PropDict._make_property('acl_extended', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + acl_nfs4 = PropDict._make_property('acl_nfs4', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + + mode = PropDict._make_property('mode', int) + uid = PropDict._make_property('uid', int) + gid = PropDict._make_property('gid', int) + atime = PropDict._make_property('atime', int) + ctime = PropDict._make_property('ctime', int) + mtime = PropDict._make_property('mtime', int) + rdev = PropDict._make_property('rdev', int) + bsdflags = PropDict._make_property('bsdflags', int) + + hardlink_master = PropDict._make_property('hardlink_master', bool) + + chunks = PropDict._make_property('chunks', list) + + xattrs = PropDict._make_property('xattrs', StableDict) From e81407ff813137aa0bd48d33b270c9cefa33a555 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 25 Apr 2016 14:08:47 +0200 Subject: [PATCH 3/5] add bigint coding, allow None as user/group --- borg/helpers.py | 4 ++++ borg/item.py | 14 ++++++++------ borg/testsuite/item.py | 19 +++++++++++++++++++ 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/borg/helpers.py b/borg/helpers.py index b342c60e05..15c01bb7c2 100644 --- a/borg/helpers.py +++ b/borg/helpers.py @@ -681,11 +681,15 @@ def posix_acl_use_stored_uid_gid(acl): def safe_decode(s, coding='utf-8', errors='surrogateescape'): """decode bytes to str, with round-tripping "invalid" bytes""" + if s is None: + return None return s.decode(coding, errors) def safe_encode(s, coding='utf-8', errors='surrogateescape'): """encode str to bytes, with round-tripping "invalid" bytes""" + if s is None: + return None return s.encode(coding, errors) diff --git a/borg/item.py b/borg/item.py index 2bf9c650b1..b0cbf12a39 100644 --- a/borg/item.py +++ b/borg/item.py @@ -1,5 +1,5 @@ from .constants import ITEM_KEYS -from .helpers import safe_encode, safe_decode, StableDict +from .helpers import safe_encode, safe_decode, bigint_to_int, int_to_bigint, StableDict class PropDict: @@ -112,22 +112,24 @@ class Item(PropDict): path = PropDict._make_property('path', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) source = PropDict._make_property('source', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - user = PropDict._make_property('user', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) - group = PropDict._make_property('group', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) acl_access = PropDict._make_property('acl_access', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) acl_default = PropDict._make_property('acl_default', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) acl_extended = PropDict._make_property('acl_extended', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) acl_nfs4 = PropDict._make_property('acl_nfs4', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) + user = PropDict._make_property('user', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode) + group = PropDict._make_property('group', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode) + mode = PropDict._make_property('mode', int) uid = PropDict._make_property('uid', int) gid = PropDict._make_property('gid', int) - atime = PropDict._make_property('atime', int) - ctime = PropDict._make_property('ctime', int) - mtime = PropDict._make_property('mtime', int) rdev = PropDict._make_property('rdev', int) bsdflags = PropDict._make_property('bsdflags', int) + atime = PropDict._make_property('atime', int, 'bigint', encode=int_to_bigint, decode=bigint_to_int) + ctime = PropDict._make_property('ctime', int, 'bigint', encode=int_to_bigint, decode=bigint_to_int) + mtime = PropDict._make_property('mtime', int, 'bigint', encode=int_to_bigint, decode=bigint_to_int) + hardlink_master = PropDict._make_property('hardlink_master', bool) chunks = PropDict._make_property('chunks', list) diff --git a/borg/testsuite/item.py b/borg/testsuite/item.py index e41d6ae18a..16e887cbbf 100644 --- a/borg/testsuite/item.py +++ b/borg/testsuite/item.py @@ -63,6 +63,25 @@ def test_item_int_property(): item.mode = "invalid" +def test_item_bigint_property(): + item = Item() + small, big = 42, 2 ** 65 + item.atime = small + assert item.atime == small + assert item.as_dict() == {'atime': small} + item.atime = big + assert item.atime == big + assert item.as_dict() == {'atime': b'\0' * 8 + b'\x02'} + + +def test_item_user_group_none(): + item = Item() + item.user = None + assert item.user is None + item.group = None + assert item.group is None + + def test_item_se_str_property(): # start simple item = Item() From 46362a1962d68f42f18d6d4fd6acf0d8ccc54f58 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Mon, 25 Apr 2016 14:12:39 +0200 Subject: [PATCH 4/5] add more tests --- borg/testsuite/item.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/borg/testsuite/item.py b/borg/testsuite/item.py index 16e887cbbf..f8b6b51176 100644 --- a/borg/testsuite/item.py +++ b/borg/testsuite/item.py @@ -40,11 +40,24 @@ def test_item_from_dict(): assert item.mode == 0o666 assert 'path' in item + # does not matter whether we get str or bytes keys item = Item({'path': b'/a/b/c', 'mode': 0o666}) assert item.path == '/a/b/c' assert item.mode == 0o666 assert 'mode' in item + # invalid - no dict + with pytest.raises(TypeError): + Item(42) + + # invalid - no bytes/str key + with pytest.raises(TypeError): + Item({42: 23}) + + # invalid - unknown key + with pytest.raises(ValueError): + Item({'foobar': 'baz'}) + def test_item_from_kw(): item = Item(path=b'/a/b/c', mode=0o666) From e6cf4ee627d367cbd6cfc165f48285190b1c7ef7 Mon Sep 17 00:00:00 2001 From: Thomas Waldmann Date: Wed, 27 Apr 2016 23:17:10 +0200 Subject: [PATCH 5/5] disallow setting unknown attributes, use StableDict as .as_dict() result --- borg/item.py | 6 +++++- borg/testsuite/item.py | 9 +++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/borg/item.py b/borg/item.py index b0cbf12a39..05ac47a2c5 100644 --- a/borg/item.py +++ b/borg/item.py @@ -17,6 +17,8 @@ class PropDict: """ VALID_KEYS = None # override with in child class + __slots__ = ("_dict", ) # avoid setting attributes not supported by properties + def __init__(self, data_dict=None, **kw): if data_dict is None: data = kw @@ -39,7 +41,7 @@ def __init__(self, data_dict=None, **kw): def as_dict(self): """return the internal dictionary""" - return self._dict # XXX use StableDict? + return StableDict(self._dict) def _check_key(self, key): """make sure key is of type str and known""" @@ -108,6 +110,8 @@ class Item(PropDict): VALID_KEYS = set(key.decode() for key in ITEM_KEYS) # we want str-typed keys + __slots__ = ("_dict", ) # avoid setting attributes not supported by properties + # properties statically defined, so that IDEs can know their names: path = PropDict._make_property('path', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode) diff --git a/borg/testsuite/item.py b/borg/testsuite/item.py index f8b6b51176..bd66e48314 100644 --- a/borg/testsuite/item.py +++ b/borg/testsuite/item.py @@ -136,3 +136,12 @@ def test_item_dict_property(): item.xattrs['bar'] = 'baz' assert item.xattrs == StableDict({'foo': 'bar', 'bar': 'baz'}) assert item.as_dict() == {'xattrs': {'foo': 'bar', 'bar': 'baz'}} + + +def test_unknown_property(): + # we do not want the user to be able to set unknown attributes - + # they won't get into the .as_dict() result dictionary. + # also they might be just typos of known attributes. + item = Item() + with pytest.raises(AttributeError): + item.unknown_attribute = None