diff --git a/zarr/storage.py b/zarr/storage.py index d30b0da6df..2a4e6ed86a 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -32,6 +32,7 @@ import glob import warnings +import numpy as np from zarr.util import (json_loads, normalize_shape, normalize_chunks, normalize_order, normalize_storage_path, buffer_size, @@ -709,7 +710,7 @@ class DirectoryStore(MutableMapping): """ - def __init__(self, path): + def __init__(self, path, memmap=False): # guard conditions path = os.path.abspath(path) @@ -717,12 +718,16 @@ def __init__(self, path): err_fspath_exists_notdir(path) self.path = path + self.memmap = memmap def __getitem__(self, key): filepath = os.path.join(self.path, key) if os.path.isfile(filepath): - with open(filepath, 'rb') as f: - return f.read() + if self.memmap: + return memoryview(np.memmap(filepath, mode='r')) + else: + with open(filepath, 'rb') as f: + return f.read() else: raise KeyError(key) @@ -805,6 +810,29 @@ def __iter__(self): def __len__(self): return sum(1 for _ in self.keys()) + __marker = object() + + def pop(self, key, default=__marker): + filepath = os.path.join(self.path, key) + if os.path.isfile(filepath): + with open(filepath, 'rb') as f: + value = f.read() + os.remove(filepath) + return value + elif default is self.__marker: + raise KeyError(key) + else: + return default + + def popitem(self): + try: + key = next(self.keys()) + except StopIteration: + raise KeyError("Store empty") + else: + value = self.pop(key) + return (key, value) + def dir_path(self, path=None): store_path = normalize_storage_path(path) dir_path = self.path @@ -896,10 +924,10 @@ class TempStore(DirectoryStore): """ # noinspection PyShadowingBuiltins - def __init__(self, suffix='', prefix='zarr', dir=None): + def __init__(self, suffix='', prefix='zarr', dir=None, memmap=False): path = tempfile.mkdtemp(suffix=suffix, prefix=prefix, dir=dir) atexit.register(atexit_rmtree, path) - super(TempStore, self).__init__(path) + super(TempStore, self).__init__(path, memmap=memmap) _prog_ckey = re.compile(r'^(\d+)(\.\d+)+$') @@ -982,8 +1010,8 @@ class NestedDirectoryStore(DirectoryStore): """ - def __init__(self, path): - super(NestedDirectoryStore, self).__init__(path) + def __init__(self, path, memmap=False): + super(NestedDirectoryStore, self).__init__(path, memmap=memmap) def __getitem__(self, key): key = _nested_map_ckey(key) @@ -1007,6 +1035,10 @@ def __eq__(self, other): self.path == other.path ) + def pop(self, key, *args, **kwargs): + key = _nested_map_ckey(key) + return super(NestedDirectoryStore, self).pop(key, *args, **kwargs) + def listdir(self, path=None): children = super(NestedDirectoryStore, self).listdir(path=path) if array_meta_key in children: diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 90461b9db4..f36895fcd2 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -21,6 +21,7 @@ except ImportError: # pragma: no cover asb = None +from numcodecs.compat import ensure_bytes from zarr.storage import (init_array, array_meta_key, attrs_key, DictStore, DirectoryStore, ZipStore, init_group, group_meta_key, @@ -65,7 +66,7 @@ def test_get_set_del_contains(self): store['foo'] store['foo'] = b'bar' assert 'foo' in store - assert b'bar' == store['foo'] + assert b'bar' == ensure_bytes(store['foo']) # test __delitem__ (optional) try: @@ -103,10 +104,10 @@ def test_pop(self): store['baz'] = b'qux' assert len(store) == 2 v = store.pop('foo') - assert v == b'bar' + assert ensure_bytes(v) == b'bar' assert len(store) == 1 v = store.pop('baz') - assert v == b'qux' + assert ensure_bytes(v) == b'qux' assert len(store) == 0 with pytest.raises(KeyError): store.pop('xxx') @@ -122,7 +123,7 @@ def test_popitem(self): store['foo'] = b'bar' k, v = store.popitem() assert k == 'foo' - assert v == b'bar' + assert ensure_bytes(v) == b'bar' assert len(store) == 0 with pytest.raises(KeyError): store.popitem() @@ -141,8 +142,8 @@ def test_update(self): assert 'foo' not in store assert 'baz' not in store store.update(foo=b'bar', baz=b'quux') - assert b'bar' == store['foo'] - assert b'quux' == store['baz'] + assert b'bar' == ensure_bytes(store['foo']) + assert b'quux' == ensure_bytes(store['baz']) def test_iterators(self): store = self.create_store() @@ -164,9 +165,9 @@ def test_iterators(self): assert 4 == len(store) assert {'a', 'b', 'c/d', 'c/e/f'} == set(store) assert {'a', 'b', 'c/d', 'c/e/f'} == set(store.keys()) - assert {b'aaa', b'bbb', b'ddd', b'fff'} == set(store.values()) + assert {b'aaa', b'bbb', b'ddd', b'fff'} == set(map(ensure_bytes, store.values())) assert ({('a', b'aaa'), ('b', b'bbb'), ('c/d', b'ddd'), ('c/e/f', b'fff')} == - set(store.items())) + set(map(lambda kv: (kv[0], ensure_bytes(kv[1])), store.items()))) def test_pickle(self): @@ -190,8 +191,8 @@ def test_pickle(self): # verify assert n == len(store2) assert keys == sorted(store2.keys()) - assert b'bar' == store2['foo'] - assert b'quux' == store2['baz'] + assert b'bar' == ensure_bytes(store2['foo']) + assert b'quux' == ensure_bytes(store2['baz']) def test_getsize(self): store = self.create_store() @@ -671,10 +672,10 @@ def setdel_hierarchy_checks(store): # test __setitem__ overwrite level store['x/y/z'] = b'xxx' store['x/y'] = b'yyy' - assert b'yyy' == store['x/y'] + assert b'yyy' == ensure_bytes(store['x/y']) assert 'x/y/z' not in store store['x'] = b'zzz' - assert b'zzz' == store['x'] + assert b'zzz' == ensure_bytes(store['x']) assert 'x/y' not in store # test __delitem__ overwrite level @@ -736,13 +737,22 @@ def test_pickle_ext(self): # check point to same underlying directory assert 'xxx' not in store store2['xxx'] = b'yyy' - assert b'yyy' == store['xxx'] + assert b'yyy' == ensure_bytes(store['xxx']) def test_setdel(self): store = self.create_store() setdel_hierarchy_checks(store) +class TestDirectoryStoreMemmap(TestDirectoryStore, unittest.TestCase): + + def create_store(self): + path = tempfile.mkdtemp() + atexit.register(atexit_rmtree, path) + store = DirectoryStore(path, memmap=True) + return store + + class TestNestedDirectoryStore(TestDirectoryStore, unittest.TestCase): def create_store(self):