From 9d6d07d2c16227e955253b6c9b2d35ad77e192cf Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 23 Jun 2021 11:12:44 -0400 Subject: [PATCH 01/42] fix conflicts --- docs/tutorial.rst | 12 +- mypy.ini | 2 +- pytest.ini | 2 + zarr/convenience.py | 71 ++++---- zarr/core.py | 17 +- zarr/creation.py | 11 +- zarr/hierarchy.py | 36 ++-- zarr/storage.py | 295 +++++++++++++++++++++++++++------ zarr/tests/test_convenience.py | 8 +- zarr/tests/test_core.py | 221 ++++++++++-------------- zarr/tests/test_creation.py | 45 ++--- zarr/tests/test_hierarchy.py | 76 +++------ zarr/tests/test_storage.py | 255 +++++++++++++++------------- zarr/tests/util.py | 5 +- 14 files changed, 618 insertions(+), 438 deletions(-) diff --git a/docs/tutorial.rst b/docs/tutorial.rst index a3421608cc..51b81e16ef 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -176,7 +176,7 @@ print some diagnostics, e.g.:: Read-only : False Compressor : Blosc(cname='zstd', clevel=3, shuffle=BITSHUFFLE, : blocksize=0) - Store type : builtins.dict + Store type : zarr.storage.KVStore No. bytes : 400000000 (381.5M) No. bytes stored : 3379344 (3.2M) Storage ratio : 118.4 @@ -268,7 +268,7 @@ Here is an example using a delta filter with the Blosc compressor:: Read-only : False Filter [0] : Delta(dtype='>> z[:] array([b'H', b'e', b'l', b'l', b'o', b' ', b'f', b'r', b'o', b'm', b' ', @@ -1264,7 +1266,7 @@ ratios, depending on the correlation structure within the data. E.g.:: Order : C Read-only : False Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - Store type : builtins.dict + Store type : zarr.storage.KVStore No. bytes : 400000000 (381.5M) No. bytes stored : 6696010 (6.4M) Storage ratio : 59.7 @@ -1278,7 +1280,7 @@ ratios, depending on the correlation structure within the data. E.g.:: Order : F Read-only : False Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - Store type : builtins.dict + Store type : zarr.storage.KVStore No. bytes : 400000000 (381.5M) No. bytes stored : 4684636 (4.5M) Storage ratio : 85.4 diff --git a/mypy.ini b/mypy.ini index 998aed51a2..7c1be49cd6 100644 --- a/mypy.ini +++ b/mypy.ini @@ -1,4 +1,4 @@ [mypy] -python_version = 3.6 +python_version = 3.8 ignore_missing_imports = True follow_imports = silent diff --git a/pytest.ini b/pytest.ini index 61a0a99ab5..8e3c0adb22 100644 --- a/pytest.ini +++ b/pytest.ini @@ -3,4 +3,6 @@ doctest_optionflags = NORMALIZE_WHITESPACE ELLIPSIS IGNORE_EXCEPTION_DETAIL addopts = --durations=10 filterwarnings = error::DeprecationWarning:zarr.* + error::UserWarning:zarr.* ignore:PY_SSIZE_T_CLEAN will be required.*:DeprecationWarning + ignore:The loop argument is deprecated since Python 3.8.*:DeprecationWarning diff --git a/zarr/convenience.py b/zarr/convenience.py index 1ed0f92ff3..f380536ca3 100644 --- a/zarr/convenience.py +++ b/zarr/convenience.py @@ -12,17 +12,21 @@ from zarr.hierarchy import group as _create_group from zarr.hierarchy import open_group from zarr.meta import json_dumps, json_loads -from zarr.storage import contains_array, contains_group +from zarr.storage import contains_array, contains_group, Store from zarr.util import TreeViewer, buffer_size, normalize_storage_path +from typing import Union + +StoreLike = Union[Store, str, None] + # noinspection PyShadowingBuiltins -def open(store=None, mode='a', **kwargs): +def open(store: StoreLike = None, mode: str = "a", **kwargs): """Convenience function to open a group or array using file-mode-like semantics. Parameters ---------- - store : MutableMapping or string, optional + store : Store or string, optional Store or path to directory in file system or name of zip file. mode : {'r', 'r+', 'a', 'w', 'w-'}, optional Persistence mode: 'r' means read only (must exist); 'r+' means @@ -75,32 +79,33 @@ def open(store=None, mode='a', **kwargs): clobber = mode == 'w' # we pass storage options explicitly, since normalize_store_arg might construct # a store if the input is a fsspec-compatible URL - store = normalize_store_arg(store, clobber=clobber, - storage_options=kwargs.pop("storage_options", {})) + _store: Store = normalize_store_arg( + store, clobber=clobber, storage_options=kwargs.pop("storage_options", {}) + ) path = normalize_storage_path(path) if mode in {'w', 'w-', 'x'}: if 'shape' in kwargs: - return open_array(store, mode=mode, **kwargs) + return open_array(_store, mode=mode, **kwargs) else: - return open_group(store, mode=mode, **kwargs) + return open_group(_store, mode=mode, **kwargs) elif mode == "a": - if "shape" in kwargs or contains_array(store, path): - return open_array(store, mode=mode, **kwargs) + if "shape" in kwargs or contains_array(_store, path): + return open_array(_store, mode=mode, **kwargs) else: - return open_group(store, mode=mode, **kwargs) + return open_group(_store, mode=mode, **kwargs) else: - if contains_array(store, path): - return open_array(store, mode=mode, **kwargs) - elif contains_group(store, path): - return open_group(store, mode=mode, **kwargs) + if contains_array(_store, path): + return open_array(_store, mode=mode, **kwargs) + elif contains_group(_store, path): + return open_group(_store, mode=mode, **kwargs) else: raise PathNotFoundError(path) -def save_array(store, arr, **kwargs): +def save_array(store: StoreLike, arr, **kwargs): """Convenience function to save a NumPy array to the local file system, following a similar API to the NumPy save() function. @@ -132,16 +137,16 @@ def save_array(store, arr, **kwargs): """ may_need_closing = isinstance(store, str) - store = normalize_store_arg(store, clobber=True) + _store: Store = normalize_store_arg(store, clobber=True) try: - _create_array(arr, store=store, overwrite=True, **kwargs) + _create_array(arr, store=_store, overwrite=True, **kwargs) finally: - if may_need_closing and hasattr(store, 'close'): + if may_need_closing: # needed to ensure zip file records are written - store.close() + _store.close() -def save_group(store, *args, **kwargs): +def save_group(store: StoreLike, *args, **kwargs): """Convenience function to save several NumPy arrays to the local file system, following a similar API to the NumPy savez()/savez_compressed() functions. @@ -203,21 +208,21 @@ def save_group(store, *args, **kwargs): raise ValueError('at least one array must be provided') # handle polymorphic store arg may_need_closing = isinstance(store, str) - store = normalize_store_arg(store, clobber=True) + _store: Store = normalize_store_arg(store, clobber=True) try: - grp = _create_group(store, overwrite=True) + grp = _create_group(_store, overwrite=True) for i, arr in enumerate(args): k = 'arr_{}'.format(i) grp.create_dataset(k, data=arr, overwrite=True) for k, arr in kwargs.items(): grp.create_dataset(k, data=arr, overwrite=True) finally: - if may_need_closing and hasattr(store, 'close'): + if may_need_closing: # needed to ensure zip file records are written - store.close() + _store.close() -def save(store, *args, **kwargs): +def save(store: StoreLike, *args, **kwargs): """Convenience function to save an array or group of arrays to the local file system. Parameters @@ -327,7 +332,7 @@ def __repr__(self): return r -def load(store): +def load(store: StoreLike): """Load data from an array or group into memory. Parameters @@ -353,11 +358,11 @@ def load(store): """ # handle polymorphic store arg - store = normalize_store_arg(store) - if contains_array(store, path=None): - return Array(store=store, path=None)[...] - elif contains_group(store, path=None): - grp = Group(store=store, path=None) + _store = normalize_store_arg(store) + if contains_array(_store, path=None): + return Array(store=_store, path=None)[...] + elif contains_group(_store, path=None): + grp = Group(store=_store, path=None) return LazyLoader(grp) @@ -1073,7 +1078,7 @@ def copy_all(source, dest, shallow=False, without_attrs=False, log=None, return n_copied, n_skipped, n_bytes_copied -def consolidate_metadata(store, metadata_key='.zmetadata'): +def consolidate_metadata(store: Store, metadata_key=".zmetadata"): """ Consolidate all metadata for groups and arrays within the given store into a single resource and put it under the given key. @@ -1124,7 +1129,7 @@ def is_zarr_key(key): return open_consolidated(store, metadata_key=metadata_key) -def open_consolidated(store, metadata_key='.zmetadata', mode='r+', **kwargs): +def open_consolidated(store: Store, metadata_key=".zmetadata", mode="r+", **kwargs): """Open group using metadata previously consolidated into a single key. This is an optimised method for opening a Zarr group, where instead of diff --git a/zarr/core.py b/zarr/core.py index 3df8043000..60d92a2c52 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -9,6 +9,8 @@ import numpy as np from numcodecs.compat import ensure_bytes, ensure_ndarray +from collections.abc import MutableMapping + from zarr.attrs import Attributes from zarr.codecs import AsType, get_codec from zarr.errors import ArrayNotFoundError, ReadOnlyError, ArrayIndexError @@ -29,7 +31,7 @@ pop_fields, ) from zarr.meta import decode_array_metadata, encode_array_metadata -from zarr.storage import array_meta_key, attrs_key, getsize, listdir +from zarr.storage import array_meta_key, attrs_key, getsize, listdir, Store from zarr.util import ( InfoReporter, check_array_shape, @@ -130,7 +132,7 @@ class Array: def __init__( self, - store, + store: Store, path=None, read_only=False, chunk_store=None, @@ -142,6 +144,9 @@ def __init__( # N.B., expect at this point store is fully initialized with all # configuration metadata fully specified and normalized + store = Store._ensure_store(store) + chunk_store = Store._ensure_store(chunk_store) + self._store = store self._chunk_store = chunk_store self._path = normalize_storage_path(path) @@ -2011,7 +2016,7 @@ def _encode_chunk(self, chunk): cdata = chunk # ensure in-memory data is immutable and easy to compare - if isinstance(self.chunk_store, dict): + if isinstance(self.chunk_store, MutableMapping): cdata = ensure_bytes(cdata) return cdata @@ -2044,10 +2049,10 @@ def info(self): Order : C Read-only : False Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - Store type : builtins.dict + Store type : zarr.storage.KVStore No. bytes : 4000000 (3.8M) - No. bytes stored : ... - Storage ratio : ... + No. bytes stored : 320 + Storage ratio : 12500.0 Chunks initialized : 0/10 """ diff --git a/zarr/creation.py b/zarr/creation.py index 73e10adff1..600252b5ba 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -2,6 +2,7 @@ import numpy as np from numcodecs.registry import codec_registry +from collections.abc import MutableMapping from zarr.core import Array from zarr.errors import ( @@ -10,9 +11,9 @@ ContainsGroupError, ) from zarr.n5 import N5Store -from zarr.storage import (DirectoryStore, ZipStore, contains_array, +from zarr.storage import (DirectoryStore, ZipStore, KVStore, contains_array, contains_group, default_compressor, init_array, - normalize_storage_path, FSStore) + normalize_storage_path, FSStore, Store) from zarr.util import normalize_dimension_separator @@ -145,9 +146,9 @@ def create(shape, chunks=True, dtype=None, compressor='default', return z -def normalize_store_arg(store, clobber=False, storage_options=None, mode='w'): +def normalize_store_arg(store, clobber=False, storage_options=None, mode="w") -> Store: if store is None: - return dict() + return Store._ensure_store(dict()) elif isinstance(store, str): mode = mode if clobber else "r" if "://" in store or "::" in store: @@ -161,6 +162,8 @@ def normalize_store_arg(store, clobber=False, storage_options=None, mode='w'): else: return DirectoryStore(store) else: + if not isinstance(store, Store) and isinstance(store, MutableMapping): + store = KVStore(store) return store diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 89804d445b..11717e572b 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -15,11 +15,26 @@ ReadOnlyError, ) from zarr.meta import decode_group_metadata -from zarr.storage import (MemoryStore, attrs_key, contains_array, - contains_group, group_meta_key, init_group, listdir, - rename, rmdir) -from zarr.util import (InfoReporter, TreeViewer, is_valid_python_name, nolock, - normalize_shape, normalize_storage_path) +from zarr.storage import ( + MemoryStore, + attrs_key, + contains_array, + contains_group, + group_meta_key, + init_group, + listdir, + rename, + rmdir, + Store, +) +from zarr.util import ( + InfoReporter, + TreeViewer, + is_valid_python_name, + nolock, + normalize_shape, + normalize_storage_path, +) class Group(MutableMapping): @@ -96,6 +111,8 @@ class Group(MutableMapping): def __init__(self, store, path=None, read_only=False, chunk_store=None, cache_attrs=True, synchronizer=None): + store = Store._ensure_store(store) + chunk_store = Store._ensure_store(chunk_store) self._store = store self._chunk_store = chunk_store self._path = normalize_storage_path(path) @@ -237,11 +254,8 @@ def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): - """If the underlying Store has a ``close`` method, call it.""" - try: - self.store.close() - except AttributeError: - pass + """If the underlying Store should always heave a ``close`` method, call it.""" + self.store.close() def info_items(self): @@ -804,11 +818,13 @@ def create_dataset(self, name, **kwargs): """ + assert "mode" not in kwargs return self._write_op(self._create_dataset_nosync, name, **kwargs) def _create_dataset_nosync(self, name, data=None, **kwargs): + assert "mode" not in kwargs path = self._item_path(name) # determine synchronizer diff --git a/zarr/storage.py b/zarr/storage.py index d2de2cda4c..06e213985f 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -31,7 +31,7 @@ from os import scandir from pickle import PicklingError from threading import Lock, RLock -from typing import Optional, Union, List, Tuple, Dict +from typing import Optional, Union, List, Tuple, Dict, Any import uuid import time @@ -79,6 +79,124 @@ Path = Union[str, bytes, None] +class Store(MutableMapping): + """Base class for stores implementation. + + + Provide a number of default method as well as other typing guaranties for mypy. + + Stores cannot be mutable mapping as they do have a couple of other + requirements that would break Liskov substitution principle (stores only + allow strings as keys, mutable mapping are more generic). + + And Stores do requires a few other method. + + Having no-op base method also helps simplifying store usage and do not need + to check the presence of attributes and methods, like `close()`. + + Stores can be used as context manager to make sure they close on exit. + + .. added: 2.5.0 + + """ + + _readable = True + _writeable = True + _erasable = True + _listable = True + + def is_readable(self): + return self._readable + + def is_writeable(self): + return self._writeable + + def is_listable(self): + return self._listable + + def is_erasable(self): + return self._erasable + + def __enter__(self): + if not hasattr(self, "_open_count"): + self._open_count = 0 + self._open_count += 1 + return self + + def __exit__(self, exc_type, exc_value, traceback): + self._open_count -= 1 + if self._open_count == 0: + self.close() + + def listdir(self, path: str = "") -> List[str]: + path = normalize_storage_path(path) + return _listdir_from_keys(self, path) + + def rename(self, src_path: str, dst_path: str) -> None: + if not self.is_erasable(): + raise NotImplementedError( + f'{type(self)} is not erasable, cannot call "rmdir"' + ) + _rename_from_keys(self, src_path, dst_path) + + def rmdir(self, path: str = "") -> None: + if not self.is_erasable(): + raise NotImplementedError( + f'{type(self)} is not erasable, cannot call "rmdir"' + ) + path = normalize_storage_path(path) + _rmdir_from_keys(self, path) + + # def getsize(self, path: Path = None): + # pass + + # def clear(self): + # pass + + def close(self) -> None: + """Do nothing by default""" + pass + + # def pop(self, key): + # raise NotImplementedError + + @staticmethod + def _ensure_store(store): + """ + We want to make sure internally that zarr storage are internally always + a class with a specific interface derived from Store, which is slightly + different than mutable mapping. + + We'll do this conversion in a few places automatically + """ + if store is None: + return None + elif isinstance(store, Store): + return store + elif isinstance(store, MutableMapping): + return KVStore(store) + else: + for attr in [ + "keys", + "values", + "get", + "__setitem__", + "__getitem__", + "__delitem__", + "__contains__", + ]: + if not hasattr(store, attr): + break + else: + return KVStore(store) + + raise ValueError( + "Starting with Zarr X.y.z, stores must be subclasses of Store, if " + "you store expose the MutableMapping interface wrap them in " + f"Zarr.storage.KVStore. Got {store}" + ) + + def _path_to_prefix(path: Optional[str]) -> str: # assume path already normalized if path: @@ -88,7 +206,7 @@ def _path_to_prefix(path: Optional[str]) -> str: return prefix -def contains_array(store: MutableMapping, path: Path = None) -> bool: +def contains_array(store: Store, path: Path = None) -> bool: """Return True if the store contains an array at the given logical path.""" path = normalize_storage_path(path) prefix = _path_to_prefix(path) @@ -96,7 +214,7 @@ def contains_array(store: MutableMapping, path: Path = None) -> bool: return key in store -def contains_group(store: MutableMapping, path: Path = None) -> bool: +def contains_group(store: Store, path: Path = None) -> bool: """Return True if the store contains a group at the given logical path.""" path = normalize_storage_path(path) prefix = _path_to_prefix(path) @@ -104,7 +222,7 @@ def contains_group(store: MutableMapping, path: Path = None) -> bool: return key in store -def _rmdir_from_keys(store: MutableMapping, path: Optional[str] = None) -> None: +def _rmdir_from_keys(store: Store, path: Optional[str] = None) -> None: # assume path already normalized prefix = _path_to_prefix(path) for key in list(store.keys()): @@ -112,20 +230,20 @@ def _rmdir_from_keys(store: MutableMapping, path: Optional[str] = None) -> None: del store[key] -def rmdir(store, path: Path = None): +def rmdir(store: Store, path: Path = None): """Remove all items under the given path. If `store` provides a `rmdir` method, this will be called, otherwise will fall back to implementation via the - `MutableMapping` interface.""" + `Store` interface.""" path = normalize_storage_path(path) if hasattr(store, 'rmdir'): # pass through - store.rmdir(path) + store.rmdir(path) # type: ignore else: # slow version, delete one key at a time _rmdir_from_keys(store, path) -def _rename_from_keys(store: MutableMapping, src_path: str, dst_path: str) -> None: +def _rename_from_keys(store: Store, src_path: str, dst_path: str) -> None: # assume path already normalized src_prefix = _path_to_prefix(src_path) dst_prefix = _path_to_prefix(dst_path) @@ -135,10 +253,10 @@ def _rename_from_keys(store: MutableMapping, src_path: str, dst_path: str) -> No store[new_key] = store.pop(key) -def rename(store, src_path: Path, dst_path: Path): +def rename(store: Store, src_path: Path, dst_path: Path): """Rename all items under the given path. If `store` provides a `rename` method, this will be called, otherwise will fall back to implementation via the - `MutableMapping` interface.""" + `Store` interface.""" src_path = normalize_storage_path(src_path) dst_path = normalize_storage_path(dst_path) if hasattr(store, 'rename'): @@ -149,7 +267,7 @@ def rename(store, src_path: Path, dst_path: Path): _rename_from_keys(store, src_path, dst_path) -def _listdir_from_keys(store: MutableMapping, path: Optional[str] = None) -> List[str]: +def _listdir_from_keys(store: Store, path: Optional[str] = None) -> List[str]: # assume path already normalized prefix = _path_to_prefix(path) children = set() @@ -161,27 +279,32 @@ def _listdir_from_keys(store: MutableMapping, path: Optional[str] = None) -> Lis return sorted(children) -def listdir(store, path: Path = None): +def listdir(store: Store, path: Path = None): """Obtain a directory listing for the given path. If `store` provides a `listdir` method, this will be called, otherwise will fall back to implementation via the `MutableMapping` interface.""" path = normalize_storage_path(path) if hasattr(store, 'listdir'): # pass through - return store.listdir(path) + return store.listdir(path) # type: ignore else: # slow version, iterate through all keys + warnings.warn( + "Store {store} has not `listdir` method. From zarr 2.5 you may want" + "to inherit from `Store`.".format(store=store), + stacklevel=2, + ) return _listdir_from_keys(store, path) -def getsize(store, path: Path = None) -> int: +def getsize(store: Store, path: Path = None) -> int: """Compute size of stored items for a given path. If `store` provides a `getsize` method, this will be called, otherwise will return -1.""" path = normalize_storage_path(path) if hasattr(store, 'getsize'): # pass through - return store.getsize(path) - elif isinstance(store, dict): + return store.getsize(path) # type: ignore + elif isinstance(store, MutableMapping): # compute from size of values if path in store: v = store[path] @@ -207,8 +330,8 @@ def getsize(store, path: Path = None) -> int: def _require_parent_group( path: Optional[str], - store: MutableMapping, - chunk_store: Optional[MutableMapping], + store: Store, + chunk_store: Optional[Store], overwrite: bool, ): # assume path is normalized @@ -224,7 +347,7 @@ def _require_parent_group( def init_array( - store: MutableMapping, + store: Store, shape: Tuple[int, ...], chunks: Union[bool, int, Tuple[int, ...]] = True, dtype=None, @@ -233,7 +356,7 @@ def init_array( order: str = "C", overwrite: bool = False, path: Path = None, - chunk_store: MutableMapping = None, + chunk_store: Store = None, filters=None, object_codec=None, dimension_separator=None, @@ -243,7 +366,7 @@ def init_array( Parameters ---------- - store : MutableMapping + store : Store A mapping that supports string keys and bytes-like values. shape : int or tuple of ints Array shape. @@ -262,7 +385,7 @@ def init_array( If True, erase all data in `store` prior to initialisation. path : string, bytes, optional Path under which array is stored. - chunk_store : MutableMapping, optional + chunk_store : Store, optional Separate storage for chunks. If not provided, `store` will be used for storage of both chunks and metadata. filters : sequence, optional @@ -455,23 +578,23 @@ def _init_array_metadata( def init_group( - store: MutableMapping, + store: Store, overwrite: bool = False, path: Path = None, - chunk_store: MutableMapping = None, + chunk_store: Store = None, ): """Initialize a group store. Note that this is a low-level function and there should be no need to call this directly from user code. Parameters ---------- - store : MutableMapping + store : Store A mapping that supports string keys and byte sequence values. overwrite : bool, optional If True, erase all data in `store` prior to initialisation. path : string, optional Path under which array is stored. - chunk_store : MutableMapping, optional + chunk_store : Store, optional Separate storage for chunks. If not provided, `store` will be used for storage of both chunks and metadata. @@ -490,10 +613,10 @@ def init_group( def _init_group_metadata( - store: MutableMapping, + store: Store, overwrite: Optional[bool] = False, path: Optional[str] = None, - chunk_store: MutableMapping = None, + chunk_store: Store = None, ): # guard conditions @@ -525,7 +648,69 @@ def _dict_store_keys(d: Dict, prefix="", cls=dict): yield prefix + k -class MemoryStore(MutableMapping): +class KVStore(Store): + """ + This provide an default implementation of a store interface around + a mutable mapping, to avoid having to test stores for presence of methods + + This, for most method should just be a pass-through to the underlying KV + store which is likely to expose a MuttableMapping interface, + """ + + def __init__(self, mutablemapping): + self.mm = mutablemapping + + def __getitem__(self, key): + return self.mm[key] + + def __setitem__(self, key, value): + # assert isinstance(value, bytes) + self.mm[key] = value + + def __delitem__(self, key): + del self.mm[key] + + def get(self, key, default=None): + return self.mm.get(key, default) + + def values(self): + return self.mm.values() + + def __iter__(self): + return iter(self.mm) + + def __len__(self): + return len(self.mm) + + def __repr__(self): + return f"<{self.__class__.__name__}: \n{repr(self.mm)}\n at {hex(id(self))}>" + + def __eq__(self, other): + if isinstance(other, KVStore): + return self.mm == other.mm + else: + return NotImplemented + + # def __contains__(self, key): + # return key in self.mm + + # def pop(self): + # return self.mm.pop() + + # def popitem + # def clear + # def update + # def setdefault + # def __eq__ + # def __ne__ + # def __contains__ + # def keys() + # def items() + # def values() + # def get + + +class MemoryStore(Store): """Store class that uses a hierarchy of :class:`dict` objects, thus all data will be held in main memory. @@ -543,7 +728,7 @@ class MemoryStore(MutableMapping): >>> z = zarr.zeros(100) >>> type(z.store) - + Notes ----- @@ -729,7 +914,7 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) -class DirectoryStore(MutableMapping): +class DirectoryStore(Store): """Storage class using directories and files on a standard file system. Parameters @@ -1015,7 +1200,7 @@ def atexit_rmglob(path, rmtree(p) -class FSStore(MutableMapping): +class FSStore(Store): """Wraps an fsspec.FSMap to give access to arbitrary filesystems Requires that ``fsspec`` is installed, as well as any additional @@ -1359,7 +1544,7 @@ def listdir(self, path=None): # noinspection PyPep8Naming -class ZipStore(MutableMapping): +class ZipStore(Store): """Storage class using a Zip file. Parameters @@ -1451,6 +1636,8 @@ class also supports the context manager protocol, which ensures the ``close()`` """ + _erasable = False + def __init__(self, path, compression=zipfile.ZIP_STORED, allowZip64=True, mode='a', dimension_separator=None): @@ -1613,7 +1800,7 @@ def migrate_1to2(store): Parameters ---------- - store : MutableMapping + store : Store Store to be migrated. Notes @@ -1657,7 +1844,7 @@ def migrate_1to2(store): # noinspection PyShadowingBuiltins -class DBMStore(MutableMapping): +class DBMStore(Store): """Storage class using a DBM-style database. Parameters @@ -1849,7 +2036,7 @@ def __contains__(self, key): return key in self.db -class LMDBStore(MutableMapping): +class LMDBStore(Store): """Storage class using LMDB. Requires the `lmdb `_ package to be installed. @@ -2026,7 +2213,7 @@ def __len__(self): return self.db.stat()['entries'] -class LRUStoreCache(MutableMapping): +class LRUStoreCache(Store): """Storage class that implements a least-recently-used (LRU) cache layer over some other store. Intended primarily for use with stores that can be slow to access, e.g., remote stores that require network communication to store and @@ -2034,7 +2221,7 @@ class LRUStoreCache(MutableMapping): Parameters ---------- - store : MutableMapping + store : Store The store containing the actual data to be cached. max_size : int The maximum size that the cache may grow to, in number of bytes. Provide `None` @@ -2063,14 +2250,14 @@ class LRUStoreCache(MutableMapping): """ - def __init__(self, store, max_size): - self._store = store + def __init__(self, store: Store, max_size: int): + self._store = Store._ensure_store(store) self._max_size = max_size self._current_size = 0 self._keys_cache = None self._contains_cache = None - self._listdir_cache = dict() - self._values_cache = OrderedDict() + self._listdir_cache: Dict[Path, Any] = dict() + self._values_cache: Dict[Path, Any] = OrderedDict() self._mutex = Lock() self.hits = self.misses = 0 @@ -2110,7 +2297,7 @@ def _keys(self): self._keys_cache = list(self._store.keys()) return self._keys_cache - def listdir(self, path=None): + def listdir(self, path: Path = None): with self._mutex: try: return self._listdir_cache[path] @@ -2119,7 +2306,7 @@ def listdir(self, path=None): self._listdir_cache[path] = listing return listing - def getsize(self, path=None): + def getsize(self, path=None) -> int: return getsize(self._store, path=path) def _pop_value(self): @@ -2136,7 +2323,7 @@ def _accommodate_value(self, value_size): v = self._pop_value() self._current_size -= buffer_size(v) - def _cache_value(self, key, value): + def _cache_value(self, key: Path, value): # cache a value value_size = buffer_size(value) # check size of the value against max size, as if the value itself exceeds max @@ -2208,7 +2395,7 @@ def __delitem__(self, key): self._invalidate_value(key) -class ABSStore(MutableMapping): +class ABSStore(Store): """Storage class using Azure Blob Storage (ABS). Parameters @@ -2355,7 +2542,7 @@ def __contains__(self, key): blob_name = self._append_path_to_prefix(key) return self.client.get_blob_client(blob_name).exists() - def listdir(self, path=None): + def listdir(self, path: Path = None): dir_path = normalize_storage_path(self._append_path_to_prefix(path)) if dir_path: dir_path += '/' @@ -2398,7 +2585,7 @@ def clear(self): self.rmdir() -class SQLiteStore(MutableMapping): +class SQLiteStore(Store): """Storage class using SQLite. Parameters @@ -2601,7 +2788,7 @@ def clear(self): ) -class MongoDBStore(MutableMapping): +class MongoDBStore(Store): """Storage class using MongoDB. .. note:: This is an experimental feature. @@ -2684,7 +2871,7 @@ def clear(self): self.collection.delete_many({}) -class RedisStore(MutableMapping): +class RedisStore(Store): """Storage class using Redis. .. note:: This is an experimental feature. @@ -2753,7 +2940,7 @@ def clear(self): del self[key] -class ConsolidatedMetadataStore(MutableMapping): +class ConsolidatedMetadataStore(Store): """A layer over other storage, where the metadata has been consolidated into a single key. @@ -2777,7 +2964,7 @@ class ConsolidatedMetadataStore(MutableMapping): Parameters ---------- - store: MutableMapping + store: Store Containing the zarr array. metadata_key: str The target in the store where all of the metadata are stored. We @@ -2789,7 +2976,7 @@ class ConsolidatedMetadataStore(MutableMapping): """ - def __init__(self, store, metadata_key='.zmetadata'): + def __init__(self, store: Store, metadata_key=".zmetadata"): self.store = store # retrieve consolidated metadata @@ -2802,7 +2989,7 @@ def __init__(self, store, metadata_key='.zmetadata'): consolidated_format) # decode metadata - self.meta_store = meta['metadata'] + self.meta_store: Store = KVStore(meta["metadata"]) def __getitem__(self, key): return self.meta_store[key] diff --git a/zarr/tests/test_convenience.py b/zarr/tests/test_convenience.py index 20cd25027c..fab97d2492 100644 --- a/zarr/tests/test_convenience.py +++ b/zarr/tests/test_convenience.py @@ -23,8 +23,12 @@ from zarr.core import Array from zarr.errors import CopyError from zarr.hierarchy import Group, group -from zarr.storage import (ConsolidatedMetadataStore, MemoryStore, - atexit_rmtree, getsize) +from zarr.storage import ( + ConsolidatedMetadataStore, + MemoryStore, + atexit_rmtree, + getsize, +) def test_open_array(): diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index cd3efcd82f..3be1f8d3e6 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -23,11 +23,12 @@ ABSStore, DBMStore, DirectoryStore, + FSStore, + KVStore, LMDBStore, LRUStoreCache, NestedDirectoryStore, SQLiteStore, - FSStore, atexit_rmglob, atexit_rmtree, init_array, @@ -44,8 +45,8 @@ class TestArray(unittest.TestCase): def test_array_init(self): # normal initialization - store = dict() - init_array(store, shape=100, chunks=10, dtype=' end assert [] == list(z.islice(6, 5)) - if hasattr(z.store, 'close'): - z.store.close() + z.store.close() def test_iter(self): params = ( @@ -1419,8 +1363,7 @@ def test_iter(self): z[:] = a for expect, actual in zip_longest(a, z): assert_array_equal(expect, actual) - if hasattr(z.store, 'close'): - z.store.close() + z.store.close() def test_islice(self): params = ( @@ -1458,8 +1401,7 @@ def test_compressors(self): assert np.all(a[0:100] == 1) a[:] = 1 assert np.all(a[:] == 1) - if hasattr(a.store, 'close'): - a.store.close() + a.store.close() def test_endian(self): dtype = np.dtype('float32') @@ -1470,10 +1412,8 @@ def test_endian(self): a2[:] = 1 x2 = a2[:] assert_array_equal(x1, x2) - if hasattr(a1.store, 'close'): - a1.store.close() - if hasattr(a2.store, 'close'): - a2.store.close() + a1.store.close() + a2.store.close() def test_attributes(self): a = self.create_array(shape=10, chunks=10, dtype='i8') @@ -1487,8 +1427,7 @@ def test_attributes(self): attrs = json_loads(a.store[a.attrs.key]) assert 'foo' in attrs and attrs['foo'] == 'bar' assert 'bar' in attrs and attrs['bar'] == 'foo' - if hasattr(a.store, 'close'): - a.store.close() + a.store.close() class TestArrayWithPath(TestArray): @@ -1502,6 +1441,9 @@ def create_array(read_only=False, **kwargs): return Array(store, path='foo/bar', read_only=read_only, cache_metadata=cache_metadata, cache_attrs=cache_attrs) + def test_nchunks_initialized(self): + pass + def test_hexdigest(self): # Check basic 1-D array z = self.create_array(shape=(1050,), chunks=100, dtype=' Date: Sat, 27 Feb 2021 10:44:47 -0800 Subject: [PATCH 02/42] cleanup naming --- zarr/storage.py | 48 ++++++++++-------------------------------------- 1 file changed, 10 insertions(+), 38 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 06e213985f..ed6436bc89 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -147,19 +147,10 @@ def rmdir(self, path: str = "") -> None: path = normalize_storage_path(path) _rmdir_from_keys(self, path) - # def getsize(self, path: Path = None): - # pass - - # def clear(self): - # pass - def close(self) -> None: """Do nothing by default""" pass - # def pop(self, key): - # raise NotImplementedError - @staticmethod def _ensure_store(store): """ @@ -658,57 +649,38 @@ class KVStore(Store): """ def __init__(self, mutablemapping): - self.mm = mutablemapping + self._mutable_mapping = mutablemapping def __getitem__(self, key): - return self.mm[key] + return self._mutable_mapping[key] def __setitem__(self, key, value): - # assert isinstance(value, bytes) - self.mm[key] = value + self._mutable_mapping[key] = value def __delitem__(self, key): - del self.mm[key] + del self._mutable_mapping[key] def get(self, key, default=None): - return self.mm.get(key, default) + return self._mutable_mapping.get(key, default) def values(self): - return self.mm.values() + return self._mutable_mapping.values() def __iter__(self): - return iter(self.mm) + return iter(self._mutable_mapping) def __len__(self): - return len(self.mm) + return len(self._mutable_mapping) def __repr__(self): - return f"<{self.__class__.__name__}: \n{repr(self.mm)}\n at {hex(id(self))}>" + return f"<{self.__class__.__name__}: \n{repr(self._mutable_mapping)}\n at {hex(id(self))}>" def __eq__(self, other): if isinstance(other, KVStore): - return self.mm == other.mm + return self._mutable_mapping == other._mutable_mapping else: return NotImplemented - # def __contains__(self, key): - # return key in self.mm - - # def pop(self): - # return self.mm.pop() - - # def popitem - # def clear - # def update - # def setdefault - # def __eq__ - # def __ne__ - # def __contains__ - # def keys() - # def items() - # def values() - # def get - class MemoryStore(Store): """Store class that uses a hierarchy of :class:`dict` objects, thus all data From 6ca96315792b52c7ef42491b5035ab4250a4d71e Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Sat, 27 Feb 2021 10:49:21 -0800 Subject: [PATCH 03/42] zip move --- zarr/tests/test_hierarchy.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index a6bdc72891..27f9aec6ff 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -1017,6 +1017,11 @@ def test_context_manager(self): with pytest.raises(ValueError): store.zf.extractall() + def test_move(self): + # zip store is not erasable (can so far only append to a zip + # so we can't test for move. + pass + class TestGroupWithDBMStore(TestGroup): From b3db0f1266195b59fa0f92e1fe309c7e180cdc47 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Sat, 27 Feb 2021 10:55:31 -0800 Subject: [PATCH 04/42] fix erasability test --- zarr/storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index ed6436bc89..f5445aa312 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -226,7 +226,7 @@ def rmdir(store: Store, path: Path = None): this will be called, otherwise will fall back to implementation via the `Store` interface.""" path = normalize_storage_path(path) - if hasattr(store, 'rmdir'): + if hasattr(store, "rmdir") and store.is_erasable(): # pass through store.rmdir(path) # type: ignore else: From 1db3be27d05fa4823a2f3f25a54f7face07656a5 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Sat, 27 Feb 2021 11:06:48 -0800 Subject: [PATCH 05/42] test for warning --- zarr/tests/test_storage.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 36a01365c2..6520c6f3db 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -91,8 +91,7 @@ def test_coverage_rename(): def test_deprecated_listdir_nosotre(): store = dict() - with warnings.catch_warnings(): - warnings.simplefilter("default") + with pytest.warns(UserWarning, match="has not `listdir`"): listdir(store) From 8d286780bfb452a815cbf92419f57765e59f0867 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Wed, 10 Mar 2021 11:58:21 -0800 Subject: [PATCH 06/42] please flake --- zarr/tests/test_storage.py | 1 - 1 file changed, 1 deletion(-) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 6520c6f3db..55c293430f 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -9,7 +9,6 @@ from contextlib import contextmanager from pickle import PicklingError from zipfile import ZipFile -import warnings import numpy as np import pytest From b5240e4dc25e73fefde5dd11dad21adca9481986 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Wed, 10 Mar 2021 12:14:11 -0800 Subject: [PATCH 07/42] remove uncovered lines --- zarr/tests/test_storage.py | 94 ++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 49 deletions(-) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 55c293430f..84305f9234 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -346,55 +346,51 @@ def test_hierarchy(self): # test rename (optional) if store.is_erasable(): - try: - store.rename("c/e", "c/e2") - assert "c/d" in store - assert "c/e" not in store - assert "c/e/f" not in store - assert "c/e/g" not in store - assert "c/e2" not in store - assert "c/e2/f" in store - assert "c/e2/g" in store - store.rename("c/e2", "c/e") - assert "c/d" in store - assert "c/e2" not in store - assert "c/e2/f" not in store - assert "c/e2/g" not in store - assert "c/e" not in store - assert "c/e/f" in store - assert "c/e/g" in store - store.rename("c", "c1/c2/c3") - assert "a" in store - assert "c" not in store - assert "c/d" not in store - assert "c/e" not in store - assert "c/e/f" not in store - assert "c/e/g" not in store - assert "c1" not in store - assert "c1/c2" not in store - assert "c1/c2/c3" not in store - assert "c1/c2/c3/d" in store - assert "c1/c2/c3/e" not in store - assert "c1/c2/c3/e/f" in store - assert "c1/c2/c3/e/g" in store - store.rename("c1/c2/c3", "c") - assert "c" not in store - assert "c/d" in store - assert "c/e" not in store - assert "c/e/f" in store - assert "c/e/g" in store - assert "c1" not in store - assert "c1/c2" not in store - assert "c1/c2/c3" not in store - assert "c1/c2/c3/d" not in store - assert "c1/c2/c3/e" not in store - assert "c1/c2/c3/e/f" not in store - assert "c1/c2/c3/e/g" not in store - except NotImplementedError: - pass - - # test rmdir (optional) - if store.is_erasable(): + store.rename("c/e", "c/e2") + assert "c/d" in store + assert "c/e" not in store + assert "c/e/f" not in store + assert "c/e/g" not in store + assert "c/e2" not in store + assert "c/e2/f" in store + assert "c/e2/g" in store + store.rename("c/e2", "c/e") + assert "c/d" in store + assert "c/e2" not in store + assert "c/e2/f" not in store + assert "c/e2/g" not in store + assert "c/e" not in store + assert "c/e/f" in store + assert "c/e/g" in store + store.rename("c", "c1/c2/c3") + assert "a" in store + assert "c" not in store + assert "c/d" not in store + assert "c/e" not in store + assert "c/e/f" not in store + assert "c/e/g" not in store + assert "c1" not in store + assert "c1/c2" not in store + assert "c1/c2/c3" not in store + assert "c1/c2/c3/d" in store + assert "c1/c2/c3/e" not in store + assert "c1/c2/c3/e/f" in store + assert "c1/c2/c3/e/g" in store + store.rename("c1/c2/c3", "c") + assert "c" not in store + assert "c/d" in store + assert "c/e" not in store + assert "c/e/f" in store + assert "c/e/g" in store + assert "c1" not in store + assert "c1/c2" not in store + assert "c1/c2/c3" not in store + assert "c1/c2/c3/d" not in store + assert "c1/c2/c3/e" not in store + assert "c1/c2/c3/e/f" not in store + assert "c1/c2/c3/e/g" not in store + + # test rmdir (optional) store.rmdir("c/e") assert "c/d" in store assert "c/e/f" not in store From b7aa2d876291fe89be7ee18dbde283926df6eae4 Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Wed, 10 Mar 2021 12:52:31 -0800 Subject: [PATCH 08/42] remove uncovered lines in tests --- zarr/tests/test_hierarchy.py | 67 +++++++++++++++++------------------- 1 file changed, 32 insertions(+), 35 deletions(-) diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index 27f9aec6ff..e9a8c6cf79 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -738,42 +738,39 @@ def test_move(self): data = np.arange(100) g['foo'] = data - try: - g.move('foo', 'bar') - assert 'foo' not in g - assert 'bar' in g - assert_array_equal(data, g['bar']) + g.move("foo", "bar") + assert "foo" not in g + assert "bar" in g + assert_array_equal(data, g["bar"]) + + g.move("bar", "foo/bar") + assert "bar" not in g + assert "foo" in g + assert "foo/bar" in g + assert isinstance(g["foo"], Group) + assert_array_equal(data, g["foo/bar"]) + + g.move("foo", "foo2") + assert "foo" not in g + assert "foo/bar" not in g + assert "foo2" in g + assert "foo2/bar" in g + assert isinstance(g["foo2"], Group) + assert_array_equal(data, g["foo2/bar"]) + + g2 = g["foo2"] + g2.move("bar", "/bar") + assert "foo2" in g + assert "foo2/bar" not in g + assert "bar" in g + assert isinstance(g["foo2"], Group) + assert_array_equal(data, g["bar"]) - g.move('bar', 'foo/bar') - assert 'bar' not in g - assert 'foo' in g - assert 'foo/bar' in g - assert isinstance(g['foo'], Group) - assert_array_equal(data, g['foo/bar']) - - g.move('foo', 'foo2') - assert 'foo' not in g - assert 'foo/bar' not in g - assert 'foo2' in g - assert 'foo2/bar' in g - assert isinstance(g['foo2'], Group) - assert_array_equal(data, g['foo2/bar']) - - g2 = g['foo2'] - g2.move('bar', '/bar') - assert 'foo2' in g - assert 'foo2/bar' not in g - assert 'bar' in g - assert isinstance(g['foo2'], Group) - assert_array_equal(data, g['bar']) - - with pytest.raises(ValueError): - g2.move('bar', 'bar2') - - with pytest.raises(ValueError): - g.move('bar', 'boo') - except NotImplementedError: - pass + with pytest.raises(ValueError): + g2.move("bar", "bar2") + + with pytest.raises(ValueError): + g.move("bar", "boo") g.store.close() From 30afcbbc3bbf5aef5bb249cb69472be672567ecd Mon Sep 17 00:00:00 2001 From: Matthias Bussonnier Date: Wed, 10 Mar 2021 12:54:53 -0800 Subject: [PATCH 09/42] pragma no cover for exceptional case --- zarr/storage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index f5445aa312..2364d2bc2e 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -136,14 +136,14 @@ def rename(self, src_path: str, dst_path: str) -> None: if not self.is_erasable(): raise NotImplementedError( f'{type(self)} is not erasable, cannot call "rmdir"' - ) + ) # pragma: no cover _rename_from_keys(self, src_path, dst_path) def rmdir(self, path: str = "") -> None: if not self.is_erasable(): raise NotImplementedError( f'{type(self)} is not erasable, cannot call "rmdir"' - ) + ) # pragma: no cover path = normalize_storage_path(path) _rmdir_from_keys(self, path) From 813671318edbca75b6bd0de3acde4ce555d026a2 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 23 Jun 2021 12:42:43 -0400 Subject: [PATCH 10/42] minor docstring fixes add assert statements to test_capabilities --- zarr/hierarchy.py | 2 +- zarr/storage.py | 28 ++++++++++++++-------------- zarr/tests/test_storage.py | 11 +++++------ 3 files changed, 20 insertions(+), 21 deletions(-) diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 11717e572b..fc64c1cf96 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -254,7 +254,7 @@ def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): - """If the underlying Store should always heave a ``close`` method, call it.""" + """Call the close method of the underlying Store.""" self.store.close() def info_items(self): diff --git a/zarr/storage.py b/zarr/storage.py index 2364d2bc2e..01dd9a0bde 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -82,8 +82,8 @@ class Store(MutableMapping): """Base class for stores implementation. - - Provide a number of default method as well as other typing guaranties for mypy. + Provide a number of default method as well as other typing guaranties for + mypy. Stores cannot be mutable mapping as they do have a couple of other requirements that would break Liskov substitution principle (stores only @@ -96,7 +96,7 @@ class Store(MutableMapping): Stores can be used as context manager to make sure they close on exit. - .. added: 2.5.0 + .. added: 2.9.0 """ @@ -135,7 +135,7 @@ def listdir(self, path: str = "") -> List[str]: def rename(self, src_path: str, dst_path: str) -> None: if not self.is_erasable(): raise NotImplementedError( - f'{type(self)} is not erasable, cannot call "rmdir"' + f'{type(self)} is not erasable, cannot call "rename"' ) # pragma: no cover _rename_from_keys(self, src_path, dst_path) @@ -154,9 +154,9 @@ def close(self) -> None: @staticmethod def _ensure_store(store): """ - We want to make sure internally that zarr storage are internally always - a class with a specific interface derived from Store, which is slightly - different than mutable mapping. + We want to make sure internally that zarr stores are always a class + with a specific interface derived from ``Store``, which is slightly + different than ``MutableMapping``. We'll do this conversion in a few places automatically """ @@ -182,8 +182,8 @@ def _ensure_store(store): return KVStore(store) raise ValueError( - "Starting with Zarr X.y.z, stores must be subclasses of Store, if " - "you store expose the MutableMapping interface wrap them in " + "Starting with Zarr 2.9.0, stores must be subclasses of Store, if " + "your store exposes the MutableMapping interface wrap it in " f"Zarr.storage.KVStore. Got {store}" ) @@ -281,8 +281,8 @@ def listdir(store: Store, path: Path = None): else: # slow version, iterate through all keys warnings.warn( - "Store {store} has not `listdir` method. From zarr 2.5 you may want" - "to inherit from `Store`.".format(store=store), + f"Store {store} has no `listdir` method. From zarr 2.9 onwards " + "may want to inherit from `Store`.", stacklevel=2, ) return _listdir_from_keys(store, path) @@ -641,10 +641,10 @@ def _dict_store_keys(d: Dict, prefix="", cls=dict): class KVStore(Store): """ - This provide an default implementation of a store interface around - a mutable mapping, to avoid having to test stores for presence of methods + This provides a default implementation of a store interface around + a mutable mapping, to avoid having to test stores for presence of methods. - This, for most method should just be a pass-through to the underlying KV + This, for most methods should just be a pass-through to the underlying KV store which is likely to expose a MuttableMapping interface, """ diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 84305f9234..0d920c1293 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -68,11 +68,10 @@ class InvalidStore: def test_capabilities(): s = KVStore(dict()) - s.is_readable() - s.is_listable() - s.is_erasable() - s.is_writeable() - + assert s.is_readable() + assert s.is_listable() + assert s.is_erasable() + assert s.is_writeable() def test_getsize_non_implemented(): assert getsize(object()) == -1 @@ -90,7 +89,7 @@ def test_coverage_rename(): def test_deprecated_listdir_nosotre(): store = dict() - with pytest.warns(UserWarning, match="has not `listdir`"): + with pytest.warns(UserWarning, match="has no `listdir`"): listdir(store) From e38a9684043d101f82db09d832d6f0df672f2433 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 23 Jun 2021 16:04:57 -0400 Subject: [PATCH 11/42] pep8 fix --- zarr/tests/test_storage.py | 1 + 1 file changed, 1 insertion(+) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 0d920c1293..4e8c79918c 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -73,6 +73,7 @@ def test_capabilities(): assert s.is_erasable() assert s.is_writeable() + def test_getsize_non_implemented(): assert getsize(object()) == -1 From eb2a6da51bf8761b4994c201bd861734fabba810 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Thu, 24 Jun 2021 08:51:38 -0400 Subject: [PATCH 12/42] avoid NumPy 1.21.0 due to https://github.com/numpy/numpy/issues/19325 --- .github/workflows/python-package.yml | 2 +- requirements_rtfd.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index e143d4e2c5..d9bc362d12 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -16,7 +16,7 @@ jobs: strategy: matrix: python-version: [3.7, 3.8] - numpy_version: ['', '==1.17.*'] + numpy_version: ['!=1.21.0', '==1.17.*'] services: redis: image: redis diff --git a/requirements_rtfd.txt b/requirements_rtfd.txt index fbf38a025d..2cdb12377d 100644 --- a/requirements_rtfd.txt +++ b/requirements_rtfd.txt @@ -5,4 +5,4 @@ sphinx sphinx-issues sphinx-rtd-theme numpydoc -numpy +numpy!=1.21.0 From 6796f794467c5331306e458a67b530554afa51f3 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 10 Sep 2021 14:48:45 -0400 Subject: [PATCH 13/42] move Store class and some helper functions to zarr._storage.store update version in Store docstring --- zarr/_storage/store.py | 159 +++++++++++++++++++++++++++++++++++++++++ zarr/storage.py | 159 +++-------------------------------------- 2 files changed, 167 insertions(+), 151 deletions(-) create mode 100644 zarr/_storage/store.py diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py new file mode 100644 index 0000000000..302e234c40 --- /dev/null +++ b/zarr/_storage/store.py @@ -0,0 +1,159 @@ +from collections.abc import MutableMapping +from typing import Optional, List + +from zarr.util import normalize_storage_path + +# v2 store keys +array_meta_key = '.zarray' +group_meta_key = '.zgroup' +attrs_key = '.zattrs' + + +class Store(MutableMapping): + """Base class for stores implementation. + + Provide a number of default method as well as other typing guaranties for + mypy. + + Stores cannot be mutable mapping as they do have a couple of other + requirements that would break Liskov substitution principle (stores only + allow strings as keys, mutable mapping are more generic). + + And Stores do requires a few other method. + + Having no-op base method also helps simplifying store usage and do not need + to check the presence of attributes and methods, like `close()`. + + Stores can be used as context manager to make sure they close on exit. + + .. added: 2.10.0 + + """ + + _readable = True + _writeable = True + _erasable = True + _listable = True + + def is_readable(self): + return self._readable + + def is_writeable(self): + return self._writeable + + def is_listable(self): + return self._listable + + def is_erasable(self): + return self._erasable + + def __enter__(self): + if not hasattr(self, "_open_count"): + self._open_count = 0 + self._open_count += 1 + return self + + def __exit__(self, exc_type, exc_value, traceback): + self._open_count -= 1 + if self._open_count == 0: + self.close() + + def listdir(self, path: str = "") -> List[str]: + path = normalize_storage_path(path) + return _listdir_from_keys(self, path) + + def rename(self, src_path: str, dst_path: str) -> None: + if not self.is_erasable(): + raise NotImplementedError( + f'{type(self)} is not erasable, cannot call "rename"' + ) # pragma: no cover + _rename_from_keys(self, src_path, dst_path) + + def rmdir(self, path: str = "") -> None: + if not self.is_erasable(): + raise NotImplementedError( + f'{type(self)} is not erasable, cannot call "rmdir"' + ) # pragma: no cover + path = normalize_storage_path(path) + _rmdir_from_keys(self, path) + + def close(self) -> None: + """Do nothing by default""" + pass + + @staticmethod + def _ensure_store(store): + """ + We want to make sure internally that zarr stores are always a class + with a specific interface derived from ``Store``, which is slightly + different than ``MutableMapping``. + + We'll do this conversion in a few places automatically + """ + from zarr.storage import KVStore # avoid circular import + + if store is None: + return None + elif isinstance(store, Store): + return store + elif isinstance(store, MutableMapping): + return KVStore(store) + else: + for attr in [ + "keys", + "values", + "get", + "__setitem__", + "__getitem__", + "__delitem__", + "__contains__", + ]: + if not hasattr(store, attr): + break + else: + return KVStore(store) + + raise ValueError( + "Starting with Zarr 2.9.0, stores must be subclasses of Store, if " + "your store exposes the MutableMapping interface wrap it in " + f"Zarr.storage.KVStore. Got {store}" + ) + + +def _path_to_prefix(path: Optional[str]) -> str: + # assume path already normalized + if path: + prefix = path + '/' + else: + prefix = '' + return prefix + + +def _rename_from_keys(store: Store, src_path: str, dst_path: str) -> None: + # assume path already normalized + src_prefix = _path_to_prefix(src_path) + dst_prefix = _path_to_prefix(dst_path) + for key in list(store.keys()): + if key.startswith(src_prefix): + new_key = dst_prefix + key.lstrip(src_prefix) + store[new_key] = store.pop(key) + + +def _rmdir_from_keys(store: Store, path: Optional[str] = None) -> None: + # assume path already normalized + prefix = _path_to_prefix(path) + for key in list(store.keys()): + if key.startswith(prefix): + del store[key] + + +def _listdir_from_keys(store: Store, path: Optional[str] = None) -> List[str]: + # assume path already normalized + prefix = _path_to_prefix(path) + children = set() + for key in list(store.keys()): + if key.startswith(prefix) and len(key) > len(prefix): + suffix = key[len(prefix):] + child = suffix.split('/')[0] + children.add(child) + return sorted(children) diff --git a/zarr/storage.py b/zarr/storage.py index b6c4f840e7..3793179fb5 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -57,6 +57,14 @@ normalize_shape, normalize_storage_path, retry_call) from zarr._storage.absstore import ABSStore # noqa: F401 +from zarr._storage.store import (_listdir_from_keys, + _path_to_prefix, + _rename_from_keys, + _rmdir_from_keys, + array_meta_key, + group_meta_key, + attrs_key, + Store) __doctest_requires__ = { ('RedisStore', 'RedisStore.*'): ['redis'], @@ -65,9 +73,6 @@ } -array_meta_key = '.zarray' -group_meta_key = '.zgroup' -attrs_key = '.zattrs' try: # noinspection PyUnresolvedReferences from zarr.codecs import Blosc @@ -80,124 +85,6 @@ Path = Union[str, bytes, None] -class Store(MutableMapping): - """Base class for stores implementation. - - Provide a number of default method as well as other typing guaranties for - mypy. - - Stores cannot be mutable mapping as they do have a couple of other - requirements that would break Liskov substitution principle (stores only - allow strings as keys, mutable mapping are more generic). - - And Stores do requires a few other method. - - Having no-op base method also helps simplifying store usage and do not need - to check the presence of attributes and methods, like `close()`. - - Stores can be used as context manager to make sure they close on exit. - - .. added: 2.9.0 - - """ - - _readable = True - _writeable = True - _erasable = True - _listable = True - - def is_readable(self): - return self._readable - - def is_writeable(self): - return self._writeable - - def is_listable(self): - return self._listable - - def is_erasable(self): - return self._erasable - - def __enter__(self): - if not hasattr(self, "_open_count"): - self._open_count = 0 - self._open_count += 1 - return self - - def __exit__(self, exc_type, exc_value, traceback): - self._open_count -= 1 - if self._open_count == 0: - self.close() - - def listdir(self, path: str = "") -> List[str]: - path = normalize_storage_path(path) - return _listdir_from_keys(self, path) - - def rename(self, src_path: str, dst_path: str) -> None: - if not self.is_erasable(): - raise NotImplementedError( - f'{type(self)} is not erasable, cannot call "rename"' - ) # pragma: no cover - _rename_from_keys(self, src_path, dst_path) - - def rmdir(self, path: str = "") -> None: - if not self.is_erasable(): - raise NotImplementedError( - f'{type(self)} is not erasable, cannot call "rmdir"' - ) # pragma: no cover - path = normalize_storage_path(path) - _rmdir_from_keys(self, path) - - def close(self) -> None: - """Do nothing by default""" - pass - - @staticmethod - def _ensure_store(store): - """ - We want to make sure internally that zarr stores are always a class - with a specific interface derived from ``Store``, which is slightly - different than ``MutableMapping``. - - We'll do this conversion in a few places automatically - """ - if store is None: - return None - elif isinstance(store, Store): - return store - elif isinstance(store, MutableMapping): - return KVStore(store) - else: - for attr in [ - "keys", - "values", - "get", - "__setitem__", - "__getitem__", - "__delitem__", - "__contains__", - ]: - if not hasattr(store, attr): - break - else: - return KVStore(store) - - raise ValueError( - "Starting with Zarr 2.9.0, stores must be subclasses of Store, if " - "your store exposes the MutableMapping interface wrap it in " - f"Zarr.storage.KVStore. Got {store}" - ) - - -def _path_to_prefix(path: Optional[str]) -> str: - # assume path already normalized - if path: - prefix = path + '/' - else: - prefix = '' - return prefix - - def contains_array(store: Store, path: Path = None) -> bool: """Return True if the store contains an array at the given logical path.""" path = normalize_storage_path(path) @@ -214,14 +101,6 @@ def contains_group(store: Store, path: Path = None) -> bool: return key in store -def _rmdir_from_keys(store: Store, path: Optional[str] = None) -> None: - # assume path already normalized - prefix = _path_to_prefix(path) - for key in list(store.keys()): - if key.startswith(prefix): - del store[key] - - def rmdir(store: Store, path: Path = None): """Remove all items under the given path. If `store` provides a `rmdir` method, this will be called, otherwise will fall back to implementation via the @@ -235,16 +114,6 @@ def rmdir(store: Store, path: Path = None): _rmdir_from_keys(store, path) -def _rename_from_keys(store: Store, src_path: str, dst_path: str) -> None: - # assume path already normalized - src_prefix = _path_to_prefix(src_path) - dst_prefix = _path_to_prefix(dst_path) - for key in list(store.keys()): - if key.startswith(src_prefix): - new_key = dst_prefix + key.lstrip(src_prefix) - store[new_key] = store.pop(key) - - def rename(store: Store, src_path: Path, dst_path: Path): """Rename all items under the given path. If `store` provides a `rename` method, this will be called, otherwise will fall back to implementation via the @@ -259,18 +128,6 @@ def rename(store: Store, src_path: Path, dst_path: Path): _rename_from_keys(store, src_path, dst_path) -def _listdir_from_keys(store: Store, path: Optional[str] = None) -> List[str]: - # assume path already normalized - prefix = _path_to_prefix(path) - children = set() - for key in list(store.keys()): - if key.startswith(prefix) and len(key) > len(prefix): - suffix = key[len(prefix):] - child = suffix.split('/')[0] - children.add(child) - return sorted(children) - - def listdir(store: Store, path: Path = None): """Obtain a directory listing for the given path. If `store` provides a `listdir` method, this will be called, otherwise will fall back to implementation via the From d0a1b00629fc09663dac7663ec4df0e0cdb1d13f Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 10 Sep 2021 15:20:57 -0400 Subject: [PATCH 14/42] BUG: ABSStore should inherit from Store --- zarr/_storage/absstore.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/zarr/_storage/absstore.py b/zarr/_storage/absstore.py index 0dc5bf1892..ad03c7a023 100644 --- a/zarr/_storage/absstore.py +++ b/zarr/_storage/absstore.py @@ -4,13 +4,14 @@ from collections.abc import MutableMapping from numcodecs.compat import ensure_bytes from zarr.util import normalize_storage_path +from zarr._storage.store import Store __doctest_requires__ = { ('ABSStore', 'ABSStore.*'): ['azure.storage.blob'], } -class ABSStore(MutableMapping): +class ABSStore(Store): """Storage class using Azure Blob Storage (ABS). Parameters From 3d39d8c47478832ffabd5ad20f89b4931fffa26b Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 10 Sep 2021 17:26:01 -0400 Subject: [PATCH 15/42] pep8 fix --- zarr/_storage/absstore.py | 1 - zarr/convenience.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/zarr/_storage/absstore.py b/zarr/_storage/absstore.py index ad03c7a023..01bfbd5039 100644 --- a/zarr/_storage/absstore.py +++ b/zarr/_storage/absstore.py @@ -1,7 +1,6 @@ """This module contains storage classes related to Azure Blob Storage (ABS)""" import warnings -from collections.abc import MutableMapping from numcodecs.compat import ensure_bytes from zarr.util import normalize_storage_path from zarr._storage.store import Store diff --git a/zarr/convenience.py b/zarr/convenience.py index 24251fa95e..1de7d5fd8a 100644 --- a/zarr/convenience.py +++ b/zarr/convenience.py @@ -213,7 +213,7 @@ def save_group(store: StoreLike, *args, **kwargs): raise ValueError('at least one array must be provided') # handle polymorphic store arg may_need_closing = _might_close(store) - _store: Store = normalize_store_arg(store, clobber=True) + _store: Store = normalize_store_arg(store, clobber=True) try: grp = _create_group(_store, overwrite=True) for i, arr in enumerate(args): From 5f7c7f58b8a6f80f5178c1fa0d1da3f8b574960f Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 22 Sep 2021 15:32:01 -0400 Subject: [PATCH 16/42] TST: make CustomMapping a subclass of Store TST: initialize stores with KVStore(dict()) instead of bare dict() --- zarr/tests/test_core.py | 34 +++++++++++++++++++--------------- zarr/tests/test_storage.py | 6 +++--- zarr/tests/test_sync.py | 5 +++-- 3 files changed, 25 insertions(+), 20 deletions(-) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index f44a98ae0a..97ad3b68e2 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -30,6 +30,7 @@ LRUStoreCache, NestedDirectoryStore, SQLiteStore, + Store, atexit_rmglob, atexit_rmtree, init_array, @@ -73,18 +74,18 @@ def test_array_init(self): assert "8fecb7a17ea1493d9c1430d04437b4f5b0b34985" == a.hexdigest() # store not initialized - store = dict() + store = KVStore(dict()) with pytest.raises(ValueError): Array(store) # group is in the way - store = dict() + store = KVStore(dict()) init_group(store, path='baz') with pytest.raises(ValueError): Array(store, path='baz') def create_array(self, read_only=False, **kwargs): - store = dict() + store = KVStore(dict()) kwargs.setdefault('compressor', Zlib(level=1)) cache_metadata = kwargs.pop('cache_metadata', True) cache_attrs = kwargs.pop('cache_attrs', True) @@ -1481,7 +1482,7 @@ class TestArrayWithPath(TestArray): @staticmethod def create_array(read_only=False, **kwargs): - store = dict() + store = KVStore(dict()) cache_metadata = kwargs.pop('cache_metadata', True) cache_attrs = kwargs.pop('cache_attrs', True) init_array(store, path='foo/bar', **kwargs) @@ -1537,9 +1538,9 @@ class TestArrayWithChunkStore(TestArray): @staticmethod def create_array(read_only=False, **kwargs): - store = dict() + store = KVStore(dict()) # separate chunk store - chunk_store = dict() + chunk_store = KVStore(dict()) cache_metadata = kwargs.pop('cache_metadata', True) cache_attrs = kwargs.pop('cache_attrs', True) init_array(store, chunk_store=chunk_store, **kwargs) @@ -2060,7 +2061,7 @@ def test_nbytes_stored(self): class TestArrayWithNoCompressor(TestArray): def create_array(self, read_only=False, **kwargs): - store = dict() + store = KVStore(dict()) kwargs.setdefault('compressor', None) cache_metadata = kwargs.pop('cache_metadata', True) cache_attrs = kwargs.pop('cache_attrs', True) @@ -2095,7 +2096,7 @@ def test_hexdigest(self): class TestArrayWithBZ2Compressor(TestArray): def create_array(self, read_only=False, **kwargs): - store = dict() + store = KVStore(dict()) compressor = BZ2(level=1) kwargs.setdefault('compressor', compressor) cache_metadata = kwargs.pop('cache_metadata', True) @@ -2131,7 +2132,7 @@ def test_hexdigest(self): class TestArrayWithBloscCompressor(TestArray): def create_array(self, read_only=False, **kwargs): - store = dict() + store = KVStore(dict()) compressor = Blosc(cname='zstd', clevel=1, shuffle=1) kwargs.setdefault('compressor', compressor) cache_metadata = kwargs.pop('cache_metadata', True) @@ -2174,7 +2175,7 @@ def test_hexdigest(self): class TestArrayWithLZMACompressor(TestArray): def create_array(self, read_only=False, **kwargs): - store = dict() + store = KVStore(dict()) compressor = LZMA(preset=1) kwargs.setdefault('compressor', compressor) cache_metadata = kwargs.pop('cache_metadata', True) @@ -2211,7 +2212,7 @@ class TestArrayWithFilters(TestArray): @staticmethod def create_array(read_only=False, **kwargs): - store = dict() + store = KVStore(dict()) dtype = kwargs.get('dtype', None) filters = [ Delta(dtype=dtype), @@ -2254,7 +2255,7 @@ def test_astype_no_filters(self): dtype = np.dtype(np.int8) astype = np.dtype(np.float32) - store = dict() + store = KVStore(dict()) init_array(store, shape=shape, chunks=10, dtype=dtype) data = np.arange(np.prod(shape), dtype=dtype).reshape(shape) @@ -2329,14 +2330,17 @@ def test_structured_array_contain_object(self): # custom store, does not support getsize() -class CustomMapping(object): +class CustomMapping(Store): def __init__(self): - self.inner = dict() + self.inner = KVStore(dict()) def __iter__(self): return iter(self.keys()) + def __len__(self): + return len(self.inner) + def keys(self): return self.inner.keys() @@ -2385,7 +2389,7 @@ class TestArrayNoCache(TestArray): @staticmethod def create_array(read_only=False, **kwargs): - store = dict() + store = KVStore(dict()) kwargs.setdefault('compressor', Zlib(level=1)) cache_metadata = kwargs.pop('cache_metadata', True) cache_attrs = kwargs.pop('cache_attrs', True) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 62e2525990..c76fa38f22 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1804,7 +1804,7 @@ def test_migrate_1to2(): # concerned about migrating a single array at the root of the store # setup - store = dict() + store = KVStore(dict()) meta = dict( shape=(100,), chunks=(10,), @@ -1842,7 +1842,7 @@ def test_migrate_1to2(): assert meta_migrated['compressor'] == Zlib(1).get_config() # check dict compression_opts - store = dict() + store = KVStore(dict()) meta['compression'] = 'blosc' meta['compression_opts'] = dict(cname='lz4', clevel=5, shuffle=1) meta_json = meta_v1.encode_metadata(meta) @@ -1856,7 +1856,7 @@ def test_migrate_1to2(): Blosc(cname='lz4', clevel=5, shuffle=1).get_config()) # check 'none' compression is migrated to None (null in JSON) - store = dict() + store = KVStore(dict()) meta['compression'] = 'none' meta_json = meta_v1.encode_metadata(meta) store['meta'] = meta_json diff --git a/zarr/tests/test_sync.py b/zarr/tests/test_sync.py index 51b7fe0e10..d420df7c68 100644 --- a/zarr/tests/test_sync.py +++ b/zarr/tests/test_sync.py @@ -12,7 +12,8 @@ from zarr.attrs import Attributes from zarr.core import Array from zarr.hierarchy import Group -from zarr.storage import DirectoryStore, atexit_rmtree, init_array, init_group +from zarr.storage import (DirectoryStore, KVStore, atexit_rmtree, init_array, + init_group) from zarr.sync import ProcessSynchronizer, ThreadSynchronizer from zarr.tests.test_attrs import TestAttributes from zarr.tests.test_core import TestArray @@ -96,7 +97,7 @@ def test_parallel_append(self): class TestArrayWithThreadSynchronizer(TestArray, MixinArraySyncTests): def create_array(self, read_only=False, **kwargs): - store = dict() + store = KVStore(dict()) cache_metadata = kwargs.pop('cache_metadata', True) cache_attrs = kwargs.pop('cache_attrs', True) init_array(store, **kwargs) From 52deac068410d37b1240124aa24782393006406b Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 22 Sep 2021 15:35:17 -0400 Subject: [PATCH 17/42] update version mentioned in Store docstring --- zarr/_storage/store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index 302e234c40..fb151d8547 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -26,7 +26,7 @@ class Store(MutableMapping): Stores can be used as context manager to make sure they close on exit. - .. added: 2.10.0 + .. added: 2.11.0 """ From a7cc4dbb193bb8533db5fd7e2ab475dc5b334720 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 22 Sep 2021 15:36:09 -0400 Subject: [PATCH 18/42] update version mentioned in warning message --- zarr/_storage/store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index fb151d8547..7037e2822c 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -114,7 +114,7 @@ def _ensure_store(store): return KVStore(store) raise ValueError( - "Starting with Zarr 2.9.0, stores must be subclasses of Store, if " + "Starting with Zarr 2.11.0, stores must be subclasses of Store, if " "your store exposes the MutableMapping interface wrap it in " f"Zarr.storage.KVStore. Got {store}" ) From c59f3be239af3709889fc33f1dcff197177dcbfb Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 22 Sep 2021 15:50:11 -0400 Subject: [PATCH 19/42] use Store._ensure_store in Attributes class ensures Attributes.store is a Store --- zarr/attrs.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/zarr/attrs.py b/zarr/attrs.py index ea6b831608..ec01dbe04f 100644 --- a/zarr/attrs.py +++ b/zarr/attrs.py @@ -1,6 +1,7 @@ from collections.abc import MutableMapping from zarr.meta import parse_metadata +from zarr._storage.store import Store from zarr.util import json_dumps @@ -26,7 +27,7 @@ class Attributes(MutableMapping): def __init__(self, store, key='.zattrs', read_only=False, cache=True, synchronizer=None): - self.store = store + self.store = Store._ensure_store(store) self.key = key self.read_only = read_only self.cache = cache From a9a98a97e3d0547aba0fc94fb3b7fd1a0709f473 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 22 Sep 2021 15:57:18 -0400 Subject: [PATCH 20/42] TST: add Attributes test case ensuring store gets coerced to a Store --- zarr/tests/test_attrs.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/zarr/tests/test_attrs.py b/zarr/tests/test_attrs.py index 2aced3abaa..01d9640f64 100644 --- a/zarr/tests/test_attrs.py +++ b/zarr/tests/test_attrs.py @@ -5,17 +5,23 @@ from zarr.attrs import Attributes from zarr.tests.util import CountingDict +from zarr.storage import KVStore -class TestAttributes(unittest.TestCase): +class TestAttributes(): def init_attributes(self, store, read_only=False, cache=True): return Attributes(store, key='attrs', read_only=read_only, cache=cache) - def test_storage(self): + @pytest.mark.parametrize('store_from_dict', [False, True]) + def test_storage(self, store_from_dict): - store = dict() + if store_from_dict: + store = dict() + else: + store = KVStore(dict()) a = Attributes(store=store, key='attrs') + assert isinstance(a.store, KVStore) assert 'foo' not in a assert 'bar' not in a assert dict() == a.asdict() From 0a6d92341cb0aceffff093d5a06aac4c4745a536 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 22 Sep 2021 15:51:20 -0400 Subject: [PATCH 21/42] use Store._ensure_store in normalize_store_arg ensures open_array, etc can work when the user supplies a dict --- zarr/creation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/creation.py b/zarr/creation.py index 854cd91890..968ce8f20f 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -167,7 +167,7 @@ def normalize_store_arg(store, clobber=False, storage_options=None, mode="w") -> return DirectoryStore(store) else: if not isinstance(store, Store) and isinstance(store, MutableMapping): - store = KVStore(store) + store = Store._ensure_store(store) return store From a40ed54a360e5e6bb7fb356d6da0c53485f4950c Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 22 Sep 2021 16:25:45 -0400 Subject: [PATCH 22/42] TST: make sure high level creation functions also work when passed a dict for store --- zarr/tests/test_creation.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/zarr/tests/test_creation.py b/zarr/tests/test_creation.py index bf25bc618c..117bc338b6 100644 --- a/zarr/tests/test_creation.py +++ b/zarr/tests/test_creation.py @@ -15,7 +15,7 @@ zeros_like) from zarr.hierarchy import open_group from zarr.n5 import N5Store -from zarr.storage import DirectoryStore +from zarr.storage import DirectoryStore, KVStore from zarr.sync import ThreadSynchronizer @@ -272,6 +272,30 @@ def test_open_array(): assert_array_equal(np.full(100, fill_value=42), a[:]) +def test_open_array_dict_store(): + + # dict will become a KVStore + store = dict() + + # mode == 'w' + z = open_array(store, mode='w', shape=100, chunks=10) + z[:] = 42 + assert isinstance(z, Array) + assert isinstance(z.store, KVStore) + assert (100,) == z.shape + assert (10,) == z.chunks + assert_array_equal(np.full(100, fill_value=42), z[:]) + + +def test_create_in_dict(): + for func in [empty, zeros, ones]: + a = func(100, store=dict()) + assert isinstance(a.store, KVStore) + + a = full(100, 5, store=dict()) + assert isinstance(a.store, KVStore) + + def test_empty_like(): # zarr array From 3fa6f51fbc1ea2137e14e555ca3d63ab10f7b9ad Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 22 Sep 2021 16:34:08 -0400 Subject: [PATCH 23/42] TST: add test case with group initialized from dict --- zarr/tests/test_hierarchy.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index e9a8c6cf79..000b8ebf7a 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -905,6 +905,18 @@ def test_context_manager(self): d[:] = np.arange(100) +def test_group_init_from_dict(): + store, chunk_store = dict(), None + init_group(store, path=None, chunk_store=chunk_store) + g = Group(store, path=None, read_only=False, chunk_store=chunk_store) + assert store is not g.store + assert isinstance(g.store, KVStore) + if chunk_store is None: + assert g.store is g.chunk_store + else: + assert chunk_store is g.chunk_store + + class TestGroupWithMemoryStore(TestGroup): @staticmethod From 0f34ad24ba50d5cab8cb477d574c0549b2741a21 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 22 Sep 2021 16:36:48 -0400 Subject: [PATCH 24/42] TST: add test case with Array initialized from dict --- zarr/tests/test_core.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 5e3d83be59..1e7d7754c6 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -1613,6 +1613,16 @@ def test_nbytes_stored(self): assert expect_nbytes_stored == z.nbytes_stored +def test_array_init_from_dict(): + # initialization via non-Store MutableMapping + store = dict() + init_array(store, shape=100, chunks=10, dtype=" Date: Wed, 22 Sep 2021 16:42:49 -0400 Subject: [PATCH 25/42] change CustomMapping back to type object, not Store want to test the non-Store code path in _ensure_store --- zarr/tests/test_core.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 1e7d7754c6..eb021ea017 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -2356,7 +2356,7 @@ def test_structured_array_contain_object(self): # custom store, does not support getsize() -class CustomMapping(Store): +class CustomMapping(object): def __init__(self): self.inner = KVStore(dict()) @@ -2364,9 +2364,6 @@ def __init__(self): def __iter__(self): return iter(self.keys()) - def __len__(self): - return len(self.inner) - def keys(self): return self.inner.keys() From cb1c5f05871668975a13815da5d643f1f6f00b14 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 24 Sep 2021 14:04:59 -0400 Subject: [PATCH 26/42] pep8 fixes --- zarr/creation.py | 2 +- zarr/tests/test_attrs.py | 1 - zarr/tests/test_core.py | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/zarr/creation.py b/zarr/creation.py index 968ce8f20f..9836b062db 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -12,7 +12,7 @@ ContainsGroupError, ) from zarr.n5 import N5Store -from zarr.storage import (DirectoryStore, ZipStore, KVStore, contains_array, +from zarr.storage import (DirectoryStore, ZipStore, contains_array, contains_group, default_compressor, init_array, normalize_storage_path, FSStore, Store) from zarr.util import normalize_dimension_separator diff --git a/zarr/tests/test_attrs.py b/zarr/tests/test_attrs.py index 01d9640f64..b2de736d4a 100644 --- a/zarr/tests/test_attrs.py +++ b/zarr/tests/test_attrs.py @@ -1,5 +1,4 @@ import json -import unittest import pytest diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index eb021ea017..35294c9d43 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -30,7 +30,6 @@ LRUStoreCache, NestedDirectoryStore, SQLiteStore, - Store, atexit_rmglob, atexit_rmtree, init_array, From 6ce098145df323ee8c93280d1ea88f167c99fcb6 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 24 Sep 2021 16:27:40 -0400 Subject: [PATCH 27/42] update/fix new hierarchy test case to complete code coverage --- zarr/tests/test_hierarchy.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index 000b8ebf7a..1b4f96a73f 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -905,8 +905,12 @@ def test_context_manager(self): d[:] = np.arange(100) -def test_group_init_from_dict(): - store, chunk_store = dict(), None +@pytest.mark.parametrize('chunk_dict', [False, True]) +def test_group_init_from_dict(chunk_dict): + if chunk_dict: + store, chunk_store = dict(), dict() + else: + store, chunk_store = dict(), None init_group(store, path=None, chunk_store=chunk_store) g = Group(store, path=None, read_only=False, chunk_store=chunk_store) assert store is not g.store @@ -914,7 +918,7 @@ def test_group_init_from_dict(): if chunk_store is None: assert g.store is g.chunk_store else: - assert chunk_store is g.chunk_store + assert chunk_store is not g.chunk_store class TestGroupWithMemoryStore(TestGroup): From 82f7376703a586c8bf149ea0e60da5ee21e96e5a Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 20 Oct 2021 18:36:23 -0400 Subject: [PATCH 28/42] create a BaseStore parent for Store BaseStore does not have the listdir or rmdir methods cleaned up some type declerations, making sure mypy passes --- zarr/_storage/store.py | 72 ++++++++++++++++++++++++------------------ zarr/convenience.py | 16 +++++----- zarr/core.py | 8 ++--- zarr/creation.py | 30 +++--------------- zarr/hierarchy.py | 6 ++-- zarr/storage.py | 67 +++++++++++++++++++++++++++------------ 6 files changed, 107 insertions(+), 92 deletions(-) diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index 7037e2822c..d68da68304 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -1,5 +1,6 @@ +from abc import abstractmethod from collections.abc import MutableMapping -from typing import Optional, List +from typing import Any, List, Optional, Union from zarr.util import normalize_storage_path @@ -9,18 +10,16 @@ attrs_key = '.zattrs' -class Store(MutableMapping): - """Base class for stores implementation. +class BaseStore(MutableMapping): + """Abstract base class for store implementations. - Provide a number of default method as well as other typing guaranties for - mypy. + This is a thin wrapper over MutableMapping that provides methods to check + whether a store is readable, writeable, eraseable and or listable. Stores cannot be mutable mapping as they do have a couple of other requirements that would break Liskov substitution principle (stores only allow strings as keys, mutable mapping are more generic). - And Stores do requires a few other method. - Having no-op base method also helps simplifying store usage and do not need to check the presence of attributes and methods, like `close()`. @@ -58,9 +57,9 @@ def __exit__(self, exc_type, exc_value, traceback): if self._open_count == 0: self.close() - def listdir(self, path: str = "") -> List[str]: - path = normalize_storage_path(path) - return _listdir_from_keys(self, path) + def close(self) -> None: + """Do nothing by default""" + pass def rename(self, src_path: str, dst_path: str) -> None: if not self.is_erasable(): @@ -69,23 +68,11 @@ def rename(self, src_path: str, dst_path: str) -> None: ) # pragma: no cover _rename_from_keys(self, src_path, dst_path) - def rmdir(self, path: str = "") -> None: - if not self.is_erasable(): - raise NotImplementedError( - f'{type(self)} is not erasable, cannot call "rmdir"' - ) # pragma: no cover - path = normalize_storage_path(path) - _rmdir_from_keys(self, path) - - def close(self) -> None: - """Do nothing by default""" - pass - @staticmethod - def _ensure_store(store): + def _ensure_store(store: Any): """ We want to make sure internally that zarr stores are always a class - with a specific interface derived from ``Store``, which is slightly + with a specific interface derived from ``BaseStore``, which is slightly different than ``MutableMapping``. We'll do this conversion in a few places automatically @@ -94,7 +81,7 @@ def _ensure_store(store): if store is None: return None - elif isinstance(store, Store): + elif isinstance(store, BaseStore): return store elif isinstance(store, MutableMapping): return KVStore(store) @@ -114,12 +101,35 @@ def _ensure_store(store): return KVStore(store) raise ValueError( - "Starting with Zarr 2.11.0, stores must be subclasses of Store, if " - "your store exposes the MutableMapping interface wrap it in " - f"Zarr.storage.KVStore. Got {store}" + "Starting with Zarr 2.11.0, stores must be subclasses of " + "BaseStore, if your store exposes the MutableMapping interface " + f"wrap it in Zarr.storage.KVStore. Got {store}" ) +class Store(BaseStore): + """Abstract store class used by implementations following the Zarr v2 spec. + + Adds public `listdir`, `rename`, and `rmdir` methods on top of BaseStore. + + .. added: 2.11.0 + + """ + def listdir(self, path: str = "") -> List[str]: + path = normalize_storage_path(path) + return _listdir_from_keys(self, path) + + + def rmdir(self, path: str = "") -> None: + if not self.is_erasable(): + raise NotImplementedError( + f'{type(self)} is not erasable, cannot call "rmdir"' + ) # pragma: no cover + path = normalize_storage_path(path) + _rmdir_from_keys(self, path) + + + def _path_to_prefix(path: Optional[str]) -> str: # assume path already normalized if path: @@ -129,7 +139,7 @@ def _path_to_prefix(path: Optional[str]) -> str: return prefix -def _rename_from_keys(store: Store, src_path: str, dst_path: str) -> None: +def _rename_from_keys(store: BaseStore, src_path: str, dst_path: str) -> None: # assume path already normalized src_prefix = _path_to_prefix(src_path) dst_prefix = _path_to_prefix(dst_path) @@ -139,7 +149,7 @@ def _rename_from_keys(store: Store, src_path: str, dst_path: str) -> None: store[new_key] = store.pop(key) -def _rmdir_from_keys(store: Store, path: Optional[str] = None) -> None: +def _rmdir_from_keys(store: Union[BaseStore, MutableMapping], path: Optional[str] = None) -> None: # assume path already normalized prefix = _path_to_prefix(path) for key in list(store.keys()): @@ -147,7 +157,7 @@ def _rmdir_from_keys(store: Store, path: Optional[str] = None) -> None: del store[key] -def _listdir_from_keys(store: Store, path: Optional[str] = None) -> List[str]: +def _listdir_from_keys(store: BaseStore, path: Optional[str] = None) -> List[str]: # assume path already normalized prefix = _path_to_prefix(path) children = set() diff --git a/zarr/convenience.py b/zarr/convenience.py index 1de7d5fd8a..26c58d6018 100644 --- a/zarr/convenience.py +++ b/zarr/convenience.py @@ -3,7 +3,7 @@ import itertools import os import re -from collections.abc import Mapping +from collections.abc import Mapping, MutableMapping from zarr.core import Array from zarr.creation import array as _create_array @@ -13,12 +13,12 @@ from zarr.hierarchy import group as _create_group from zarr.hierarchy import open_group from zarr.meta import json_dumps, json_loads -from zarr.storage import contains_array, contains_group, Store +from zarr.storage import contains_array, contains_group, BaseStore, Store from zarr.util import TreeViewer, buffer_size, normalize_storage_path from typing import Union -StoreLike = Union[Store, str, None] +StoreLike = Union[BaseStore, MutableMapping, str, None] # noinspection PyShadowingBuiltins @@ -80,7 +80,7 @@ def open(store: StoreLike = None, mode: str = "a", **kwargs): clobber = mode == 'w' # we pass storage options explicitly, since normalize_store_arg might construct # a store if the input is a fsspec-compatible URL - _store: Store = normalize_store_arg( + _store: BaseStore = normalize_store_arg( store, clobber=clobber, storage_options=kwargs.pop("storage_options", {}) ) path = normalize_storage_path(path) @@ -142,7 +142,7 @@ def save_array(store: StoreLike, arr, **kwargs): """ may_need_closing = _might_close(store) - _store: Store = normalize_store_arg(store, clobber=True) + _store: BaseStore = normalize_store_arg(store, clobber=True) try: _create_array(arr, store=_store, overwrite=True, **kwargs) finally: @@ -213,7 +213,7 @@ def save_group(store: StoreLike, *args, **kwargs): raise ValueError('at least one array must be provided') # handle polymorphic store arg may_need_closing = _might_close(store) - _store: Store = normalize_store_arg(store, clobber=True) + _store: BaseStore = normalize_store_arg(store, clobber=True) try: grp = _create_group(_store, overwrite=True) for i, arr in enumerate(args): @@ -1083,7 +1083,7 @@ def copy_all(source, dest, shallow=False, without_attrs=False, log=None, return n_copied, n_skipped, n_bytes_copied -def consolidate_metadata(store: Store, metadata_key=".zmetadata"): +def consolidate_metadata(store: StoreLike, metadata_key=".zmetadata"): """ Consolidate all metadata for groups and arrays within the given store into a single resource and put it under the given key. @@ -1134,7 +1134,7 @@ def is_zarr_key(key): return open_consolidated(store, metadata_key=metadata_key) -def open_consolidated(store: Store, metadata_key=".zmetadata", mode="r+", **kwargs): +def open_consolidated(store: StoreLike, metadata_key=".zmetadata", mode="r+", **kwargs): """Open group using metadata previously consolidated into a single key. This is an optimised method for opening a Zarr group, where instead of diff --git a/zarr/core.py b/zarr/core.py index 8f21a6ac02..becf4e790a 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -32,7 +32,7 @@ pop_fields, ) from zarr.meta import decode_array_metadata, encode_array_metadata -from zarr.storage import array_meta_key, attrs_key, getsize, listdir, Store +from zarr.storage import array_meta_key, attrs_key, getsize, listdir, BaseStore from zarr.util import ( all_equal, InfoReporter, @@ -143,7 +143,7 @@ class Array: def __init__( self, - store: Store, + store: BaseStore, path=None, read_only=False, chunk_store=None, @@ -156,8 +156,8 @@ def __init__( # N.B., expect at this point store is fully initialized with all # configuration metadata fully specified and normalized - store = Store._ensure_store(store) - chunk_store = Store._ensure_store(chunk_store) + store = BaseStore._ensure_store(store) + chunk_store = BaseStore._ensure_store(chunk_store) self._store = store self._chunk_store = chunk_store diff --git a/zarr/creation.py b/zarr/creation.py index f0f7ea17d7..d50f67c31a 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -12,9 +12,10 @@ ContainsGroupError, ) from zarr.n5 import N5Store -from zarr.storage import (DirectoryStore, ZipStore, contains_array, - contains_group, default_compressor, init_array, - normalize_storage_path, FSStore, Store) +from zarr.storage import (BaseStore, DirectoryStore, FSStore, ZipStore, + contains_array, contains_group, default_compressor, + init_array, normalize_storage_path, + normalize_store_arg) from zarr.util import normalize_dimension_separator @@ -158,29 +159,6 @@ def create(shape, chunks=True, dtype=None, compressor='default', return z -def normalize_store_arg(store, clobber=False, storage_options=None, mode="w") -> Store: - if store is None: - return Store._ensure_store(dict()) - elif isinstance(store, os.PathLike): - store = os.fspath(store) - if isinstance(store, str): - mode = mode if clobber else "r" - if "://" in store or "::" in store: - return FSStore(store, mode=mode, **(storage_options or {})) - elif storage_options: - raise ValueError("storage_options passed with non-fsspec path") - if store.endswith('.zip'): - return ZipStore(store, mode=mode) - elif store.endswith('.n5'): - return N5Store(store) - else: - return DirectoryStore(store) - else: - if not isinstance(store, Store) and isinstance(store, MutableMapping): - store = Store._ensure_store(store) - return store - - def _kwargs_compat(compressor, fill_value, kwargs): # to be compatible with h5py, as well as backwards-compatible with Zarr diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index c94079117c..402b8dd976 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -16,6 +16,7 @@ ) from zarr.meta import decode_group_metadata from zarr.storage import ( + BaseStore, MemoryStore, attrs_key, contains_array, @@ -25,7 +26,6 @@ listdir, rename, rmdir, - Store, ) from zarr.util import ( InfoReporter, @@ -111,8 +111,8 @@ class Group(MutableMapping): def __init__(self, store, path=None, read_only=False, chunk_store=None, cache_attrs=True, synchronizer=None): - store = Store._ensure_store(store) - chunk_store = Store._ensure_store(chunk_store) + store: BaseStore = BaseStore._ensure_store(store) + chunk_store: BaseStore = BaseStore._ensure_store(chunk_store) self._store = store self._chunk_store = chunk_store self._path = normalize_storage_path(path) diff --git a/zarr/storage.py b/zarr/storage.py index 4427577668..72b20cd797 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -64,6 +64,7 @@ array_meta_key, group_meta_key, attrs_key, + BaseStore, Store) __doctest_requires__ = { @@ -83,9 +84,11 @@ Path = Union[str, bytes, None] +# allow MutableMapping for backwards compatibility +StoreLike = Union[BaseStore, MutableMapping] -def contains_array(store: Store, path: Path = None) -> bool: +def contains_array(store: StoreLike, path: Path = None) -> bool: """Return True if the store contains an array at the given logical path.""" path = normalize_storage_path(path) prefix = _path_to_prefix(path) @@ -93,7 +96,7 @@ def contains_array(store: Store, path: Path = None) -> bool: return key in store -def contains_group(store: Store, path: Path = None) -> bool: +def contains_group(store: StoreLike, path: Path = None) -> bool: """Return True if the store contains a group at the given logical path.""" path = normalize_storage_path(path) prefix = _path_to_prefix(path) @@ -101,12 +104,36 @@ def contains_group(store: Store, path: Path = None) -> bool: return key in store -def rmdir(store: Store, path: Path = None): +def normalize_store_arg(store: Any, clobber=False, storage_options=None, mode="w") -> BaseStore: + if store is None: + return BaseStore._ensure_store(dict()) + elif isinstance(store, os.PathLike): + store = os.fspath(store) + if isinstance(store, str): + mode = mode if clobber else "r" + if "://" in store or "::" in store: + return FSStore(store, mode=mode, **(storage_options or {})) + elif storage_options: + raise ValueError("storage_options passed with non-fsspec path") + if store.endswith('.zip'): + return ZipStore(store, mode=mode) + elif store.endswith('.n5'): + from zarr.n5 import N5Store + return N5Store(store) + else: + return DirectoryStore(store) + else: + if not isinstance(store, BaseStore) and isinstance(store, MutableMapping): + store = BaseStore._ensure_store(store) + return store + + +def rmdir(store: StoreLike, path: Path = None): """Remove all items under the given path. If `store` provides a `rmdir` method, this will be called, otherwise will fall back to implementation via the `Store` interface.""" path = normalize_storage_path(path) - if hasattr(store, "rmdir") and store.is_erasable(): + if hasattr(store, "rmdir"): # pass through store.rmdir(path) # type: ignore else: @@ -114,7 +141,7 @@ def rmdir(store: Store, path: Path = None): _rmdir_from_keys(store, path) -def rename(store: Store, src_path: Path, dst_path: Path): +def rename(store: BaseStore, src_path: Path, dst_path: Path): """Rename all items under the given path. If `store` provides a `rename` method, this will be called, otherwise will fall back to implementation via the `Store` interface.""" @@ -128,7 +155,7 @@ def rename(store: Store, src_path: Path, dst_path: Path): _rename_from_keys(store, src_path, dst_path) -def listdir(store: Store, path: Path = None): +def listdir(store: BaseStore, path: Path = None): """Obtain a directory listing for the given path. If `store` provides a `listdir` method, this will be called, otherwise will fall back to implementation via the `MutableMapping` interface.""" @@ -146,7 +173,7 @@ def listdir(store: Store, path: Path = None): return _listdir_from_keys(store, path) -def getsize(store: Store, path: Path = None) -> int: +def getsize(store: BaseStore, path: Path = None) -> int: """Compute size of stored items for a given path. If `store` provides a `getsize` method, this will be called, otherwise will return -1.""" path = normalize_storage_path(path) @@ -179,8 +206,8 @@ def getsize(store: Store, path: Path = None) -> int: def _require_parent_group( path: Optional[str], - store: Store, - chunk_store: Optional[Store], + store: StoreLike, + chunk_store: Optional[StoreLike], overwrite: bool, ): # assume path is normalized @@ -196,7 +223,7 @@ def _require_parent_group( def init_array( - store: Store, + store: StoreLike, shape: Tuple[int, ...], chunks: Union[bool, int, Tuple[int, ...]] = True, dtype=None, @@ -205,7 +232,7 @@ def init_array( order: str = "C", overwrite: bool = False, path: Path = None, - chunk_store: Store = None, + chunk_store: StoreLike = None, filters=None, object_codec=None, dimension_separator=None, @@ -248,8 +275,8 @@ def init_array( -------- Initialize an array store:: - >>> from zarr.storage import init_array - >>> store = dict() + >>> from zarr.storage import init_array, KVStore + >>> store = KVStore(dict()) >>> init_array(store, shape=(10000, 10000), chunks=(1000, 1000)) >>> sorted(store.keys()) ['.zarray'] @@ -282,7 +309,7 @@ def init_array( Initialize an array using a storage path:: - >>> store = dict() + >>> store = KVStore(dict()) >>> init_array(store, shape=100000000, chunks=1000000, dtype='i1', path='foo') >>> sorted(store.keys()) ['.zgroup', 'foo/.zarray'] @@ -427,10 +454,10 @@ def _init_array_metadata( def init_group( - store: Store, + store: StoreLike, overwrite: bool = False, path: Path = None, - chunk_store: Store = None, + chunk_store: StoreLike = None, ): """Initialize a group store. Note that this is a low-level function and there should be no need to call this directly from user code. @@ -462,10 +489,10 @@ def init_group( def _init_group_metadata( - store: Store, + store: StoreLike, overwrite: Optional[bool] = False, path: Optional[str] = None, - chunk_store: Store = None, + chunk_store: StoreLike = None, ): # guard conditions @@ -2608,8 +2635,8 @@ class ConsolidatedMetadataStore(Store): """ - def __init__(self, store: Store, metadata_key=".zmetadata"): - self.store = store + def __init__(self, store: StoreLike, metadata_key=".zmetadata"): + self.store = Store._ensure_store(store) # retrieve consolidated metadata meta = json_loads(store[metadata_key]) From 3481ea43bbde5ee097691ecb6ab521e94739e2a8 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 24 Sep 2021 12:36:52 -0400 Subject: [PATCH 29/42] Convert functions in zarr.meta to class methods This is done to ease transition to Zarr v3 support. When adding v3 support, we can override encode and decode methods to account for changes in the metadata format. The classmethods are also exported under the old function names for backwards compatibility. Co-authored-by: Matthias Bussonier --- zarr/meta.py | 396 ++++++++++++++++++++++++++------------------------- 1 file changed, 204 insertions(+), 192 deletions(-) diff --git a/zarr/meta.py b/zarr/meta.py index 8a7d760c0f..731317337e 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -10,221 +10,233 @@ ZARR_FORMAT = 2 +FLOAT_FILLS = { + 'NaN': np.nan, + 'Infinity': np.PINF, + '-Infinity': np.NINF +} -def parse_metadata(s: Union[MappingType, str]) -> MappingType[str, Any]: - - # Here we allow that a store may return an already-parsed metadata object, - # or a string of JSON that we will parse here. We allow for an already-parsed - # object to accommodate a consolidated metadata store, where all the metadata for - # all groups and arrays will already have been parsed from JSON. - if isinstance(s, Mapping): - # assume metadata has already been parsed into a mapping object - meta = s +class Metadata2: + ZARR_FORMAT = ZARR_FORMAT - else: - # assume metadata needs to be parsed as JSON - meta = json_loads(s) + @classmethod + def parse_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, Any]: + # Here we allow that a store may return an already-parsed metadata object, + # or a string of JSON that we will parse here. We allow for an already-parsed + # object to accommodate a consolidated metadata store, where all the metadata for + # all groups and arrays will already have been parsed from JSON. - return meta + if isinstance(s, Mapping): + # assume metadata has already been parsed into a mapping object + meta = s + else: + # assume metadata needs to be parsed as JSON + meta = json_loads(s) + return meta + @classmethod + def decode_array_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, Any]: + meta = cls.parse_metadata(s) -def decode_array_metadata(s: Union[MappingType, str]) -> MappingType[str, Any]: - meta = parse_metadata(s) + # check metadata format + zarr_format = meta.get("zarr_format", None) + if zarr_format != cls.ZARR_FORMAT: + raise MetadataError("unsupported zarr format: %s" % zarr_format) - # check metadata format - zarr_format = meta.get('zarr_format', None) - if zarr_format != ZARR_FORMAT: - raise MetadataError('unsupported zarr format: %s' % zarr_format) + # extract array metadata fields + try: + # dimension_separator = meta.get("dimension_separator", None) + dtype = cls.decode_dtype(meta["dtype"]) + if dtype.hasobject: + import numcodecs + object_codec = numcodecs.get_codec(meta['filters'][0]) + else: + object_codec = None + fill_value = cls.decode_fill_value(meta['fill_value'], dtype, object_codec) + meta = dict( + zarr_format=meta["zarr_format"], + shape=tuple(meta["shape"]), + chunks=tuple(meta["chunks"]), + dtype=dtype, + compressor=meta["compressor"], + fill_value=fill_value, + order=meta["order"], + filters=meta["filters"], + dimension_separator=meta.get("dimension_separator", "."), + ) + except Exception as e: + raise MetadataError("error decoding metadata") from e + else: + return meta - # extract array metadata fields - try: - dtype = decode_dtype(meta['dtype']) + @classmethod + def encode_array_metadata(cls, meta: MappingType[str, Any]) -> bytes: + dtype = meta["dtype"] + sdshape = () + if dtype.subdtype is not None: + dtype, sdshape = dtype.subdtype + dimension_separator = meta.get("dimension_separator") if dtype.hasobject: import numcodecs object_codec = numcodecs.get_codec(meta['filters'][0]) else: object_codec = None - dimension_separator = meta.get('dimension_separator', None) - fill_value = decode_fill_value(meta['fill_value'], dtype, object_codec) meta = dict( - zarr_format=meta['zarr_format'], - shape=tuple(meta['shape']), - chunks=tuple(meta['chunks']), - dtype=dtype, - compressor=meta['compressor'], - fill_value=fill_value, - order=meta['order'], - filters=meta['filters'], + zarr_format=cls.ZARR_FORMAT, + shape=meta["shape"] + sdshape, + chunks=meta["chunks"], + dtype=cls.encode_dtype(dtype), + compressor=meta["compressor"], + fill_value=cls.encode_fill_value(meta["fill_value"], dtype, object_codec), + order=meta["order"], + filters=meta["filters"], ) if dimension_separator: meta['dimension_separator'] = dimension_separator - except Exception as e: - raise MetadataError('error decoding metadata: %s' % e) - else: - return meta - - -def encode_array_metadata(meta: MappingType[str, Any]) -> bytes: - dtype = meta['dtype'] - sdshape = () - if dtype.subdtype is not None: - dtype, sdshape = dtype.subdtype - - dimension_separator = meta.get('dimension_separator') - if dtype.hasobject: - import numcodecs - object_codec = numcodecs.get_codec(meta['filters'][0]) - else: - object_codec = None - meta = dict( - zarr_format=ZARR_FORMAT, - shape=meta['shape'] + sdshape, - chunks=meta['chunks'], - dtype=encode_dtype(dtype), - compressor=meta['compressor'], - fill_value=encode_fill_value(meta['fill_value'], dtype, object_codec), - order=meta['order'], - filters=meta['filters'], - ) - - if dimension_separator: - meta['dimension_separator'] = dimension_separator - - return json_dumps(meta) - - -def encode_dtype(d: np.dtype): - if d.fields is None: - return d.str - else: - return d.descr - - -def _decode_dtype_descr(d) -> List[Any]: - # need to convert list of lists to list of tuples - if isinstance(d, list): - # recurse to handle nested structures - d = [(k[0], _decode_dtype_descr(k[1])) + tuple(k[2:]) for k in d] - return d - - -def decode_dtype(d) -> np.dtype: - d = _decode_dtype_descr(d) - return np.dtype(d) - - -def decode_group_metadata(s: Union[MappingType, str]) -> MappingType[str, Any]: - meta = parse_metadata(s) - - # check metadata format version - zarr_format = meta.get('zarr_format', None) - if zarr_format != ZARR_FORMAT: - raise MetadataError('unsupported zarr format: %s' % zarr_format) - - meta = dict(zarr_format=zarr_format) - return meta - - -# N.B., keep `meta` parameter as a placeholder for future -# noinspection PyUnusedLocal -def encode_group_metadata(meta=None) -> bytes: - meta = dict( - zarr_format=ZARR_FORMAT, - ) - return json_dumps(meta) - - -FLOAT_FILLS = { - 'NaN': np.nan, - 'Infinity': np.PINF, - '-Infinity': np.NINF -} + if dimension_separator: + meta["dimension_separator"] = dimension_separator + return json_dumps(meta) -def decode_fill_value(v, dtype, object_codec=None): - # early out - if v is None: - return v - if dtype.kind == 'V' and dtype.hasobject: - if object_codec is None: - raise ValueError('missing object_codec for object array') - v = base64.standard_b64decode(v) - v = object_codec.decode(v) - v = np.array(v, dtype=dtype)[()] - return v - if dtype.kind == 'f': - if v == 'NaN': - return np.nan - elif v == 'Infinity': - return np.PINF - elif v == '-Infinity': - return np.NINF + @classmethod + def encode_dtype(cls, d: np.dtype): + if d.fields is None: + return d.str else: + return d.descr + + @classmethod + def _decode_dtype_descr(cls, d) -> List[Any]: + # need to convert list of lists to list of tuples + if isinstance(d, list): + # recurse to handle nested structures + d = [(k[0], cls._decode_dtype_descr(k[1])) + tuple(k[2:]) for k in d] + return d + + @classmethod + def decode_dtype(cls, d) -> np.dtype: + d = cls._decode_dtype_descr(d) + return np.dtype(d) + + @classmethod + def decode_group_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, Any]: + meta = cls.parse_metadata(s) + + # check metadata format version + zarr_format = meta.get("zarr_format", None) + if zarr_format != cls.ZARR_FORMAT: + raise MetadataError("unsupported zarr format: %s" % zarr_format) + + meta = dict(zarr_format=zarr_format) + return meta + + # N.B., keep `meta` parameter as a placeholder for future + # noinspection PyUnusedLocal + @classmethod + def encode_group_metadata(cls, meta=None) -> bytes: + meta = dict(zarr_format=cls.ZARR_FORMAT) + return json_dumps(meta) + + @classmethod + def decode_fill_value(cls, v: Any, dtype: np.dtype, object_codec: Any = None) -> Any: + # early out + if v is None: + return v + if dtype.kind == 'V' and dtype.hasobject: + if object_codec is None: + raise ValueError('missing object_codec for object array') + v = base64.standard_b64decode(v) + v = object_codec.decode(v) + v = np.array(v, dtype=dtype)[()] + return v + if dtype.kind == "f": + if v == "NaN": + return np.nan + elif v == "Infinity": + return np.PINF + elif v == "-Infinity": + return np.NINF + else: + return np.array(v, dtype=dtype)[()] + elif dtype.kind in "c": + v = ( + cls.decode_fill_value(v[0], dtype.type().real.dtype), + cls.decode_fill_value(v[1], dtype.type().imag.dtype), + ) + v = v[0] + 1j * v[1] return np.array(v, dtype=dtype)[()] - elif dtype.kind in 'c': - v = (decode_fill_value(v[0], dtype.type().real.dtype), - decode_fill_value(v[1], dtype.type().imag.dtype)) - v = v[0] + 1j * v[1] - return np.array(v, dtype=dtype)[()] - elif dtype.kind == 'S': - # noinspection PyBroadException - try: + elif dtype.kind == "S": + # noinspection PyBroadException + try: + v = base64.standard_b64decode(v) + except Exception: + # be lenient, allow for other values that may have been used before base64 + # encoding and may work as fill values, e.g., the number 0 + pass + v = np.array(v, dtype=dtype)[()] + return v + elif dtype.kind == "V": v = base64.standard_b64decode(v) - except Exception: - # be lenient, allow for other values that may have been used before base64 - # encoding and may work as fill values, e.g., the number 0 - pass - v = np.array(v, dtype=dtype)[()] - return v - elif dtype.kind == 'V': - v = base64.standard_b64decode(v) - v = np.array(v, dtype=dtype.str).view(dtype)[()] - return v - elif dtype.kind == 'U': - # leave as-is - return v - else: - return np.array(v, dtype=dtype)[()] - - -def encode_fill_value(v: Any, dtype: np.dtype, object_codec: Any = None) -> Any: - # early out - if v is None: - return v - if dtype.kind == 'V' and dtype.hasobject: - if object_codec is None: - raise ValueError('missing object_codec for object array') - v = object_codec.encode(v) - v = str(base64.standard_b64encode(v), 'ascii') - return v - if dtype.kind == 'f': - if np.isnan(v): - return 'NaN' - elif np.isposinf(v): - return 'Infinity' - elif np.isneginf(v): - return '-Infinity' + v = np.array(v, dtype=dtype.str).view(dtype)[()] + return v + elif dtype.kind == "U": + # leave as-is + return v + else: + return np.array(v, dtype=dtype)[()] + + @classmethod + def encode_fill_value(cls, v: Any, dtype: np.dtype, object_codec: Any = None) -> Any: + # early out + if v is None: + return v + if dtype.kind == 'V' and dtype.hasobject: + if object_codec is None: + raise ValueError('missing object_codec for object array') + v = object_codec.encode(v) + v = str(base64.standard_b64encode(v), 'ascii') + return v + if dtype.kind == "f": + if np.isnan(v): + return "NaN" + elif np.isposinf(v): + return "Infinity" + elif np.isneginf(v): + return "-Infinity" + else: + return float(v) + elif dtype.kind in "ui": + return int(v) + elif dtype.kind == "b": + return bool(v) + elif dtype.kind in "c": + c = cast(np.complex128, np.dtype(complex).type()) + v = (cls.encode_fill_value(v.real, c.real.dtype, object_codec), + cls.encode_fill_value(v.imag, c.imag.dtype, object_codec)) + return v + elif dtype.kind in "SV": + v = str(base64.standard_b64encode(v), "ascii") + return v + elif dtype.kind == "U": + return v + elif dtype.kind in "mM": + return int(v.view("i8")) else: - return float(v) - elif dtype.kind in 'ui': - return int(v) - elif dtype.kind == 'b': - return bool(v) - elif dtype.kind in 'c': - c = cast(np.complex128, np.dtype(complex).type()) - v = (encode_fill_value(v.real, c.real.dtype, object_codec), - encode_fill_value(v.imag, c.imag.dtype, object_codec)) - return v - elif dtype.kind in 'SV': - v = str(base64.standard_b64encode(v), 'ascii') - return v - elif dtype.kind == 'U': - return v - elif dtype.kind in 'mM': - return int(v.view('i8')) - else: - return v + return v + + +# expose class methods for backwards compatibility +parse_metadata = Metadata2.parse_metadata +decode_array_metadata = Metadata2.decode_array_metadata +encode_array_metadata = Metadata2.encode_array_metadata +encode_dtype = Metadata2.encode_dtype +_decode_dtype_descr = Metadata2._decode_dtype_descr +decode_dtype = Metadata2.decode_dtype +decode_group_metadata = Metadata2.decode_group_metadata +encode_group_metadata = Metadata2.encode_group_metadata +decode_fill_value = Metadata2.decode_fill_value +encode_fill_value = Metadata2.encode_fill_value From 9ad4ef870e4ab351bfb74dd1aa2b534bda37064f Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 24 Sep 2021 12:41:30 -0400 Subject: [PATCH 30/42] Add a _metadata_class attribute to the Store class Because existing functions allow coerce of dict to store, there are a lot of hasattr calls here. We can remove these checks if we start enforcing that the input MUST be a Store. Use of this _metadata_class will ease the transition to v3 --- zarr/_storage/store.py | 3 +++ zarr/attrs.py | 6 +++++- zarr/core.py | 10 ++++++++-- zarr/hierarchy.py | 5 ++++- zarr/storage.py | 15 ++++++++++++--- 5 files changed, 32 insertions(+), 7 deletions(-) diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index d68da68304..6d4285dbd4 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -2,6 +2,7 @@ from collections.abc import MutableMapping from typing import Any, List, Optional, Union +from zarr.meta import Metadata2 from zarr.util import normalize_storage_path # v2 store keys @@ -33,6 +34,8 @@ class BaseStore(MutableMapping): _writeable = True _erasable = True _listable = True + _store_version = 2 + _metadata_class = Metadata2 def is_readable(self): return self._readable diff --git a/zarr/attrs.py b/zarr/attrs.py index ec01dbe04f..789e56fab9 100644 --- a/zarr/attrs.py +++ b/zarr/attrs.py @@ -40,7 +40,11 @@ def _get_nosync(self): except KeyError: d = dict() else: - d = parse_metadata(data) + if hasattr(self.store, '_metadata_class'): + d = self.store._metadata_class.parse_metadata(data) + else: + # can remove this if we require self.store to be a Store + d = parse_metadata(data) return d def asdict(self): diff --git a/zarr/core.py b/zarr/core.py index becf4e790a..d06dc5b256 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -206,7 +206,10 @@ def _load_metadata_nosync(self): else: # decode and store metadata as instance members - meta = decode_array_metadata(meta_bytes) + if hasattr(self._store, '_metadata_class'): + meta = self._store._metadata_class.decode_array_metadata(meta_bytes) + else: + meta = decode_array_metadata(meta_bytes) self._meta = meta self._shape = meta['shape'] self._chunks = meta['chunks'] @@ -263,7 +266,10 @@ def _flush_metadata_nosync(self): compressor=compressor_config, fill_value=self._fill_value, order=self._order, filters=filters_config) mkey = self._key_prefix + array_meta_key - self._store[mkey] = encode_array_metadata(meta) + if hasattr(self._store, '_metadata_class'): + self._store[mkey] = self._store._metadata_class.encode_array_metadata(meta) + else: + self._store[mkey] = encode_array_metadata(meta) @property def store(self): diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index 402b8dd976..c374321b4a 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -134,7 +134,10 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None, except KeyError: raise GroupNotFoundError(path) else: - meta = decode_group_metadata(meta_bytes) + if hasattr(self._store, '_metadata_class'): + meta = self._store._metadata_class.decode_group_metadata(meta_bytes) + else: + meta = decode_group_metadata(meta_bytes) self._meta = meta # setup attributes diff --git a/zarr/storage.py b/zarr/storage.py index 72b20cd797..90531bc754 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -446,7 +446,10 @@ def _init_array_metadata( order=order, filters=filters_config, dimension_separator=dimension_separator) key = _path_to_prefix(path) + array_meta_key - store[key] = encode_array_metadata(meta) + if hasattr(store, '_metadata_class'): + store[key] = store._metadata_class.encode_array_metadata(meta) + else: + store[key] = encode_array_metadata(meta) # backwards compatibility @@ -511,7 +514,10 @@ def _init_group_metadata( # be in future meta = dict() # type: ignore key = _path_to_prefix(path) + group_meta_key - store[key] = encode_group_metadata(meta) + if hasattr(store, '_metadata_class'): + store[key] = store._metadata_class.encode_group_metadata(meta) + else: + store[key] = encode_group_metadata(meta) def _dict_store_keys(d: Dict, prefix="", cls=dict): @@ -1685,7 +1691,10 @@ def migrate_1to2(store): del meta['compression_opts'] # store migrated metadata - store[array_meta_key] = encode_array_metadata(meta) + if hasattr(store, '_metadata_class'): + store[array_meta_key] = store._metadata_class.encode_array_metadata(meta) + else: + store[array_meta_key] = encode_array_metadata(meta) # migrate user attributes store[attrs_key] = store['attrs'] From 4cd041637716b6cf3ff6348454752c157ceef447 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 24 Sep 2021 12:43:43 -0400 Subject: [PATCH 31/42] Use _metadata_class attribute in test_storage.py This will make it easier to reuse existing testing code when adding v3 support --- zarr/tests/test_storage.py | 58 +++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 889fa80043..4ce081859b 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -427,7 +427,7 @@ def test_init_array(self, dimension_separator_fixture): # check metadata assert array_meta_key in store - meta = decode_array_metadata(store[array_meta_key]) + meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) assert ZARR_FORMAT == meta['zarr_format'] assert (1000,) == meta['shape'] assert (100,) == meta['chunks'] @@ -460,7 +460,7 @@ def test_init_group_overwrite_chunk_store(self): def _test_init_array_overwrite(self, order): # setup store = self.create_store() - store[array_meta_key] = encode_array_metadata( + store[array_meta_key] = store._metadata_class.encode_array_metadata( dict(shape=(2000,), chunks=(200,), dtype=np.dtype('u1'), @@ -482,7 +482,7 @@ def _test_init_array_overwrite(self, order): pass else: assert array_meta_key in store - meta = decode_array_metadata(store[array_meta_key]) + meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) assert ZARR_FORMAT == meta['zarr_format'] assert (1000,) == meta['shape'] assert (100,) == meta['chunks'] @@ -498,7 +498,7 @@ def test_init_array_path(self): # check metadata key = path + '/' + array_meta_key assert key in store - meta = decode_array_metadata(store[key]) + meta = store._metadata_class.decode_array_metadata(store[key]) assert ZARR_FORMAT == meta['zarr_format'] assert (1000,) == meta['shape'] assert (100,) == meta['chunks'] @@ -519,8 +519,8 @@ def _test_init_array_overwrite_path(self, order): fill_value=0, order=order, filters=None) - store[array_meta_key] = encode_array_metadata(meta) - store[path + '/' + array_meta_key] = encode_array_metadata(meta) + store[array_meta_key] = store._metadata_class.encode_array_metadata(meta) + store[path + '/' + array_meta_key] = store._metadata_class.encode_array_metadata(meta) # don't overwrite with pytest.raises(ValueError): @@ -537,7 +537,7 @@ def _test_init_array_overwrite_path(self, order): assert array_meta_key not in store assert (path + '/' + array_meta_key) in store # should have been overwritten - meta = decode_array_metadata(store[path + '/' + array_meta_key]) + meta = store._metadata_class.decode_array_metadata(store[path + '/' + array_meta_key]) assert ZARR_FORMAT == meta['zarr_format'] assert (1000,) == meta['shape'] assert (100,) == meta['chunks'] @@ -549,7 +549,7 @@ def test_init_array_overwrite_group(self): # setup path = 'foo/bar' store = self.create_store() - store[path + '/' + group_meta_key] = encode_group_metadata() + store[path + '/' + group_meta_key] = store._metadata_class.encode_group_metadata() # don't overwrite with pytest.raises(ValueError): @@ -564,7 +564,7 @@ def test_init_array_overwrite_group(self): else: assert (path + '/' + group_meta_key) not in store assert (path + '/' + array_meta_key) in store - meta = decode_array_metadata(store[path + '/' + array_meta_key]) + meta = store._metadata_class.decode_array_metadata(store[path + '/' + array_meta_key]) assert ZARR_FORMAT == meta['zarr_format'] assert (1000,) == meta['shape'] assert (100,) == meta['chunks'] @@ -576,7 +576,7 @@ def _test_init_array_overwrite_chunk_store(self, order): # setup store = self.create_store() chunk_store = self.create_store() - store[array_meta_key] = encode_array_metadata( + store[array_meta_key] = store._metadata_class.encode_array_metadata( dict(shape=(2000,), chunks=(200,), dtype=np.dtype('u1'), @@ -600,7 +600,7 @@ def _test_init_array_overwrite_chunk_store(self, order): pass else: assert array_meta_key in store - meta = decode_array_metadata(store[array_meta_key]) + meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) assert ZARR_FORMAT == meta['zarr_format'] assert (1000,) == meta['shape'] assert (100,) == meta['chunks'] @@ -614,7 +614,7 @@ def _test_init_array_overwrite_chunk_store(self, order): def test_init_array_compat(self): store = self.create_store() init_array(store, shape=1000, chunks=100, compressor='none') - meta = decode_array_metadata(store[array_meta_key]) + meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) assert meta['compressor'] is None store.close() @@ -625,7 +625,7 @@ def test_init_group(self): # check metadata assert group_meta_key in store - meta = decode_group_metadata(store[group_meta_key]) + meta = store._metadata_class.decode_group_metadata(store[group_meta_key]) assert ZARR_FORMAT == meta['zarr_format'] store.close() @@ -633,7 +633,7 @@ def test_init_group(self): def _test_init_group_overwrite(self, order): # setup store = self.create_store() - store[array_meta_key] = encode_array_metadata( + store[array_meta_key] = store._metadata_class.encode_array_metadata( dict(shape=(2000,), chunks=(200,), dtype=np.dtype('u1'), @@ -655,7 +655,7 @@ def _test_init_group_overwrite(self, order): else: assert array_meta_key not in store assert group_meta_key in store - meta = decode_group_metadata(store[group_meta_key]) + meta = store._metadata_class.decode_group_metadata(store[group_meta_key]) assert ZARR_FORMAT == meta['zarr_format'] # don't overwrite group @@ -675,8 +675,8 @@ def _test_init_group_overwrite_path(self, order): fill_value=0, order=order, filters=None) - store[array_meta_key] = encode_array_metadata(meta) - store[path + '/' + array_meta_key] = encode_array_metadata(meta) + store[array_meta_key] = store._metadata_class.encode_array_metadata(meta) + store[path + '/' + array_meta_key] = store._metadata_class.encode_array_metadata(meta) # don't overwrite with pytest.raises(ValueError): @@ -693,7 +693,7 @@ def _test_init_group_overwrite_path(self, order): assert (path + '/' + array_meta_key) not in store assert (path + '/' + group_meta_key) in store # should have been overwritten - meta = decode_group_metadata(store[path + '/' + group_meta_key]) + meta = store._metadata_class.decode_group_metadata(store[path + '/' + group_meta_key]) assert ZARR_FORMAT == meta['zarr_format'] store.close() @@ -702,7 +702,7 @@ def _test_init_group_overwrite_chunk_store(self, order): # setup store = self.create_store() chunk_store = self.create_store() - store[array_meta_key] = encode_array_metadata( + store[array_meta_key] = store._metadata_class.encode_array_metadata( dict(shape=(2000,), chunks=(200,), dtype=np.dtype('u1'), @@ -726,7 +726,7 @@ def _test_init_group_overwrite_chunk_store(self, order): else: assert array_meta_key not in store assert group_meta_key in store - meta = decode_group_metadata(store[group_meta_key]) + meta = store._metadata_class.decode_group_metadata(store[group_meta_key]) assert ZARR_FORMAT == meta['zarr_format'] assert 'foo' not in chunk_store assert 'baz' not in chunk_store @@ -941,7 +941,7 @@ def test_init_array(self): # check metadata assert array_meta_key in store - meta = decode_array_metadata(store[array_meta_key]) + meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) assert ZARR_FORMAT == meta['zarr_format'] assert (1000,) == meta['shape'] assert (100,) == meta['chunks'] @@ -1191,7 +1191,7 @@ def test_init_array(self): # check metadata assert array_meta_key in store - meta = decode_array_metadata(store[array_meta_key]) + meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) assert ZARR_FORMAT == meta['zarr_format'] assert (1000,) == meta['shape'] assert (100,) == meta['chunks'] @@ -1267,7 +1267,7 @@ def test_init_array(self): # check metadata assert array_meta_key in store - meta = decode_array_metadata(store[array_meta_key]) + meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) assert ZARR_FORMAT == meta['zarr_format'] assert (1000,) == meta['shape'] assert (100,) == meta['chunks'] @@ -1287,7 +1287,7 @@ def test_init_array_path(self): # check metadata key = path + '/' + array_meta_key assert key in store - meta = decode_array_metadata(store[key]) + meta = store._metadata_class.decode_array_metadata(store[key]) assert ZARR_FORMAT == meta['zarr_format'] assert (1000,) == meta['shape'] assert (100,) == meta['chunks'] @@ -1301,7 +1301,7 @@ def test_init_array_path(self): def test_init_array_compat(self): store = self.create_store() init_array(store, shape=1000, chunks=100, compressor='none') - meta = decode_array_metadata(store[array_meta_key]) + meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) # N5Store wraps the actual compressor compressor_config = meta['compressor']['compressor_config'] assert compressor_config is None @@ -1332,7 +1332,7 @@ def test_init_group(self): assert group_meta_key in store assert group_meta_key in store.listdir() assert group_meta_key in store.listdir('') - meta = decode_group_metadata(store[group_meta_key]) + meta = store._metadata_class.decode_group_metadata(store[group_meta_key]) assert ZARR_FORMAT == meta['zarr_format'] def test_filters(self): @@ -1958,7 +1958,7 @@ def test_migrate_1to2(): assert array_meta_key in store assert 'attrs' not in store assert attrs_key in store - meta_migrated = decode_array_metadata(store[array_meta_key]) + meta_migrated = store._metadata_class.decode_array_metadata(store[array_meta_key]) assert 2 == meta_migrated['zarr_format'] # preserved fields @@ -1981,7 +1981,7 @@ def test_migrate_1to2(): store['meta'] = meta_json store['attrs'] = json.dumps(dict()).encode('ascii') migrate_1to2(store) - meta_migrated = decode_array_metadata(store[array_meta_key]) + meta_migrated = store._metadata_class.decode_array_metadata(store[array_meta_key]) assert 'compression' not in meta_migrated assert 'compression_opts' not in meta_migrated assert (meta_migrated['compressor'] == @@ -1994,7 +1994,7 @@ def test_migrate_1to2(): store['meta'] = meta_json store['attrs'] = json.dumps(dict()).encode('ascii') migrate_1to2(store) - meta_migrated = decode_array_metadata(store[array_meta_key]) + meta_migrated = store._metadata_class.decode_array_metadata(store[array_meta_key]) assert 'compression' not in meta_migrated assert 'compression_opts' not in meta_migrated assert meta_migrated['compressor'] is None From 8c385f6315c257c0e86ed23152522b7c96cad69d Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 24 Sep 2021 14:07:44 -0400 Subject: [PATCH 32/42] remove unused imports use _metadata_class methods --- zarr/tests/test_storage.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 4ce081859b..33d1eec305 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -20,9 +20,7 @@ from zarr.codecs import BZ2, AsType, Blosc, Zlib from zarr.errors import MetadataError from zarr.hierarchy import group -from zarr.meta import (ZARR_FORMAT, decode_array_metadata, - decode_group_metadata, encode_array_metadata, - encode_group_metadata) +from zarr.meta import ZARR_FORMAT from zarr.n5 import N5Store, N5FSStore from zarr.storage import (ABSStore, ConsolidatedMetadataStore, DBMStore, DictStore, DirectoryStore, KVStore, LMDBStore, @@ -1387,7 +1385,7 @@ def test_init_array(self): # check metadata assert array_meta_key in store - meta = decode_array_metadata(store[array_meta_key]) + meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) assert ZARR_FORMAT == meta['zarr_format'] assert (1000,) == meta['shape'] assert (100,) == meta['chunks'] @@ -1407,7 +1405,7 @@ def test_init_array_path(self): # check metadata key = path + '/' + array_meta_key assert key in store - meta = decode_array_metadata(store[key]) + meta = store._metadata_class.decode_array_metadata(store[key]) assert ZARR_FORMAT == meta['zarr_format'] assert (1000,) == meta['shape'] assert (100,) == meta['chunks'] @@ -1421,7 +1419,7 @@ def test_init_array_path(self): def test_init_array_compat(self): store = self.create_store() init_array(store, shape=1000, chunks=100, compressor='none') - meta = decode_array_metadata(store[array_meta_key]) + meta = store._metadata_class.decode_array_metadata(store[array_meta_key]) # N5Store wraps the actual compressor compressor_config = meta['compressor']['compressor_config'] assert compressor_config is None From fce042c4692be90e0f7da37a0b67a1e71ebebd05 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 24 Sep 2021 16:35:53 -0400 Subject: [PATCH 33/42] remove unnecessary hasattr checks In these cases self._store was created using _ensure_store, so it will always have the attribute --- zarr/attrs.py | 6 +----- zarr/core.py | 10 ++-------- zarr/hierarchy.py | 6 +----- 3 files changed, 4 insertions(+), 18 deletions(-) diff --git a/zarr/attrs.py b/zarr/attrs.py index 789e56fab9..454803e95b 100644 --- a/zarr/attrs.py +++ b/zarr/attrs.py @@ -40,11 +40,7 @@ def _get_nosync(self): except KeyError: d = dict() else: - if hasattr(self.store, '_metadata_class'): - d = self.store._metadata_class.parse_metadata(data) - else: - # can remove this if we require self.store to be a Store - d = parse_metadata(data) + d = self.store._metadata_class.parse_metadata(data) return d def asdict(self): diff --git a/zarr/core.py b/zarr/core.py index d06dc5b256..11a249d6b3 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -206,10 +206,7 @@ def _load_metadata_nosync(self): else: # decode and store metadata as instance members - if hasattr(self._store, '_metadata_class'): - meta = self._store._metadata_class.decode_array_metadata(meta_bytes) - else: - meta = decode_array_metadata(meta_bytes) + meta = self._store._metadata_class.decode_array_metadata(meta_bytes) self._meta = meta self._shape = meta['shape'] self._chunks = meta['chunks'] @@ -266,10 +263,7 @@ def _flush_metadata_nosync(self): compressor=compressor_config, fill_value=self._fill_value, order=self._order, filters=filters_config) mkey = self._key_prefix + array_meta_key - if hasattr(self._store, '_metadata_class'): - self._store[mkey] = self._store._metadata_class.encode_array_metadata(meta) - else: - self._store[mkey] = encode_array_metadata(meta) + self._store[mkey] = self._store._metadata_class.encode_array_metadata(meta) @property def store(self): diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index c374321b4a..df7fa07750 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -134,11 +134,7 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None, except KeyError: raise GroupNotFoundError(path) else: - if hasattr(self._store, '_metadata_class'): - meta = self._store._metadata_class.decode_group_metadata(meta_bytes) - else: - meta = decode_group_metadata(meta_bytes) - self._meta = meta + self._meta = self._store._metadata_class.decode_group_metadata(meta_bytes) # setup attributes akey = self._key_prefix + attrs_key From 9ed426f2170e8c97f7a92261c79b9d213eb204f6 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 24 Sep 2021 16:40:07 -0400 Subject: [PATCH 34/42] test migrate1to2 with store=dict() and store=KVStore(dict()) --- zarr/tests/test_storage.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 33d1eec305..2dcacd30ab 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -20,7 +20,7 @@ from zarr.codecs import BZ2, AsType, Blosc, Zlib from zarr.errors import MetadataError from zarr.hierarchy import group -from zarr.meta import ZARR_FORMAT +from zarr.meta import ZARR_FORMAT, decode_array_metadata from zarr.n5 import N5Store, N5FSStore from zarr.storage import (ABSStore, ConsolidatedMetadataStore, DBMStore, DictStore, DirectoryStore, KVStore, LMDBStore, @@ -1927,14 +1927,16 @@ def test_getsize(): assert -1 == getsize(store) -def test_migrate_1to2(): +@pytest.mark.parametrize('dict_store', [False, True]) +def test_migrate_1to2(dict_store): from zarr import meta_v1 # N.B., version 1 did not support hierarchies, so we only have to be # concerned about migrating a single array at the root of the store # setup - store = KVStore(dict()) + store = dict() if dict_store else KVStore(dict()) + meta = dict( shape=(100,), chunks=(10,), @@ -1956,7 +1958,7 @@ def test_migrate_1to2(): assert array_meta_key in store assert 'attrs' not in store assert attrs_key in store - meta_migrated = store._metadata_class.decode_array_metadata(store[array_meta_key]) + meta_migrated = decode_array_metadata(store[array_meta_key]) assert 2 == meta_migrated['zarr_format'] # preserved fields @@ -1972,27 +1974,27 @@ def test_migrate_1to2(): assert meta_migrated['compressor'] == Zlib(1).get_config() # check dict compression_opts - store = KVStore(dict()) + store = dict() if dict_store else KVStore(dict()) meta['compression'] = 'blosc' meta['compression_opts'] = dict(cname='lz4', clevel=5, shuffle=1) meta_json = meta_v1.encode_metadata(meta) store['meta'] = meta_json store['attrs'] = json.dumps(dict()).encode('ascii') migrate_1to2(store) - meta_migrated = store._metadata_class.decode_array_metadata(store[array_meta_key]) + meta_migrated = decode_array_metadata(store[array_meta_key]) assert 'compression' not in meta_migrated assert 'compression_opts' not in meta_migrated assert (meta_migrated['compressor'] == Blosc(cname='lz4', clevel=5, shuffle=1).get_config()) # check 'none' compression is migrated to None (null in JSON) - store = KVStore(dict()) + store = dict() if dict_store else KVStore(dict()) meta['compression'] = 'none' meta_json = meta_v1.encode_metadata(meta) store['meta'] = meta_json store['attrs'] = json.dumps(dict()).encode('ascii') migrate_1to2(store) - meta_migrated = store._metadata_class.decode_array_metadata(store[array_meta_key]) + meta_migrated = decode_array_metadata(store[array_meta_key]) assert 'compression' not in meta_migrated assert 'compression_opts' not in meta_migrated assert meta_migrated['compressor'] is None From c6b5d52a74af830ac239acf838dfff64872ce7dc Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Fri, 24 Sep 2021 17:01:50 -0400 Subject: [PATCH 35/42] pep8: remove unused imports --- zarr/attrs.py | 1 - zarr/core.py | 1 - zarr/hierarchy.py | 1 - 3 files changed, 3 deletions(-) diff --git a/zarr/attrs.py b/zarr/attrs.py index 454803e95b..eff1237db1 100644 --- a/zarr/attrs.py +++ b/zarr/attrs.py @@ -1,6 +1,5 @@ from collections.abc import MutableMapping -from zarr.meta import parse_metadata from zarr._storage.store import Store from zarr.util import json_dumps diff --git a/zarr/core.py b/zarr/core.py index 11a249d6b3..76d7d31950 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -31,7 +31,6 @@ is_scalar, pop_fields, ) -from zarr.meta import decode_array_metadata, encode_array_metadata from zarr.storage import array_meta_key, attrs_key, getsize, listdir, BaseStore from zarr.util import ( all_equal, diff --git a/zarr/hierarchy.py b/zarr/hierarchy.py index df7fa07750..763a5f1631 100644 --- a/zarr/hierarchy.py +++ b/zarr/hierarchy.py @@ -14,7 +14,6 @@ GroupNotFoundError, ReadOnlyError, ) -from zarr.meta import decode_group_metadata from zarr.storage import ( BaseStore, MemoryStore, From c171c2ec64f3251d87ae45ecc0eecff8f738bdf3 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 20 Oct 2021 17:22:24 -0400 Subject: [PATCH 36/42] misc --- zarr/storage.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 90531bc754..a276203c71 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -59,6 +59,7 @@ from zarr._storage.absstore import ABSStore # noqa: F401 from zarr._storage.store import (_listdir_from_keys, _path_to_prefix, + _prefix_to_attrs_key, # noqa _rename_from_keys, _rmdir_from_keys, array_meta_key, @@ -574,7 +575,7 @@ def __eq__(self, other): class MemoryStore(Store): - """Store class that uses a hierarchy of :class:`dict` objects, thus all data + """Store class that uses a hierarchy of :class:`KVStore` objects, thus all data will be held in main memory. Examples @@ -587,7 +588,7 @@ class MemoryStore(Store): Note that the default class when creating an array is the built-in - :class:`dict` class, i.e.:: + :class:`KVStore` class, i.e.:: >>> z = zarr.zeros(100) >>> type(z.store) @@ -2108,7 +2109,7 @@ class LRUStoreCache(Store): """ - def __init__(self, store: Store, max_size: int): + def __init__(self, store: [Store, Any], max_size: int): self._store = Store._ensure_store(store) self._max_size = max_size self._current_size = 0 From 26a9cefa5c9aba3fbece2743202b9ef925524e7f Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 20 Oct 2021 20:03:14 -0400 Subject: [PATCH 37/42] s --- zarr/storage.py | 1 - 1 file changed, 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index a276203c71..9a9f13633f 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -59,7 +59,6 @@ from zarr._storage.absstore import ABSStore # noqa: F401 from zarr._storage.store import (_listdir_from_keys, _path_to_prefix, - _prefix_to_attrs_key, # noqa _rename_from_keys, _rmdir_from_keys, array_meta_key, From 87dc56746bcea5efccb177b63914cc92f11fcf72 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 20 Oct 2021 20:05:09 -0400 Subject: [PATCH 38/42] remove unused FLOAT_FILLS --- zarr/meta.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/zarr/meta.py b/zarr/meta.py index 731317337e..32e8239452 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -10,12 +10,6 @@ ZARR_FORMAT = 2 -FLOAT_FILLS = { - 'NaN': np.nan, - 'Infinity': np.PINF, - '-Infinity': np.NINF -} - class Metadata2: ZARR_FORMAT = ZARR_FORMAT From ca4db802e4f74cc204f68abfc74aa8fb52341d14 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 20 Oct 2021 20:13:56 -0400 Subject: [PATCH 39/42] mypy fixes --- zarr/storage.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 9a9f13633f..e75d0c73aa 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -231,8 +231,8 @@ def init_array( fill_value=None, order: str = "C", overwrite: bool = False, - path: Path = None, - chunk_store: StoreLike = None, + path: Optional[Path] = None, + chunk_store: Optional[StoreLike] = None, filters=None, object_codec=None, dimension_separator=None, @@ -357,7 +357,7 @@ def init_array( def _init_array_metadata( - store, + store: StoreLike, shape, chunks=None, dtype=None, @@ -366,7 +366,7 @@ def _init_array_metadata( order="C", overwrite=False, path: Optional[str] = None, - chunk_store=None, + chunk_store: Optional[StoreLike] = None, filters=None, object_codec=None, dimension_separator=None, @@ -447,7 +447,7 @@ def _init_array_metadata( dimension_separator=dimension_separator) key = _path_to_prefix(path) + array_meta_key if hasattr(store, '_metadata_class'): - store[key] = store._metadata_class.encode_array_metadata(meta) + store[key] = store._metadata_class.encode_array_metadata(meta) # type: ignore else: store[key] = encode_array_metadata(meta) @@ -515,7 +515,7 @@ def _init_group_metadata( meta = dict() # type: ignore key = _path_to_prefix(path) + group_meta_key if hasattr(store, '_metadata_class'): - store[key] = store._metadata_class.encode_group_metadata(meta) + store[key] = store._metadata_class.encode_group_metadata(meta) # type: ignore else: store[key] = encode_group_metadata(meta) @@ -2108,8 +2108,8 @@ class LRUStoreCache(Store): """ - def __init__(self, store: [Store, Any], max_size: int): - self._store = Store._ensure_store(store) + def __init__(self, store: StoreLike, max_size: int): + self._store: BaseStore = Store._ensure_store(store) self._max_size = max_size self._current_size = 0 self._keys_cache = None From 15d4d78bf6b60790ffcc8108bec51f3578ae0cb9 Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 20 Oct 2021 20:19:05 -0400 Subject: [PATCH 40/42] sync metadata methods with current master branch --- zarr/meta.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/zarr/meta.py b/zarr/meta.py index 32e8239452..c292b09a14 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -16,6 +16,7 @@ class Metadata2: @classmethod def parse_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, Any]: + # Here we allow that a store may return an already-parsed metadata object, # or a string of JSON that we will parse here. We allow for an already-parsed # object to accommodate a consolidated metadata store, where all the metadata for @@ -24,9 +25,11 @@ def parse_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, Any]: if isinstance(s, Mapping): # assume metadata has already been parsed into a mapping object meta = s + else: # assume metadata needs to be parsed as JSON meta = json_loads(s) + return meta @classmethod @@ -40,13 +43,14 @@ def decode_array_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, A # extract array metadata fields try: - # dimension_separator = meta.get("dimension_separator", None) dtype = cls.decode_dtype(meta["dtype"]) if dtype.hasobject: import numcodecs object_codec = numcodecs.get_codec(meta['filters'][0]) else: object_codec = None + + dimension_separator = meta.get("dimension_separator", None) fill_value = cls.decode_fill_value(meta['fill_value'], dtype, object_codec) meta = dict( zarr_format=meta["zarr_format"], @@ -57,8 +61,9 @@ def decode_array_metadata(cls, s: Union[MappingType, str]) -> MappingType[str, A fill_value=fill_value, order=meta["order"], filters=meta["filters"], - dimension_separator=meta.get("dimension_separator", "."), ) + if dimension_separator: + meta['dimension_separator'] = dimension_separator except Exception as e: raise MetadataError("error decoding metadata") from e else: From c7c2624838df06f9d68918e12f83d8e04a5049fe Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Wed, 20 Oct 2021 20:42:22 -0400 Subject: [PATCH 41/42] flake8 --- zarr/_storage/store.py | 3 --- zarr/convenience.py | 2 +- zarr/creation.py | 6 +----- 3 files changed, 2 insertions(+), 9 deletions(-) diff --git a/zarr/_storage/store.py b/zarr/_storage/store.py index 6d4285dbd4..ad9f4a959e 100644 --- a/zarr/_storage/store.py +++ b/zarr/_storage/store.py @@ -1,4 +1,3 @@ -from abc import abstractmethod from collections.abc import MutableMapping from typing import Any, List, Optional, Union @@ -122,7 +121,6 @@ def listdir(self, path: str = "") -> List[str]: path = normalize_storage_path(path) return _listdir_from_keys(self, path) - def rmdir(self, path: str = "") -> None: if not self.is_erasable(): raise NotImplementedError( @@ -132,7 +130,6 @@ def rmdir(self, path: str = "") -> None: _rmdir_from_keys(self, path) - def _path_to_prefix(path: Optional[str]) -> str: # assume path already normalized if path: diff --git a/zarr/convenience.py b/zarr/convenience.py index 26c58d6018..18b59a77b2 100644 --- a/zarr/convenience.py +++ b/zarr/convenience.py @@ -13,7 +13,7 @@ from zarr.hierarchy import group as _create_group from zarr.hierarchy import open_group from zarr.meta import json_dumps, json_loads -from zarr.storage import contains_array, contains_group, BaseStore, Store +from zarr.storage import contains_array, contains_group, BaseStore from zarr.util import TreeViewer, buffer_size, normalize_storage_path from typing import Union diff --git a/zarr/creation.py b/zarr/creation.py index d50f67c31a..244a9b080c 100644 --- a/zarr/creation.py +++ b/zarr/creation.py @@ -1,9 +1,7 @@ -import os from warnings import warn import numpy as np from numcodecs.registry import codec_registry -from collections.abc import MutableMapping from zarr.core import Array from zarr.errors import ( @@ -11,9 +9,7 @@ ContainsArrayError, ContainsGroupError, ) -from zarr.n5 import N5Store -from zarr.storage import (BaseStore, DirectoryStore, FSStore, ZipStore, - contains_array, contains_group, default_compressor, +from zarr.storage import (contains_array, contains_group, default_compressor, init_array, normalize_storage_path, normalize_store_arg) from zarr.util import normalize_dimension_separator From 4588c0180f8c83461c8b2be16296423085380b7a Mon Sep 17 00:00:00 2001 From: Gregory Lee Date: Thu, 21 Oct 2021 00:50:23 -0400 Subject: [PATCH 42/42] restore is_erasable check to rmdir function Otherwise the save_array doc example fails to write to a ZipStore --- zarr/storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index e75d0c73aa..d4329117a6 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -133,7 +133,7 @@ def rmdir(store: StoreLike, path: Path = None): this will be called, otherwise will fall back to implementation via the `Store` interface.""" path = normalize_storage_path(path) - if hasattr(store, "rmdir"): + if hasattr(store, "rmdir") and store.is_erasable(): # type: ignore # pass through store.rmdir(path) # type: ignore else: