diff --git a/.travis.yml b/.travis.yml index 8a5e1fe521..5ecf462419 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,6 +11,10 @@ addons: packages: - libdb-dev +services: + - redis-server + - mongodb + matrix: include: - python: 2.7 @@ -20,6 +24,9 @@ matrix: dist: xenial sudo: true +before_script: + - mongo mydb_test --eval 'db.createUser({user:"travis",pwd:"test",roles:["readWrite"]});' + install: - pip install -U pip setuptools wheel tox-travis coveralls diff --git a/docs/api/storage.rst b/docs/api/storage.rst index 24498b0d79..9abe240379 100644 --- a/docs/api/storage.rst +++ b/docs/api/storage.rst @@ -25,6 +25,8 @@ Storage (``zarr.storage``) .. automethod:: close +.. autoclass:: MongoDBStore +.. autoclass:: RedisStore .. autoclass:: LRUStoreCache .. automethod:: invalidate diff --git a/docs/release.rst b/docs/release.rst index 9493f273f8..65bd94c45f 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -26,6 +26,14 @@ Enhancements * Efficient iteration over arrays by decompressing chunkwise. By :user:`Jerome Kelleher `, :issue:`398`, :issue:`399`. +* Adds the Redis-backed :class:`zarr.storage.RedisStore` class enabling a + Redis database to be used as the backing store for an array or group. + By :user:`Joe Hamman `, :issue:`299`, :issue:`372`. + +* Adds the MongoDB-backed :class:`zarr.storage.MongoDBStore` class enabling a + MongoDB database to be used as the backing store for an array or group. + By :user:`Joe Hamman `, :issue:`299`, :issue:`372`. + Bug fixes ~~~~~~~~~ diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 0fbefc3e2e..3e8e9bac66 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -739,6 +739,13 @@ Python is built with SQLite support):: >>> z[:] = 42 >>> store.close() +Also added in Zarr version 2.3 are two storage classes for interfacing with server-client +databases. The :class:`zarr.storage.RedisStore` class interfaces `Redis `_ +(an in memory data structure store), and the :class:`zarr.storage.MongoDB` class interfaces +with `MongoDB `_ (an oject oriented NoSQL database). These stores +respectively require the `redis `_ and +`pymongo `_ packages to be installed. + Distributed/cloud storage ~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/requirements_dev_optional.txt b/requirements_dev_optional.txt index 1ea71451d9..2e4feddce0 100644 --- a/requirements_dev_optional.txt +++ b/requirements_dev_optional.txt @@ -1,3 +1,5 @@ # These packages are currently not available on Windows. bsddb3==6.2.6 lmdb==0.94 +redis==3.0.1 +pymongo==3.7.1 \ No newline at end of file diff --git a/zarr/__init__.py b/zarr/__init__.py index c9046f6bff..e208b8ae82 100644 --- a/zarr/__init__.py +++ b/zarr/__init__.py @@ -8,7 +8,7 @@ ones_like, full_like, open_array, open_like, create) from zarr.storage import (DictStore, DirectoryStore, ZipStore, TempStore, NestedDirectoryStore, DBMStore, LMDBStore, SQLiteStore, - LRUStoreCache) + LRUStoreCache, RedisStore, MongoDBStore) from zarr.hierarchy import group, open_group, Group from zarr.sync import ThreadSynchronizer, ProcessSynchronizer from zarr.codecs import * diff --git a/zarr/storage.py b/zarr/storage.py index 75e4d7d04d..d71ee3a18a 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -37,7 +37,7 @@ normalize_storage_path, buffer_size, normalize_fill_value, nolock, normalize_dtype) from zarr.meta import encode_array_metadata, encode_group_metadata -from zarr.compat import PY2, OrderedDict_move_to_end +from zarr.compat import PY2, OrderedDict_move_to_end, binary_type from numcodecs.registry import codec_registry from numcodecs.compat import ensure_bytes, ensure_contiguous_ndarray from zarr.errors import (err_contains_group, err_contains_array, err_bad_compressor, @@ -2084,6 +2084,188 @@ def clear(self): ) +class MongoDBStore(MutableMapping): + """Storage class using MongoDB. + + .. note:: This is an experimental feature. + + Requires the `pymongo `_ + package to be installed. + + Parameters + ---------- + database : string + Name of database + collection : string + Name of collection + **kwargs + Keyword arguments passed through to the `pymongo.MongoClient` function. + + Examples + -------- + Store a single array:: + + >>> import zarr + >>> store = zarr.MongoDBStore('localhost') + >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) + >>> z[...] = 42 + >>> store.close() + + Store a group:: + + >>> store = zarr.MongoDBStore('localhost') + >>> root = zarr.group(store=store, overwrite=True) + >>> foo = root.create_group('foo') + >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5)) + >>> bar[...] = 42 + >>> store.close() + + Notes + ----- + The maximum chunksize in MongoDB documents is 16 MB. + + """ + + _key = 'key' + _value = 'value' + + def __init__(self, database='mongodb_zarr', collection='zarr_collection', + **kwargs): + import pymongo + + self._database = database + self._collection = collection + self._kwargs = kwargs + + self.client = pymongo.MongoClient(**self._kwargs) + self.db = self.client.get_database(self._database) + self.collection = self.db.get_collection(self._collection) + + def __getitem__(self, key): + doc = self.collection.find_one({self._key: key}) + + if doc is None: + raise KeyError(key) + else: + return binary_type(doc[self._value]) + + def __setitem__(self, key, value): + value = ensure_bytes(value) + self.collection.replace_one({self._key: key}, + {self._key: key, self._value: value}, + upsert=True) + + def __delitem__(self, key): + result = self.collection.delete_many({self._key: key}) + if not result.deleted_count == 1: + raise KeyError(key) + + def __iter__(self): + for f in self.collection.find({}): + yield f[self._key] + + def __len__(self): + return self.collection.count_documents({}) + + def __getstate__(self): + return self._database, self._collection, self._kwargs + + def __setstate__(self, state): + database, collection, kwargs = state + self.__init__(database=database, collection=collection, **kwargs) + + def close(self): + """Cleanup client resources and disconnect from MongoDB.""" + self.client.close() + + def clear(self): + """Remove all items from store.""" + self.collection.delete_many({}) + + +class RedisStore(MutableMapping): + """Storage class using Redis. + + .. note:: This is an experimental feature. + + Requires the `redis `_ + package to be installed. + + Parameters + ---------- + prefix : string + Name of prefix for Redis keys + **kwargs + Keyword arguments passed through to the `redis.Redis` function. + + Examples + -------- + Store a single array:: + + >>> import zarr + >>> store = zarr.RedisStore(port=6379) + >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) + >>> z[...] = 42 + + Store a group:: + + >>> store = zarr.RedisStore(port=6379) + >>> root = zarr.group(store=store, overwrite=True) + >>> foo = root.create_group('foo') + >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5)) + >>> bar[...] = 42 + + """ + def __init__(self, prefix='zarr', **kwargs): + import redis + self._prefix = prefix + self._kwargs = kwargs + + self.client = redis.Redis(**kwargs) + + def _key(self, key): + return '{prefix}:{key}'.format(prefix=self._prefix, key=key) + + def __getitem__(self, key): + return self.client[self._key(key)] + + def __setitem__(self, key, value): + value = ensure_bytes(value) + self.client[self._key(key)] = value + + def __delitem__(self, key): + count = self.client.delete(self._key(key)) + if not count: + raise KeyError(key) + + def keylist(self): + offset = len(self._key('')) # length of prefix + return [key[offset:].decode('utf-8') + for key in self.client.keys(self._key('*'))] + + def keys(self): + for key in self.keylist(): + yield key + + def __iter__(self): + for key in self.keys(): + yield key + + def __len__(self): + return len(self.keylist()) + + def __getstate__(self): + return self._prefix, self._kwargs + + def __setstate__(self, state): + prefix, kwargs = state + self.__init__(prefix=prefix, **kwargs) + + def clear(self): + for key in self.keys(): + del self[key] + + class ConsolidatedMetadataStore(MutableMapping): """A layer over other storage, where the metadata has been consolidated into a single key. diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index ab3ea5c26d..87273d140c 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -20,8 +20,8 @@ DirectoryStore, ZipStore, init_group, group_meta_key, getsize, migrate_1to2, TempStore, atexit_rmtree, NestedDirectoryStore, default_compressor, DBMStore, - LMDBStore, SQLiteStore, atexit_rmglob, LRUStoreCache, - ConsolidatedMetadataStore) + LMDBStore, SQLiteStore, MongoDBStore, RedisStore, + atexit_rmglob, LRUStoreCache, ConsolidatedMetadataStore) from zarr.meta import (decode_array_metadata, encode_array_metadata, ZARR_FORMAT, decode_group_metadata, encode_group_metadata) from zarr.compat import PY2 @@ -900,6 +900,29 @@ def test_context_manager(self): except ImportError: # pragma: no cover sqlite3 = None +try: + import pymongo + from pymongo.errors import ConnectionFailure, ServerSelectionTimeoutError + try: + client = pymongo.MongoClient(host='127.0.0.1', + serverSelectionTimeoutMS=1e3) + client.server_info() + except (ConnectionFailure, ServerSelectionTimeoutError): # pragma: no cover + pymongo = None +except ImportError: # pragma: no cover + pymongo = None + +try: + import redis + from redis import ConnectionError + try: + rs = redis.Redis("localhost", port=6379) + rs.ping() + except ConnectionError: # pragma: no cover + redis = None +except ImportError: # pragma: no cover + redis = None + @unittest.skipIf(sqlite3 is None, 'python built without sqlite') class TestSQLiteStore(StoreTests, unittest.TestCase): @@ -930,6 +953,29 @@ def test_pickle(self): pickle.dumps(store) +@unittest.skipIf(pymongo is None, 'test requires pymongo') +class TestMongoDBStore(StoreTests, unittest.TestCase): + + def create_store(self): + store = MongoDBStore(host='127.0.0.1', database='zarr_tests', + collection='zarr_tests') + # start with an empty store + store.clear() + return store + + +@unittest.skipIf(redis is None, 'test requires redis') +class TestRedisStore(StoreTests, unittest.TestCase): + + def create_store(self): + # TODO: this is the default host for Redis on Travis, + # we probably want to generalize this though + store = RedisStore(host='localhost', port=6379) + # start with an empty store + store.clear() + return store + + class TestLRUStoreCache(StoreTests, unittest.TestCase): def create_store(self):