Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
71 commits
Select commit Hold shift + click to select a range
8ef5dc1
Create an SQLite-backed mutable mapping
jakirkham Dec 21, 2018
e3e2c2e
Test SQLiteStore
jakirkham Dec 21, 2018
d60aaab
Export `SQLiteStore` to the top-level namespace
jakirkham Dec 21, 2018
ace251c
Include some SQLiteStore examples
jakirkham Dec 21, 2018
ecf18f7
Demonstrate the `SQLiteStore` in the tutorial
jakirkham Dec 21, 2018
92a4d71
Provide API documentation for `SQLiteStore`
jakirkham Dec 21, 2018
efa9ccd
Make a release note for `SQLiteStore`
jakirkham Dec 21, 2018
6f68451
Use unique extension for `SQLiteStore` files
jakirkham Dec 21, 2018
9bbbde6
Only close SQLite database when requested
jakirkham Dec 21, 2018
20ef384
Update docs to show how to close `SQLiteStore`
jakirkham Dec 21, 2018
a8f31cf
Ensure all SQL commands are capitalized
jakirkham Dec 21, 2018
5ddc193
Simplify `SQLiteStore`'s `__delitem__` using `in`
jakirkham Dec 21, 2018
0abcc1a
Drop no longer needed flake8 error suppression
jakirkham Dec 21, 2018
b339c09
Simplify `close` and use `flush`
jakirkham Dec 21, 2018
8b8d289
Flush before pickling `SQLiteStore`
jakirkham Dec 21, 2018
1cac5eb
Special case in-memory SQLite database
jakirkham Dec 21, 2018
b8e2d23
Drop unneeded empty `return` statement
jakirkham Dec 21, 2018
4db7e14
Update docs/release.rst
alimanfoo Dec 21, 2018
31a9af3
Update docs/release.rst
alimanfoo Dec 21, 2018
9f5d02b
Correct default value for `check_same_thread`
jakirkham Dec 21, 2018
ac6827e
Flush after making any mutation to the database
jakirkham Dec 21, 2018
8b35eb8
Skip flushing data when pickling `SQLiteStore`
jakirkham Dec 21, 2018
f8d3f03
Skip using `flush` in `close`
jakirkham Dec 21, 2018
1abeba7
Implement `update` for `SQLiteStore`
jakirkham Dec 21, 2018
4bbbeba
Rewrite `__setitem__` to use `update`
jakirkham Dec 21, 2018
f9481b8
Disable `check_same_thread` by default again
jakirkham Dec 21, 2018
0188a60
Force some parameters to defaults
jakirkham Dec 21, 2018
1af4446
Drop `flush` calls from `SQLiteStore`
jakirkham Dec 21, 2018
ca6b8a4
Drop the `flush` function from `SQLiteStore`
jakirkham Dec 21, 2018
eb4564b
Implement optimized `clear` for `SQLiteStore`
jakirkham Dec 25, 2018
4f59451
Implement optimized `rmdir` for `SQLiteStore`
jakirkham Dec 25, 2018
5ab54c0
Implement optimized `getsize` for `SQLiteStore`
jakirkham Dec 25, 2018
c386c72
Implement optimized `listdir` for `SQLiteStore`
jakirkham Dec 25, 2018
f9dfc06
Implement `rename` for `SQLiteStore`
jakirkham Dec 25, 2018
8d4a8e2
Allow users to specify the SQLite table name
jakirkham Dec 25, 2018
349a885
Randomize temporary table name
jakirkham Dec 25, 2018
152ed0c
Merge `SELECT`s in `rename`
jakirkham Dec 25, 2018
82e6522
Tidy `rename` SQL code a bit
jakirkham Dec 25, 2018
3748982
Fuse away one `SELECT` in `listdir`
jakirkham Dec 25, 2018
168843c
Only use `k` in `SQLiteStore`'s `__contains__`
jakirkham Dec 25, 2018
8defc15
Fuse `SELECT`s in `SQLiteStore`'s `__contains__`
jakirkham Dec 25, 2018
019b3e0
Cast `has` to `bool` in `SQLiteStore.__contains__`
jakirkham Dec 25, 2018
3f74f25
Prefer using single quotes in more places
jakirkham Dec 27, 2018
dcb808f
Wrap SQL table creation text
jakirkham Jan 2, 2019
9369862
Adjust wrapping of `SQLiteStore.clear`'s code
jakirkham Jan 2, 2019
b1c644a
Use parameters for SQL in `listdir`
jakirkham Jan 2, 2019
1c3a4d7
Use parameters for SQL in `getsize`
jakirkham Jan 2, 2019
059ec45
Use parameters for SQL in `rmdir`
jakirkham Jan 2, 2019
4fd3b3d
Adjust formatting of `SQLiteStore.__contains__`
jakirkham Jan 2, 2019
ed9c3b0
Drop `SQLiteStore`'s implementation of `rename`
jakirkham Jan 2, 2019
075eabc
Just name the SQL table "zarr"
jakirkham Jan 2, 2019
06e0fec
Unwrap some lines to compact the code a bit
jakirkham Jan 2, 2019
a07ecba
Simplify `SQLiteStore.__contains__` code wrapping
jakirkham Jan 2, 2019
d3e4f3d
Check SQLite Cursor's rowcount for deletion
jakirkham Jan 2, 2019
7bff163
Parenthesize operations to `?` in SQL
jakirkham Jan 2, 2019
0c5fa9d
Check `rowcount` for values less than `1`
jakirkham Jan 2, 2019
9aa5381
Parenthesize a few other SQL commands with `?`
jakirkham Jan 2, 2019
1edf86a
Use one line for `SQLiteStore.rmdir`'s SQL
jakirkham Jan 2, 2019
0618dbb
Use 1 line for `SQLiteStore.rmdir`'s SQL & params
jakirkham Jan 2, 2019
d55ac16
Update docs/release.rst
alimanfoo Jan 3, 2019
996fd77
`TestSQLiteStore` -> `TestGroupWithSQLiteStore`
jakirkham Jan 3, 2019
d268144
Drop `else` in `for`/`else` for clarity
jakirkham Jan 3, 2019
207565d
Ensure SQLite is new enough to enable threading
jakirkham Jan 3, 2019
7e86d3e
Add spacing around `=`
jakirkham Jan 3, 2019
043eec4
Merge 'zarr-developers/master' into 'jakirkham/add_sqlite_store'
jakirkham Jan 3, 2019
0282fbf
Hold a lock for any DML operations in SQLiteStore
jakirkham Jan 3, 2019
c65f78f
Raise when pickling an in-memory SQLite database
jakirkham Jan 3, 2019
505ac5f
Test in-memory SQLiteStore's separately
jakirkham Jan 3, 2019
0bad6c5
Drop explicit setting of `sqlite3` defaults
jakirkham Jan 4, 2019
0dc34bb
Adjust inheritance of `TestSQLiteStoreInMemory`
jakirkham Jan 4, 2019
f5b8913
Merge 'zarr-developers/master' into 'jakirkham/add_sqlite_store'
jakirkham Jan 16, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/api/storage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ Storage (``zarr.storage``)
.. automethod:: close
.. automethod:: flush

.. autoclass:: SQLiteStore

.. automethod:: close

.. autoclass:: LRUStoreCache

.. automethod:: invalidate
Expand Down
4 changes: 4 additions & 0 deletions docs/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ Enhancements
* Support has been added for structured arrays with sub-array shape and/or nested fields. By
:user:`Tarik Onalan <onalant>`, :issue:`111`, :issue:`296`.

* Adds the SQLite-backed :class:`zarr.storage.SQLiteStore` class enabling an
SQLite database to be used as the backing store for an array or group.
By :user:`John Kirkham <jakirkham>`, :issue:`368`, :issue:`365`.

Bug fixes
~~~~~~~~~

Expand Down
10 changes: 10 additions & 0 deletions docs/tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -729,6 +729,16 @@ group (requires `lmdb <http://lmdb.readthedocs.io/>`_ to be installed)::
>>> z[:] = 42
>>> store.close()

In Zarr version 2.3 is the :class:`zarr.storage.SQLiteStore` class which
enables the SQLite database to be used for storing an array or group (requires
Python is built with SQLite support)::

>>> store = zarr.SQLiteStore('data/example.sqldb')
>>> root = zarr.group(store=store, overwrite=True)
>>> z = root.zeros('foo/bar', shape=(1000, 1000), chunks=(100, 100), dtype='i4')
>>> z[:] = 42
>>> store.close()

Distributed/cloud storage
~~~~~~~~~~~~~~~~~~~~~~~~~

Expand Down
3 changes: 2 additions & 1 deletion zarr/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
from zarr.creation import (empty, zeros, ones, full, array, empty_like, zeros_like,
ones_like, full_like, open_array, open_like, create)
from zarr.storage import (DictStore, DirectoryStore, ZipStore, TempStore,
NestedDirectoryStore, DBMStore, LMDBStore, LRUStoreCache)
NestedDirectoryStore, DBMStore, LMDBStore, SQLiteStore,
LRUStoreCache)
from zarr.hierarchy import group, open_group, Group
from zarr.sync import ThreadSynchronizer, ProcessSynchronizer
from zarr.codecs import *
Expand Down
207 changes: 207 additions & 0 deletions zarr/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from __future__ import absolute_import, print_function, division
from collections import MutableMapping, OrderedDict
import os
import operator
import tempfile
import zipfile
import shutil
Expand All @@ -26,6 +27,7 @@
import sys
import json
import multiprocessing
from pickle import PicklingError
from threading import Lock, RLock
import glob
import warnings
Expand Down Expand Up @@ -1877,6 +1879,211 @@ def __delitem__(self, key):
self._invalidate_value(key)


class SQLiteStore(MutableMapping):
"""Storage class using SQLite.

Parameters
----------
path : string
Location of database file.
**kwargs
Keyword arguments passed through to the `sqlite3.connect` function.

Examples
--------
Store a single array::

>>> import zarr
>>> store = zarr.SQLiteStore('data/array.sqldb')
>>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True)
>>> z[...] = 42
>>> store.close() # don't forget to call this when you're done

Store a group::

>>> store = zarr.SQLiteStore('data/group.sqldb')
>>> root = zarr.group(store=store, overwrite=True)
>>> foo = root.create_group('foo')
>>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5))
>>> bar[...] = 42
>>> store.close() # don't forget to call this when you're done
"""

def __init__(self, path, **kwargs):
import sqlite3

# normalize path
if path != ':memory:':
path = os.path.abspath(path)

# store properties
self.path = path
self.kwargs = kwargs

# allow threading if SQLite connections are thread-safe
#
# ref: https://www.sqlite.org/releaselog/3_3_1.html
# ref: https://bugs.python.org/issue27190
check_same_thread = True
if sqlite3.sqlite_version_info >= (3, 3, 1):
check_same_thread = False

# keep a lock for serializing mutable operations
self.lock = Lock()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any value in allowing this lock (or the lock type) to be specified by the user? For example, we've found this sort of thing to be useful at times when using dask-distributed for I/O operations.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@alimanfoo and I were a little concerned by some of the wording in the docs about sqlite3 and thread safety as discussed here. Having this lock may be (overly) cautious. It's not clear what the right answer is here. Would be happy to hear other thoughts on this if you have any.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is fine for now. We can certainly revisit this down the road if additional functionality is needed.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @jhamman. SGTM


# open database
self.db = sqlite3.connect(
self.path,
detect_types=0,
isolation_level=None,
check_same_thread=check_same_thread,
**self.kwargs
)

# handle keys as `str`s
self.db.text_factory = str

# get a cursor to read/write to the database
self.cursor = self.db.cursor()

# initialize database with our table if missing
with self.lock:
self.cursor.execute(
'CREATE TABLE IF NOT EXISTS zarr(k TEXT PRIMARY KEY, v BLOB)'
)

def __getstate__(self):
if self.path == ':memory:':
raise PicklingError('Cannot pickle in-memory SQLite databases')
return self.path, self.kwargs

def __setstate__(self, state):
path, kwargs = state
self.__init__(path=path, **kwargs)

def close(self):
"""Closes the underlying database."""

# close cursor and db objects
self.cursor.close()
self.db.close()

def __getitem__(self, key):
value = self.cursor.execute('SELECT v FROM zarr WHERE (k = ?)', (key,))
for v, in value:
return v
raise KeyError(key)

def __setitem__(self, key, value):
self.update({key: value})

def __delitem__(self, key):
with self.lock:
self.cursor.execute('DELETE FROM zarr WHERE (k = ?)', (key,))
if self.cursor.rowcount < 1:
raise KeyError(key)

def __contains__(self, key):
cs = self.cursor.execute(
'SELECT COUNT(*) FROM zarr WHERE (k = ?)', (key,)
)
for has, in cs:
has = bool(has)
return has

def items(self):
kvs = self.cursor.execute('SELECT k, v FROM zarr')
for k, v in kvs:
yield k, v

def keys(self):
ks = self.cursor.execute('SELECT k FROM zarr')
for k, in ks:
yield k

def values(self):
vs = self.cursor.execute('SELECT v FROM zarr')
for v, in vs:
yield v

def __iter__(self):
return self.keys()

def __len__(self):
cs = self.cursor.execute('SELECT COUNT(*) FROM zarr')
for c, in cs:
return c

def update(self, *args, **kwargs):
args += (kwargs,)

kv_list = []
for dct in args:
for k, v in dct.items():
# Python 2 cannot store `memoryview`s, but it can store
# `buffer`s. However Python 2 won't return `bytes` then. So we
# coerce to `bytes`, which are handled correctly. Python 3
# doesn't have these issues.
if PY2: # pragma: py3 no cover
v = ensure_bytes(v)
else: # pragma: py2 no cover
v = ensure_contiguous_ndarray(v)

# Accumulate key-value pairs for storage
kv_list.append((k, v))

with self.lock:
self.cursor.executemany('REPLACE INTO zarr VALUES (?, ?)', kv_list)

def listdir(self, path=None):
path = normalize_storage_path(path)
keys = self.cursor.execute(
'''
SELECT DISTINCT SUBSTR(m, 0, INSTR(m, "/")) AS l FROM (
SELECT LTRIM(SUBSTR(k, LENGTH(?) + 1), "/") || "/" AS m
FROM zarr WHERE k LIKE (? || "_%")
) ORDER BY l ASC
''',
(path, path)
)
keys = list(map(operator.itemgetter(0), keys))
return keys

def getsize(self, path=None):
path = normalize_storage_path(path)
size = self.cursor.execute(
'''
SELECT COALESCE(SUM(LENGTH(v)), 0) FROM zarr
WHERE k LIKE (? || "%") AND
0 == INSTR(LTRIM(SUBSTR(k, LENGTH(?) + 1), "/"), "/")
''',
(path, path)
)
for s, in size:
return s

def rmdir(self, path=None):
path = normalize_storage_path(path)
if path:
with self.lock:
self.cursor.execute(
'DELETE FROM zarr WHERE k LIKE (? || "_%")', (path,)
)
else:
self.clear()

def clear(self):
with self.lock:
self.cursor.executescript(
'''
BEGIN TRANSACTION;
DROP TABLE zarr;
CREATE TABLE zarr(k TEXT PRIMARY KEY, v BLOB);
COMMIT TRANSACTION;
'''
)


class ConsolidatedMetadataStore(MutableMapping):
"""A layer over other storage, where the metadata has been consolidated into
a single key.
Expand Down
27 changes: 26 additions & 1 deletion zarr/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@


from zarr.storage import (DirectoryStore, init_array, init_group, NestedDirectoryStore,
DBMStore, LMDBStore, atexit_rmtree, atexit_rmglob,
DBMStore, LMDBStore, SQLiteStore, atexit_rmtree, atexit_rmglob,
LRUStoreCache)
from zarr.core import Array
from zarr.errors import PermissionError
Expand Down Expand Up @@ -1390,6 +1390,31 @@ def test_nbytes_stored(self):
pass # not implemented


try:
import sqlite3
except ImportError: # pragma: no cover
sqlite3 = None


@unittest.skipIf(sqlite3 is None, 'python built without sqlite')
class TestArrayWithSQLiteStore(TestArray):

@staticmethod
def create_array(read_only=False, **kwargs):
path = mktemp(suffix='.db')
atexit.register(atexit_rmtree, path)
store = SQLiteStore(path)
cache_metadata = kwargs.pop('cache_metadata', True)
cache_attrs = kwargs.pop('cache_attrs', True)
kwargs.setdefault('compressor', Zlib(1))
init_array(store, **kwargs)
return Array(store, read_only=read_only, cache_metadata=cache_metadata,
cache_attrs=cache_attrs)

def test_nbytes_stored(self):
pass # not implemented


class TestArrayWithNoCompressor(TestArray):

def create_array(self, read_only=False, **kwargs):
Expand Down
20 changes: 18 additions & 2 deletions zarr/tests/test_hierarchy.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@

from zarr.storage import (DictStore, DirectoryStore, ZipStore, init_group, init_array,
array_meta_key, group_meta_key, atexit_rmtree,
NestedDirectoryStore, DBMStore, LMDBStore, atexit_rmglob,
LRUStoreCache)
NestedDirectoryStore, DBMStore, LMDBStore, SQLiteStore,
atexit_rmglob, LRUStoreCache)
from zarr.core import Array
from zarr.compat import PY2, text_type
from zarr.hierarchy import Group, group, open_group
Expand Down Expand Up @@ -928,6 +928,22 @@ def create_store():
return store, None


try:
import sqlite3
except ImportError: # pragma: no cover
sqlite3 = None


@unittest.skipIf(sqlite3 is None, 'python built without sqlite')
class TestGroupWithSQLiteStore(TestGroup):

def create_store(self):
path = tempfile.mktemp(suffix='.db')
atexit.register(atexit_rmtree, path)
store = SQLiteStore(path)
return store, None


class TestGroupWithChunkStore(TestGroup):

@staticmethod
Expand Down
Loading