Skip to content
3 changes: 3 additions & 0 deletions docs/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@ Enhancements
* Ensure contiguous data using ``astype``.
By :user:`John Kirkham <jakirkham>`; :issue:`513`.

* Ensure contiguous data using ``astype``.
By :user:`John Kirkham <jakirkham>`; :issue:`513`.

* Refactor out ``_tofile``/``_fromfile`` from ``DirectoryStore``.
By :user:`John Kirkham <jakirkham>`; :issue:`503`.

Expand Down
46 changes: 38 additions & 8 deletions zarr/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ class Array:
read and decompressed when possible.

.. versionadded:: 2.7
meta_array : array, optional
An array instance to use for determining arrays to create and return
to users.

Attributes
----------
Expand Down Expand Up @@ -137,6 +140,7 @@ def __init__(
cache_metadata=True,
cache_attrs=True,
partial_decompress=False,
meta_array=None
):
# N.B., expect at this point store is fully initialized with all
# configuration metadata fully specified and normalized
Expand All @@ -148,6 +152,10 @@ def __init__(
self._key_prefix = self._path + '/'
else:
self._key_prefix = ''
if meta_array is not None:
self._meta_array = np.empty_like(meta_array)
else:
self._meta_array = np.empty(())
self._read_only = bool(read_only)
self._synchronizer = synchronizer
self._cache_metadata = cache_metadata
Expand Down Expand Up @@ -746,7 +754,7 @@ def _get_basic_selection_zd(self, selection, out=None, fields=None):

except KeyError:
# chunk not initialized
chunk = np.zeros((), dtype=self._dtype)
chunk = np.zeros_like(self._meta_array, shape=(), dtype=self._dtype)
if self._fill_value is not None:
chunk.fill(self._fill_value)

Expand Down Expand Up @@ -1050,7 +1058,8 @@ def _get_selection(self, indexer, out=None, fields=None):

# setup output array
if out is None:
out = np.empty(out_shape, dtype=out_dtype, order=self._order)
out = np.empty_like(self._meta_array, shape=out_shape,
dtype=out_dtype, order=self._order)
else:
check_array_shape('out', out, out_shape)

Expand Down Expand Up @@ -1516,7 +1525,7 @@ def _set_basic_selection_zd(self, selection, value, fields=None):

except KeyError:
# chunk not initialized
chunk = np.zeros((), dtype=self._dtype)
chunk = np.zeros_like(self._meta_array, shape=(), dtype=self._dtype)
if self._fill_value is not None:
chunk.fill(self._fill_value)

Expand Down Expand Up @@ -1746,6 +1755,23 @@ def _chunk_getitem(self, chunk_coords, chunk_selection, out, out_selection,
self._process_chunk(out, cdata, chunk_selection, drop_axes,
out_is_ndarray, fields, out_selection)

if (out_is_ndarray and
not fields and
is_contiguous_selection(out_selection) and
is_total_slice(chunk_selection, self._chunks) and
not self._filters and
self._dtype != object):

dest = out[out_selection]
write_direct = (
getattr(getattr(dest, "flags", None), "writeable", True) and (
(self._order == 'C' and dest.flags.c_contiguous) or
(self._order == 'F' and dest.flags.f_contiguous)
)
)

if write_direct:

def _chunk_getitems(self, lchunk_coords, lchunk_selection, out, lout_selection,
drop_axes=None, fields=None):
"""As _chunk_getitem, but for lists of chunks
Expand Down Expand Up @@ -1848,7 +1874,8 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None):
if is_scalar(value, self._dtype):

# setup array filled with value
chunk = np.empty(self._chunks, dtype=self._dtype, order=self._order)
chunk = np.empty_like(self._meta_array, shape=self._chunks,
dtype=self._dtype, order=self._order)
chunk.fill(value)

else:
Expand All @@ -1868,20 +1895,22 @@ def _process_for_setitem(self, ckey, chunk_selection, value, fields=None):

# chunk not initialized
if self._fill_value is not None:
chunk = np.empty(self._chunks, dtype=self._dtype, order=self._order)
chunk = np.empty_like(self._meta_array, shape=self._chunks,
dtype=self._dtype, order=self._order)
chunk.fill(self._fill_value)
elif self._dtype == object:
chunk = np.empty(self._chunks, dtype=self._dtype, order=self._order)
else:
# N.B., use zeros here so any region beyond the array has consistent
# and compressible data
chunk = np.zeros(self._chunks, dtype=self._dtype, order=self._order)
chunk = np.zeros_like(self._meta_array, shape=self._chunks,
dtype=self._dtype, order=self._order)

else:

# decode chunk
chunk = self._decode_chunk(cdata)
if not chunk.flags.writeable:
if not getattr(getattr(chunk, "flags", None), "writeable", True):
chunk = chunk.copy(order='K')

# modify
Expand Down Expand Up @@ -2113,7 +2142,8 @@ def hexdigest(self, hashname="sha1"):

def __getstate__(self):
return (self._store, self._path, self._read_only, self._chunk_store,
self._synchronizer, self._cache_metadata, self._attrs.cache)
self._synchronizer, self._cache_metadata, self._attrs.cache,
self._meta_array)

def __setstate__(self, state):
self.__init__(*state)
Expand Down
19 changes: 14 additions & 5 deletions zarr/hierarchy.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ class Group(MutableMapping):
to all attribute read operations.
synchronizer : object, optional
Array synchronizer.
meta_array : array, optional
An array to base allocations off of.

Attributes
----------
Expand Down Expand Up @@ -95,7 +97,7 @@ class Group(MutableMapping):
"""

def __init__(self, store, path=None, read_only=False, chunk_store=None,
cache_attrs=True, synchronizer=None):
cache_attrs=True, synchronizer=None, meta_array=None):
self._store = store
self._chunk_store = chunk_store
self._path = normalize_storage_path(path)
Expand All @@ -105,6 +107,10 @@ def __init__(self, store, path=None, read_only=False, chunk_store=None,
self._key_prefix = ''
self._read_only = read_only
self._synchronizer = synchronizer
if meta_array is not None:
self._meta_array = np.empty_like(meta_array)
else:
self._meta_array = np.empty(())

# guard conditions
if contains_array(store, path=self._path):
Expand Down Expand Up @@ -282,7 +288,7 @@ def typestr(o):

def __getstate__(self):
return (self._store, self._path, self._read_only, self._chunk_store,
self.attrs.cache, self._synchronizer)
self.attrs.cache, self._synchronizer, self._meta_array)

def __setstate__(self, state):
self.__init__(*state)
Expand Down Expand Up @@ -340,11 +346,13 @@ def __getitem__(self, item):
if contains_array(self._store, path):
return Array(self._store, read_only=self._read_only, path=path,
chunk_store=self._chunk_store,
synchronizer=self._synchronizer, cache_attrs=self.attrs.cache)
synchronizer=self._synchronizer, cache_attrs=self.attrs.cache,
meta_array=self._meta_array)
elif contains_group(self._store, path):
return Group(self._store, read_only=self._read_only, path=path,
chunk_store=self._chunk_store, cache_attrs=self.attrs.cache,
synchronizer=self._synchronizer)
synchronizer=self._synchronizer,
meta_array=self._meta_array)
else:
raise KeyError(item)

Expand Down Expand Up @@ -863,7 +871,8 @@ def _require_dataset_nosync(self, name, shape, dtype=None, exact=False,
cache_attrs = kwargs.get('cache_attrs', self.attrs.cache)
a = Array(self._store, path=path, read_only=self._read_only,
chunk_store=self._chunk_store, synchronizer=synchronizer,
cache_metadata=cache_metadata, cache_attrs=cache_attrs)
cache_metadata=cache_metadata, cache_attrs=cache_attrs,
meta_array=self._meta_array)
shape = normalize_shape(shape)
if shape != a.shape:
raise TypeError('shape do not match existing array; expected {}, got {}'
Expand Down