From 7eed366397a4c8bf98f8bb4adac34fa022ac4532 Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Fri, 30 Nov 2018 12:18:30 -0500
Subject: [PATCH 01/14] Bump Numcodecs requirement to 0.6.1

---
 requirements_dev.txt | 2 +-
 setup.py             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements_dev.txt b/requirements_dev.txt
index 2ad18f372c..d39ba9e9b8 100644
--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@@ -1,3 +1,3 @@
 asciitree==0.3.3
 fasteners==0.14.1
-numcodecs==0.5.5
+numcodecs==0.6.1
diff --git a/setup.py b/setup.py
index a5e8334e43..903af3bc04 100644
--- a/setup.py
+++ b/setup.py
@@ -26,7 +26,7 @@
         'asciitree',
         'numpy>=1.7',
         'fasteners',
-        'numcodecs>=0.5.3',
+        'numcodecs>=0.6.1',
     ],
     package_dir={'': '.'},
     packages=['zarr', 'zarr.tests'],

From 2552f620191cafa72429566f4a8ce4f49b4db4d3 Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Fri, 30 Nov 2018 13:06:29 -0500
Subject: [PATCH 02/14] Assert MsgPack round-trips bytes objects correctly

Previously MsgPack was turning bytes objects to unicode objects when
round-tripping them. However this has been fixed in the latest version
of Numcodecs. So correct this test now that MsgPack is working
correctly.
---
 zarr/tests/test_core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py
index 11891f8fe9..544ec95c41 100644
--- a/zarr/tests/test_core.py
+++ b/zarr/tests/test_core.py
@@ -982,7 +982,7 @@ def test_object_arrays(self):
         z[0] = 'foo'
         assert z[0] == 'foo'
         z[1] = b'bar'
-        assert z[1] == 'bar'  # msgpack gets this wrong
+        assert z[1] == b'bar'
         z[2] = 1
         assert z[2] == 1
         z[3] = [2, 4, 6, 'baz']

From aee5aceced5e5a3f2698f2363540f064c200f4a9 Mon Sep 17 00:00:00 2001
From: Alistair Miles <alimanfoo@googlemail.com>
Date: Sat, 1 Dec 2018 14:09:40 +0000
Subject: [PATCH 03/14] properly guard against removal of object codec

---
 zarr/core.py | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/zarr/core.py b/zarr/core.py
index b4da45cd99..bcae03cb9f 100644
--- a/zarr/core.py
+++ b/zarr/core.py
@@ -8,6 +8,7 @@
 
 
 import numpy as np
+from numcodecs.compat import ensure_contiguous_ndarray
 
 
 from zarr.util import (is_total_slice, human_readable_size, normalize_resize_args,
@@ -1743,18 +1744,25 @@ def _decode_chunk(self, cdata):
             for f in self._filters[::-1]:
                 chunk = f.decode(chunk)
 
-        # view as correct dtype
+        # view as numpy array with correct dtype
         if self._dtype == object:
-            if isinstance(chunk, np.ndarray):
-                chunk = chunk.astype(self._dtype)
+            # special case object dtype, because incorrect handling can lead to
+            # segfaults and other bad things happening
+            if isinstance(chunk, np.ndarray) and chunk.dtype == object:
+                # chunk is already of correct dtype, good to carry on
+                # flatten just to be sure we can reshape later
+                chunk = chunk.reshape(-1, order='A')
             else:
+                # If we end up here, someone must have hacked around with the filters.
+                # We cannot deal with object arrays unless there is an object
+                # codec in the filter chain, i.e., a filter that converts from object
+                # array to something else during encoding, and converts back to object
+                # array during decoding.
                 raise RuntimeError('cannot read object array without object codec')
-        elif isinstance(chunk, np.ndarray):
-            chunk = chunk.view(self._dtype)
         else:
-            chunk = np.frombuffer(chunk, dtype=self._dtype)
+            chunk = ensure_contiguous_ndarray(chunk).view(self._dtype)
 
-        # reshape
+        # ensure correct chunk shape
         chunk = chunk.reshape(self._chunks, order=self._order)
 
         return chunk

From bf4eee8cc763b1917e299fcfde04a5e5d9a0938b Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Sat, 1 Dec 2018 13:21:24 -0500
Subject: [PATCH 04/14] Ensure `chunk` in `_decode_chunk` is an `ndarray`

---
 zarr/core.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/zarr/core.py b/zarr/core.py
index bcae03cb9f..94bd94edde 100644
--- a/zarr/core.py
+++ b/zarr/core.py
@@ -8,7 +8,7 @@
 
 
 import numpy as np
-from numcodecs.compat import ensure_contiguous_ndarray
+from numcodecs.compat import ensure_ndarray, ensure_contiguous_ndarray
 
 
 from zarr.util import (is_total_slice, human_readable_size, normalize_resize_args,
@@ -1745,10 +1745,11 @@ def _decode_chunk(self, cdata):
                 chunk = f.decode(chunk)
 
         # view as numpy array with correct dtype
+        chunk = ensure_ndarray(chunk)
         if self._dtype == object:
             # special case object dtype, because incorrect handling can lead to
             # segfaults and other bad things happening
-            if isinstance(chunk, np.ndarray) and chunk.dtype == object:
+            if chunk.dtype == object:
                 # chunk is already of correct dtype, good to carry on
                 # flatten just to be sure we can reshape later
                 chunk = chunk.reshape(-1, order='A')

From b741fe12a0099cdcc0697a80b3ace31c82738cce Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Sat, 1 Dec 2018 15:47:44 -0500
Subject: [PATCH 05/14] Reshape `chunk` ourselves since it is an `ndarray`

As we already ensured the `chunk` is an `ndarray` viewing the original
data, there is no need for us to do that here as well. Plus the checks
performed by `ensure_contiguous_ndarray` are not needed for our use case
here. Particularly as we have already handled the unusual type cases
above. We also don't need to constrain the buffer size. As such the only
thing we really need is to flatten the array and make it contiguous,
which is what we handle here directly.
---
 zarr/core.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/zarr/core.py b/zarr/core.py
index 94bd94edde..b5d0185faf 100644
--- a/zarr/core.py
+++ b/zarr/core.py
@@ -8,7 +8,7 @@
 
 
 import numpy as np
-from numcodecs.compat import ensure_ndarray, ensure_contiguous_ndarray
+from numcodecs.compat import ensure_ndarray
 
 
 from zarr.util import (is_total_slice, human_readable_size, normalize_resize_args,
@@ -1761,7 +1761,7 @@ def _decode_chunk(self, cdata):
                 # array during decoding.
                 raise RuntimeError('cannot read object array without object codec')
         else:
-            chunk = ensure_contiguous_ndarray(chunk).view(self._dtype)
+            chunk = chunk.reshape(-1, order='A').view(self._dtype)
 
         # ensure correct chunk shape
         chunk = chunk.reshape(self._chunks, order=self._order)

From f3144ae6b4fdc929eb1390b1ed87ee5a35e6862f Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Sat, 1 Dec 2018 15:47:50 -0500
Subject: [PATCH 06/14] Refactor `reshape` from `_decode_chunk`

As both the expected `object` case and the non-`object` case perform a
`reshape` to flatten the data, go ahead and refactor that out of both
cases and handle it generally. Simplifies the code a bit.
---
 zarr/core.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/zarr/core.py b/zarr/core.py
index b5d0185faf..ab5de14512 100644
--- a/zarr/core.py
+++ b/zarr/core.py
@@ -1749,11 +1749,7 @@ def _decode_chunk(self, cdata):
         if self._dtype == object:
             # special case object dtype, because incorrect handling can lead to
             # segfaults and other bad things happening
-            if chunk.dtype == object:
-                # chunk is already of correct dtype, good to carry on
-                # flatten just to be sure we can reshape later
-                chunk = chunk.reshape(-1, order='A')
-            else:
+            if chunk.dtype != object:
                 # If we end up here, someone must have hacked around with the filters.
                 # We cannot deal with object arrays unless there is an object
                 # codec in the filter chain, i.e., a filter that converts from object
@@ -1761,9 +1757,10 @@ def _decode_chunk(self, cdata):
                 # array during decoding.
                 raise RuntimeError('cannot read object array without object codec')
         else:
-            chunk = chunk.reshape(-1, order='A').view(self._dtype)
+            chunk = chunk.view(self._dtype)
 
         # ensure correct chunk shape
+        chunk = chunk.reshape(-1, order='A')
         chunk = chunk.reshape(self._chunks, order=self._order)
 
         return chunk

From 3e3920af230e059e84f70563c4f215d60f845aed Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Sat, 1 Dec 2018 15:47:53 -0500
Subject: [PATCH 07/14] Consolidate type checks in `_decode_chunk`

As refactoring of the `reshape` step has effectively dropped the
expected `object` type case, the checks for different types is a little
more complicated than needed. To fix this, basically invert and swap the
case ordering. This way we can handle all generally expected types first
and simply cast them. Then we can raise if an `object` type shows up and
is unexpected.
---
 zarr/core.py | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/zarr/core.py b/zarr/core.py
index ab5de14512..a2a07a29ba 100644
--- a/zarr/core.py
+++ b/zarr/core.py
@@ -1746,18 +1746,17 @@ def _decode_chunk(self, cdata):
 
         # view as numpy array with correct dtype
         chunk = ensure_ndarray(chunk)
-        if self._dtype == object:
-            # special case object dtype, because incorrect handling can lead to
-            # segfaults and other bad things happening
-            if chunk.dtype != object:
-                # If we end up here, someone must have hacked around with the filters.
-                # We cannot deal with object arrays unless there is an object
-                # codec in the filter chain, i.e., a filter that converts from object
-                # array to something else during encoding, and converts back to object
-                # array during decoding.
-                raise RuntimeError('cannot read object array without object codec')
-        else:
+        # special case object dtype, because incorrect handling can lead to
+        # segfaults and other bad things happening
+        if self._dtype != object:
             chunk = chunk.view(self._dtype)
+        elif chunk.dtype != object:
+            # If we end up here, someone must have hacked around with the filters.
+            # We cannot deal with object arrays unless there is an object
+            # codec in the filter chain, i.e., a filter that converts from object
+            # array to something else during encoding, and converts back to object
+            # array during decoding.
+            raise RuntimeError('cannot read object array without object codec')
 
         # ensure correct chunk shape
         chunk = chunk.reshape(-1, order='A')

From 9badf39ff815438244f131197457f815a51d56a6 Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Mon, 3 Dec 2018 18:56:44 -0500
Subject: [PATCH 08/14] Drop `ensure_bytes` definition from `zarr.storage`

As Numcodecs now includes a very versatile and effective `ensure_bytes`
function, there is no need to define our own in `zarr.storage` as well.
So go ahead and drop it.
---
 zarr/storage.py | 20 ++------------------
 1 file changed, 2 insertions(+), 18 deletions(-)

diff --git a/zarr/storage.py b/zarr/storage.py
index 6720b42d12..a490f5e8fe 100644
--- a/zarr/storage.py
+++ b/zarr/storage.py
@@ -38,8 +38,9 @@
                        normalize_storage_path, buffer_size,
                        normalize_fill_value, nolock, normalize_dtype)
 from zarr.meta import encode_array_metadata, encode_group_metadata
-from zarr.compat import PY2, binary_type, OrderedDict_move_to_end
+from zarr.compat import PY2, OrderedDict_move_to_end
 from numcodecs.registry import codec_registry
+from numcodecs.compat import ensure_bytes
 from zarr.errors import (err_contains_group, err_contains_array, err_bad_compressor,
                          err_fspath_exists_notdir, err_read_only, MetadataError)
 
@@ -444,23 +445,6 @@ def _init_group_metadata(store, overwrite=False, path=None, chunk_store=None):
     store[key] = encode_group_metadata(meta)
 
 
-def ensure_bytes(s):
-    if isinstance(s, binary_type):
-        return s
-    if isinstance(s, np.ndarray):
-        if PY2:  # pragma: py3 no cover
-            # noinspection PyArgumentList
-            return s.tostring(order='A')
-        else:  # pragma: py2 no cover
-            # noinspection PyArgumentList
-            return s.tobytes(order='A')
-    if hasattr(s, 'tobytes'):
-        return s.tobytes()
-    if PY2 and hasattr(s, 'tostring'):  # pragma: py3 no cover
-        return s.tostring()
-    return memoryview(s).tobytes()
-
-
 def _dict_store_keys(d, prefix='', cls=dict):
     for k in d.keys():
         v = d[k]

From 7bd8a2a1a3608def7335f1ea1a1115da5b1b1cec Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Mon, 3 Dec 2018 18:56:45 -0500
Subject: [PATCH 09/14] Take flattened array views to avoid some copies

Make use of Numcodecs' `ensure_contiguous_ndarray` to take `ndarray`
views onto buffers to be stored in a few cases so as to reshape them and
avoid a copy (thanks to the buffer protocol). This ensures that
datetime/timedeltas are handled by default. Also catches things like
object arrays. Finally this handles flattening the array if needed.
All-in-all this gets as close to a `bytes` object as possible while not
copying and doing its best to preserve type information while
constructing something that fits the buffer protocol.
---
 zarr/storage.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/zarr/storage.py b/zarr/storage.py
index a490f5e8fe..24bd4506d0 100644
--- a/zarr/storage.py
+++ b/zarr/storage.py
@@ -31,16 +31,13 @@
 import warnings
 
 
-import numpy as np
-
-
 from zarr.util import (normalize_shape, normalize_chunks, normalize_order,
                        normalize_storage_path, buffer_size,
                        normalize_fill_value, nolock, normalize_dtype)
 from zarr.meta import encode_array_metadata, encode_group_metadata
 from zarr.compat import PY2, OrderedDict_move_to_end
 from numcodecs.registry import codec_registry
-from numcodecs.compat import ensure_bytes
+from numcodecs.compat import ensure_bytes, ensure_contiguous_ndarray
 from zarr.errors import (err_contains_group, err_contains_array, err_bad_compressor,
                          err_fspath_exists_notdir, err_read_only, MetadataError)
 
@@ -725,9 +722,8 @@ def __getitem__(self, key):
 
     def __setitem__(self, key, value):
 
-        # handle F-contiguous numpy arrays
-        if isinstance(value, np.ndarray) and value.flags.f_contiguous:
-            value = ensure_bytes(value)
+        # coerce to flat, contiguous array (ideally without copying)
+        value = ensure_contiguous_ndarray(value)
 
         # destination path for key
         file_path = os.path.join(self.path, key)
@@ -1176,7 +1172,7 @@ def __getitem__(self, key):
     def __setitem__(self, key, value):
         if self.mode == 'r':
             err_read_only()
-        value = ensure_bytes(value)
+        value = ensure_contiguous_ndarray(value)
         with self.mutex:
             self.zf.writestr(key, value)
 

From 2c6ac778d6e07364e49c712f967cd2ab325011a5 Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Mon, 3 Dec 2018 18:56:45 -0500
Subject: [PATCH 10/14] Simplify `buffer_size` by using `ensure_ndarray`

Rewrite `buffer_size` to just use Numcodecs' `ensure_ndarray` to get an
`ndarray` that views the data. Once the `ndarray` is gotten, all that is
needed is to access its `nbytes` member, which returns the number of
bytes that it takes up.
---
 zarr/util.py | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/zarr/util.py b/zarr/util.py
index b79865bfe8..ad882c41d5 100644
--- a/zarr/util.py
+++ b/zarr/util.py
@@ -1,6 +1,5 @@
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import, print_function, division
-import operator
 from textwrap import TextWrapper, dedent
 import numbers
 import uuid
@@ -10,10 +9,11 @@
 from asciitree import BoxStyle, LeftAligned
 from asciitree.traversal import Traversal
 import numpy as np
+from numcodecs.compat import ensure_ndarray
 from numcodecs.registry import codec_registry
 
 
-from zarr.compat import PY2, reduce, text_type, binary_type
+from zarr.compat import PY2, text_type, binary_type
 
 
 # codecs to use for object dtype convenience API
@@ -314,17 +314,7 @@ def normalize_storage_path(path):
 
 
 def buffer_size(v):
-    from array import array as _stdlib_array
-    if PY2 and isinstance(v, _stdlib_array):  # pragma: py3 no cover
-        # special case array.array because does not support buffer
-        # interface in PY2
-        return v.buffer_info()[1] * v.itemsize
-    else:  # pragma: py2 no cover
-        v = memoryview(v)
-        if v.shape:
-            return reduce(operator.mul, v.shape) * v.itemsize
-        else:
-            return v.itemsize
+    return ensure_ndarray(v).nbytes
 
 
 def info_text_report(items):

From 398820f9137c9b2e4690f8cf7653436186af6fa9 Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Mon, 3 Dec 2018 18:56:46 -0500
Subject: [PATCH 11/14] Simplify `ensure_str` in `zarr.meta`

If the data is already a `str` instance, turn `ensure_str` into a no-op.
For all other cases, make use of Numcodecs' `ensure_bytes` to aid
`ensure_str` in coercing data through the buffer protocol. If we are on
Python 3, then decode the `bytes` object to a `str`.
---
 zarr/meta.py | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/zarr/meta.py b/zarr/meta.py
index 9ce580eff2..7984efb701 100644
--- a/zarr/meta.py
+++ b/zarr/meta.py
@@ -5,9 +5,10 @@
 
 
 import numpy as np
+from numcodecs.compat import ensure_bytes
 
 
-from zarr.compat import PY2, binary_type, Mapping
+from zarr.compat import PY2, Mapping
 from zarr.errors import MetadataError
 
 
@@ -15,14 +16,9 @@
 
 
 def ensure_str(s):
-    if PY2:  # pragma: py3 no cover
-        # noinspection PyUnresolvedReferences
-        if isinstance(s, buffer):  # noqa
-            s = str(s)
-    else:  # pragma: py2 no cover
-        if isinstance(s, memoryview):
-            s = s.tobytes()
-        if isinstance(s, binary_type):
+    if not isinstance(s, str):
+        s = ensure_bytes(s)
+        if not PY2:  # pragma: py2 no cover
             s = s.decode('ascii')
     return s
 

From bc4d57907a0406cea92f3fa18c4c0daad54e44fc Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Mon, 3 Dec 2018 18:59:20 -0500
Subject: [PATCH 12/14] Bump to Numcodecs 0.6.2

---
 requirements_dev.txt | 2 +-
 setup.py             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements_dev.txt b/requirements_dev.txt
index d39ba9e9b8..03eaa8e871 100644
--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@@ -1,3 +1,3 @@
 asciitree==0.3.3
 fasteners==0.14.1
-numcodecs==0.6.1
+numcodecs==0.6.2
diff --git a/setup.py b/setup.py
index 903af3bc04..b6d237fe0a 100644
--- a/setup.py
+++ b/setup.py
@@ -26,7 +26,7 @@
         'asciitree',
         'numpy>=1.7',
         'fasteners',
-        'numcodecs>=0.6.1',
+        'numcodecs>=0.6.2',
     ],
     package_dir={'': '.'},
     packages=['zarr', 'zarr.tests'],

From efacb5234573fae72661d50e15180db414fe6ff8 Mon Sep 17 00:00:00 2001
From: John Kirkham <kirkhamj@janelia.hhmi.org>
Date: Mon, 3 Dec 2018 19:11:06 -0500
Subject: [PATCH 13/14] Update tutorial's info content

As Blosc got upgraded and it contained an upgrade of Zstd, the results
changed a little bit for this example. So update them accordingly.
Should fix the doctest failure.
---
 docs/tutorial.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/tutorial.rst b/docs/tutorial.rst
index 606b5acef5..29ce8b0935 100644
--- a/docs/tutorial.rst
+++ b/docs/tutorial.rst
@@ -178,8 +178,8 @@ print some diagnostics, e.g.::
                        : blocksize=0)
     Store type         : builtins.dict
     No. bytes          : 400000000 (381.5M)
-    No. bytes stored   : 3242241 (3.1M)
-    Storage ratio      : 123.4
+    No. bytes stored   : 3379344 (3.2M)
+    Storage ratio      : 118.4
     Chunks initialized : 100/100
 
 If you don't specify a compressor, by default Zarr uses the Blosc

From cc1d7762e64a78cd0e8941430479de4a18d70382 Mon Sep 17 00:00:00 2001
From: Alistair Miles <alimanfoo@googlemail.com>
Date: Tue, 4 Dec 2018 22:24:43 +0000
Subject: [PATCH 14/14] release notes [ci skip]

---
 docs/release.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/docs/release.rst b/docs/release.rst
index 96ac7c8f2f..335ec58fb3 100644
--- a/docs/release.rst
+++ b/docs/release.rst
@@ -22,6 +22,11 @@ Enhancements
 Maintenance
 ~~~~~~~~~~~
 
+* The required version of the `numcodecs <http://numcodecs.rtfd.io>`_ package has been upgraded 
+  to 0.6.2, which has enabled some code simplification and fixes a failing test involving
+  msgpack encoding. By :user:`John Kirkham <jakirkham>`, :issue:`352`, :issue:`355`, 
+  :issue:`324`.
+
 * CI and test environments have been upgraded to include Python 3.7, drop Python 3.4, and
   upgrade all pinned package requirements. :issue:`308`.