Skip to content
12 changes: 12 additions & 0 deletions docs/release.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,18 @@
Release notes
=============

.. _release_2.3.2:

2.3.2
-----

Bug fixes
~~~~~~~~~

* Coerce data to text for JSON parsing.
By :user:`John Kirkham <jakirkham>`; :issue:`429`


.. _release_2.3.1:

2.3.1
Expand Down
6 changes: 2 additions & 4 deletions zarr/convenience.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from zarr.errors import err_path_not_found, CopyError
from zarr.util import normalize_storage_path, TreeViewer, buffer_size
from zarr.compat import PY2, text_type
from zarr.meta import ensure_str, json_dumps
from zarr.meta import json_dumps, json_loads


# noinspection PyShadowingBuiltins
Expand Down Expand Up @@ -1112,8 +1112,6 @@ def consolidate_metadata(store, metadata_key='.zmetadata'):
open_consolidated

"""
import json

store = normalize_store_arg(store)

def is_zarr_key(key):
Expand All @@ -1123,7 +1121,7 @@ def is_zarr_key(key):
out = {
'zarr_consolidated_format': 1,
'metadata': {
key: json.loads(ensure_str(store[key]))
key: json_loads(store[key])
for key in store if is_zarr_key(key)
}
}
Expand Down
22 changes: 13 additions & 9 deletions zarr/meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,24 @@
from __future__ import absolute_import, print_function, division
import json
import base64
import codecs


import numpy as np
from numcodecs.compat import ensure_bytes
from numcodecs.compat import ensure_contiguous_ndarray


from zarr.compat import PY2, Mapping
from zarr.compat import PY2, Mapping, text_type
from zarr.errors import MetadataError


ZARR_FORMAT = 2


def ensure_str(s):
if not isinstance(s, str):
s = ensure_bytes(s)
if not PY2: # pragma: py2 no cover
s = s.decode('ascii')
def ensure_text_type(s):
if not isinstance(s, text_type):
s = ensure_contiguous_ndarray(s)
s = codecs.decode(s, 'ascii')
return s


Expand All @@ -29,6 +29,11 @@ def json_dumps(o):
separators=(',', ': '))


def json_loads(s):
"""Read JSON in a consistent way."""
return json.loads(ensure_text_type(s))


def parse_metadata(s):

# Here we allow that a store may return an already-parsed metadata object,
Expand All @@ -42,8 +47,7 @@ def parse_metadata(s):

else:
# assume metadata needs to be parsed as JSON
s = ensure_str(s)
meta = json.loads(s)
meta = json_loads(s)

return meta

Expand Down
15 changes: 5 additions & 10 deletions zarr/n5.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"""This module contains a storage class and codec to support the N5 format.
"""
from __future__ import absolute_import, division
from .meta import ZARR_FORMAT, ensure_str, json_dumps
from .meta import ZARR_FORMAT, json_dumps, json_loads
from .storage import (
NestedDirectoryStore,
group_meta_key as zarr_group_meta_key,
Expand All @@ -12,7 +12,6 @@
from numcodecs.abc import Codec
from numcodecs.compat import ndarray_copy
from numcodecs.registry import register_codec, get_codec
import json
import numpy as np
import struct
import sys
Expand Down Expand Up @@ -103,29 +102,26 @@ def __setitem__(self, key, value):

key = key.replace(zarr_group_meta_key, n5_attrs_key)

value = ensure_str(value)
n5_attrs = self._load_n5_attrs(key)
n5_attrs.update(**group_metadata_to_n5(json.loads(value)))
n5_attrs.update(**group_metadata_to_n5(json_loads(value)))

value = json_dumps(n5_attrs).encode('ascii')

elif key.endswith(zarr_array_meta_key):

key = key.replace(zarr_array_meta_key, n5_attrs_key)

value = ensure_str(value)
n5_attrs = self._load_n5_attrs(key)
n5_attrs.update(**array_metadata_to_n5(json.loads(value)))
n5_attrs.update(**array_metadata_to_n5(json_loads(value)))

value = json_dumps(n5_attrs).encode('ascii')

elif key.endswith(zarr_attrs_key):

key = key.replace(zarr_attrs_key, n5_attrs_key)

value = ensure_str(value)
n5_attrs = self._load_n5_attrs(key)
zarr_attrs = json.loads(value)
zarr_attrs = json_loads(value)

for k in n5_keywords:
if k in zarr_attrs.keys():
Expand Down Expand Up @@ -246,8 +242,7 @@ def listdir(self, path=None):
def _load_n5_attrs(self, path):
try:
s = super(N5Store, self).__getitem__(path)
s = ensure_str(s)
return json.loads(s)
return json_loads(s)
except KeyError:
return {}

Expand Down
12 changes: 2 additions & 10 deletions zarr/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
normalize_storage_path, buffer_size,
normalize_fill_value, nolock, normalize_dtype)
from zarr.meta import encode_array_metadata, encode_group_metadata
from zarr.compat import PY2, OrderedDict_move_to_end, binary_type
from zarr.compat import PY2, OrderedDict_move_to_end
from numcodecs.registry import codec_registry
from numcodecs.compat import ensure_bytes, ensure_contiguous_ndarray
from zarr.errors import (err_contains_group, err_contains_array, err_bad_compressor,
Expand Down Expand Up @@ -2296,15 +2296,7 @@ def __getitem__(self, key):
if doc is None:
raise KeyError(key)
else:
value = doc[self._value]

# Coerce `bson.Binary` to `bytes` type on Python 2.
# PyMongo handles this conversion for us on Python 3.
# ref: http://api.mongodb.com/python/current/python3.html#id3
if PY2: # pragma: py3 no cover
value = binary_type(value)

return value
return doc[self._value]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this makes me very happy to see

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Likewise. 🙂

FWIW it turns out this is not Python 2 specific. We just only handled decoding before parsing JSON on Python 3 (hence avoiding the issue there). With this change we just always decode to text before parsing JSON. Here's a short reproducer.

>>> import json
>>> json.loads(b"{}")
{}
>>> json.loads(b"{\x00}")
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/Users/jkirkham/miniconda/lib/python3.7/json/__init__.py", line 348, in loads
    return _default_decoder.decode(s)
  File "/Users/jkirkham/miniconda/lib/python3.7/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/Users/jkirkham/miniconda/lib/python3.7/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)


def __setitem__(self, key, value):
value = ensure_bytes(value)
Expand Down
7 changes: 3 additions & 4 deletions zarr/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import unittest
from tempfile import mkdtemp, mktemp
import atexit
import json
import shutil
import pickle
import os
Expand All @@ -26,7 +25,7 @@
from zarr.core import Array
from zarr.errors import PermissionError
from zarr.compat import PY2, text_type, binary_type, zip_longest
from zarr.meta import ensure_str
from zarr.meta import json_loads
from zarr.util import buffer_size
from zarr.n5 import n5_keywords, N5Store
from numcodecs import (Delta, FixedScaleOffset, LZ4, GZip, Zlib, Blosc, BZ2, MsgPack, Pickle,
Expand Down Expand Up @@ -1273,10 +1272,10 @@ def test_endian(self):
def test_attributes(self):
a = self.create_array(shape=10, chunks=10, dtype='i8')
a.attrs['foo'] = 'bar'
attrs = json.loads(ensure_str(a.store[a.attrs.key]))
attrs = json_loads(a.store[a.attrs.key])
assert 'foo' in attrs and attrs['foo'] == 'bar'
a.attrs['bar'] = 'foo'
attrs = json.loads(ensure_str(a.store[a.attrs.key]))
attrs = json_loads(a.store[a.attrs.key])
assert 'foo' in attrs and attrs['foo'] == 'bar'
assert 'bar' in attrs and attrs['bar'] == 'foo'

Expand Down