From eaf9f636d6852f560ebfaffc2e10f3447117cf65 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Fri, 19 Apr 2019 19:23:03 -0400 Subject: [PATCH 1/9] Move `ensure_text_type` to `zarr.util` As `ensure_text_type` is a more general utility function, go ahead and move it over to `zarr.util` and drop it from `zarr.meta`. Though make sure to import it into `zarr.meta` so that it can still be used there. --- zarr/meta.py | 12 ++---------- zarr/util.py | 10 +++++++++- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/zarr/meta.py b/zarr/meta.py index 29eb987584..380b1f132e 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -2,27 +2,19 @@ from __future__ import absolute_import, print_function, division import json import base64 -import codecs import numpy as np -from numcodecs.compat import ensure_contiguous_ndarray -from zarr.compat import PY2, Mapping, text_type +from zarr.compat import PY2, Mapping from zarr.errors import MetadataError +from zarr.util import ensure_text_type ZARR_FORMAT = 2 -def ensure_text_type(s): - if not isinstance(s, text_type): - s = ensure_contiguous_ndarray(s) - s = codecs.decode(s, 'ascii') - return s - - def json_dumps(o): """Write JSON in a consistent, human-readable way.""" return json.dumps(o, indent=4, sort_keys=True, ensure_ascii=True, diff --git a/zarr/util.py b/zarr/util.py index ad882c41d5..f368c26523 100644 --- a/zarr/util.py +++ b/zarr/util.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, print_function, division from textwrap import TextWrapper, dedent +import codecs import numbers import uuid import inspect @@ -9,7 +10,7 @@ from asciitree import BoxStyle, LeftAligned from asciitree.traversal import Traversal import numpy as np -from numcodecs.compat import ensure_ndarray +from numcodecs.compat import ensure_ndarray, ensure_contiguous_ndarray from numcodecs.registry import codec_registry @@ -24,6 +25,13 @@ } +def ensure_text_type(s): + if not isinstance(s, text_type): + s = ensure_contiguous_ndarray(s) + s = codecs.decode(s, 'ascii') + return s + + def normalize_shape(shape): """Convenience function to normalize the `shape` argument.""" From fce3cece3579153643de1028edeaee48b6655860 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Fri, 19 Apr 2019 19:28:50 -0400 Subject: [PATCH 2/9] Move `json_dumps` to `zarr.util` As `json_dumps` has broader use than just for metdata (e.g. attributes would also benefit from it), this moves `json_dumps` to `zarr.util`. --- zarr/meta.py | 8 +------- zarr/util.py | 7 +++++++ 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/zarr/meta.py b/zarr/meta.py index 380b1f132e..da521acfae 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -9,18 +9,12 @@ from zarr.compat import PY2, Mapping from zarr.errors import MetadataError -from zarr.util import ensure_text_type +from zarr.util import ensure_text_type, json_dumps ZARR_FORMAT = 2 -def json_dumps(o): - """Write JSON in a consistent, human-readable way.""" - return json.dumps(o, indent=4, sort_keys=True, ensure_ascii=True, - separators=(',', ': ')) - - def json_loads(s): """Read JSON in a consistent way.""" return json.loads(ensure_text_type(s)) diff --git a/zarr/util.py b/zarr/util.py index f368c26523..fb3f8d227e 100644 --- a/zarr/util.py +++ b/zarr/util.py @@ -2,6 +2,7 @@ from __future__ import absolute_import, print_function, division from textwrap import TextWrapper, dedent import codecs +import json import numbers import uuid import inspect @@ -32,6 +33,12 @@ def ensure_text_type(s): return s +def json_dumps(o): + """Write JSON in a consistent, human-readable way.""" + return json.dumps(o, indent=4, sort_keys=True, ensure_ascii=True, + separators=(',', ': ')) + + def normalize_shape(shape): """Convenience function to normalize the `shape` argument.""" From 3ae25319863d83911575034fbaf0bf9b60b6ac87 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Fri, 19 Apr 2019 19:31:01 -0400 Subject: [PATCH 3/9] Move `json_loads` to `zarr.util` As `json_loads` is more generally useful than just parsing metadata, move it over to `zarr.util`. Though ensure it is still imported in `zarr.meta`. --- zarr/meta.py | 8 +------- zarr/util.py | 5 +++++ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/zarr/meta.py b/zarr/meta.py index da521acfae..c1410dd655 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -1,6 +1,5 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, print_function, division -import json import base64 @@ -9,17 +8,12 @@ from zarr.compat import PY2, Mapping from zarr.errors import MetadataError -from zarr.util import ensure_text_type, json_dumps +from zarr.util import json_dumps, json_loads ZARR_FORMAT = 2 -def json_loads(s): - """Read JSON in a consistent way.""" - return json.loads(ensure_text_type(s)) - - def parse_metadata(s): # Here we allow that a store may return an already-parsed metadata object, diff --git a/zarr/util.py b/zarr/util.py index fb3f8d227e..8611c18962 100644 --- a/zarr/util.py +++ b/zarr/util.py @@ -39,6 +39,11 @@ def json_dumps(o): separators=(',', ': ')) +def json_loads(s): + """Read JSON in a consistent way.""" + return json.loads(ensure_text_type(s)) + + def normalize_shape(shape): """Convenience function to normalize the `shape` argument.""" From 3742f96eb845d4a82d0156305fb6de95e93223e2 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Fri, 19 Apr 2019 19:37:47 -0400 Subject: [PATCH 4/9] Use `json_loads` in `ConsolidatedMetadataStore` Should ensure that the `ConsolidatedMetadataStore` is able to handle loading JSON data in a standard way. --- zarr/storage.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 4539d8d009..b86b8dc653 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -26,7 +26,6 @@ import errno import re import sys -import json import multiprocessing from pickle import PicklingError from threading import Lock, RLock @@ -34,7 +33,7 @@ import warnings -from zarr.util import (normalize_shape, normalize_chunks, normalize_order, +from zarr.util import (json_loads, normalize_shape, normalize_chunks, normalize_order, normalize_storage_path, buffer_size, normalize_fill_value, nolock, normalize_dtype) from zarr.meta import encode_array_metadata, encode_group_metadata @@ -2458,7 +2457,7 @@ def __init__(self, store, metadata_key='.zmetadata'): d = store[metadata_key].decode() # pragma: no cover else: # pragma: no cover d = store[metadata_key] - meta = json.loads(d) + meta = json_loads(d) # check format of consolidated metadata consolidated_format = meta.get('zarr_consolidated_format', None) From c59a54f8f7339ea954e6f0f0fd35bf62a0439987 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Fri, 19 Apr 2019 19:41:45 -0400 Subject: [PATCH 5/9] Use `json_dumps` to serialize attributes --- zarr/attrs.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/zarr/attrs.py b/zarr/attrs.py index 21cb77bc10..6752846ba4 100644 --- a/zarr/attrs.py +++ b/zarr/attrs.py @@ -1,11 +1,11 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, print_function, division -import json from collections import MutableMapping from zarr.errors import PermissionError from zarr.meta import parse_metadata +from zarr.util import json_dumps class Attributes(MutableMapping): @@ -113,8 +113,7 @@ def put(self, d): self._write_op(self._put_nosync, d) def _put_nosync(self, d): - s = json.dumps(d, indent=4, sort_keys=True, ensure_ascii=True, - separators=(',', ': ')) + s = json_dumps(d) self.store[self.key] = s.encode('ascii') if self.cache: self._cached_asdict = d From 47cff3e9121cc25b08474fb5cb419ddeea25dcd8 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Fri, 19 Apr 2019 19:48:41 -0400 Subject: [PATCH 6/9] Handle `encode` within `json_dumps` As every use of `json_dumps` is followed by a call to `encode` to convert the string to bytes, go ahead and incorporate this step in `json_dumps`. Should make it a bit easier to use. --- zarr/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/util.py b/zarr/util.py index 8611c18962..daa436d9ef 100644 --- a/zarr/util.py +++ b/zarr/util.py @@ -36,7 +36,7 @@ def ensure_text_type(s): def json_dumps(o): """Write JSON in a consistent, human-readable way.""" return json.dumps(o, indent=4, sort_keys=True, ensure_ascii=True, - separators=(',', ': ')) + separators=(',', ': ')).encode('ascii') def json_loads(s): From 05de729cb154552d4320a53b509fd53743bcc428 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Fri, 19 Apr 2019 19:50:26 -0400 Subject: [PATCH 7/9] Drop `encode` from `json_dumps` output As the `encode` step is now part of `json_dumps`, remove that step when using the `json_dumps` function. --- zarr/attrs.py | 4 ++-- zarr/convenience.py | 2 +- zarr/meta.py | 6 ++---- zarr/n5.py | 12 ++++++------ 4 files changed, 11 insertions(+), 13 deletions(-) diff --git a/zarr/attrs.py b/zarr/attrs.py index 6752846ba4..92245b4313 100644 --- a/zarr/attrs.py +++ b/zarr/attrs.py @@ -113,8 +113,8 @@ def put(self, d): self._write_op(self._put_nosync, d) def _put_nosync(self, d): - s = json_dumps(d) - self.store[self.key] = s.encode('ascii') + b = json_dumps(d) + self.store[self.key] = b if self.cache: self._cached_asdict = d diff --git a/zarr/convenience.py b/zarr/convenience.py index efa0a99c41..204533524f 100644 --- a/zarr/convenience.py +++ b/zarr/convenience.py @@ -1125,7 +1125,7 @@ def is_zarr_key(key): for key in store if is_zarr_key(key) } } - store[metadata_key] = json_dumps(out).encode() + store[metadata_key] = json_dumps(out) return open_consolidated(store, metadata_key=metadata_key) diff --git a/zarr/meta.py b/zarr/meta.py index c1410dd655..d02b57ee09 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -75,8 +75,7 @@ def encode_array_metadata(meta): order=meta['order'], filters=meta['filters'], ) - s = json_dumps(meta) - b = s.encode('ascii') + b = json_dumps(meta) return b @@ -122,8 +121,7 @@ def encode_group_metadata(meta=None): meta = dict( zarr_format=ZARR_FORMAT, ) - s = json_dumps(meta) - b = s.encode('ascii') + b = json_dumps(meta) return b diff --git a/zarr/n5.py b/zarr/n5.py index 48dfe0a5e5..34018098ed 100644 --- a/zarr/n5.py +++ b/zarr/n5.py @@ -71,14 +71,14 @@ def __getitem__(self, key): key = key.replace(zarr_group_meta_key, n5_attrs_key) value = group_metadata_to_zarr(self._load_n5_attrs(key)) - return json_dumps(value).encode('ascii') + return json_dumps(value) elif key.endswith(zarr_array_meta_key): key = key.replace(zarr_array_meta_key, n5_attrs_key) value = array_metadata_to_zarr(self._load_n5_attrs(key)) - return json_dumps(value).encode('ascii') + return json_dumps(value) elif key.endswith(zarr_attrs_key): @@ -88,7 +88,7 @@ def __getitem__(self, key): if len(value) == 0: raise KeyError(key) else: - return json_dumps(value).encode('ascii') + return json_dumps(value) elif is_chunk_key(key): @@ -105,7 +105,7 @@ def __setitem__(self, key, value): n5_attrs = self._load_n5_attrs(key) n5_attrs.update(**group_metadata_to_n5(json_loads(value))) - value = json_dumps(n5_attrs).encode('ascii') + value = json_dumps(n5_attrs) elif key.endswith(zarr_array_meta_key): @@ -114,7 +114,7 @@ def __setitem__(self, key, value): n5_attrs = self._load_n5_attrs(key) n5_attrs.update(**array_metadata_to_n5(json_loads(value))) - value = json_dumps(n5_attrs).encode('ascii') + value = json_dumps(n5_attrs) elif key.endswith(zarr_attrs_key): @@ -135,7 +135,7 @@ def __setitem__(self, key, value): # add new user attributes n5_attrs.update(**zarr_attrs) - value = json_dumps(n5_attrs).encode('ascii') + value = json_dumps(n5_attrs) elif is_chunk_key(key): From f3dcbadeed073d45595f9b165cb97bd380695267 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Fri, 19 Apr 2019 20:04:12 -0400 Subject: [PATCH 8/9] Drop unneeded intermediates in `json_dumps` usage After moving `encode` within `json_dumps`, there were intermediate variables left over that weren't really needed. This drops them. --- zarr/attrs.py | 3 +-- zarr/meta.py | 6 ++---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/zarr/attrs.py b/zarr/attrs.py index 92245b4313..d502968d2e 100644 --- a/zarr/attrs.py +++ b/zarr/attrs.py @@ -113,8 +113,7 @@ def put(self, d): self._write_op(self._put_nosync, d) def _put_nosync(self, d): - b = json_dumps(d) - self.store[self.key] = b + self.store[self.key] = json_dumps(d) if self.cache: self._cached_asdict = d diff --git a/zarr/meta.py b/zarr/meta.py index d02b57ee09..8b5012c612 100644 --- a/zarr/meta.py +++ b/zarr/meta.py @@ -75,8 +75,7 @@ def encode_array_metadata(meta): order=meta['order'], filters=meta['filters'], ) - b = json_dumps(meta) - return b + return json_dumps(meta) def encode_dtype(d): @@ -121,8 +120,7 @@ def encode_group_metadata(meta=None): meta = dict( zarr_format=ZARR_FORMAT, ) - b = json_dumps(meta) - return b + return json_dumps(meta) FLOAT_FILLS = { From ad3f66f75362e4e3d46f18700f8433e10018f6dd Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Fri, 19 Apr 2019 20:18:30 -0400 Subject: [PATCH 9/9] Update release note on JSON reading and writing --- docs/release.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index c9c5bb70fe..c0ed05c930 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -9,8 +9,8 @@ Release notes Bug fixes ~~~~~~~~~ -* Coerce data to text for JSON parsing. - By :user:`John Kirkham `; :issue:`429` +* Add and use utility functions to simplify reading and writing JSON. + By :user:`John Kirkham `; :issue:`429`, :issue:`430` .. _release_2.3.1: