Skip to content
12 changes: 12 additions & 0 deletions docs/release.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,18 @@
Release notes
=============

.. _release_2.3.2:

2.3.2
-----

Bug fixes
~~~~~~~~~

* Coerce data to text for JSON parsing.
By :user:`John Kirkham <jakirkham>`; :issue:`429`


.. _release_2.3.1:

2.3.1
Expand Down
6 changes: 2 additions & 4 deletions zarr/convenience.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from zarr.errors import err_path_not_found, CopyError
from zarr.util import normalize_storage_path, TreeViewer, buffer_size
from zarr.compat import PY2, text_type
from zarr.meta import ensure_str, json_dumps
from zarr.meta import json_dumps, json_loads


# noinspection PyShadowingBuiltins
Expand Down Expand Up @@ -1112,8 +1112,6 @@ def consolidate_metadata(store, metadata_key='.zmetadata'):
open_consolidated

"""
import json

store = normalize_store_arg(store)

def is_zarr_key(key):
Expand All @@ -1123,7 +1121,7 @@ def is_zarr_key(key):
out = {
'zarr_consolidated_format': 1,
'metadata': {
key: json.loads(ensure_str(store[key]))
key: json_loads(store[key])
for key in store if is_zarr_key(key)
}
}
Expand Down
22 changes: 13 additions & 9 deletions zarr/meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,24 @@
from __future__ import absolute_import, print_function, division
import json
import base64
import codecs


import numpy as np
from numcodecs.compat import ensure_bytes
from numcodecs.compat import ensure_contiguous_ndarray


from zarr.compat import PY2, Mapping
from zarr.compat import PY2, Mapping, text_type
from zarr.errors import MetadataError


ZARR_FORMAT = 2


def ensure_str(s):
if not isinstance(s, str):
s = ensure_bytes(s)
if not PY2: # pragma: py2 no cover
s = s.decode('ascii')
def ensure_text_type(s):
if not isinstance(s, text_type):
s = ensure_contiguous_ndarray(s)
s = codecs.decode(s, 'ascii')
return s


Expand All @@ -29,6 +29,11 @@ def json_dumps(o):
separators=(',', ': '))


def json_loads(s):
"""Read JSON in a consistent way."""
return json.loads(ensure_text_type(s))


def parse_metadata(s):

# Here we allow that a store may return an already-parsed metadata object,
Expand All @@ -42,8 +47,7 @@ def parse_metadata(s):

else:
# assume metadata needs to be parsed as JSON
s = ensure_str(s)
meta = json.loads(s)
meta = json_loads(s)

return meta

Expand Down
15 changes: 5 additions & 10 deletions zarr/n5.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"""This module contains a storage class and codec to support the N5 format.
"""
from __future__ import absolute_import, division
from .meta import ZARR_FORMAT, ensure_str, json_dumps
from .meta import ZARR_FORMAT, json_dumps, json_loads
from .storage import (
NestedDirectoryStore,
group_meta_key as zarr_group_meta_key,
Expand All @@ -12,7 +12,6 @@
from numcodecs.abc import Codec
from numcodecs.compat import ndarray_copy
from numcodecs.registry import register_codec, get_codec
import json
import numpy as np
import struct
import sys
Expand Down Expand Up @@ -103,29 +102,26 @@ def __setitem__(self, key, value):

key = key.replace(zarr_group_meta_key, n5_attrs_key)

value = ensure_str(value)
n5_attrs = self._load_n5_attrs(key)
n5_attrs.update(**group_metadata_to_n5(json.loads(value)))
n5_attrs.update(**group_metadata_to_n5(json_loads(value)))

value = json_dumps(n5_attrs).encode('ascii')

elif key.endswith(zarr_array_meta_key):

key = key.replace(zarr_array_meta_key, n5_attrs_key)

value = ensure_str(value)
n5_attrs = self._load_n5_attrs(key)
n5_attrs.update(**array_metadata_to_n5(json.loads(value)))
n5_attrs.update(**array_metadata_to_n5(json_loads(value)))

value = json_dumps(n5_attrs).encode('ascii')

elif key.endswith(zarr_attrs_key):

key = key.replace(zarr_attrs_key, n5_attrs_key)

value = ensure_str(value)
n5_attrs = self._load_n5_attrs(key)
zarr_attrs = json.loads(value)
zarr_attrs = json_loads(value)

for k in n5_keywords:
if k in zarr_attrs.keys():
Expand Down Expand Up @@ -246,8 +242,7 @@ def listdir(self, path=None):
def _load_n5_attrs(self, path):
try:
s = super(N5Store, self).__getitem__(path)
s = ensure_str(s)
return json.loads(s)
return json_loads(s)
except KeyError:
return {}

Expand Down
12 changes: 2 additions & 10 deletions zarr/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
normalize_storage_path, buffer_size,
normalize_fill_value, nolock, normalize_dtype)
from zarr.meta import encode_array_metadata, encode_group_metadata
from zarr.compat import PY2, OrderedDict_move_to_end, binary_type
from zarr.compat import PY2, OrderedDict_move_to_end
from numcodecs.registry import codec_registry
from numcodecs.compat import ensure_bytes, ensure_contiguous_ndarray
from zarr.errors import (err_contains_group, err_contains_array, err_bad_compressor,
Expand Down Expand Up @@ -2296,15 +2296,7 @@ def __getitem__(self, key):
if doc is None:
raise KeyError(key)
else:
value = doc[self._value]

# Coerce `bson.Binary` to `bytes` type on Python 2.
# PyMongo handles this conversion for us on Python 3.
# ref: http://api.mongodb.com/python/current/python3.html#id3
if PY2: # pragma: py3 no cover
value = binary_type(value)

return value
return doc[self._value]
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this makes me very happy to see

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Likewise. 🙂

FWIW it turns out this is not Python 2 specific. We just only handled decoding before parsing JSON on Python 3 (hence avoiding the issue there). With this change we just always decode to text before parsing JSON. Here's a short reproducer.

>>> import json
>>> json.loads(b"{}")
{}
>>> json.loads(b"{\x00}")
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/Users/jkirkham/miniconda/lib/python3.7/json/__init__.py", line 348, in loads
    return _default_decoder.decode(s)
  File "/Users/jkirkham/miniconda/lib/python3.7/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/Users/jkirkham/miniconda/lib/python3.7/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)


def __setitem__(self, key, value):
value = ensure_bytes(value)
Expand Down
7 changes: 3 additions & 4 deletions zarr/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import unittest
from tempfile import mkdtemp, mktemp
import atexit
import json
import shutil
import pickle
import os
Expand All @@ -26,7 +25,7 @@
from zarr.core import Array
from zarr.errors import PermissionError
from zarr.compat import PY2, text_type, binary_type, zip_longest
from zarr.meta import ensure_str
from zarr.meta import json_loads
from zarr.util import buffer_size
from zarr.n5 import n5_keywords, N5Store
from numcodecs import (Delta, FixedScaleOffset, LZ4, GZip, Zlib, Blosc, BZ2, MsgPack, Pickle,
Expand Down Expand Up @@ -1273,10 +1272,10 @@ def test_endian(self):
def test_attributes(self):
a = self.create_array(shape=10, chunks=10, dtype='i8')
a.attrs['foo'] = 'bar'
attrs = json.loads(ensure_str(a.store[a.attrs.key]))
attrs = json_loads(a.store[a.attrs.key])
assert 'foo' in attrs and attrs['foo'] == 'bar'
a.attrs['bar'] = 'foo'
attrs = json.loads(ensure_str(a.store[a.attrs.key]))
attrs = json_loads(a.store[a.attrs.key])
assert 'foo' in attrs and attrs['foo'] == 'bar'
assert 'bar' in attrs and attrs['bar'] == 'foo'

Expand Down