From ca7e01b2aba7aa4e2513831de5e0592f5e6aa69e Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Sun, 19 Feb 2017 18:33:52 -0500 Subject: [PATCH 1/2] Add AsType codec from Zarr. --- numcodecs/__init__.py | 3 ++ numcodecs/astype.py | 88 ++++++++++++++++++++++++++++++++++ numcodecs/tests/test_astype.py | 60 +++++++++++++++++++++++ 3 files changed, 151 insertions(+) create mode 100644 numcodecs/astype.py create mode 100644 numcodecs/tests/test_astype.py diff --git a/numcodecs/__init__.py b/numcodecs/__init__.py index 425d10df..973d6ea1 100644 --- a/numcodecs/__init__.py +++ b/numcodecs/__init__.py @@ -49,6 +49,9 @@ except ImportError: # pragma: no cover pass +from numcodecs.astype import AsType +register_codec(AsType) + from numcodecs.delta import Delta register_codec(Delta) diff --git a/numcodecs/astype.py b/numcodecs/astype.py new file mode 100644 index 00000000..66268b4f --- /dev/null +++ b/numcodecs/astype.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, print_function, division + +import numpy as np + +from numcodecs.abc import Codec +from numcodecs.compat import buffer_copy, ndarray_from_buffer + + + +class AsType(Codec): + """Filter to convert data between different types. + + Parameters + ---------- + encode_dtype : dtype + Data type to use for encoded data. + decode_dtype : dtype, optional + Data type to use for decoded data. + + Notes + ----- + If `encode_dtype` is of lower precision than `decode_dtype`, please be + aware that data loss can occur by writing data to disk using this filter. + No checks are made to ensure the casting will work in that direction and + data corruption will occur. + + Examples + -------- + >>> import numcodecs + >>> import numpy as np + >>> x = np.arange(100, 120, 2, dtype=np.int8) + >>> x + array([100, 102, 104, 106, 108, 110, 112, 114, 116, 118], dtype=int8) + >>> f = numcodecs.AsType(encode_dtype=x.dtype, decode_dtype=np.int64) + >>> y = f.decode(x) + >>> y + array([100, 102, 104, 106, 108, 110, 112, 114, 116, 118]) + >>> z = f.encode(y) + >>> z + array([100, 102, 104, 106, 108, 110, 112, 114, 116, 118], dtype=int8) + + """ # flake8: noqa + + codec_id = 'astype' + + def __init__(self, encode_dtype, decode_dtype): + self.encode_dtype = np.dtype(encode_dtype) + self.decode_dtype = np.dtype(decode_dtype) + + def encode(self, buf): + + # view input data as 1D array + arr = ndarray_from_buffer(buf, self.decode_dtype) + + # convert and copy + enc = arr.astype(self.encode_dtype) + + return enc + + def decode(self, buf, out=None): + + # view encoded data as 1D array + enc = ndarray_from_buffer(buf, self.encode_dtype) + + # convert and copy + dec = enc.astype(self.decode_dtype) + + # handle output + out = buffer_copy(dec, out) + + return out + + def get_config(self): + config = dict() + config['id'] = self.codec_id + config['encode_dtype'] = self.encode_dtype.str + config['decode_dtype'] = self.decode_dtype.str + return config + + def __repr__(self): + return ( + '%s(encode_dtype=%s, decode_dtype=%s)' % ( + type(self).__name__, + self.encode_dtype, + self.decode_dtype + ) + ) diff --git a/numcodecs/tests/test_astype.py b/numcodecs/tests/test_astype.py new file mode 100644 index 00000000..3be00fb5 --- /dev/null +++ b/numcodecs/tests/test_astype.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- +from __future__ import absolute_import, print_function, division + + +import numpy as np +from numpy.testing import assert_array_equal +from nose.tools import eq_ as eq + + +from numcodecs.astype import AsType +from numcodecs.tests.common import check_encode_decode, check_config, \ + check_repr + + +# mix of dtypes: integer, float +# mix of shapes: 1D, 2D, 3D +# mix of orders: C, F +arrays = [ + np.arange(1000, dtype='i4'), + np.linspace(1000, 1001, 1000, dtype='f8').reshape(100, 10), + np.random.normal(loc=1000, scale=1, size=(10, 10, 10)), + np.random.randint(0, 200, size=1000, dtype='u2').reshape(100, 10, + order='F'), +] + + +def test_encode_decode(): + for arr in arrays: + codec = AsType(encode_dtype=arr.dtype, decode_dtype=arr.dtype) + check_encode_decode(arr, codec) + + +def test_decode(): + encode_dtype, decode_dtype = ' Date: Mon, 20 Feb 2017 22:43:38 -0500 Subject: [PATCH 2/2] Use string representations of AsType's dtypes. --- numcodecs/astype.py | 6 +++--- numcodecs/tests/test_astype.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/numcodecs/astype.py b/numcodecs/astype.py index 66268b4f..6ecc658e 100644 --- a/numcodecs/astype.py +++ b/numcodecs/astype.py @@ -80,9 +80,9 @@ def get_config(self): def __repr__(self): return ( - '%s(encode_dtype=%s, decode_dtype=%s)' % ( + '%s(encode_dtype=%r, decode_dtype=%r)' % ( type(self).__name__, - self.encode_dtype, - self.decode_dtype + self.encode_dtype.str, + self.decode_dtype.str ) ) diff --git a/numcodecs/tests/test_astype.py b/numcodecs/tests/test_astype.py index 3be00fb5..9211b164 100644 --- a/numcodecs/tests/test_astype.py +++ b/numcodecs/tests/test_astype.py @@ -57,4 +57,4 @@ def test_config(): def test_repr(): - check_repr("AsType(encode_dtype=int32, decode_dtype=int64)") + check_repr("AsType(encode_dtype='