Skip to content

Commit 3d692fd

Browse files
pp-modjkirkham
authored andcommitted
Add cube co-realisation. (#2967)
1 parent 089b14f commit 3d692fd

File tree

4 files changed

+162
-10
lines changed

4 files changed

+162
-10
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
* Added new function :func:`iris.co_realise_cubes` to compute multiple lazy
2+
values in a single operation, avoiding repeated re-loading of data or
3+
re-calculation of expressions.

lib/iris/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ def callback(cube, field, filename):
112112
from iris._deprecation import IrisDeprecation, warn_deprecated
113113
import iris.fileformats
114114
import iris.io
115+
from iris._lazy_data import co_realise_cubes
115116

116117

117118
try:
@@ -127,7 +128,7 @@ def callback(cube, field, filename):
127128
__all__ = ['load', 'load_cube', 'load_cubes', 'load_raw',
128129
'save', 'Constraint', 'AttributeConstraint', 'sample_data_path',
129130
'site_configuration', 'Future', 'FUTURE',
130-
'IrisDeprecation']
131+
'IrisDeprecation', 'co_realise_cubes']
131132

132133

133134
Constraint = iris._constraints.Constraint

lib/iris/_lazy_data.py

Lines changed: 71 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# (C) British Crown Copyright 2017, Met Office
1+
# (C) British Crown Copyright 2017 - 2018, Met Office
22
#
33
# This file is part of Iris.
44
#
@@ -102,6 +102,39 @@ def as_lazy_data(data, chunks=None, asarray=False):
102102
return data
103103

104104

105+
def _co_realise_lazy_arrays(arrays):
106+
"""
107+
Compute multiple lazy arrays and return a list of real values.
108+
109+
All the arrays are computed together, so they can share results for common
110+
graph elements.
111+
112+
Casts all results with `np.asanyarray`, and converts any MaskedConstants
113+
appearing into masked arrays, to ensure that all return values are
114+
writeable NumPy array objects.
115+
116+
Any non-lazy arrays are passed through, as they are by `da.compute`.
117+
They undergo the same result standardisation.
118+
119+
"""
120+
computed_arrays = da.compute(*arrays)
121+
results = []
122+
for lazy_in, real_out in zip(arrays, computed_arrays):
123+
# Ensure we always have arrays.
124+
# Note : in some cases dask (and numpy) will return a scalar
125+
# numpy.int/numpy.float object rather than an ndarray.
126+
# Recorded in https://github.com/dask/dask/issues/2111.
127+
real_out = np.asanyarray(real_out)
128+
if isinstance(real_out, ma.core.MaskedConstant):
129+
# Convert any masked constants into NumPy masked arrays.
130+
# NOTE: in this case, also apply the original lazy-array dtype, as
131+
# masked constants *always* have dtype float64.
132+
real_out = ma.masked_array(real_out.data, mask=real_out.mask,
133+
dtype=lazy_in.dtype)
134+
results.append(real_out)
135+
return results
136+
137+
105138
def as_concrete_data(data):
106139
"""
107140
Return the actual content of a lazy array, as a numpy array.
@@ -120,14 +153,7 @@ def as_concrete_data(data):
120153
121154
"""
122155
if is_lazy_data(data):
123-
# Realise dask array, ensuring the data result is always a NumPy array.
124-
# In some cases dask may return a scalar numpy.int/numpy.float object
125-
# rather than a numpy.ndarray object.
126-
# Recorded in https://github.com/dask/dask/issues/2111.
127-
dtype = data.dtype
128-
data = np.asanyarray(data.compute())
129-
if isinstance(data, ma.core.MaskedConstant):
130-
data = ma.masked_array(data.data, dtype=dtype, mask=data.mask)
156+
data, = _co_realise_lazy_arrays([data])
131157

132158
return data
133159

@@ -158,3 +184,39 @@ def multidim_lazy_stack(stack):
158184
result = da.stack([multidim_lazy_stack(subarray)
159185
for subarray in stack])
160186
return result
187+
188+
189+
def co_realise_cubes(*cubes):
190+
"""
191+
Fetch 'real' data for multiple cubes, in a shared calculation.
192+
193+
This computes any lazy data, equivalent to accessing each `cube.data`.
194+
However, lazy calculations and data fetches can be shared between the
195+
computations, improving performance.
196+
197+
Args:
198+
199+
* cubes (list of :class:`~iris.cube.Cube`):
200+
Arguments, each of which is a cube to be realised.
201+
202+
For example::
203+
204+
# Form stats.
205+
a_std = cube_a.collapsed(['x', 'y'], iris.analysis.STD_DEV)
206+
b_std = cube_b.collapsed(['x', 'y'], iris.analysis.STD_DEV)
207+
ab_mean_diff = (cube_b - cube_a).collapsed(['x', 'y'],
208+
iris.analysis.MEAN)
209+
std_err = (a_std * a_std + b_std * b_std) ** 0.5
210+
211+
# Compute stats together (to avoid multiple data passes).
212+
iris.co_realise_cubes(a_std, b_std, ab_mean_diff, std_err)
213+
214+
215+
.. Note::
216+
217+
Cubes with non-lazy data may also be passed, with no ill effect.
218+
219+
"""
220+
results = _co_realise_lazy_arrays([cube.core_data() for cube in cubes])
221+
for cube, result in zip(cubes, results):
222+
cube.data = result
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# (C) British Crown Copyright 2018, Met Office
2+
#
3+
# This file is part of Iris.
4+
#
5+
# Iris is free software: you can redistribute it and/or modify it under
6+
# the terms of the GNU Lesser General Public License as published by the
7+
# Free Software Foundation, either version 3 of the License, or
8+
# (at your option) any later version.
9+
#
10+
# Iris is distributed in the hope that it will be useful,
11+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
# GNU Lesser General Public License for more details.
14+
#
15+
# You should have received a copy of the GNU Lesser General Public License
16+
# along with Iris. If not, see <http://www.gnu.org/licenses/>.
17+
"""Test function :func:`iris._lazy data.co_realise_cubes`."""
18+
19+
from __future__ import (absolute_import, division, print_function)
20+
from six.moves import (filter, input, map, range, zip) # noqa
21+
22+
# Import iris.tests first so that some things can be initialised before
23+
# importing anything else.
24+
import iris.tests as tests
25+
26+
from mock import MagicMock
27+
import numpy as np
28+
29+
from iris.cube import Cube
30+
from iris._lazy_data import as_lazy_data
31+
32+
from iris._lazy_data import co_realise_cubes
33+
34+
35+
class ArrayAccessCounter(object):
36+
def __init__(self, array):
37+
self.dtype = array.dtype
38+
self.shape = array.shape
39+
self._array = array
40+
self.access_count = 0
41+
42+
def __getitem__(self, keys):
43+
self.access_count += 1
44+
return self._array[keys]
45+
46+
47+
class Test_co_realise_cubes(tests.IrisTest):
48+
def test_empty(self):
49+
# Ensure that 'no args' case does not raise an error.
50+
co_realise_cubes()
51+
52+
def test_basic(self):
53+
real_data = np.arange(3.)
54+
cube = Cube(as_lazy_data(real_data))
55+
co_realise_cubes(cube)
56+
self.assertFalse(cube.has_lazy_data())
57+
self.assertArrayAllClose(cube.core_data(), real_data)
58+
59+
def test_multi(self):
60+
real_data = np.arange(3.)
61+
cube_base = Cube(as_lazy_data(real_data))
62+
cube_inner = cube_base + 1
63+
result_a = cube_base + 1
64+
result_b = cube_inner + 1
65+
co_realise_cubes(result_a, result_b)
66+
# Check that target cubes were realised.
67+
self.assertFalse(result_a.has_lazy_data())
68+
self.assertFalse(result_b.has_lazy_data())
69+
# Check that other cubes referenced remain lazy.
70+
self.assertTrue(cube_base.has_lazy_data())
71+
self.assertTrue(cube_inner.has_lazy_data())
72+
73+
def test_combined_access(self):
74+
wrapped_array = ArrayAccessCounter(np.arange(3.))
75+
lazy_array = as_lazy_data(wrapped_array)
76+
derived_a = lazy_array + 1
77+
derived_b = lazy_array + 2
78+
cube_a = Cube(derived_a)
79+
cube_b = Cube(derived_b)
80+
co_realise_cubes(cube_a, cube_b)
81+
# Though used twice, the source data should only get fetched once.
82+
self.assertEqual(wrapped_array.access_count, 1)
83+
84+
85+
if __name__ == '__main__':
86+
tests.main()

0 commit comments

Comments
 (0)