From 2508024f57d451a3a70022c6ee0a8ab022d21794 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Mon, 28 Nov 2016 14:41:02 +0000 Subject: [PATCH 01/10] Separate linear+nearest codepaths in trajectory-interpolate. --- lib/iris/analysis/trajectory.py | 44 ++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/lib/iris/analysis/trajectory.py b/lib/iris/analysis/trajectory.py index f2d36fdd3f..d236828f98 100644 --- a/lib/iris/analysis/trajectory.py +++ b/lib/iris/analysis/trajectory.py @@ -249,30 +249,34 @@ def interpolate(cube, sample_points, method=None): method = "nearest" break - # Use a cache with _nearest_neighbour_indices_ndcoords() - cache = {} - - for i in range(trajectory_size): - point = [(coord, values[i]) for coord, values in sample_points] - - if method in ["linear", None]: + if method in ["linear", None]: + for i in range(trajectory_size): + point = [(coord, values[i]) for coord, values in sample_points] column = linear_regrid(cube, point) new_cube.data[..., i] = column.data - elif method == "nearest": - column_index = _nearest_neighbour_indices_ndcoords(cube, point, - cache=cache) + # Fill in the empty squashed (non derived) coords. + for column_coord in column.dim_coords + column.aux_coords: + src_dims = cube.coord_dims(column_coord) + if not squish_my_dims.isdisjoint(src_dims): + if len(column_coord.points) != 1: + raise Exception("Expected to find exactly one point. Found %d" % len(column_coord.points)) + new_cube.coord(column_coord.name()).points[i] = column_coord.points[0] + + elif method == "nearest": + # Use a cache with _nearest_neighbour_indices_ndcoords() + cache = {} + for i in range(trajectory_size): + point = [(coord, values[i]) for coord, values in sample_points] + column_index = _nearest_neighbour_indices_ndcoords(cube, point, cache=cache) column = cube[column_index] new_cube.data[..., i] = column.data - - # Fill in the empty squashed (non derived) coords. - for column_coord in column.dim_coords + column.aux_coords: - src_dims = cube.coord_dims(column_coord) - if not squish_my_dims.isdisjoint(src_dims): - if len(column_coord.points) != 1: - raise Exception("Expected to find exactly one point. " - "Found %d" % len(column_coord.points)) - new_cube.coord(column_coord.name()).points[i] = \ - column_coord.points[0] + # Fill in the empty squashed (non derived) coords. + for column_coord in column.dim_coords + column.aux_coords: + src_dims = cube.coord_dims(column_coord) + if not squish_my_dims.isdisjoint(src_dims): + if len(column_coord.points) != 1: + raise Exception("Expected to find exactly one point. Found %d" % len(column_coord.points)) + new_cube.coord(column_coord.name()).points[i] = column_coord.points[0] return new_cube From 4c8179b09202d3329698b06d741955161c900480 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Mon, 28 Nov 2016 14:42:36 +0000 Subject: [PATCH 02/10] Remove redundant test in _nearest_neighbour_indices_ndcoords. --- lib/iris/analysis/_interpolate_private.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/lib/iris/analysis/_interpolate_private.py b/lib/iris/analysis/_interpolate_private.py index 9207508271..1b157f8828 100644 --- a/lib/iris/analysis/_interpolate_private.py +++ b/lib/iris/analysis/_interpolate_private.py @@ -225,10 +225,7 @@ def _nearest_neighbour_indices_ndcoords(cube, sample_point, cache=None): point = [] ok_coord_ids = set(map(id, cube.dim_coords + cube.aux_coords)) for coord, value in sample_point: - if isinstance(coord, six.string_types): - coord = cube.coord(coord) - else: - coord = cube.coord(coord) + coord = cube.coord(coord) if id(coord) not in ok_coord_ids: msg = ('Invalid sample coordinate {!r}: derived coordinates are' ' not allowed.'.format(coord.name())) From b2aa120415a21a1aba6288c8e6dc90f7a47b5ce8 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Mon, 28 Nov 2016 15:05:24 +0000 Subject: [PATCH 03/10] Simplify routine setup slightly. --- lib/iris/analysis/_interpolate_private.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/lib/iris/analysis/_interpolate_private.py b/lib/iris/analysis/_interpolate_private.py index 1b157f8828..38bf2a483e 100644 --- a/lib/iris/analysis/_interpolate_private.py +++ b/lib/iris/analysis/_interpolate_private.py @@ -222,7 +222,10 @@ def _nearest_neighbour_indices_ndcoords(cube, sample_point, cache=None): raise ValueError('Sample points must be a list of (coordinate, value) pairs. Got %r.' % sample_point) # Convert names to coords in sample_point - point = [] + # Reformat sample point values for use in _cartesian_sample_points(), below. + coord_values = [] + sample_point_coords = [] + sample_point_coord_names = [] ok_coord_ids = set(map(id, cube.dim_coords + cube.aux_coords)) for coord, value in sample_point: coord = cube.coord(coord) @@ -230,12 +233,11 @@ def _nearest_neighbour_indices_ndcoords(cube, sample_point, cache=None): msg = ('Invalid sample coordinate {!r}: derived coordinates are' ' not allowed.'.format(coord.name())) raise ValueError(msg) - point.append((coord, value)) + sample_point_coords.append(coord) + sample_point_coord_names.append(coord.name()) + coord_values.append([value]) - # Reformat sample_point for use in _cartesian_sample_points(), below. - sample_point = np.array([[value] for coord, value in point]) - sample_point_coords = [coord for coord, value in point] - sample_point_coord_names = [coord.name() for coord, value in point] + coord_values = np.array(coord_values) # Which dims are we sampling? sample_dims = set() @@ -259,13 +261,13 @@ def _nearest_neighbour_indices_ndcoords(cube, sample_point, cache=None): # Order the sample point coords according to the sample space cube coords sample_space_coord_names = [coord.name() for coord in sample_space_cube.coords()] new_order = [sample_space_coord_names.index(name) for name in sample_point_coord_names] - sample_point = np.array([sample_point[i] for i in new_order]) + coord_values = np.array([coord_values[i] for i in new_order]) sample_point_coord_names = [sample_point_coord_names[i] for i in new_order] # Convert the sample point to cartesian coords. # If there is no latlon within the coordinate there will be no change. # Otherwise, geographic latlon is replaced with cartesian xyz. - cartesian_sample_point = _cartesian_sample_points(sample_point, sample_point_coord_names)[0] + cartesian_sample_point = _cartesian_sample_points(coord_values, sample_point_coord_names)[0] sample_space_coords = sample_space_cube.dim_coords + sample_space_cube.aux_coords sample_space_coords_and_dims = [(coord, sample_space_cube.coord_dims(coord)) for coord in sample_space_coords] From 1c26805f89989d081a345f0f0bbed5267afb6016 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Mon, 28 Nov 2016 15:40:25 +0000 Subject: [PATCH 04/10] Reform _nearest_neighbour_indices_ndcoords api to do multiple sample points. --- lib/iris/analysis/_interpolate_private.py | 70 ++++++++++++++--------- lib/iris/analysis/trajectory.py | 21 +++---- 2 files changed, 53 insertions(+), 38 deletions(-) diff --git a/lib/iris/analysis/_interpolate_private.py b/lib/iris/analysis/_interpolate_private.py index 38bf2a483e..c86bda7b34 100644 --- a/lib/iris/analysis/_interpolate_private.py +++ b/lib/iris/analysis/_interpolate_private.py @@ -193,10 +193,13 @@ def nearest_neighbour_indices(cube, sample_points): return tuple(indices) -def _nearest_neighbour_indices_ndcoords(cube, sample_point, cache=None): +def _nearest_neighbour_indices_ndcoords(cube, sample_points, cache=None): """ See documentation for :func:`iris.analysis.interpolate.nearest_neighbour_indices`. + 'sample_points' is of the form [[coord-or-coord-name, point-value(s)]*]. + The numbers of all the point-values sequences must be equal. + This function is adapted for points sampling a multi-dimensional coord, and can currently only do nearest neighbour interpolation. @@ -209,17 +212,17 @@ def _nearest_neighbour_indices_ndcoords(cube, sample_point, cache=None): # A "sample space cube" is made which only has the coords and dims we are sampling on. # We get the nearest neighbour using this sample space cube. - if isinstance(sample_point, dict): + if isinstance(sample_points, dict): msg = ('Providing a dictionary to specify points is deprecated. ' 'Please provide a list of (coordinate, values) pairs.') warn_deprecated(msg) - sample_point = list(sample_point.items()) + sample_points = list(sample_points.items()) - if sample_point: + if sample_points: try: - coord, value = sample_point[0] + coord, value = sample_points[0] except ValueError: - raise ValueError('Sample points must be a list of (coordinate, value) pairs. Got %r.' % sample_point) + raise ValueError('Sample points must be a list of (coordinate, value) pairs. Got %r.' % sample_points) # Convert names to coords in sample_point # Reformat sample point values for use in _cartesian_sample_points(), below. @@ -227,7 +230,7 @@ def _nearest_neighbour_indices_ndcoords(cube, sample_point, cache=None): sample_point_coords = [] sample_point_coord_names = [] ok_coord_ids = set(map(id, cube.dim_coords + cube.aux_coords)) - for coord, value in sample_point: + for coord, value in sample_points: coord = cube.coord(coord) if id(coord) not in ok_coord_ids: msg = ('Invalid sample coordinate {!r}: derived coordinates are' @@ -235,7 +238,13 @@ def _nearest_neighbour_indices_ndcoords(cube, sample_point, cache=None): raise ValueError(msg) sample_point_coords.append(coord) sample_point_coord_names.append(coord.name()) - coord_values.append([value]) + value = np.array(value, ndmin=1) + coord_values.append(value) + + coord_point_lens = np.array([len(value) for value in coord_values]) + if not np.all(coord_point_lens == coord_point_lens[0]): + msg = 'All coordinates must have the same number of sample points.' + raise ValueError(msg) coord_values = np.array(coord_values) @@ -264,11 +273,6 @@ def _nearest_neighbour_indices_ndcoords(cube, sample_point, cache=None): coord_values = np.array([coord_values[i] for i in new_order]) sample_point_coord_names = [sample_point_coord_names[i] for i in new_order] - # Convert the sample point to cartesian coords. - # If there is no latlon within the coordinate there will be no change. - # Otherwise, geographic latlon is replaced with cartesian xyz. - cartesian_sample_point = _cartesian_sample_points(coord_values, sample_point_coord_names)[0] - sample_space_coords = sample_space_cube.dim_coords + sample_space_cube.aux_coords sample_space_coords_and_dims = [(coord, sample_space_cube.coord_dims(coord)) for coord in sample_space_coords] @@ -290,26 +294,36 @@ def _nearest_neighbour_indices_ndcoords(cube, sample_point, cache=None): # Get the nearest datum index to the sample point. This is the goal of the function. kdtree = scipy.spatial.cKDTree(cartesian_space_data_coords) - cartesian_distance, datum_index = kdtree.query(cartesian_sample_point) - sample_space_ndi = np.unravel_index(datum_index, sample_space_cube.data.shape) - - # Turn sample_space_ndi into a main cube slice. - # Map sample cube to main cube dims and leave the rest as a full slice. - main_cube_slice = [slice(None, None)] * cube.ndim - for sample_coord, sample_coord_dims in sample_space_coords_and_dims: - # Find the coord in the main cube - main_coord = cube.coord(sample_coord.name()) - main_coord_dims = cube.coord_dims(main_coord) - # Mark the nearest data index/indices with respect to this coord - for sample_i, main_i in zip(sample_coord_dims, main_coord_dims): - main_cube_slice[main_i] = sample_space_ndi[sample_i] - + n_coords, n_points = coord_values.shape + main_cube_slices = [] + for i_point in range(n_points): + single_point_coord_values = coord_values[..., i_point:i_point + 1] + # Convert the sample point to cartesian coords. + # If there is no latlon within the coordinate there will be no change. + # Otherwise, geographic latlon is replaced with cartesian xyz. + cartesian_sample_point = _cartesian_sample_points( + single_point_coord_values, sample_point_coord_names)[0] + + cartesian_distance, datum_index = kdtree.query(cartesian_sample_point) + sample_space_ndi = np.unravel_index(datum_index, sample_space_cube.data.shape) + + # Turn sample_space_ndi into a main cube slice. + # Map sample cube to main cube dims and leave the rest as a full slice. + main_cube_slice = [slice(None, None)] * cube.ndim + for sample_coord, sample_coord_dims in sample_space_coords_and_dims: + # Find the coord in the main cube + main_coord = cube.coord(sample_coord.name()) + main_coord_dims = cube.coord_dims(main_coord) + # Mark the nearest data index/indices with respect to this coord + for sample_i, main_i in zip(sample_coord_dims, main_coord_dims): + main_cube_slice[main_i] = sample_space_ndi[sample_i] + main_cube_slices.append(tuple(main_cube_slice)) # Update cache if cache is not None: cache[cube] = kdtree - return tuple(main_cube_slice) + return main_cube_slices def extract_nearest_neighbour(cube, sample_points): diff --git a/lib/iris/analysis/trajectory.py b/lib/iris/analysis/trajectory.py index d236828f98..8324f30273 100644 --- a/lib/iris/analysis/trajectory.py +++ b/lib/iris/analysis/trajectory.py @@ -267,16 +267,17 @@ def interpolate(cube, sample_points, method=None): cache = {} for i in range(trajectory_size): point = [(coord, values[i]) for coord, values in sample_points] - column_index = _nearest_neighbour_indices_ndcoords(cube, point, cache=cache) - column = cube[column_index] - new_cube.data[..., i] = column.data - # Fill in the empty squashed (non derived) coords. - for column_coord in column.dim_coords + column.aux_coords: - src_dims = cube.coord_dims(column_coord) - if not squish_my_dims.isdisjoint(src_dims): - if len(column_coord.points) != 1: - raise Exception("Expected to find exactly one point. Found %d" % len(column_coord.points)) - new_cube.coord(column_coord.name()).points[i] = column_coord.points[0] + column_indexes = _nearest_neighbour_indices_ndcoords(cube, point, cache=cache) + for column_index in column_indexes: + column = cube[column_index] + new_cube.data[..., i] = column.data + # Fill in the empty squashed (non derived) coords. + for column_coord in column.dim_coords + column.aux_coords: + src_dims = cube.coord_dims(column_coord) + if not squish_my_dims.isdisjoint(src_dims): + if len(column_coord.points) != 1: + raise Exception("Expected to find exactly one point. Found %d" % len(column_coord.points)) + new_cube.coord(column_coord.name()).points[i] = column_coord.points[0] return new_cube From d7008aa073100024dcc34fad7ad1e159cb328c90 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Tue, 29 Nov 2016 18:55:01 +0000 Subject: [PATCH 05/10] Replace per-point cube indexing with fancy indexing; load a square region of the data. --- lib/iris/analysis/trajectory.py | 92 ++++++++++++++++++++++++++++----- 1 file changed, 79 insertions(+), 13 deletions(-) diff --git a/lib/iris/analysis/trajectory.py b/lib/iris/analysis/trajectory.py index 8324f30273..511942a67e 100644 --- a/lib/iris/analysis/trajectory.py +++ b/lib/iris/analysis/trajectory.py @@ -265,19 +265,85 @@ def interpolate(cube, sample_points, method=None): elif method == "nearest": # Use a cache with _nearest_neighbour_indices_ndcoords() cache = {} - for i in range(trajectory_size): - point = [(coord, values[i]) for coord, values in sample_points] - column_indexes = _nearest_neighbour_indices_ndcoords(cube, point, cache=cache) - for column_index in column_indexes: - column = cube[column_index] - new_cube.data[..., i] = column.data - # Fill in the empty squashed (non derived) coords. - for column_coord in column.dim_coords + column.aux_coords: - src_dims = cube.coord_dims(column_coord) - if not squish_my_dims.isdisjoint(src_dims): - if len(column_coord.points) != 1: - raise Exception("Expected to find exactly one point. Found %d" % len(column_coord.points)) - new_cube.coord(column_coord.name()).points[i] = column_coord.points[0] + column_indexes = _nearest_neighbour_indices_ndcoords( + cube, sample_points, cache=cache) + + # Construct "fancy" indexes, so we can create the result data array in + # a single numpy indexing operation. + # ALSO: capture the index range in each dimension, so that we can fetch + # only a required (square) sub-region of the source data. + fancy_source_indices = [] + region_slices = [] + n_index_length = len(column_indexes[0]) + for i_ind in range(n_index_length): + contents = [column_index[i_ind] + for column_index in column_indexes] + each_used = [content != slice(None) for content in contents] + if not np.any(each_used): + # This dimension is not addressed by the operation. + # Use a ":" as the index. + fancy_index = slice(None) + # No sub-region selection for this dimension. + region_slice = slice(None) + elif np.all(each_used): + # This dimension is addressed : use a list of indices. + # Select the region by min+max indices. + start_ind = np.min(contents) + stop_ind = 1 + np.max(contents) + region_slice = slice(start_ind, stop_ind) + # Record point indices with start subtracted from all of them. + fancy_index = list(np.array(contents) - start_ind) + else: + # Should really never happen, if _ndcoords is right. + msg = ('Error trajectory interpolation: point selection ' + 'indices do not all have the same form.') + raise ValueError() + + fancy_source_indices.append(fancy_index) + region_slices.append(region_slice) + + # NOTE: fetch the required (square-section) region of the source data. + # This is not quite as good as only fetching the individual points + # which are used, but it avoids creating a sub-cube for each point, + # which is very slow, especially when points are re-used a lot ... + source_area_indices = tuple(region_slices) + source_data = cube[source_area_indices].data + + # Apply fancy indexing to get all the result data points. + new_cube.data[:] = source_data[fancy_source_indices] + # NOTE: we assign to "new_cube.data[:]" and *not* just "new_cube.data", + # because the existing code produces a default dtype from 'np.empty' + # instead of preserving the input dtype. + # TODO: maybe this should be fixed -- i.e. to preserve input dtype ?? + + # Fill in the empty squashed (non derived) coords. + column_coords = [coord + for coord in cube.dim_coords + cube.aux_coords + if not squish_my_dims.isdisjoint( + cube.coord_dims(coord))] + new_cube_coords = [new_cube.coord(column_coord.name()) + for column_coord in column_coords] + all_point_indices = np.array(column_indexes) + single_point_test_cube = cube[column_indexes[0]] + for new_cube_coord, src_coord in zip(new_cube_coords, column_coords): + # Check structure of the indexed coord (at one selected point). + point_coord = single_point_test_cube.coord(src_coord) + if len(point_coord.points) != 1: + msg = ('Coord {} at one x-y position has the shape {}, ' + 'instead of being a single point. ') + raise Exception(msg.format(src_coord.name(), src_coord.shape)) + + # Work out which indices apply to the input coord. + # NOTE: we know how to index the source cube to get a cube with a + # single point for each coord, but this is very inefficient. + # So here, we translate cube indexes into *coord* indexes. + src_coord_dims = cube.coord_dims(src_coord) + fancy_coord_index_arrays = [list(all_point_indices[:, src_dim]) + for src_dim in src_coord_dims] + + # Fill the new coord with all the correct points from the old one. + new_cube_coord.points = src_coord.points[fancy_coord_index_arrays] + # NOTE: the new coords do *not* have bounds. return new_cube From c50efb8b49f28490db6cab6b2a97fe8b33a7b190 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 30 Nov 2016 10:37:18 +0000 Subject: [PATCH 06/10] pep8 fix. --- lib/iris/analysis/trajectory.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/iris/analysis/trajectory.py b/lib/iris/analysis/trajectory.py index 511942a67e..7138ee0131 100644 --- a/lib/iris/analysis/trajectory.py +++ b/lib/iris/analysis/trajectory.py @@ -259,8 +259,10 @@ def interpolate(cube, sample_points, method=None): src_dims = cube.coord_dims(column_coord) if not squish_my_dims.isdisjoint(src_dims): if len(column_coord.points) != 1: - raise Exception("Expected to find exactly one point. Found %d" % len(column_coord.points)) - new_cube.coord(column_coord.name()).points[i] = column_coord.points[0] + msg = "Expected to find exactly one point. Found {}." + raise Exception(msg.format(column_coord.points)) + new_cube.coord(column_coord.name()).points[i] = \ + column_coord.points[0] elif method == "nearest": # Use a cache with _nearest_neighbour_indices_ndcoords() From 68ed7e55cfedb45346acf0cdd9a91bbf70aa9776 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 30 Nov 2016 11:54:54 +0000 Subject: [PATCH 07/10] Vectorise _nearest_neighbour_indices_ndcoords and its kdtree operation. --- lib/iris/analysis/_interpolate_private.py | 68 ++++++++++++++--------- 1 file changed, 41 insertions(+), 27 deletions(-) diff --git a/lib/iris/analysis/_interpolate_private.py b/lib/iris/analysis/_interpolate_private.py index c86bda7b34..de28cab6a6 100644 --- a/lib/iris/analysis/_interpolate_private.py +++ b/lib/iris/analysis/_interpolate_private.py @@ -291,39 +291,53 @@ def _nearest_neighbour_indices_ndcoords(cube, sample_points, cache=None): # Convert to cartesian coordinates. Flatten for kdtree compatibility. cartesian_space_data_coords = _cartesian_sample_points(sample_space_data_positions, sample_point_coord_names) - # Get the nearest datum index to the sample point. This is the goal of the function. + # Create a kdtree for the nearest-distance lookup to these 3d points. kdtree = scipy.spatial.cKDTree(cartesian_space_data_coords) - - n_coords, n_points = coord_values.shape - main_cube_slices = [] - for i_point in range(n_points): - single_point_coord_values = coord_values[..., i_point:i_point + 1] - # Convert the sample point to cartesian coords. - # If there is no latlon within the coordinate there will be no change. - # Otherwise, geographic latlon is replaced with cartesian xyz. - cartesian_sample_point = _cartesian_sample_points( - single_point_coord_values, sample_point_coord_names)[0] - - cartesian_distance, datum_index = kdtree.query(cartesian_sample_point) - sample_space_ndi = np.unravel_index(datum_index, sample_space_cube.data.shape) - - # Turn sample_space_ndi into a main cube slice. - # Map sample cube to main cube dims and leave the rest as a full slice. - main_cube_slice = [slice(None, None)] * cube.ndim - for sample_coord, sample_coord_dims in sample_space_coords_and_dims: - # Find the coord in the main cube - main_coord = cube.coord(sample_coord.name()) - main_coord_dims = cube.coord_dims(main_coord) - # Mark the nearest data index/indices with respect to this coord - for sample_i, main_i in zip(sample_coord_dims, main_coord_dims): - main_cube_slice[main_i] = sample_space_ndi[sample_i] - main_cube_slices.append(tuple(main_cube_slice)) + # This can find the nearest datum point to any given target point, + # which is the goal of this function. # Update cache if cache is not None: cache[cube] = kdtree - return main_cube_slices + # Convert the sample points to cartesian (3d) coords. + # If there is no latlon within the coordinate there will be no change. + # Otherwise, geographic latlon is replaced with cartesian xyz. + cartesian_sample_points = _cartesian_sample_points( + coord_values, sample_point_coord_names) + + # Use kdtree to get the nearest sourcepoint index for each target point. + _, datum_index_lists = kdtree.query(cartesian_sample_points) + + # Convert flat indices back into multidimensional sample-space indices. + sample_space_dimension_indices = np.unravel_index( + datum_index_lists, sample_space_cube.data.shape) + # Convert this from "pointwise list of index arrays for each dimension", + # to "list of cube indices for each point". + sample_space_ndis = np.array(sample_space_dimension_indices).transpose() + + # For the returned result, we must convert these indices into the source + # (sample-space) cube, to equivalent indices into the target 'cube'. + + # Make a result array: (cube.ndim * ), per sample point. + n_points = coord_values.shape[-1] + main_cube_slices = np.empty((n_points, cube.ndim), dtype=object) + # Initialise so all unused indices are ":". + main_cube_slices[:] = slice(None) + + # Move result indices according to the source (sample) and target (cube) + # dimension mappings. + for sample_coord, sample_coord_dims in sample_space_coords_and_dims: + # Find the coord in the main cube + main_coord = cube.coord(sample_coord.name()) + main_coord_dims = cube.coord_dims(main_coord) + # Fill nearest-point data indices for each coord dimension. + for sample_i, main_i in zip(sample_coord_dims, main_coord_dims): + main_cube_slices[:, main_i] = sample_space_ndis[:, sample_i] + + # Return as a list of **tuples** : required for correct indexing usage. + result = [tuple(inds) for inds in main_cube_slices] + return result def extract_nearest_neighbour(cube, sample_points): From d27a9d0a1c4cb28f2bad265e5dd599e685cb7731 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Mon, 5 Dec 2016 15:52:52 +0000 Subject: [PATCH 08/10] Small behaviour tweaks + fix tests. --- lib/iris/analysis/_interpolate_private.py | 2 +- lib/iris/analysis/trajectory.py | 15 +++++-- lib/iris/tests/integration/test_trajectory.py | 2 + ...est__nearest_neighbour_indices_ndcoords.py | 44 ++++++++++++++----- .../analysis/trajectory/test_interpolate.py | 5 ++- 5 files changed, 52 insertions(+), 16 deletions(-) diff --git a/lib/iris/analysis/_interpolate_private.py b/lib/iris/analysis/_interpolate_private.py index de28cab6a6..69b11b8026 100644 --- a/lib/iris/analysis/_interpolate_private.py +++ b/lib/iris/analysis/_interpolate_private.py @@ -198,7 +198,7 @@ def _nearest_neighbour_indices_ndcoords(cube, sample_points, cache=None): See documentation for :func:`iris.analysis.interpolate.nearest_neighbour_indices`. 'sample_points' is of the form [[coord-or-coord-name, point-value(s)]*]. - The numbers of all the point-values sequences must be equal. + The lengths of all the point-values sequences must be equal. This function is adapted for points sampling a multi-dimensional coord, and can currently only do nearest neighbour interpolation. diff --git a/lib/iris/analysis/trajectory.py b/lib/iris/analysis/trajectory.py index 7138ee0131..3185d5e966 100644 --- a/lib/iris/analysis/trajectory.py +++ b/lib/iris/analysis/trajectory.py @@ -28,9 +28,10 @@ import numpy as np +import iris.analysis import iris.coord_systems import iris.coords -import iris.analysis + from iris.analysis._interpolate_private import \ _nearest_neighbour_indices_ndcoords, linear as linear_regrid @@ -312,7 +313,15 @@ def interpolate(cube, sample_points, method=None): source_data = cube[source_area_indices].data # Apply fancy indexing to get all the result data points. - new_cube.data[:] = source_data[fancy_source_indices] + source_data = source_data[fancy_source_indices] + # "Fix" problems with missing datapoints producing odd values + # when copied from a masked into an unmasked array. + if np.ma.isMaskedArray(source_data): + # This is **not** proper mask handling, because we cannot produce a + # masked result, but it ensures we use a "filled" version of the + # input in this case. + source_data = source_data.filled() + new_cube.data[:] = source_data # NOTE: we assign to "new_cube.data[:]" and *not* just "new_cube.data", # because the existing code produces a default dtype from 'np.empty' # instead of preserving the input dtype. @@ -333,7 +342,7 @@ def interpolate(cube, sample_points, method=None): if len(point_coord.points) != 1: msg = ('Coord {} at one x-y position has the shape {}, ' 'instead of being a single point. ') - raise Exception(msg.format(src_coord.name(), src_coord.shape)) + raise ValueError(msg.format(src_coord.name(), src_coord.shape)) # Work out which indices apply to the input coord. # NOTE: we know how to index the source cube to get a cube with a diff --git a/lib/iris/tests/integration/test_trajectory.py b/lib/iris/tests/integration/test_trajectory.py index bc38a4d280..6c76bc11a3 100644 --- a/lib/iris/tests/integration/test_trajectory.py +++ b/lib/iris/tests/integration/test_trajectory.py @@ -196,6 +196,8 @@ def test_tri_polar__nearest(self): test_cube = self.cube # Use just one 2d layer, just to be faster. test_cube = test_cube[0][0] + # Fetch all the data, so we can control the fill value. + test_cube.data # Fix the fill value of the data to zero, just so that we get the same # result under numpy < 1.11 as with 1.11. # NOTE: numpy<1.11 *used* to assign missing data points into an diff --git a/lib/iris/tests/unit/analysis/interpolate_private/test__nearest_neighbour_indices_ndcoords.py b/lib/iris/tests/unit/analysis/interpolate_private/test__nearest_neighbour_indices_ndcoords.py index 6996a2bad5..ae9f691a7f 100644 --- a/lib/iris/tests/unit/analysis/interpolate_private/test__nearest_neighbour_indices_ndcoords.py +++ b/lib/iris/tests/unit/analysis/interpolate_private/test__nearest_neighbour_indices_ndcoords.py @@ -46,7 +46,17 @@ def test_nonlatlon_simple_2d(self): cube.add_dim_coord(co_x, 1) sample_point = [('x', 2.8), ('y', 18.5)] result = nn_ndinds(cube, sample_point) - self.assertEqual(result, (1, 2)) + self.assertEqual(result, [(1, 2)]) + + def test_nonlatlon_multiple_2d(self): + co_y = DimCoord([10.0, 20.0], long_name='y') + co_x = DimCoord([1.0, 2.0, 3.0], long_name='x') + cube = Cube(np.zeros((2, 3))) + cube.add_dim_coord(co_y, 0) + cube.add_dim_coord(co_x, 1) + sample_points = [('x', [2.8, -350.0, 1.7]), ('y', [18.5, 8.7, 12.2])] + result = nn_ndinds(cube, sample_points) + self.assertEqual(result, [(1, 2), (0, 0), (0, 1)]) def test_latlon_simple_2d(self): co_y = DimCoord([10.0, 20.0], @@ -58,7 +68,21 @@ def test_latlon_simple_2d(self): cube.add_dim_coord(co_x, 1) sample_point = [('longitude', 2.8), ('latitude', 18.5)] result = nn_ndinds(cube, sample_point) - self.assertEqual(result, (1, 2)) + self.assertEqual(result, [(1, 2)]) + + def test_latlon_multiple_2d(self): + co_y = DimCoord([10.0, 20.0], + standard_name='latitude', units='degrees') + co_x = DimCoord([1.0, 2.0, 3.0], + standard_name='longitude', units='degrees') + cube = Cube(np.zeros((2, 3))) + cube.add_dim_coord(co_y, 0) + cube.add_dim_coord(co_x, 1) + sample_points = [('longitude', [2.8, -350.0, 1.7]), + ('latitude', [18.5, 8.7, 12.2])] + result = nn_ndinds(cube, sample_points) + # Note slight difference from non-latlon version. + self.assertEqual(result, [(1, 2), (0, 2), (0, 1)]) class Test1d(tests.IrisTest): @@ -70,7 +94,7 @@ def test_nonlatlon_simple_1d(self): cube.add_aux_coord(co_x, 0) sample_point = [('x', 2.8), ('y', 18.5)] result = nn_ndinds(cube, sample_point) - self.assertEqual(result, (5,)) + self.assertEqual(result, [(5,)]) def test_latlon_simple_1d(self): cube = Cube([11.0, 12.0, 13.0, 21.0, 22.0, 23.0]) @@ -82,7 +106,7 @@ def test_latlon_simple_1d(self): cube.add_aux_coord(co_x, 0) sample_point = [('longitude', 2.8), ('latitude', 18.5)] result = nn_ndinds(cube, sample_point) - self.assertEqual(result, (5,)) + self.assertEqual(result, [(5,)]) class TestApiExtras(tests.IrisTest): @@ -96,7 +120,7 @@ def test_no_y_dim(self): cube.add_dim_coord(co_x, 1) sample_point = [('x', 2.8)] result = nn_ndinds(cube, sample_point) - self.assertEqual(result, (slice(None), 2)) + self.assertEqual(result, [(slice(None), 2)]) def test_no_x_dim(self): # Operate in Y only, returned slice should be [iy, :]. @@ -107,7 +131,7 @@ def test_no_x_dim(self): cube.add_dim_coord(co_x, 1) sample_point = [('y', 18.5)] result = nn_ndinds(cube, sample_point) - self.assertEqual(result, (1, slice(None))) + self.assertEqual(result, [(1, slice(None))]) def test_sample_dictionary(self): # Pass sample_point arg as a dictionary: this usage mode is deprecated. @@ -120,7 +144,7 @@ def test_sample_dictionary(self): warn_call = self.patch( 'iris.analysis._interpolate_private.warn_deprecated') result = nn_ndinds(cube, sample_point) - self.assertEqual(result, (1, 2)) + self.assertEqual(result, [(1, 2)]) self.assertEqual(warn_call.call_count, 1) self.assertIn('dictionary to specify points is deprecated', warn_call.call_args[0][0]) @@ -139,7 +163,7 @@ def _testcube_latlon_1d(self, lats, lons): def _check_latlon_1d(self, lats, lons, sample_point, expect): cube = self._testcube_latlon_1d(lats, lons) result = nn_ndinds(cube, sample_point) - self.assertEqual(result, (expect,)) + self.assertEqual(result, [(expect,)]) def test_lat_scaling(self): # Check that (88, 25) is closer to (88, 0) than to (87, 25) @@ -158,7 +182,7 @@ def test_alternate_latlon_names_okay(self): cube.coord('longitude').rename('x_longitude_x') sample_point = [('y_latitude_y', 88), ('x_longitude_x', 25)] result = nn_ndinds(cube, sample_point) - self.assertEqual(result, (0,)) + self.assertEqual(result, [(0,)]) def test_alternate_nonlatlon_names_different(self): # Check that (88, 25) is **NOT** closer to (88, 0) than to (87, 25) @@ -169,7 +193,7 @@ def test_alternate_nonlatlon_names_different(self): cube.coord('longitude').rename('x') sample_point = [('y', 88), ('x', 25)] result = nn_ndinds(cube, sample_point) - self.assertEqual(result, (1,)) + self.assertEqual(result, [(1,)]) def test_lons_wrap_359_0(self): # Check that (0, 359) is closer to (0, 0) than to (0, 350) diff --git a/lib/iris/tests/unit/analysis/trajectory/test_interpolate.py b/lib/iris/tests/unit/analysis/trajectory/test_interpolate.py index b04d422b1e..1381f2cc4c 100644 --- a/lib/iris/tests/unit/analysis/trajectory/test_interpolate.py +++ b/lib/iris/tests/unit/analysis/trajectory/test_interpolate.py @@ -161,8 +161,9 @@ def test_aux_coord_fail_mixed_dims(self): [211, 212, 213, 214]], long_name='aux_0x'), (0, 2)) - msg = 'Expected to find exactly one point.*Found 2' - with self.assertRaisesRegexp(Exception, msg): + msg = ('Coord aux_0x at one x-y position has the shape.*' + 'instead of being a single point') + with self.assertRaisesRegexp(ValueError, msg): interpolate(cube, self.single_sample_point, method='nearest') def test_metadata(self): From 7db9a300f7c90e808bebe42a67df374ac20f5939 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Mon, 5 Dec 2016 16:16:00 +0000 Subject: [PATCH 09/10] Fix should-never-happen error reporting. --- lib/iris/analysis/trajectory.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/iris/analysis/trajectory.py b/lib/iris/analysis/trajectory.py index 3185d5e966..9ef7d7639f 100644 --- a/lib/iris/analysis/trajectory.py +++ b/lib/iris/analysis/trajectory.py @@ -298,9 +298,9 @@ def interpolate(cube, sample_points, method=None): fancy_index = list(np.array(contents) - start_ind) else: # Should really never happen, if _ndcoords is right. - msg = ('Error trajectory interpolation: point selection ' - 'indices do not all have the same form.') - raise ValueError() + msg = ('Internal error in trajectory interpolation : point ' + 'selection indices should all have the same form.') + raise ValueError(msg) fancy_source_indices.append(fancy_index) region_slices.append(region_slice) From e2a9ed063360dc3f893fa06837de95a903b86620 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Mon, 5 Dec 2016 16:19:24 +0000 Subject: [PATCH 10/10] Revert meaningless change in test_trajectory. --- lib/iris/tests/integration/test_trajectory.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/iris/tests/integration/test_trajectory.py b/lib/iris/tests/integration/test_trajectory.py index 6c76bc11a3..bc38a4d280 100644 --- a/lib/iris/tests/integration/test_trajectory.py +++ b/lib/iris/tests/integration/test_trajectory.py @@ -196,8 +196,6 @@ def test_tri_polar__nearest(self): test_cube = self.cube # Use just one 2d layer, just to be faster. test_cube = test_cube[0][0] - # Fetch all the data, so we can control the fill value. - test_cube.data # Fix the fill value of the data to zero, just so that we get the same # result under numpy < 1.11 as with 1.11. # NOTE: numpy<1.11 *used* to assign missing data points into an