Skip to content
101 changes: 64 additions & 37 deletions lib/iris/analysis/_interpolate_private.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,10 +193,13 @@ def nearest_neighbour_indices(cube, sample_points):
return tuple(indices)


def _nearest_neighbour_indices_ndcoords(cube, sample_point, cache=None):
def _nearest_neighbour_indices_ndcoords(cube, sample_points, cache=None):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this function is now quite clever. Would it be plausible to have a unit test which threw some known and soluble problems at this function at asserted that the expected answers were produced?

"""
See documentation for :func:`iris.analysis.interpolate.nearest_neighbour_indices`.

'sample_points' is of the form [[coord-or-coord-name, point-value(s)]*].
The lengths of all the point-values sequences must be equal.

This function is adapted for points sampling a multi-dimensional coord,
and can currently only do nearest neighbour interpolation.

Expand All @@ -209,36 +212,41 @@ def _nearest_neighbour_indices_ndcoords(cube, sample_point, cache=None):
# A "sample space cube" is made which only has the coords and dims we are sampling on.
# We get the nearest neighbour using this sample space cube.

if isinstance(sample_point, dict):
if isinstance(sample_points, dict):
msg = ('Providing a dictionary to specify points is deprecated. '
'Please provide a list of (coordinate, values) pairs.')
warn_deprecated(msg)
sample_point = list(sample_point.items())
sample_points = list(sample_points.items())

if sample_point:
if sample_points:
try:
coord, value = sample_point[0]
coord, value = sample_points[0]
except ValueError:
raise ValueError('Sample points must be a list of (coordinate, value) pairs. Got %r.' % sample_point)
raise ValueError('Sample points must be a list of (coordinate, value) pairs. Got %r.' % sample_points)

# Convert names to coords in sample_point
point = []
# Reformat sample point values for use in _cartesian_sample_points(), below.
coord_values = []
sample_point_coords = []
sample_point_coord_names = []
ok_coord_ids = set(map(id, cube.dim_coords + cube.aux_coords))
for coord, value in sample_point:
if isinstance(coord, six.string_types):
coord = cube.coord(coord)
else:
coord = cube.coord(coord)
for coord, value in sample_points:
coord = cube.coord(coord)
if id(coord) not in ok_coord_ids:
msg = ('Invalid sample coordinate {!r}: derived coordinates are'
' not allowed.'.format(coord.name()))
raise ValueError(msg)
point.append((coord, value))
sample_point_coords.append(coord)
sample_point_coord_names.append(coord.name())
value = np.array(value, ndmin=1)
coord_values.append(value)

# Reformat sample_point for use in _cartesian_sample_points(), below.
sample_point = np.array([[value] for coord, value in point])
sample_point_coords = [coord for coord, value in point]
sample_point_coord_names = [coord.name() for coord, value in point]
coord_point_lens = np.array([len(value) for value in coord_values])
if not np.all(coord_point_lens == coord_point_lens[0]):
msg = 'All coordinates must have the same number of sample points.'
raise ValueError(msg)

coord_values = np.array(coord_values)

# Which dims are we sampling?
sample_dims = set()
Expand All @@ -262,14 +270,9 @@ def _nearest_neighbour_indices_ndcoords(cube, sample_point, cache=None):
# Order the sample point coords according to the sample space cube coords
sample_space_coord_names = [coord.name() for coord in sample_space_cube.coords()]
new_order = [sample_space_coord_names.index(name) for name in sample_point_coord_names]
sample_point = np.array([sample_point[i] for i in new_order])
coord_values = np.array([coord_values[i] for i in new_order])
sample_point_coord_names = [sample_point_coord_names[i] for i in new_order]

# Convert the sample point to cartesian coords.
# If there is no latlon within the coordinate there will be no change.
# Otherwise, geographic latlon is replaced with cartesian xyz.
cartesian_sample_point = _cartesian_sample_points(sample_point, sample_point_coord_names)[0]

sample_space_coords = sample_space_cube.dim_coords + sample_space_cube.aux_coords
sample_space_coords_and_dims = [(coord, sample_space_cube.coord_dims(coord)) for coord in sample_space_coords]

Expand All @@ -288,29 +291,53 @@ def _nearest_neighbour_indices_ndcoords(cube, sample_point, cache=None):
# Convert to cartesian coordinates. Flatten for kdtree compatibility.
cartesian_space_data_coords = _cartesian_sample_points(sample_space_data_positions, sample_point_coord_names)

# Get the nearest datum index to the sample point. This is the goal of the function.
# Create a kdtree for the nearest-distance lookup to these 3d points.
kdtree = scipy.spatial.cKDTree(cartesian_space_data_coords)
# This can find the nearest datum point to any given target point,
# which is the goal of this function.

cartesian_distance, datum_index = kdtree.query(cartesian_sample_point)
sample_space_ndi = np.unravel_index(datum_index, sample_space_cube.data.shape)
# Update cache
if cache is not None:
cache[cube] = kdtree

# Turn sample_space_ndi into a main cube slice.
# Map sample cube to main cube dims and leave the rest as a full slice.
main_cube_slice = [slice(None, None)] * cube.ndim
# Convert the sample points to cartesian (3d) coords.
# If there is no latlon within the coordinate there will be no change.
# Otherwise, geographic latlon is replaced with cartesian xyz.
cartesian_sample_points = _cartesian_sample_points(
coord_values, sample_point_coord_names)

# Use kdtree to get the nearest sourcepoint index for each target point.
_, datum_index_lists = kdtree.query(cartesian_sample_points)

# Convert flat indices back into multidimensional sample-space indices.
sample_space_dimension_indices = np.unravel_index(
datum_index_lists, sample_space_cube.data.shape)
# Convert this from "pointwise list of index arrays for each dimension",
# to "list of cube indices for each point".
sample_space_ndis = np.array(sample_space_dimension_indices).transpose()

# For the returned result, we must convert these indices into the source
# (sample-space) cube, to equivalent indices into the target 'cube'.

# Make a result array: (cube.ndim * <index>), per sample point.
n_points = coord_values.shape[-1]
main_cube_slices = np.empty((n_points, cube.ndim), dtype=object)
# Initialise so all unused indices are ":".
main_cube_slices[:] = slice(None)

# Move result indices according to the source (sample) and target (cube)
# dimension mappings.
for sample_coord, sample_coord_dims in sample_space_coords_and_dims:
# Find the coord in the main cube
main_coord = cube.coord(sample_coord.name())
main_coord_dims = cube.coord_dims(main_coord)
# Mark the nearest data index/indices with respect to this coord
# Fill nearest-point data indices for each coord dimension.
for sample_i, main_i in zip(sample_coord_dims, main_coord_dims):
main_cube_slice[main_i] = sample_space_ndi[sample_i]


# Update cache
if cache is not None:
cache[cube] = kdtree
main_cube_slices[:, main_i] = sample_space_ndis[:, sample_i]

return tuple(main_cube_slice)
# Return as a list of **tuples** : required for correct indexing usage.
result = [tuple(inds) for inds in main_cube_slices]
return result


def extract_nearest_neighbour(cube, sample_points):
Expand Down
124 changes: 103 additions & 21 deletions lib/iris/analysis/trajectory.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@

import numpy as np

import iris.analysis
import iris.coord_systems
import iris.coords
import iris.analysis

from iris.analysis._interpolate_private import \
_nearest_neighbour_indices_ndcoords, linear as linear_regrid

Expand Down Expand Up @@ -249,30 +250,111 @@ def interpolate(cube, sample_points, method=None):
method = "nearest"
break

# Use a cache with _nearest_neighbour_indices_ndcoords()
cache = {}

for i in range(trajectory_size):
point = [(coord, values[i]) for coord, values in sample_points]

if method in ["linear", None]:
if method in ["linear", None]:
for i in range(trajectory_size):
point = [(coord, values[i]) for coord, values in sample_points]
column = linear_regrid(cube, point)
new_cube.data[..., i] = column.data
elif method == "nearest":
column_index = _nearest_neighbour_indices_ndcoords(cube, point,
cache=cache)
column = cube[column_index]
new_cube.data[..., i] = column.data
# Fill in the empty squashed (non derived) coords.
for column_coord in column.dim_coords + column.aux_coords:
src_dims = cube.coord_dims(column_coord)
if not squish_my_dims.isdisjoint(src_dims):
if len(column_coord.points) != 1:
msg = "Expected to find exactly one point. Found {}."
raise Exception(msg.format(column_coord.points))
new_cube.coord(column_coord.name()).points[i] = \
column_coord.points[0]

elif method == "nearest":
# Use a cache with _nearest_neighbour_indices_ndcoords()
cache = {}
column_indexes = _nearest_neighbour_indices_ndcoords(
cube, sample_points, cache=cache)

# Construct "fancy" indexes, so we can create the result data array in
# a single numpy indexing operation.
# ALSO: capture the index range in each dimension, so that we can fetch
# only a required (square) sub-region of the source data.
fancy_source_indices = []
region_slices = []
n_index_length = len(column_indexes[0])
for i_ind in range(n_index_length):
contents = [column_index[i_ind]
for column_index in column_indexes]
each_used = [content != slice(None) for content in contents]
if not np.any(each_used):
# This dimension is not addressed by the operation.
# Use a ":" as the index.
fancy_index = slice(None)
# No sub-region selection for this dimension.
region_slice = slice(None)
elif np.all(each_used):
# This dimension is addressed : use a list of indices.
# Select the region by min+max indices.
start_ind = np.min(contents)
stop_ind = 1 + np.max(contents)
region_slice = slice(start_ind, stop_ind)
# Record point indices with start subtracted from all of them.
fancy_index = list(np.array(contents) - start_ind)
else:
# Should really never happen, if _ndcoords is right.
msg = ('Internal error in trajectory interpolation : point '
'selection indices should all have the same form.')
raise ValueError(msg)

fancy_source_indices.append(fancy_index)
region_slices.append(region_slice)

# NOTE: fetch the required (square-section) region of the source data.
# This is not quite as good as only fetching the individual points
# which are used, but it avoids creating a sub-cube for each point,
# which is very slow, especially when points are re-used a lot ...
source_area_indices = tuple(region_slices)
source_data = cube[source_area_indices].data

# Apply fancy indexing to get all the result data points.
source_data = source_data[fancy_source_indices]
# "Fix" problems with missing datapoints producing odd values
# when copied from a masked into an unmasked array.
if np.ma.isMaskedArray(source_data):
# This is **not** proper mask handling, because we cannot produce a
# masked result, but it ensures we use a "filled" version of the
# input in this case.
source_data = source_data.filled()
new_cube.data[:] = source_data
# NOTE: we assign to "new_cube.data[:]" and *not* just "new_cube.data",
# because the existing code produces a default dtype from 'np.empty'
# instead of preserving the input dtype.
# TODO: maybe this should be fixed -- i.e. to preserve input dtype ??

# Fill in the empty squashed (non derived) coords.
for column_coord in column.dim_coords + column.aux_coords:
src_dims = cube.coord_dims(column_coord)
if not squish_my_dims.isdisjoint(src_dims):
if len(column_coord.points) != 1:
raise Exception("Expected to find exactly one point. "
"Found %d" % len(column_coord.points))
new_cube.coord(column_coord.name()).points[i] = \
column_coord.points[0]
column_coords = [coord
for coord in cube.dim_coords + cube.aux_coords
if not squish_my_dims.isdisjoint(
cube.coord_dims(coord))]
new_cube_coords = [new_cube.coord(column_coord.name())
for column_coord in column_coords]
all_point_indices = np.array(column_indexes)
single_point_test_cube = cube[column_indexes[0]]
for new_cube_coord, src_coord in zip(new_cube_coords, column_coords):
# Check structure of the indexed coord (at one selected point).
point_coord = single_point_test_cube.coord(src_coord)
if len(point_coord.points) != 1:
msg = ('Coord {} at one x-y position has the shape {}, '
'instead of being a single point. ')
raise ValueError(msg.format(src_coord.name(), src_coord.shape))

# Work out which indices apply to the input coord.
# NOTE: we know how to index the source cube to get a cube with a
# single point for each coord, but this is very inefficient.
# So here, we translate cube indexes into *coord* indexes.
src_coord_dims = cube.coord_dims(src_coord)
fancy_coord_index_arrays = [list(all_point_indices[:, src_dim])
for src_dim in src_coord_dims]

# Fill the new coord with all the correct points from the old one.
new_cube_coord.points = src_coord.points[fancy_coord_index_arrays]
# NOTE: the new coords do *not* have bounds.

return new_cube

Expand Down
Loading