diff --git a/lib/iris/analysis/_interpolate_private.py b/lib/iris/analysis/_interpolate_private.py index 9207508271..69b11b8026 100644 --- a/lib/iris/analysis/_interpolate_private.py +++ b/lib/iris/analysis/_interpolate_private.py @@ -193,10 +193,13 @@ def nearest_neighbour_indices(cube, sample_points): return tuple(indices) -def _nearest_neighbour_indices_ndcoords(cube, sample_point, cache=None): +def _nearest_neighbour_indices_ndcoords(cube, sample_points, cache=None): """ See documentation for :func:`iris.analysis.interpolate.nearest_neighbour_indices`. + 'sample_points' is of the form [[coord-or-coord-name, point-value(s)]*]. + The lengths of all the point-values sequences must be equal. + This function is adapted for points sampling a multi-dimensional coord, and can currently only do nearest neighbour interpolation. @@ -209,36 +212,41 @@ def _nearest_neighbour_indices_ndcoords(cube, sample_point, cache=None): # A "sample space cube" is made which only has the coords and dims we are sampling on. # We get the nearest neighbour using this sample space cube. - if isinstance(sample_point, dict): + if isinstance(sample_points, dict): msg = ('Providing a dictionary to specify points is deprecated. ' 'Please provide a list of (coordinate, values) pairs.') warn_deprecated(msg) - sample_point = list(sample_point.items()) + sample_points = list(sample_points.items()) - if sample_point: + if sample_points: try: - coord, value = sample_point[0] + coord, value = sample_points[0] except ValueError: - raise ValueError('Sample points must be a list of (coordinate, value) pairs. Got %r.' % sample_point) + raise ValueError('Sample points must be a list of (coordinate, value) pairs. Got %r.' % sample_points) # Convert names to coords in sample_point - point = [] + # Reformat sample point values for use in _cartesian_sample_points(), below. + coord_values = [] + sample_point_coords = [] + sample_point_coord_names = [] ok_coord_ids = set(map(id, cube.dim_coords + cube.aux_coords)) - for coord, value in sample_point: - if isinstance(coord, six.string_types): - coord = cube.coord(coord) - else: - coord = cube.coord(coord) + for coord, value in sample_points: + coord = cube.coord(coord) if id(coord) not in ok_coord_ids: msg = ('Invalid sample coordinate {!r}: derived coordinates are' ' not allowed.'.format(coord.name())) raise ValueError(msg) - point.append((coord, value)) + sample_point_coords.append(coord) + sample_point_coord_names.append(coord.name()) + value = np.array(value, ndmin=1) + coord_values.append(value) - # Reformat sample_point for use in _cartesian_sample_points(), below. - sample_point = np.array([[value] for coord, value in point]) - sample_point_coords = [coord for coord, value in point] - sample_point_coord_names = [coord.name() for coord, value in point] + coord_point_lens = np.array([len(value) for value in coord_values]) + if not np.all(coord_point_lens == coord_point_lens[0]): + msg = 'All coordinates must have the same number of sample points.' + raise ValueError(msg) + + coord_values = np.array(coord_values) # Which dims are we sampling? sample_dims = set() @@ -262,14 +270,9 @@ def _nearest_neighbour_indices_ndcoords(cube, sample_point, cache=None): # Order the sample point coords according to the sample space cube coords sample_space_coord_names = [coord.name() for coord in sample_space_cube.coords()] new_order = [sample_space_coord_names.index(name) for name in sample_point_coord_names] - sample_point = np.array([sample_point[i] for i in new_order]) + coord_values = np.array([coord_values[i] for i in new_order]) sample_point_coord_names = [sample_point_coord_names[i] for i in new_order] - # Convert the sample point to cartesian coords. - # If there is no latlon within the coordinate there will be no change. - # Otherwise, geographic latlon is replaced with cartesian xyz. - cartesian_sample_point = _cartesian_sample_points(sample_point, sample_point_coord_names)[0] - sample_space_coords = sample_space_cube.dim_coords + sample_space_cube.aux_coords sample_space_coords_and_dims = [(coord, sample_space_cube.coord_dims(coord)) for coord in sample_space_coords] @@ -288,29 +291,53 @@ def _nearest_neighbour_indices_ndcoords(cube, sample_point, cache=None): # Convert to cartesian coordinates. Flatten for kdtree compatibility. cartesian_space_data_coords = _cartesian_sample_points(sample_space_data_positions, sample_point_coord_names) - # Get the nearest datum index to the sample point. This is the goal of the function. + # Create a kdtree for the nearest-distance lookup to these 3d points. kdtree = scipy.spatial.cKDTree(cartesian_space_data_coords) + # This can find the nearest datum point to any given target point, + # which is the goal of this function. - cartesian_distance, datum_index = kdtree.query(cartesian_sample_point) - sample_space_ndi = np.unravel_index(datum_index, sample_space_cube.data.shape) + # Update cache + if cache is not None: + cache[cube] = kdtree - # Turn sample_space_ndi into a main cube slice. - # Map sample cube to main cube dims and leave the rest as a full slice. - main_cube_slice = [slice(None, None)] * cube.ndim + # Convert the sample points to cartesian (3d) coords. + # If there is no latlon within the coordinate there will be no change. + # Otherwise, geographic latlon is replaced with cartesian xyz. + cartesian_sample_points = _cartesian_sample_points( + coord_values, sample_point_coord_names) + + # Use kdtree to get the nearest sourcepoint index for each target point. + _, datum_index_lists = kdtree.query(cartesian_sample_points) + + # Convert flat indices back into multidimensional sample-space indices. + sample_space_dimension_indices = np.unravel_index( + datum_index_lists, sample_space_cube.data.shape) + # Convert this from "pointwise list of index arrays for each dimension", + # to "list of cube indices for each point". + sample_space_ndis = np.array(sample_space_dimension_indices).transpose() + + # For the returned result, we must convert these indices into the source + # (sample-space) cube, to equivalent indices into the target 'cube'. + + # Make a result array: (cube.ndim * ), per sample point. + n_points = coord_values.shape[-1] + main_cube_slices = np.empty((n_points, cube.ndim), dtype=object) + # Initialise so all unused indices are ":". + main_cube_slices[:] = slice(None) + + # Move result indices according to the source (sample) and target (cube) + # dimension mappings. for sample_coord, sample_coord_dims in sample_space_coords_and_dims: # Find the coord in the main cube main_coord = cube.coord(sample_coord.name()) main_coord_dims = cube.coord_dims(main_coord) - # Mark the nearest data index/indices with respect to this coord + # Fill nearest-point data indices for each coord dimension. for sample_i, main_i in zip(sample_coord_dims, main_coord_dims): - main_cube_slice[main_i] = sample_space_ndi[sample_i] - - - # Update cache - if cache is not None: - cache[cube] = kdtree + main_cube_slices[:, main_i] = sample_space_ndis[:, sample_i] - return tuple(main_cube_slice) + # Return as a list of **tuples** : required for correct indexing usage. + result = [tuple(inds) for inds in main_cube_slices] + return result def extract_nearest_neighbour(cube, sample_points): diff --git a/lib/iris/analysis/trajectory.py b/lib/iris/analysis/trajectory.py index f2d36fdd3f..9ef7d7639f 100644 --- a/lib/iris/analysis/trajectory.py +++ b/lib/iris/analysis/trajectory.py @@ -28,9 +28,10 @@ import numpy as np +import iris.analysis import iris.coord_systems import iris.coords -import iris.analysis + from iris.analysis._interpolate_private import \ _nearest_neighbour_indices_ndcoords, linear as linear_regrid @@ -249,30 +250,111 @@ def interpolate(cube, sample_points, method=None): method = "nearest" break - # Use a cache with _nearest_neighbour_indices_ndcoords() - cache = {} - - for i in range(trajectory_size): - point = [(coord, values[i]) for coord, values in sample_points] - - if method in ["linear", None]: + if method in ["linear", None]: + for i in range(trajectory_size): + point = [(coord, values[i]) for coord, values in sample_points] column = linear_regrid(cube, point) new_cube.data[..., i] = column.data - elif method == "nearest": - column_index = _nearest_neighbour_indices_ndcoords(cube, point, - cache=cache) - column = cube[column_index] - new_cube.data[..., i] = column.data + # Fill in the empty squashed (non derived) coords. + for column_coord in column.dim_coords + column.aux_coords: + src_dims = cube.coord_dims(column_coord) + if not squish_my_dims.isdisjoint(src_dims): + if len(column_coord.points) != 1: + msg = "Expected to find exactly one point. Found {}." + raise Exception(msg.format(column_coord.points)) + new_cube.coord(column_coord.name()).points[i] = \ + column_coord.points[0] + + elif method == "nearest": + # Use a cache with _nearest_neighbour_indices_ndcoords() + cache = {} + column_indexes = _nearest_neighbour_indices_ndcoords( + cube, sample_points, cache=cache) + + # Construct "fancy" indexes, so we can create the result data array in + # a single numpy indexing operation. + # ALSO: capture the index range in each dimension, so that we can fetch + # only a required (square) sub-region of the source data. + fancy_source_indices = [] + region_slices = [] + n_index_length = len(column_indexes[0]) + for i_ind in range(n_index_length): + contents = [column_index[i_ind] + for column_index in column_indexes] + each_used = [content != slice(None) for content in contents] + if not np.any(each_used): + # This dimension is not addressed by the operation. + # Use a ":" as the index. + fancy_index = slice(None) + # No sub-region selection for this dimension. + region_slice = slice(None) + elif np.all(each_used): + # This dimension is addressed : use a list of indices. + # Select the region by min+max indices. + start_ind = np.min(contents) + stop_ind = 1 + np.max(contents) + region_slice = slice(start_ind, stop_ind) + # Record point indices with start subtracted from all of them. + fancy_index = list(np.array(contents) - start_ind) + else: + # Should really never happen, if _ndcoords is right. + msg = ('Internal error in trajectory interpolation : point ' + 'selection indices should all have the same form.') + raise ValueError(msg) + + fancy_source_indices.append(fancy_index) + region_slices.append(region_slice) + + # NOTE: fetch the required (square-section) region of the source data. + # This is not quite as good as only fetching the individual points + # which are used, but it avoids creating a sub-cube for each point, + # which is very slow, especially when points are re-used a lot ... + source_area_indices = tuple(region_slices) + source_data = cube[source_area_indices].data + + # Apply fancy indexing to get all the result data points. + source_data = source_data[fancy_source_indices] + # "Fix" problems with missing datapoints producing odd values + # when copied from a masked into an unmasked array. + if np.ma.isMaskedArray(source_data): + # This is **not** proper mask handling, because we cannot produce a + # masked result, but it ensures we use a "filled" version of the + # input in this case. + source_data = source_data.filled() + new_cube.data[:] = source_data + # NOTE: we assign to "new_cube.data[:]" and *not* just "new_cube.data", + # because the existing code produces a default dtype from 'np.empty' + # instead of preserving the input dtype. + # TODO: maybe this should be fixed -- i.e. to preserve input dtype ?? # Fill in the empty squashed (non derived) coords. - for column_coord in column.dim_coords + column.aux_coords: - src_dims = cube.coord_dims(column_coord) - if not squish_my_dims.isdisjoint(src_dims): - if len(column_coord.points) != 1: - raise Exception("Expected to find exactly one point. " - "Found %d" % len(column_coord.points)) - new_cube.coord(column_coord.name()).points[i] = \ - column_coord.points[0] + column_coords = [coord + for coord in cube.dim_coords + cube.aux_coords + if not squish_my_dims.isdisjoint( + cube.coord_dims(coord))] + new_cube_coords = [new_cube.coord(column_coord.name()) + for column_coord in column_coords] + all_point_indices = np.array(column_indexes) + single_point_test_cube = cube[column_indexes[0]] + for new_cube_coord, src_coord in zip(new_cube_coords, column_coords): + # Check structure of the indexed coord (at one selected point). + point_coord = single_point_test_cube.coord(src_coord) + if len(point_coord.points) != 1: + msg = ('Coord {} at one x-y position has the shape {}, ' + 'instead of being a single point. ') + raise ValueError(msg.format(src_coord.name(), src_coord.shape)) + + # Work out which indices apply to the input coord. + # NOTE: we know how to index the source cube to get a cube with a + # single point for each coord, but this is very inefficient. + # So here, we translate cube indexes into *coord* indexes. + src_coord_dims = cube.coord_dims(src_coord) + fancy_coord_index_arrays = [list(all_point_indices[:, src_dim]) + for src_dim in src_coord_dims] + + # Fill the new coord with all the correct points from the old one. + new_cube_coord.points = src_coord.points[fancy_coord_index_arrays] + # NOTE: the new coords do *not* have bounds. return new_cube diff --git a/lib/iris/tests/unit/analysis/interpolate_private/test__nearest_neighbour_indices_ndcoords.py b/lib/iris/tests/unit/analysis/interpolate_private/test__nearest_neighbour_indices_ndcoords.py index 6996a2bad5..ae9f691a7f 100644 --- a/lib/iris/tests/unit/analysis/interpolate_private/test__nearest_neighbour_indices_ndcoords.py +++ b/lib/iris/tests/unit/analysis/interpolate_private/test__nearest_neighbour_indices_ndcoords.py @@ -46,7 +46,17 @@ def test_nonlatlon_simple_2d(self): cube.add_dim_coord(co_x, 1) sample_point = [('x', 2.8), ('y', 18.5)] result = nn_ndinds(cube, sample_point) - self.assertEqual(result, (1, 2)) + self.assertEqual(result, [(1, 2)]) + + def test_nonlatlon_multiple_2d(self): + co_y = DimCoord([10.0, 20.0], long_name='y') + co_x = DimCoord([1.0, 2.0, 3.0], long_name='x') + cube = Cube(np.zeros((2, 3))) + cube.add_dim_coord(co_y, 0) + cube.add_dim_coord(co_x, 1) + sample_points = [('x', [2.8, -350.0, 1.7]), ('y', [18.5, 8.7, 12.2])] + result = nn_ndinds(cube, sample_points) + self.assertEqual(result, [(1, 2), (0, 0), (0, 1)]) def test_latlon_simple_2d(self): co_y = DimCoord([10.0, 20.0], @@ -58,7 +68,21 @@ def test_latlon_simple_2d(self): cube.add_dim_coord(co_x, 1) sample_point = [('longitude', 2.8), ('latitude', 18.5)] result = nn_ndinds(cube, sample_point) - self.assertEqual(result, (1, 2)) + self.assertEqual(result, [(1, 2)]) + + def test_latlon_multiple_2d(self): + co_y = DimCoord([10.0, 20.0], + standard_name='latitude', units='degrees') + co_x = DimCoord([1.0, 2.0, 3.0], + standard_name='longitude', units='degrees') + cube = Cube(np.zeros((2, 3))) + cube.add_dim_coord(co_y, 0) + cube.add_dim_coord(co_x, 1) + sample_points = [('longitude', [2.8, -350.0, 1.7]), + ('latitude', [18.5, 8.7, 12.2])] + result = nn_ndinds(cube, sample_points) + # Note slight difference from non-latlon version. + self.assertEqual(result, [(1, 2), (0, 2), (0, 1)]) class Test1d(tests.IrisTest): @@ -70,7 +94,7 @@ def test_nonlatlon_simple_1d(self): cube.add_aux_coord(co_x, 0) sample_point = [('x', 2.8), ('y', 18.5)] result = nn_ndinds(cube, sample_point) - self.assertEqual(result, (5,)) + self.assertEqual(result, [(5,)]) def test_latlon_simple_1d(self): cube = Cube([11.0, 12.0, 13.0, 21.0, 22.0, 23.0]) @@ -82,7 +106,7 @@ def test_latlon_simple_1d(self): cube.add_aux_coord(co_x, 0) sample_point = [('longitude', 2.8), ('latitude', 18.5)] result = nn_ndinds(cube, sample_point) - self.assertEqual(result, (5,)) + self.assertEqual(result, [(5,)]) class TestApiExtras(tests.IrisTest): @@ -96,7 +120,7 @@ def test_no_y_dim(self): cube.add_dim_coord(co_x, 1) sample_point = [('x', 2.8)] result = nn_ndinds(cube, sample_point) - self.assertEqual(result, (slice(None), 2)) + self.assertEqual(result, [(slice(None), 2)]) def test_no_x_dim(self): # Operate in Y only, returned slice should be [iy, :]. @@ -107,7 +131,7 @@ def test_no_x_dim(self): cube.add_dim_coord(co_x, 1) sample_point = [('y', 18.5)] result = nn_ndinds(cube, sample_point) - self.assertEqual(result, (1, slice(None))) + self.assertEqual(result, [(1, slice(None))]) def test_sample_dictionary(self): # Pass sample_point arg as a dictionary: this usage mode is deprecated. @@ -120,7 +144,7 @@ def test_sample_dictionary(self): warn_call = self.patch( 'iris.analysis._interpolate_private.warn_deprecated') result = nn_ndinds(cube, sample_point) - self.assertEqual(result, (1, 2)) + self.assertEqual(result, [(1, 2)]) self.assertEqual(warn_call.call_count, 1) self.assertIn('dictionary to specify points is deprecated', warn_call.call_args[0][0]) @@ -139,7 +163,7 @@ def _testcube_latlon_1d(self, lats, lons): def _check_latlon_1d(self, lats, lons, sample_point, expect): cube = self._testcube_latlon_1d(lats, lons) result = nn_ndinds(cube, sample_point) - self.assertEqual(result, (expect,)) + self.assertEqual(result, [(expect,)]) def test_lat_scaling(self): # Check that (88, 25) is closer to (88, 0) than to (87, 25) @@ -158,7 +182,7 @@ def test_alternate_latlon_names_okay(self): cube.coord('longitude').rename('x_longitude_x') sample_point = [('y_latitude_y', 88), ('x_longitude_x', 25)] result = nn_ndinds(cube, sample_point) - self.assertEqual(result, (0,)) + self.assertEqual(result, [(0,)]) def test_alternate_nonlatlon_names_different(self): # Check that (88, 25) is **NOT** closer to (88, 0) than to (87, 25) @@ -169,7 +193,7 @@ def test_alternate_nonlatlon_names_different(self): cube.coord('longitude').rename('x') sample_point = [('y', 88), ('x', 25)] result = nn_ndinds(cube, sample_point) - self.assertEqual(result, (1,)) + self.assertEqual(result, [(1,)]) def test_lons_wrap_359_0(self): # Check that (0, 359) is closer to (0, 0) than to (0, 350) diff --git a/lib/iris/tests/unit/analysis/trajectory/test_interpolate.py b/lib/iris/tests/unit/analysis/trajectory/test_interpolate.py index b04d422b1e..1381f2cc4c 100644 --- a/lib/iris/tests/unit/analysis/trajectory/test_interpolate.py +++ b/lib/iris/tests/unit/analysis/trajectory/test_interpolate.py @@ -161,8 +161,9 @@ def test_aux_coord_fail_mixed_dims(self): [211, 212, 213, 214]], long_name='aux_0x'), (0, 2)) - msg = 'Expected to find exactly one point.*Found 2' - with self.assertRaisesRegexp(Exception, msg): + msg = ('Coord aux_0x at one x-y position has the shape.*' + 'instead of being a single point') + with self.assertRaisesRegexp(ValueError, msg): interpolate(cube, self.single_sample_point, method='nearest') def test_metadata(self):