diff --git a/lib/iris/analysis/stats.py b/lib/iris/analysis/stats.py index f3715f6e8b..3d16b2018e 100644 --- a/lib/iris/analysis/stats.py +++ b/lib/iris/analysis/stats.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2013 - 2014, Met Office +# (C) British Crown Copyright 2013 - 2015, Met Office # # This file is part of Iris. # @@ -30,7 +30,8 @@ def _get_calc_view(cube_a, cube_b, corr_coords): """ This function takes two cubes and returns cubes which are flattened so that efficient comparisons can be performed - between the two. + between the two. If the arrays are maksed then only values + that are unmasked in both arrays are used. Args: @@ -94,7 +95,29 @@ def _get_calc_view(cube_a, cube_b, corr_coords): reshaped_b = data_b.transpose(slice_ind+res_ind)\ .reshape(dim_i_len, dim_j_len) - return reshaped_a, reshaped_b, res_ind + # Remove data where one or both cubes are masked + # First deal with the case that either cube is unmasked + # Collapse masks to the dimension we are correlating over (0th) + if np.ma.is_masked(reshaped_a): + a_not_masked = np.logical_not(reshaped_a.mask).any(axis=1) + else: + a_not_masked = True + if np.ma.is_masked(reshaped_b): + b_not_masked = np.logical_not(reshaped_b.mask.any(axis=1)) + else: + b_not_masked = True + + both_not_masked = a_not_masked & b_not_masked + try: + # compress to good values using mask array + return_a = reshaped_a.compress(both_not_masked) + return_b = reshaped_b.compress(both_not_masked) + except ValueError: + # expect when masks are just non-array True/False + return_a = reshaped_a + return_b = reshaped_b + + return return_a, return_b, res_ind def pearsonr(cube_a, cube_b, corr_coords=None): diff --git a/lib/iris/tests/analysis/test_stats.py b/lib/iris/tests/analysis/test_stats.py index 992978530c..3c12509fb2 100644 --- a/lib/iris/tests/analysis/test_stats.py +++ b/lib/iris/tests/analysis/test_stats.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2014, Met Office +# (C) British Crown Copyright 2014 - 2015, Met Office # # This file is part of Iris. # @@ -39,6 +39,14 @@ def setUp(self): self.cube_b = iris.load_cube(iris.sample_data_path('GloSea4', 'ensemble_002.pp')) + dummycrd = iris.coords.DimCoord(range(100), long_name="dummy") + mask_a = [True]*20 + [False]*80 + self.masked_a = iris.cube.Cube(np.ma.masked_array(range(100), mask_a)) + self.masked_a.add_dim_coord(dummycrd, 0) + mask_b = [False]*10 + [True]*20 + [False]*70 + self.masked_b = iris.cube.Cube(np.ma.masked_array(range(100), mask_b)) + self.masked_b.add_dim_coord(dummycrd, 0) + def test_perfect_corr(self): r = stats.pearsonr(self.cube_a, self.cube_a, ['latitude', 'longitude']) @@ -101,6 +109,24 @@ def test_non_existent_coord(self): with self.assertRaises(ValueError): stats.pearsonr(self.cube_a, self.cube_b, 'bad_coord') + def test_differing_masks(self): + """ + Test that we only consider points + where both cubes are unmasked + + """ + r = stats.pearsonr(self.masked_a, self.masked_b) + self.assertArrayEqual(r.data, [1.0]) + + self.masked_a.data.mask = True + r = stats.pearsonr(self.masked_a, self.masked_b) + self.assertArrayEqual(r.data.mask, [True]) + + self.masked_a.data.mask = True + self.masked_b.data.mask = True + r = stats.pearsonr(self.masked_a, self.masked_b) + self.assertArrayEqual(r.data.mask, [True]) + if __name__ == '__main__': tests.main()