Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 26 additions & 3 deletions lib/iris/analysis/stats.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# (C) British Crown Copyright 2013 - 2014, Met Office
# (C) British Crown Copyright 2013 - 2015, Met Office
#
# This file is part of Iris.
#
Expand Down Expand Up @@ -30,7 +30,8 @@ def _get_calc_view(cube_a, cube_b, corr_coords):
"""
This function takes two cubes and returns cubes which are
flattened so that efficient comparisons can be performed
between the two.
between the two. If the arrays are maksed then only values
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just a typo *masked

that are unmasked in both arrays are used.

Args:

Expand Down Expand Up @@ -94,7 +95,29 @@ def _get_calc_view(cube_a, cube_b, corr_coords):
reshaped_b = data_b.transpose(slice_ind+res_ind)\
.reshape(dim_i_len, dim_j_len)

return reshaped_a, reshaped_b, res_ind
# Remove data where one or both cubes are masked
# First deal with the case that either cube is unmasked
# Collapse masks to the dimension we are correlating over (0th)
if np.ma.is_masked(reshaped_a):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be just ma.is_masked(reshaped_a) with an additional import at the top of the file import numpy.ma as ma. This will match the style used elsewhere.

a_not_masked = np.logical_not(reshaped_a.mask).any(axis=1)
else:
a_not_masked = True
if np.ma.is_masked(reshaped_b):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As above.

b_not_masked = np.logical_not(reshaped_b.mask.any(axis=1))
else:
b_not_masked = True

both_not_masked = a_not_masked & b_not_masked
try:
# compress to good values using mask array
return_a = reshaped_a.compress(both_not_masked)
return_b = reshaped_b.compress(both_not_masked)
except ValueError:
# expect when masks are just non-array True/False
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please could you expand the comment a little to explain why any ValueError is acceptable to be excepted, I feel I am missing a bit of logic

return_a = reshaped_a
return_b = reshaped_b

return return_a, return_b, res_ind


def pearsonr(cube_a, cube_b, corr_coords=None):
Expand Down
28 changes: 27 additions & 1 deletion lib/iris/tests/analysis/test_stats.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# (C) British Crown Copyright 2014, Met Office
# (C) British Crown Copyright 2014 - 2015, Met Office
#
# This file is part of Iris.
#
Expand Down Expand Up @@ -39,6 +39,14 @@ def setUp(self):
self.cube_b = iris.load_cube(iris.sample_data_path('GloSea4',
'ensemble_002.pp'))

dummycrd = iris.coords.DimCoord(range(100), long_name="dummy")
mask_a = [True]*20 + [False]*80
self.masked_a = iris.cube.Cube(np.ma.masked_array(range(100), mask_a))
self.masked_a.add_dim_coord(dummycrd, 0)
mask_b = [False]*10 + [True]*20 + [False]*70
self.masked_b = iris.cube.Cube(np.ma.masked_array(range(100), mask_b))
self.masked_b.add_dim_coord(dummycrd, 0)

def test_perfect_corr(self):
r = stats.pearsonr(self.cube_a, self.cube_a,
['latitude', 'longitude'])
Expand Down Expand Up @@ -101,6 +109,24 @@ def test_non_existent_coord(self):
with self.assertRaises(ValueError):
stats.pearsonr(self.cube_a, self.cube_b, 'bad_coord')

def test_differing_masks(self):
"""
Test that we only consider points
where both cubes are unmasked

"""
r = stats.pearsonr(self.masked_a, self.masked_b)
self.assertArrayEqual(r.data, [1.0])

self.masked_a.data.mask = True
r = stats.pearsonr(self.masked_a, self.masked_b)
self.assertArrayEqual(r.data.mask, [True])

self.masked_a.data.mask = True
self.masked_b.data.mask = True
r = stats.pearsonr(self.masked_a, self.masked_b)
self.assertArrayEqual(r.data.mask, [True])


if __name__ == '__main__':
tests.main()