Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
f40305e
Allow masked data to be passed into a Cell, and by extensions a Coord.
ukmo-ccbunney May 15, 2025
c3fa75c
Handled case where integer array with masked values could not be crea…
ukmo-ccbunney May 15, 2025
dff7cb8
Handle case where reading of scalar masked value from netCDF file
ukmo-ccbunney May 16, 2025
d7c5827
Removed some comments
ukmo-ccbunney May 21, 2025
1eb2798
Removed fill_value keyword to masked_array creation - wasn't fixing the
ukmo-ccbunney May 22, 2025
5c9e553
Single method for handling points arrays with masked data
ukmo-ccbunney May 22, 2025
2df6d05
Refactored to add small inline helper function to add AuxCoord
ukmo-ccbunney May 22, 2025
9d599db
Refactored the masked array handling and now filling masked arrays wi…
ukmo-ccbunney May 22, 2025
9ac20de
Added test for Cell hashing
ukmo-ccbunney May 22, 2025
f7ece4d
test_Cell.py: Converted unittest to pytest
ukmo-ccbunney May 22, 2025
da13470
Merge branch 'main' into masked_coords
ukmo-ccbunney May 22, 2025
dd6cba2
Added tests for merging of scalar aux coords with mising data.
ukmo-ccbunney May 23, 2025
ec157e2
Removed unnecessary __main__ entry point from test_Cell.py
ukmo-ccbunney May 23, 2025
cd1a7a8
Missing hash test for bounded points
ukmo-ccbunney May 23, 2025
e955116
More rigorous test on masked data of merged AuxCoord
ukmo-ccbunney May 23, 2025
b54e284
Migrated test_merge.py to pytest.
ukmo-ccbunney May 23, 2025
1ea061d
Moved tests/test_merge.py to tests/unit/merge/test_merge.py
ukmo-ccbunney May 23, 2025
43fcb15
What's new entry.
ukmo-ccbunney May 23, 2025
77b51fe
Fixed some typos in comment
ukmo-ccbunney May 23, 2025
802eeca
Split out integration test from unit/merge/test_merge.py to
ukmo-ccbunney May 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 67 additions & 26 deletions lib/iris/_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1406,10 +1406,37 @@ def axis_and_name(name):
# TODO: Consider appropriate sort order (ascending,
# descending) i.e. use CF positive attribute.
cells = sorted(indexes[name])
points = np.array(
[cell.point for cell in cells],
dtype=metadata[name].points_dtype,
)
points = [cell.point for cell in cells]

# If any points are masked then create a masked array type,
# otherwise create a standard ndarray.
if np.ma.masked in points:
dtype = metadata[name].points_dtype

# Create a pre-filled array with all elements set to `fill_value` for dtype
# This avoids the following problems when trying to do `np.ma.masked_array(points, dtype...)`:
# - Underlying data of masked elements is arbitrary
# - Can't convert a np.ma.masked to an integer type
# - For floating point arrays, numpy raises a warning about "converting masked elements to NaN"
fill_value = np.trunc(
np.ma.default_fill_value(dtype), dtype=dtype
) # truncation needed to deal with silly default fill values in Numpy

# create array of fill values; ensures we have consistent data under mask
arr_points = np.ma.repeat(dtype.type(fill_value), len(points))

# get mask index and filtered data then store in new array:
mask = np.array([p is np.ma.masked for p in points])
arr_points.mask = mask

# Need another list comprehension to avoid numpy warning "converting masked elements to NaN":
arr_points[~mask] = np.array(
[p for p in points if p is not np.ma.masked]
)
points = arr_points
else:
points = np.array(points, dtype=metadata[name].points_dtype)

if cells[0].bound is not None:
bounds = np.array(
[cell.bound for cell in cells],
Expand Down Expand Up @@ -1594,13 +1621,19 @@ def _build_coordinates(self):
# the bounds are not monotonic, so try building the coordinate,
# and if it fails make the coordinate into an auxiliary coordinate.
# This will ultimately make an anonymous dimension.
try:
coord = iris.coords.DimCoord(
template.points, bounds=template.bounds, **template.kwargs
)
dim_coords_and_dims.append(_CoordAndDims(coord, template.dims))
except ValueError:

# If the points contain masked values, if definitely cannot be built
# as a dim coord, so add it to the _aux_templates immediately
if np.ma.is_masked(template.points):
self._aux_templates.append(template)
else:
try:
coord = iris.coords.DimCoord(
template.points, bounds=template.bounds, **template.kwargs
)
dim_coords_and_dims.append(_CoordAndDims(coord, template.dims))
except ValueError:
self._aux_templates.append(template)

# There is the potential that there are still anonymous dimensions.
# Get a list of the dimensions which are not anonymous at this stage.
Expand All @@ -1609,26 +1642,34 @@ def _build_coordinates(self):
]

# Build the auxiliary coordinates.
def _build_aux_coord_from_template(template):
# kwarg not applicable to AuxCoord.
template.kwargs.pop("circular", None)
coord = iris.coords.AuxCoord(
template.points, bounds=template.bounds, **template.kwargs
)
aux_coords_and_dims.append(_CoordAndDims(coord, template.dims))

for template in self._aux_templates:
# Attempt to build a DimCoord and add it to the cube. If this
# fails e.g it's non-monontic or multi-dimensional or non-numeric,
# then build an AuxCoord.
try:
coord = iris.coords.DimCoord(
template.points, bounds=template.bounds, **template.kwargs
)
if len(template.dims) == 1 and template.dims[0] not in covered_dims:
dim_coords_and_dims.append(_CoordAndDims(coord, template.dims))
covered_dims.append(template.dims[0])
else:
aux_coords_and_dims.append(_CoordAndDims(coord, template.dims))
except ValueError:
# kwarg not applicable to AuxCoord.
template.kwargs.pop("circular", None)
coord = iris.coords.AuxCoord(
template.points, bounds=template.bounds, **template.kwargs
)
aux_coords_and_dims.append(_CoordAndDims(coord, template.dims))

# Check here whether points are masked? If so then it has to be an AuxCoord
if np.ma.is_masked(template.points):
_build_aux_coord_from_template(template)
else:
try:
coord = iris.coords.DimCoord(
template.points, bounds=template.bounds, **template.kwargs
)
if len(template.dims) == 1 and template.dims[0] not in covered_dims:
dim_coords_and_dims.append(_CoordAndDims(coord, template.dims))
covered_dims.append(template.dims[0])
else:
aux_coords_and_dims.append(_CoordAndDims(coord, template.dims))
except ValueError:
_build_aux_coord_from_template(template)

# Mix in the vector coordinates.
for item, dims in zip(
Expand Down
17 changes: 14 additions & 3 deletions lib/iris/coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -1237,6 +1237,9 @@ class Cell(namedtuple("Cell", ["point", "bound"])):
# Make this class's comparison operators override those of numpy
__array_priority__ = 100

# pre-computed hash for un-hashable `np.ma.masked` value
_MASKED_VALUE_HASH = hash("<<##MASKED_VALUE##>>")

def __new__(cls, point=None, bound=None):
"""Construct a Cell from point or point-and-bound information."""
if point is None:
Expand Down Expand Up @@ -1277,13 +1280,17 @@ def __add__(self, mod):

def __hash__(self):
# See __eq__ for the definition of when two cells are equal.
point = self.point
if np.ma.is_masked(point):
# `np.ma.masked` is unhashable
point = Cell._MASKED_VALUE_HASH
if self.bound is None:
return hash(self.point)
return hash(point)
bound = self.bound
rbound = bound[::-1]
if rbound < bound:
bound = rbound
return hash((self.point, bound))
return hash((point, bound))

def __eq__(self, other):
"""Compare Cell equality depending on the type of the object to be compared."""
Expand Down Expand Up @@ -2086,7 +2093,8 @@ def cell(self, index):
"""
index = iris.util._build_full_slice_given_keys(index, self.ndim)

point = tuple(np.array(self.core_points()[index], ndmin=1).flatten())
# Use `np.asanyaray` to preserve any masked values:
point = tuple(np.asanyarray(self.core_points()[index]).flatten())
if len(point) != 1:
raise IndexError(
"The index %s did not uniquely identify a single "
Expand Down Expand Up @@ -2809,6 +2817,9 @@ def _values(self, points):
# Check validity requirements for dimension-coordinate points.
self._new_points_requirements(points)
# Cast to a numpy array for masked arrays with no mask.

# NOTE: This is the point where any mask is lost on a coordinate if none of the
# values are actually masked. What if we wanted this to be an AuxCoord with a mask?
points = np.array(points)

super(DimCoord, self.__class__)._values.fset(self, points)
Expand Down
8 changes: 8 additions & 0 deletions lib/iris/fileformats/netcdf/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,14 @@ def _get_cf_var_data(cf_var, filename):
if total_bytes < _LAZYVAR_MIN_BYTES:
# Don't make a lazy array, as it will cost more memory AND more time to access.
result = cf_var[:]

# Special handling of masked scalar value; this will be returned as
# an `np.ma.masked` instance which will loose the original dtype.
# Workaround for this it return a 1-element masked array of the
# correct dtype. Note - this is not an ussue for masked arrays,
# only masked scalar values.
if result is np.ma.masked:
result = np.ma.masked_all(1, dtype=cf_var.datatype)
else:
# Get lazy chunked data out of a cf variable.
# Creates Dask wrappers around data arrays for any cube components which
Expand Down
44 changes: 44 additions & 0 deletions lib/iris/tests/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import numpy as np
import numpy.ma as ma
import pytest

import iris
from iris._lazy_data import as_lazy_data
Expand Down Expand Up @@ -1098,5 +1099,48 @@ def test_splitattrs_keys_local_match_masks_global_mismatch(self):
)


@pytest.mark.parametrize(
"dtype", [np.int16, np.int32, np.int64, np.float32, np.float64]
)
class TestCubeMerge_masked_scalar:
"""Test for merging of scalar coordinates containing masked data."""

def _build_cube(self, scalar_data):
return iris.cube.Cube(
np.arange(5),
standard_name="air_pressure",
aux_coords_and_dims=[
(AuxCoord(points=scalar_data, standard_name="realization"), None)
],
)

def test_merge_scalar_coords_all_masked(self, dtype):
"""Test merging of scalar aux coords all with masked data."""
n = 5
cubes = iris.cube.CubeList(
[self._build_cube(np.ma.masked_all(1, dtype=dtype)) for i in range(n)]
)
merged = cubes.merge_cube()
c = merged.coord("realization")
assert np.ma.isMaskedArray(c.points)
assert np.all(c.points.mask)
assert c.points.dtype.type is dtype

def test_merge_scalar_coords_some_masked(self, dtype):
"""Test merging of scalar aux coords with mix of masked and unmasked data."""
n = 5
cubes = iris.cube.CubeList(
[
self._build_cube(np.ma.masked_array(i, dtype=dtype, mask=i % 2))
for i in range(n)
]
)
merged = cubes.merge_cube()
c = merged.coord("realization")
assert np.ma.isMaskedArray(c.points)
assert np.any(c.points.mask) and not np.all(c.points.mask)
assert c.points.dtype.type is dtype


if __name__ == "__main__":
tests.main()
Loading
Loading