diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst index ae79764343..17510b4da1 100644 --- a/docs/src/whatsnew/latest.rst +++ b/docs/src/whatsnew/latest.rst @@ -72,6 +72,15 @@ This document explains the changes made to Iris for this release #. `@larsbarring`_ updated :func:`~iris.util.equalise_attributes` to return a list of dictionaries containing the attributes removed from each :class:`~iris.cube.Cube`. (:pull:`4357`) +#. `@trexfeathers`_ enabled streaming of **all** lazy arrays when saving to + NetCDF files (was previously just :class:`~iris.cube.Cube` + :attr:`~iris.cube.Cube.data`). This is + important given the much greater size of + :class:`~iris.coords.AuxCoord` :attr:`~iris.coords.AuxCoord.points` and + :class:`~iris.experimental.ugrid.mesh.Connectivity` + :attr:`~iris.experimental.ugrid.mesh.Connectivity.indices` under the + `UGRID`_ model. (:pull:`4375`) + 🐛 Bugs Fixed ============= diff --git a/lib/iris/fileformats/netcdf.py b/lib/iris/fileformats/netcdf.py index 867d49291b..88ad4bcc7f 100644 --- a/lib/iris/fileformats/netcdf.py +++ b/lib/iris/fileformats/netcdf.py @@ -27,7 +27,7 @@ import numpy as np import numpy.ma as ma -from iris._lazy_data import as_lazy_data +from iris._lazy_data import _co_realise_lazy_arrays, as_lazy_data, is_lazy_data from iris.aux_factory import ( AtmosphereSigmaFactory, HybridHeightFactory, @@ -1475,12 +1475,17 @@ def _add_mesh(self, cube_or_mesh): if conn.src_dim == 1: # Has the 'other' dimension order, =reversed conn_dims = conn_dims[::-1] + if iris.util.is_masked(conn.core_indices()): + # Flexible mesh. + fill_value = -1 + else: + fill_value = None cf_conn_name = self._create_generic_cf_array_var( cube_or_mesh, [], conn, element_dims=conn_dims, - fill_value=-1, + fill_value=fill_value, ) # Add essential attributes to the Connectivity variable. cf_conn_var = self._dataset.variables[cf_conn_name] @@ -1994,10 +1999,14 @@ def _ensure_valid_dtype(self, values, src_name, src_object): "NETCDF3_64BIT", "NETCDF4_CLASSIC", ): + val_min, val_max = (values.min(), values.max()) + if is_lazy_data(values): + val_min, val_max = _co_realise_lazy_arrays([val_min, val_max]) # Cast to an integer type supported by netCDF3. - if not np.can_cast(values.max(), np.int32) or not np.can_cast( - values.min(), np.int32 - ): + can_cast = all( + [np.can_cast(m, np.int32) for m in (val_min, val_max)] + ) + if not can_cast: msg = ( "The data type of {} {!r} is not supported by {} and" " its values cannot be safely cast to a supported" @@ -2030,7 +2039,7 @@ def _create_cf_bounds(self, coord, cf_var, cf_name): if hasattr(coord, "has_bounds") and coord.has_bounds(): # Get the values in a form which is valid for the file format. bounds = self._ensure_valid_dtype( - coord.bounds, "the bounds of coordinate", coord + coord.core_bounds(), "the bounds of coordinate", coord ) n_bounds = bounds.shape[-1] @@ -2057,7 +2066,12 @@ def _create_cf_bounds(self, coord, cf_var, cf_name): bounds.dtype.newbyteorder("="), cf_var.dimensions + (bounds_dimension_name,), ) - cf_var_bounds[:] = bounds + self._lazy_stream_data( + data=bounds, + fill_value=None, + fill_warn=True, + cf_var=cf_var_bounds, + ) def _get_cube_variable_name(self, cube): """ @@ -2311,7 +2325,7 @@ def _create_generic_cf_array_var( # Get the data values, in a way which works for any element type, as # all are subclasses of _DimensionalMetadata. # (e.g. =points if a coord, =data if an ancillary, etc) - data = element._values + data = element._core_values() if np.issubdtype(data.dtype, np.str_): # Deal with string-type variables. @@ -2336,7 +2350,10 @@ def _create_generic_cf_array_var( # Convert data from an array of strings into a character array # with an extra string-length dimension. if len(element_dims) == 1: - data = list("%- *s" % (string_dimension_depth, data[0])) + data_first = data[0] + if is_lazy_data(data_first): + data_first = data_first.compute() + data = list("%- *s" % (string_dimension_depth, data_first)) else: orig_shape = data.shape new_shape = orig_shape + (string_dimension_depth,) @@ -2353,14 +2370,6 @@ def _create_generic_cf_array_var( element_type = type(element).__name__ data = self._ensure_valid_dtype(data, element_type, element) - if fill_value is not None: - if np.ma.is_masked(data): - # Use a specific fill-value in place of the netcdf default. - data = np.ma.filled(data, fill_value) - else: - # Create variable without a (non-standard) fill_value - fill_value = None - # Check if this is a dim-coord. is_dimcoord = cube is not None and element in cube.dim_coords @@ -2368,7 +2377,7 @@ def _create_generic_cf_array_var( # Disallow saving of *masked* cell measures. # NOTE: currently, this is the only functional difference in # variable creation between an ancillary and a cell measure. - if ma.is_masked(data): + if iris.util.is_masked(data): # We can't save masked points properly, as we don't maintain # a fill_value. (Load will not record one, either). msg = "Cell measures with missing data are not supported." @@ -2398,7 +2407,9 @@ def _create_generic_cf_array_var( self._create_cf_bounds(element, cf_var, cf_name) # Add the data to the CF-netCDF variable. - cf_var[:] = data # TODO: support dask streaming + self._lazy_stream_data( + data=data, fill_value=fill_value, fill_warn=True, cf_var=cf_var + ) # Add names + units self._set_cf_var_attributes(cf_var, element) @@ -2696,6 +2707,8 @@ def _create_cf_data_variable( The newly created CF-netCDF data variable. """ + # Get the values in a form which is valid for the file format. + data = self._ensure_valid_dtype(cube.core_data(), "cube", cube) if packing: if isinstance(packing, dict): @@ -2717,11 +2730,12 @@ def _create_cf_data_variable( raise ValueError(msg) else: # We compute the scale_factor and add_offset based on the - # min/max of the data. This requires the data to be loaded. - masked = ma.isMaskedArray(cube.data) + # min/max of the data. + masked = iris.util.is_masked(data) dtype = np.dtype(packing) - cmax = cube.data.max() - cmin = cube.data.min() + cmin, cmax = (data.min(), data.max()) + if is_lazy_data(data): + cmin, cmax = _co_realise_lazy_arrays([cmin, cmax]) n = dtype.itemsize * 8 if masked: scale_factor = (cmax - cmin) / (2 ** n - 2) @@ -2734,6 +2748,8 @@ def _create_cf_data_variable( add_offset = (cmax + cmin) / 2 else: add_offset = cmin + 2 ** (n - 1) * scale_factor + else: + dtype = data.dtype.newbyteorder("=") def set_packing_ncattrs(cfvar): """Set netCDF packing attributes.""" @@ -2747,74 +2763,19 @@ def set_packing_ncattrs(cfvar): while cf_name in self._dataset.variables: cf_name = self._increment_name(cf_name) - # if netcdf3 avoid streaming due to dtype handling - if not cube.has_lazy_data() or self._dataset.file_format in ( - "NETCDF3_CLASSIC", - "NETCDF3_64BIT", - ): - - # Get the values in a form which is valid for the file format. - data = self._ensure_valid_dtype(cube.data, "cube", cube) - - def store(data, cf_var, fill_value): - cf_var[:] = data - is_masked = ma.is_masked(data) - contains_value = fill_value is not None and fill_value in data - return is_masked, contains_value - - else: - data = cube.lazy_data() - - def store(data, cf_var, fill_value): - # Store lazy data and check whether it is masked and contains - # the fill value - target = _FillValueMaskCheckAndStoreTarget(cf_var, fill_value) - da.store([data], [target]) - return target.is_masked, target.contains_value - - if not packing: - dtype = data.dtype.newbyteorder("=") - # Create the cube CF-netCDF data variable with data payload. cf_var = self._dataset.createVariable( cf_name, dtype, dimension_names, fill_value=fill_value, **kwargs ) - set_packing_ncattrs(cf_var) - - # If packing attributes are specified, don't bother checking whether - # the fill value is in the data. - if packing: - fill_value_to_check = None - elif fill_value is not None: - fill_value_to_check = fill_value - else: - fill_value_to_check = netCDF4.default_fillvals[dtype.str[1:]] - # Store the data and check if it is masked and contains the fill value - is_masked, contains_fill_value = store( - data, cf_var, fill_value_to_check + set_packing_ncattrs(cf_var) + self._lazy_stream_data( + data=data, + fill_value=fill_value, + fill_warn=(not packing), + cf_var=cf_var, ) - if dtype.itemsize == 1 and fill_value is None: - if is_masked: - msg = ( - "Cube '{}' contains byte data with masked points, but " - "no fill_value keyword was given. As saved, these " - "points will read back as valid values. To save as " - "masked byte data, please explicitly specify the " - "'fill_value' keyword." - ) - warnings.warn(msg.format(cube.name())) - elif contains_fill_value: - msg = ( - "Cube '{}' contains unmasked data points equal to the " - "fill-value, {}. As saved, these points will read back " - "as missing data. To save these as normal values, please " - "specify a 'fill_value' keyword not equal to any valid " - "data points." - ) - warnings.warn(msg.format(cube.name(), fill_value)) - if cube.standard_name: _setncattr(cf_var, "standard_name", cube.standard_name) @@ -2902,6 +2863,65 @@ def _increment_name(self, varname): return "{}_{}".format(varname, num) + @staticmethod + def _lazy_stream_data(data, fill_value, fill_warn, cf_var): + if is_lazy_data(data): + + def store(data, cf_var, fill_value): + # Store lazy data and check whether it is masked and contains + # the fill value + target = _FillValueMaskCheckAndStoreTarget(cf_var, fill_value) + da.store([data], [target]) + return target.is_masked, target.contains_value + + else: + + def store(data, cf_var, fill_value): + cf_var[:] = data + is_masked = np.ma.is_masked(data) + contains_value = fill_value is not None and fill_value in data + return is_masked, contains_value + + dtype = cf_var.dtype + + # fill_warn allows us to skip warning if packing attributes have been + # specified. It would require much more complex operations to work out + # what the values and fill_value _would_ be in such a case. + if fill_warn: + if fill_value is not None: + fill_value_to_check = fill_value + else: + fill_value_to_check = netCDF4.default_fillvals[dtype.str[1:]] + else: + fill_value_to_check = None + + # Store the data and check if it is masked and contains the fill value. + is_masked, contains_fill_value = store( + data, cf_var, fill_value_to_check + ) + + if dtype.itemsize == 1 and fill_value is None: + if is_masked: + msg = ( + "CF var '{}' contains byte data with masked points, but " + "no fill_value keyword was given. As saved, these " + "points will read back as valid values. To save as " + "masked byte data, `_FillValue` needs to be explicitly " + "set. For Cube data this can be done via the 'fill_value' " + "keyword during saving, otherwise use ncedit/equivalent." + ) + warnings.warn(msg.format(cf_var.name)) + elif contains_fill_value: + msg = ( + "CF var '{}' contains unmasked data points equal to the " + "fill-value, {}. As saved, these points will read back " + "as missing data. To save these as normal values, " + "`_FillValue` needs to be set to not equal any valid data " + "points. For Cube data this can be done via the 'fill_value' " + "keyword during saving, otherwise use ncedit/equivalent." + ) + warnings.warn(msg.format(cf_var.name, fill_value)) + def save( cube, diff --git a/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py b/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py index cb5cd2954a..d30aae5c0d 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py +++ b/lib/iris/tests/unit/fileformats/netcdf/test_Saver.py @@ -18,7 +18,6 @@ from numpy import ma import iris -from iris._lazy_data import as_lazy_data from iris.coord_systems import ( AlbersEqualArea, GeogCS, @@ -42,8 +41,12 @@ class Test_write(tests.IrisTest): # It is not considered necessary to have integration tests for saving # EVERY coordinate system. A subset are tested below. # ------------------------------------------------------------------------- + + # Attribute is substituted in test_Saver__lazy. + array_lib = np + def _transverse_mercator_cube(self, ellipsoid=None): - data = np.arange(12).reshape(3, 4) + data = self.array_lib.arange(12).reshape(3, 4) cube = Cube(data, "air_pressure_anomaly") trans_merc = TransverseMercator( 49.0, -2.0, -400000.0, 100000.0, 0.9996012717, ellipsoid @@ -65,7 +68,7 @@ def _transverse_mercator_cube(self, ellipsoid=None): return cube def _mercator_cube(self, ellipsoid=None): - data = np.arange(12).reshape(3, 4) + data = self.array_lib.arange(12).reshape(3, 4) cube = Cube(data, "air_pressure_anomaly") merc = Mercator(49.0, ellipsoid) coord = DimCoord( @@ -85,7 +88,7 @@ def _mercator_cube(self, ellipsoid=None): return cube def _stereo_cube(self, ellipsoid=None): - data = np.arange(12).reshape(3, 4) + data = self.array_lib.arange(12).reshape(3, 4) cube = Cube(data, "air_pressure_anomaly") stereo = Stereographic( -10.0, 20.0, 500000.0, -200000.0, None, ellipsoid @@ -158,7 +161,7 @@ def test_stereographic_no_ellipsoid(self): self.assertCDL(nc_path) def _simple_cube(self, dtype): - data = np.arange(12, dtype=dtype).reshape(3, 4) + data = self.array_lib.arange(12, dtype=dtype).reshape(3, 4) points = np.arange(3, dtype=dtype) bounds = np.arange(6, dtype=dtype).reshape(3, 2) cube = Cube(data, "air_pressure_anomaly") @@ -210,7 +213,7 @@ def test_zlib(self): def test_least_significant_digit(self): cube = Cube( - np.array([1.23, 4.56, 7.89]), + self.array_lib.array([1.23, 4.56, 7.89]), standard_name="surface_temperature", long_name=None, var_name="temp", @@ -298,9 +301,14 @@ def test_with_climatology(self): class Test__create_cf_bounds(tests.IrisTest): + # Method is substituted in test_Saver__lazy. + @staticmethod + def climatology_3d(): + return stock.climatology_3d() + def _check_bounds_setting(self, climatological=False): # Generic test that can run with or without a climatological coord. - cube = stock.climatology_3d() + cube = self.climatology_3d() coord = cube.coord("time").copy() # Over-write original value from stock.climatology_3d with test value. coord.climatological = climatological @@ -354,11 +362,14 @@ def test_set_bounds_climatology(self): class Test_write__valid_x_cube_attributes(tests.IrisTest): """Testing valid_range, valid_min and valid_max attributes.""" + # Attribute is substituted in test_Saver__lazy. + array_lib = np + def test_valid_range_saved(self): cube = tests.stock.lat_lon_cube() cube.data = cube.data.astype("int32") - vrange = np.array([1, 2], dtype="int32") + vrange = self.array_lib.array([1, 2], dtype="int32") cube.attributes["valid_range"] = vrange with self.temp_filename(".nc") as nc_path: with Saver(nc_path, "NETCDF4") as saver: @@ -395,11 +406,14 @@ def test_valid_max_saved(self): class Test_write__valid_x_coord_attributes(tests.IrisTest): """Testing valid_range, valid_min and valid_max attributes.""" + # Attribute is substituted in test_Saver__lazy. + array_lib = np + def test_valid_range_saved(self): cube = tests.stock.lat_lon_cube() cube.data = cube.data.astype("int32") - vrange = np.array([1, 2], dtype="int32") + vrange = self.array_lib.array([1, 2], dtype="int32") cube.coord(axis="x").attributes["valid_range"] = vrange with self.temp_filename(".nc") as nc_path: with Saver(nc_path, "NETCDF4") as saver: @@ -436,17 +450,16 @@ def test_valid_max_saved(self): class Test_write_fill_value(tests.IrisTest): - def _make_cube( - self, dtype, lazy=False, masked_value=None, masked_index=None - ): - data = np.arange(12, dtype=dtype).reshape(3, 4) + # Attribute is substituted in test_Saver__lazy. + array_lib = np + + def _make_cube(self, dtype, masked_value=None, masked_index=None): + data = self.array_lib.arange(12, dtype=dtype).reshape(3, 4) if masked_value is not None: data = ma.masked_equal(data, masked_value) if masked_index is not None: - data = np.ma.masked_array(data) + data = self.array_lib.ma.masked_array(data) data[masked_index] = ma.masked - if lazy: - data = as_lazy_data(data) lat = DimCoord(np.arange(3), "latitude", units="degrees") lon = DimCoord(np.arange(4), "longitude", units="degrees") return Cube( @@ -502,24 +515,6 @@ def test_mask_default_fill_value(self): self.assertNotIn("_FillValue", var.ncattrs()) self.assertTrue(var[index].mask) - def test_mask_lazy_fill_value(self): - # Test that masked lazy data saves correctly when given a fill value. - index = (1, 1) - fill_value = 12345.0 - cube = self._make_cube(">f4", masked_index=index, lazy=True) - with self._netCDF_var(cube, fill_value=fill_value) as var: - self.assertEqual(var._FillValue, fill_value) - self.assertTrue(var[index].mask) - - def test_mask_lazy_default_fill_value(self): - # Test that masked lazy data saves correctly using the default fill - # value. - index = (1, 1) - cube = self._make_cube(">f4", masked_index=index, lazy=True) - with self._netCDF_var(cube) as var: - self.assertNotIn("_FillValue", var.ncattrs()) - self.assertTrue(var[index].mask) - def test_contains_fill_value_passed(self): # Test that a warning is raised if the data contains the fill value. cube = self._make_cube(">f4") @@ -612,9 +607,12 @@ def test_no_hyphen(self): class _Common__check_attribute_compliance: + # Attribute is substituted in test_Saver__lazy. + array_lib = np + def setUp(self): self.container = mock.Mock(name="container", attributes={}) - self.data = np.array(1, dtype="int32") + self.data = self.array_lib.array(1, dtype="int32") patch = mock.patch("netCDF4.Dataset") _ = patch.start() @@ -642,18 +640,18 @@ def attribute(self): return "valid_range" def test_valid_range_type_coerce(self): - value = np.array([1, 2], dtype="float") + value = self.array_lib.array([1, 2], dtype="float") self.check_attribute_compliance_call(value) self.assertAttribute(self.data.dtype) def test_valid_range_unsigned_int8_data_signed_range(self): self.data = self.data.astype("uint8") - value = np.array([1, 2], dtype="int8") + value = self.array_lib.array([1, 2], dtype="int8") self.check_attribute_compliance_call(value) self.assertAttribute(value.dtype) def test_valid_range_cannot_coerce(self): - value = np.array([1.5, 2.5], dtype="float64") + value = self.array_lib.array([1.5, 2.5], dtype="float64") msg = '"valid_range" is not of a suitable value' with self.assertRaisesRegex(ValueError, msg): self.check_attribute_compliance_call(value) @@ -674,18 +672,18 @@ def attribute(self): return "valid_min" def test_valid_range_type_coerce(self): - value = np.array(1, dtype="float") + value = self.array_lib.array(1, dtype="float") self.check_attribute_compliance_call(value) self.assertAttribute(self.data.dtype) def test_valid_range_unsigned_int8_data_signed_range(self): self.data = self.data.astype("uint8") - value = np.array(1, dtype="int8") + value = self.array_lib.array(1, dtype="int8") self.check_attribute_compliance_call(value) self.assertAttribute(value.dtype) def test_valid_range_cannot_coerce(self): - value = np.array(1.5, dtype="float64") + value = self.array_lib.array(1.5, dtype="float64") msg = '"valid_min" is not of a suitable value' with self.assertRaisesRegex(ValueError, msg): self.check_attribute_compliance_call(value) @@ -706,18 +704,18 @@ def attribute(self): return "valid_max" def test_valid_range_type_coerce(self): - value = np.array(2, dtype="float") + value = self.array_lib.array(2, dtype="float") self.check_attribute_compliance_call(value) self.assertAttribute(self.data.dtype) def test_valid_range_unsigned_int8_data_signed_range(self): self.data = self.data.astype("uint8") - value = np.array(2, dtype="int8") + value = self.array_lib.array(2, dtype="int8") self.check_attribute_compliance_call(value) self.assertAttribute(value.dtype) def test_valid_range_cannot_coerce(self): - value = np.array(2.5, dtype="float64") + value = self.array_lib.array(2.5, dtype="float64") msg = '"valid_max" is not of a suitable value' with self.assertRaisesRegex(ValueError, msg): self.check_attribute_compliance_call(value) @@ -730,7 +728,7 @@ def test_valid_range_not_numpy_array(self): self.assertAttribute(np.int64) -class Test_check_attribute_compliance__exception_handlng( +class Test_check_attribute_compliance__exception_handling( _Common__check_attribute_compliance, tests.IrisTest ): def test_valid_range_and_valid_min_valid_max_provided(self): @@ -1019,10 +1017,16 @@ def test_geo_cs(self): class Test__create_cf_cell_measure_variable(tests.IrisTest): # Saving of masked data is disallowed. + + # Attribute is substituted in test_Saver__lazy. + array_lib = np + def setUp(self): self.cube = stock.lat_lon_cube() self.names_map = ["latitude", "longitude"] - masked_array = np.ma.masked_array([0, 1, 2], mask=[True, False, True]) + masked_array = self.array_lib.ma.masked_array( + [0, 1, 2], mask=[True, False, True] + ) self.cm = iris.coords.CellMeasure(masked_array, var_name="cell_area") self.cube.add_cell_measure(self.cm, data_dims=0) self.exp_emsg = "Cell measures with missing data are not supported." diff --git a/lib/iris/tests/unit/fileformats/netcdf/test_Saver__lazy.py b/lib/iris/tests/unit/fileformats/netcdf/test_Saver__lazy.py new file mode 100644 index 0000000000..ca5a814b8c --- /dev/null +++ b/lib/iris/tests/unit/fileformats/netcdf/test_Saver__lazy.py @@ -0,0 +1,124 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the LGPL license. +# See COPYING and COPYING.LESSER in the root of the repository for full +# licensing details. +"""Mirror of :mod:`iris.tests.unit.fileformats.netcdf.test_Saver`, but with lazy arrays.""" + +# Import iris.tests first so that some things can be initialised before +# importing anything else. +import iris.tests as tests # isort:skip + +from dask import array as da + +from iris.coords import AuxCoord +from iris.fileformats.netcdf import Saver +from iris.tests import stock +from iris.tests.unit.fileformats.netcdf import test_Saver + + +class LazyMixin(tests.IrisTest): + array_lib = da + + def result_path(self, basename=None, ext=""): + # Precisely mirroring the tests in test_Saver, so use those CDL's. + original = super().result_path(basename, ext) + return original.replace("Saver__lazy", "Saver") + + +class Test_write(LazyMixin, test_Saver.Test_write): + pass + + +class Test__create_cf_bounds(test_Saver.Test__create_cf_bounds): + @staticmethod + def climatology_3d(): + cube = stock.climatology_3d() + aux_coord = AuxCoord.from_coord(cube.coord("time")) + lazy_coord = aux_coord.copy( + aux_coord.lazy_points(), aux_coord.lazy_bounds() + ) + cube.replace_coord(lazy_coord) + return cube + + +class Test_write__valid_x_cube_attributes( + LazyMixin, test_Saver.Test_write__valid_x_cube_attributes +): + pass + + +class Test_write__valid_x_coord_attributes( + LazyMixin, test_Saver.Test_write__valid_x_coord_attributes +): + pass + + +class Test_write_fill_value(LazyMixin, test_Saver.Test_write_fill_value): + pass + + +class Test_check_attribute_compliance__valid_range( + LazyMixin, test_Saver.Test_check_attribute_compliance__valid_range +): + pass + + +class Test_check_attribute_compliance__valid_min( + LazyMixin, test_Saver.Test_check_attribute_compliance__valid_min +): + pass + + +class Test_check_attribute_compliance__valid_max( + LazyMixin, test_Saver.Test_check_attribute_compliance__valid_max +): + pass + + +class Test_check_attribute_compliance__exception_handling( + LazyMixin, test_Saver.Test_check_attribute_compliance__exception_handling +): + pass + + +class Test__create_cf_cell_measure_variable( + LazyMixin, test_Saver.Test__create_cf_cell_measure_variable +): + pass + + +class TestStreamed(tests.IrisTest): + def setUp(self): + self.cube = stock.simple_2d() + self.store_watch = self.patch("dask.array.store") + + def save_common(self, cube_to_save): + with self.temp_filename(".nc") as nc_path: + with Saver(nc_path, "NETCDF4") as saver: + saver.write(cube_to_save) + + def test_realised_not_streamed(self): + self.save_common(self.cube) + self.assertFalse(self.store_watch.called) + + def test_lazy_streamed_data(self): + self.cube.data = self.cube.lazy_data() + self.save_common(self.cube) + self.assertTrue(self.store_watch.called) + + def test_lazy_streamed_coord(self): + aux_coord = AuxCoord.from_coord(self.cube.coords()[0]) + lazy_coord = aux_coord.copy( + aux_coord.lazy_points(), aux_coord.lazy_bounds() + ) + self.cube.replace_coord(lazy_coord) + self.save_common(self.cube) + self.assertTrue(self.store_watch.called) + + def test_lazy_streamed_bounds(self): + aux_coord = AuxCoord.from_coord(self.cube.coords()[0]) + lazy_coord = aux_coord.copy(aux_coord.points, aux_coord.lazy_bounds()) + self.cube.replace_coord(lazy_coord) + self.save_common(self.cube) + self.assertTrue(self.store_watch.called) diff --git a/lib/iris/tests/unit/fileformats/netcdf/test_Saver__ugrid.py b/lib/iris/tests/unit/fileformats/netcdf/test_Saver__ugrid.py index 3c8838dbea..07af82e6ee 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/test_Saver__ugrid.py +++ b/lib/iris/tests/unit/fileformats/netcdf/test_Saver__ugrid.py @@ -3,8 +3,13 @@ # This file is part of Iris and is released under the LGPL license. # See COPYING and COPYING.LESSER in the root of the repository for full # licensing details. -"""Unit tests for the `iris.fileformats.netcdf.Saver` class.""" +""" +Unit tests for the :class:`iris.fileformats.netcdf.Saver` class. +WHEN MODIFYING THIS MODULE, CHECK IF ANY CORRESPONDING CHANGES ARE NEEDED IN +:mod:`iris.tests.unit.fileformats.netcdf.test_Saver__lazy.` + +""" # Import iris.tests first so that some things can be initialised before # importing anything else. import iris.tests as tests # isort:skip @@ -1284,5 +1289,9 @@ def test_multiple_different_meshes(self): self.assertEqual(dims["Mesh2d_edge_0"], 2) +# WHEN MODIFYING THIS MODULE, CHECK IF ANY CORRESPONDING CHANGES ARE NEEDED IN +# :mod:`iris.tests.unit.fileformats.netcdf.test_Saver__lazy.` + + if __name__ == "__main__": tests.main() diff --git a/lib/iris/util.py b/lib/iris/util.py index 1514739c01..826162fa7f 100644 --- a/lib/iris/util.py +++ b/lib/iris/util.py @@ -20,10 +20,12 @@ import tempfile import cf_units +from dask import array as da import numpy as np import numpy.ma as ma from iris._deprecation import warn_deprecated +from iris._lazy_data import is_lazy_data import iris.exceptions @@ -1896,6 +1898,28 @@ def equalise_attributes(cubes): return removed +def is_masked(array): + """ + Equivalent to :func:`numpy.ma.is_masked`, but works for both lazy AND realised arrays. + + Parameters + ---------- + array : :class:`numpy.Array` or `dask.array.Array` + The array to be checked for masks. + + Returns + ------- + bool + Whether or not the array has any masks. + + """ + if is_lazy_data(array): + result = da.ma.getmaskarray(array).any().compute() + else: + result = ma.is_masked(array) + return result + + def _strip_metadata_from_dims(cube, dims): """ Remove ancillary variables and cell measures that map to specific dimensions.