diff --git a/docs/src/further_topics/missing_data_handling.rst b/docs/src/further_topics/missing_data_handling.rst index 13b00d3424..a461a44456 100644 --- a/docs/src/further_topics/missing_data_handling.rst +++ b/docs/src/further_topics/missing_data_handling.rst @@ -22,6 +22,7 @@ On load, any fill-value or missing data value defined in the loaded dataset should be used as the ``fill_value`` of the NumPy masked array data attribute of the :class:`~iris.cube.Cube`. This will only appear when the cube's data is realised. +.. _missing_data_saving: Saving ------ @@ -37,7 +38,8 @@ For example:: .. note:: Not all savers accept the ``fill_value`` keyword argument. -Iris will check for and issue warnings of fill-value 'collisions'. +Iris will check for and issue warnings of fill-value 'collisions' (exception: +**NetCDF**, see the heading below). This basically means that whenever there are unmasked values that would read back as masked, we issue a warning and suggest a workaround. @@ -51,6 +53,8 @@ This will occur in the following cases: NetCDF ~~~~~~ +:term:`NetCDF Format` + NetCDF is a special case, because all ordinary variable data is "potentially masked", owing to the use of default fill values. The default fill-value used depends on the type of the variable data. @@ -64,6 +68,16 @@ The exceptions to this are: * Small integers create problems by *not* having the exemption applied to byte data. Thus, in principle, ``int32`` data cannot use the full range of 2**16 valid values. +Warnings are not issued for NetCDF fill value collisions. Increasingly large +and complex parallel I/O operations unfortunately made this feature +un-maintainable and it was retired in Iris 3.9 (:pull:`5833`). + +If you need to know about collisions then you can perform your own checks ahead +of saving. Such operations can be run lazily (:term:`Lazy Data`). Here is an +example:: + + >>> default_fill = netCDF4.default_fillvals[my_cube.dtype.str[1:]] + >>> fill_present = (my_cube.lazy_data() == default_fill).any().compute() Merging ------- diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst index 9150568316..43ecdfafa4 100644 --- a/docs/src/whatsnew/latest.rst +++ b/docs/src/whatsnew/latest.rst @@ -42,7 +42,8 @@ This document explains the changes made to Iris for this release 💣 Incompatible Changes ======================= -#. N/A +#. Warnings are no longer produced for fill value 'collisions' in NetCDF + saving. :ref:`Read more `. (:pull:`5833`) 🚀 Performance Enhancements diff --git a/lib/iris/fileformats/netcdf/saver.py b/lib/iris/fileformats/netcdf/saver.py index 11491de900..8d53a4d5be 100644 --- a/lib/iris/fileformats/netcdf/saver.py +++ b/lib/iris/fileformats/netcdf/saver.py @@ -20,7 +20,7 @@ import os.path import re import string -from typing import List +import typing import warnings import cf_units @@ -162,15 +162,6 @@ } -class _WarnComboMaskSave( - iris.warnings.IrisMaskValueMatchWarning, - iris.warnings.IrisSaveWarning, -): - """One-off combination of warning classes - enhances user filtering.""" - - pass - - class CFNameCoordMap: """Provide a simple CF name to CF coordinate mapping.""" @@ -285,41 +276,6 @@ def _setncattr(variable, name, attribute): MESH_ELEMENTS = ("node", "edge", "face") -_FillvalueCheckInfo = collections.namedtuple( - "_FillvalueCheckInfo", ["user_value", "check_value", "dtype", "varname"] -) - - -def _data_fillvalue_check(arraylib, data, check_value): - """Check whether an array is masked, and whether it contains a fill-value. - - Parameters - ---------- - arraylib : module - Either numpy or dask.array : When dask, results are lazy computations. - data : array-like - Array to check (numpy or dask). - check_value : number or None - If not None, fill-value to check for existence in the array. - If None, do not do value-in-array check. - - Returns - ------- - is_masked : bool - True if array has any masked points. - contains_value : bool - True if array contains check_value. - Always False if check_value is None. - - """ - is_masked = arraylib.any(arraylib.ma.getmaskarray(data)) - if check_value is None: - contains_value = False - else: - contains_value = arraylib.any(data == check_value) - return is_masked, contains_value - - class SaverFillValueWarning(iris.warnings.IrisSaverFillValueWarning): """Backwards compatible form of :class:`iris.warnings.IrisSaverFillValueWarning`.""" @@ -327,59 +283,16 @@ class SaverFillValueWarning(iris.warnings.IrisSaverFillValueWarning): pass -def _fillvalue_report(fill_info, is_masked, contains_fill_value, warn=False): - """Work out whether there was a possible or actual fill-value collision. - - From the given information, work out whether there was a possible or actual - fill-value collision, and if so construct a warning. +class VariableEmulator(typing.Protocol): + """Duck-type-hinting for a ncdata object. - Parameters - ---------- - fill_info : _FillvalueCheckInfo - A named-tuple containing the context of the fill-value check. - is_masked : bool - Whether the data array was masked. - contains_fill_value : bool - Whether the data array contained the fill-value. - warn : bool, default=False - If True, also issue any resulting warning immediately. + https://github.com/pp-mo/ncdata + """ - Returns - ------- - None or :class:`Warning` - If not None, indicates a known or possible problem with filling. + _data_array: np.typing.ArrayLike - """ - varname = fill_info.varname - user_value = fill_info.user_value - check_value = fill_info.check_value - is_byte_data = fill_info.dtype.itemsize == 1 - result = None - if is_byte_data and is_masked and user_value is None: - result = SaverFillValueWarning( - f"CF var '{varname}' contains byte data with masked points, but " - "no fill_value keyword was given. As saved, these " - "points will read back as valid values. To save as " - "masked byte data, `_FillValue` needs to be explicitly " - "set. For Cube data this can be done via the 'fill_value' " - "keyword during saving, otherwise use ncedit/equivalent." - ) - elif contains_fill_value: - result = SaverFillValueWarning( - f"CF var '{varname}' contains unmasked data points equal to the " - f"fill-value, {check_value}. As saved, these points will read back " - "as missing data. To save these as normal values, " - "`_FillValue` needs to be set to not equal any valid data " - "points. For Cube data this can be done via the 'fill_value' " - "keyword during saving, otherwise use ncedit/equivalent." - ) - if warn and result is not None: - warnings.warn( - result, - category=_WarnComboMaskSave, - ) - return result +CFVariable = typing.Union[_thread_safe_nc.VariableWrapper, VariableEmulator] class Saver: @@ -454,7 +367,7 @@ def __init__(self, filename, netcdf_format, compute=True): self._formula_terms_cache = {} #: Target filepath self.filepath = None # this line just for the API page -- value is set later - #: Whether to complete delayed saves on exit (and raise associated warnings). + #: Whether to complete delayed saves on exit. self.compute = compute # N.B. the file-write-lock *type* actually depends on the dask scheduler type. #: A per-file write lock to prevent dask attempting overlapping writes. @@ -463,7 +376,7 @@ def __init__(self, filename, netcdf_format, compute=True): ) # A list of delayed writes for lazy saving - # a list of triples (source, target, fill-info). + # a list of couples (source, target). self._delayed_writes = [] # Detect if we were passed a pre-opened dataset (or something like one) @@ -1544,12 +1457,7 @@ def _create_cf_bounds(self, coord, cf_var, cf_name): bounds.dtype.newbyteorder("="), cf_var.dimensions + (bounds_dimension_name,), ) - self._lazy_stream_data( - data=bounds, - fill_value=None, - fill_warn=True, - cf_var=cf_var_bounds, - ) + self._lazy_stream_data(data=bounds, cf_var=cf_var_bounds) def _get_cube_variable_name(self, cube): """Return a CF-netCDF variable name for the given cube. @@ -1882,9 +1790,7 @@ def _create_generic_cf_array_var( self._create_cf_bounds(element, cf_var, cf_name) # Add the data to the CF-netCDF variable. - self._lazy_stream_data( - data=data, fill_value=fill_value, fill_warn=True, cf_var=cf_var - ) + self._lazy_stream_data(data=data, cf_var=cf_var) # Add names + units self._set_cf_var_attributes(cf_var, element) @@ -2289,12 +2195,7 @@ def set_packing_ncattrs(cfvar): ) set_packing_ncattrs(cf_var) - self._lazy_stream_data( - data=data, - fill_value=fill_value, - fill_warn=(not packing), - cf_var=cf_var, - ) + self._lazy_stream_data(data=data, cf_var=cf_var) if cube.standard_name: _setncattr(cf_var, "standard_name", cube.standard_name) @@ -2387,7 +2288,11 @@ def _increment_name(self, varname): return "{}_{}".format(varname, num) - def _lazy_stream_data(self, data, fill_value, fill_warn, cf_var): + def _lazy_stream_data( + self, + data: np.typing.ArrayLike, + cf_var: CFVariable, + ) -> None: if hasattr(data, "shape") and data.shape == (1,) + cf_var.shape: # (Don't do this check for string data). # Reduce dimensionality where the data array has an extra dimension @@ -2403,72 +2308,36 @@ def _lazy_stream_data(self, data, fill_value, fill_warn, cf_var): # data to/from netcdf data container objects in other packages, such as # xarray. # See https://github.com/SciTools/iris/issues/4994 "Xarray bridge". - # N.B. also, in this case there is no need for fill-value checking as the - # data is not being translated to an in-file representation. cf_var._data_array = data - else: - # Decide whether we are checking for fill-value collisions. - dtype = cf_var.dtype - # fill_warn allows us to skip warning if packing attributes have been - # specified. It would require much more complex operations to work out - # what the values and fill_value _would_ be in such a case. - if fill_warn: - if fill_value is not None: - fill_value_to_check = fill_value - else: - # Retain 'fill_value == None', to show that no specific value was given. - # But set 'fill_value_to_check' to a calculated value - fill_value_to_check = _thread_safe_nc.default_fillvals[ - dtype.str[1:] - ] - # Cast the check-value to the correct dtype. - # NOTE: In the case of 'S1' dtype (at least), the default (Python) value - # does not have a compatible type. This causes a deprecation warning at - # numpy 1.24, *and* was preventing correct fill-value checking of character - # data, since they are actually bytes (dtype 'S1'). - fill_value_to_check = np.array(fill_value_to_check, dtype=dtype) - else: - # A None means we will NOT check for collisions. - fill_value_to_check = None - - fill_info = _FillvalueCheckInfo( - user_value=fill_value, - check_value=fill_value_to_check, - dtype=dtype, - varname=cf_var.name, - ) + else: doing_delayed_save = is_lazy_data(data) if doing_delayed_save: # save lazy data with a delayed operation. For now, we just record the # necessary information -- a single, complete delayed action is constructed # later by a call to delayed_completion(). - def store(data, cf_var, fill_info): + def store( + data: np.typing.ArrayLike, + cf_var: CFVariable, + ) -> None: # Create a data-writeable object that we can stream into, which # encapsulates the file to be opened + variable to be written. write_wrapper = _thread_safe_nc.NetCDFWriteProxy( self.filepath, cf_var, self.file_write_lock ) # Add to the list of delayed writes, used in delayed_completion(). - self._delayed_writes.append((data, write_wrapper, fill_info)) - # In this case, fill-value checking is done later. But return 2 dummy - # values, to be consistent with the non-streamed "store" signature. - is_masked, contains_value = False, False - return is_masked, contains_value + self._delayed_writes.append((data, write_wrapper)) else: # Real data is always written directly, i.e. not via lazy save. - # We also check it immediately for any fill-value problems. - def store(data, cf_var, fill_info): + def store( + data: np.typing.ArrayLike, + cf_var: CFVariable, + ) -> None: cf_var[:] = data - return _data_fillvalue_check(np, data, fill_info.check_value) - - # Store the data and check if it is masked and contains the fill value. - is_masked, contains_fill_value = store(data, cf_var, fill_info) - if not doing_delayed_save: - # Issue a fill-value warning immediately, if appropriate. - _fillvalue_report(fill_info, is_masked, contains_fill_value, warn=True) + # Store the data. + store(data, cf_var) def delayed_completion(self) -> Delayed: """Perform file completion for delayed saves. @@ -2476,11 +2345,6 @@ def delayed_completion(self) -> Delayed: Create and return a :class:`dask.delayed.Delayed` to perform file completion for delayed saves. - This contains all the delayed writes, which complete the file by - filling out the data of variables initially created empty, and also the - checks for potential fill-value collisions. When computed, it returns - a list of any warnings which were generated in the save operation. - Returns ------- :class:`dask.delayed.Delayed` @@ -2492,68 +2356,24 @@ def delayed_completion(self) -> Delayed: """ if self._delayed_writes: # Create a single delayed da.store operation to complete the file. - sources, targets, fill_infos = zip(*self._delayed_writes) - store_op = da.store(sources, targets, compute=False, lock=False) - - # Construct a delayed fill-check operation for each (lazy) source array. - delayed_fillvalue_checks = [ - # NB with arraylib=dask.array, this routine does lazy array computation - _data_fillvalue_check(da, source, fillinfo.check_value) - for source, fillinfo in zip(sources, fill_infos) - ] - - # Return a single delayed object which completes the delayed saves and - # returns a list of any fill-value warnings. - @dask.delayed - def compute_and_return_warnings(store_op, fv_infos, fv_checks): - # Note: we don't actually *do* anything with the 'store_op' argument, - # but including it here ensures that dask will compute it (thus - # performing all the delayed saves), before calling this function. - results = [] - # Pair each fill_check result (is_masked, contains_value) with its - # fillinfo and construct a suitable Warning if needed. - for fillinfo, (is_masked, contains_value) in zip(fv_infos, fv_checks): - fv_warning = _fillvalue_report( - fill_info=fillinfo, - is_masked=is_masked, - contains_fill_value=contains_value, - ) - if fv_warning is not None: - # Collect the warnings and return them. - results.append(fv_warning) - return results - - result = compute_and_return_warnings( - store_op, - fv_infos=fill_infos, - fv_checks=delayed_fillvalue_checks, - ) + sources, targets = zip(*self._delayed_writes) + result = da.store(sources, targets, compute=False, lock=False) else: - # Return a delayed, which returns an empty list, for usage consistency. + # Return a do-nothing delayed, for usage consistency. @dask.delayed def no_op(): - return [] + return None result = no_op() return result - def complete(self, issue_warnings=True) -> List[Warning]: + def complete(self) -> None: """Complete file by computing any delayed variable saves. This requires that the Saver has closed the dataset (exited its context). - Parameters - ---------- - issue_warnings : bool, default = True - If true, issue all the resulting warnings with :func:`warnings.warn`. - - Returns - ------- - list of Warning - Any warnings that were raised while writing delayed data. - """ if self._dataset.isopen(): msg = ( @@ -2562,15 +2382,8 @@ def complete(self, issue_warnings=True) -> List[Warning]: ) raise ValueError(msg) - delayed_write = self.delayed_completion() - # Complete the saves now, and handle any delayed warnings that occurred - result_warnings = delayed_write.compute() - if issue_warnings: - # Issue any delayed warnings from the compute. - for delayed_warning in result_warnings: - warnings.warn(delayed_warning, category=iris.warnings.IrisSaveWarning) - - return result_warnings + # Complete the saves now + self.delayed_completion().compute() def save( @@ -2722,11 +2535,6 @@ def save( Several such data saves can be performed in parallel, by passing a list of them into a :func:`dask.compute` call. - .. note:: - when computed, the returned :class:`dask.delayed.Delayed` object returns - a list of :class:`Warning` : These are any warnings which *would* have - been issued in the save call, if ``compute`` had been ``True``. - .. note:: If saving to an open dataset instead of a filepath, then the caller **must** specify ``compute=False``, and complete delayed saves **after diff --git a/lib/iris/tests/integration/netcdf/test_delayed_save.py b/lib/iris/tests/integration/netcdf/test_delayed_save.py index 9322bb9f54..2869924dce 100644 --- a/lib/iris/tests/integration/netcdf/test_delayed_save.py +++ b/lib/iris/tests/integration/netcdf/test_delayed_save.py @@ -4,7 +4,6 @@ # See LICENSE in the root of the repository for full licensing details. """Integration tests for delayed saving.""" -import re import warnings from cf_units import Unit @@ -19,7 +18,6 @@ from iris.fileformats.netcdf._thread_safe_nc import default_fillvals import iris.tests from iris.tests.stock import realistic_4d -from iris.warnings import IrisSaverFillValueWarning class Test__lazy_stream_data: @@ -185,39 +183,12 @@ def test_scheduler_types(self, output_path, scheduler_type, save_is_delayed): cube = self.make_testcube( include_lazy_content=True, ensure_fillvalue_collision=True ) - with warnings.catch_warnings(record=True) as logged_warnings: - result = iris.save(cube, output_path, compute=not save_is_delayed) + result = iris.save(cube, output_path, compute=not save_is_delayed) if not save_is_delayed: assert result is None - issued_warnings = [log.message for log in logged_warnings] else: - assert result is not None - assert len(logged_warnings) == 0 - with warnings.catch_warnings(record=True) as logged_warnings: - # The compute *returns* warnings from the delayed operations. - issued_warnings = result.compute() - issued_warnings = [log.message for log in logged_warnings] + issued_warnings - - warning_messages = [warning.args[0] for warning in issued_warnings] - if scheduler_type == "DistributedScheduler": - # Ignore any "large data transfer" messages generated, - # specifically when testing with the Distributed scheduler. - # These may not always occur and don't reflect something we want to - # test for. - large_transfer_message_regex = re.compile( - "Sending large graph.* may cause some slowdown", re.DOTALL - ) - warning_messages = [ - message - for message in warning_messages - if not large_transfer_message_regex.search(message) - ] - - # In all cases, should get 2 fill value warnings overall. - assert len(warning_messages) == 2 - expected_msg = "contains unmasked data points equal to the fill-value" - assert all(expected_msg in message for message in warning_messages) + assert isinstance(result, Delayed) def test_time_of_writing(self, save_is_delayed, output_path, scheduler_type): # Check when lazy data is *actually* written : @@ -290,49 +261,6 @@ def fetch_masks(): assert np.all(~ancil_mask) assert np.all(~cm_mask) - @pytest.mark.parametrize( - "warning_type", ["WarnMaskedBytes", "WarnFillvalueCollision"] - ) - def test_fill_warnings(self, warning_type, output_path, save_is_delayed): - # Test collision warnings for data with fill-value collisions, or for masked - # byte data. - if warning_type == "WarnFillvalueCollision": - make_fv_collide = True - make_maskedbytes = False - expected_msg = "contains unmasked data points equal to the fill-value" - else: - assert warning_type == "WarnMaskedBytes" - make_fv_collide = False - make_maskedbytes = True - expected_msg = "contains byte data with masked points" - - cube = self.make_testcube( - include_lazy_content=True, - ensure_fillvalue_collision=make_fv_collide, - data_is_maskedbytes=make_maskedbytes, - ) - with warnings.catch_warnings(record=True) as logged_warnings: - result = iris.save(cube, output_path, compute=not save_is_delayed) - - result_warnings = [ - log.message - for log in logged_warnings - if isinstance(log.message, IrisSaverFillValueWarning) - ] - - if save_is_delayed: - # Should have had *no* fill-warnings in the initial save. - assert len(result_warnings) == 0 - # Complete the operation now - with warnings.catch_warnings(): - # NOTE: warnings should *not* be issued here, instead they are returned. - warnings.simplefilter("error", category=IrisSaverFillValueWarning) - result_warnings = result.compute() - - # Either way, we should now have 2 similar warnings. - assert len(result_warnings) == 2 - assert all(expected_msg in warning.args[0] for warning in result_warnings) - def test_no_delayed_writes(self, output_path): # Just check that a delayed save returns a usable 'delayed' object, even when # there is no lazy content = no delayed writes to perform. @@ -340,4 +268,3 @@ def test_no_delayed_writes(self, output_path): warnings.simplefilter("error") result = iris.save(cube, output_path, compute=False) assert isinstance(result, Delayed) - assert result.compute() == [] diff --git a/lib/iris/tests/unit/fileformats/netcdf/saver/test_Saver.py b/lib/iris/tests/unit/fileformats/netcdf/saver/test_Saver.py index 744051f02d..f067993bed 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/saver/test_Saver.py +++ b/lib/iris/tests/unit/fileformats/netcdf/saver/test_Saver.py @@ -34,7 +34,6 @@ from iris.cube import Cube from iris.fileformats.netcdf import Saver, _thread_safe_nc import iris.tests.stock as stock -from iris.warnings import IrisMaskValueMatchWarning class Test_write(tests.IrisTest): @@ -548,76 +547,6 @@ def test_mask_default_fill_value(self): self.assertNotIn("_FillValue", var.ncattrs()) self.assertTrue(var[index].mask) - def test_contains_fill_value_passed(self): - # Test that a warning is raised if the data contains the fill value. - cube = self._make_cube(">f4") - fill_value = 1 - with self.assertWarnsRegex( - IrisMaskValueMatchWarning, - "contains unmasked data points equal to the fill-value", - ): - with self._netCDF_var(cube, fill_value=fill_value): - pass - - def test_contains_fill_value_byte(self): - # Test that a warning is raised if the data contains the fill value - # when it is of a byte type. - cube = self._make_cube(">i1") - fill_value = 1 - with self.assertWarnsRegex( - IrisMaskValueMatchWarning, - "contains unmasked data points equal to the fill-value", - ): - with self._netCDF_var(cube, fill_value=fill_value): - pass - - def test_contains_default_fill_value(self): - # Test that a warning is raised if the data contains the default fill - # value if no fill_value argument is supplied. - cube = self._make_cube(">f4") - cube.data[0, 0] = _thread_safe_nc.default_fillvals["f4"] - with self.assertWarnsRegex( - IrisMaskValueMatchWarning, - "contains unmasked data points equal to the fill-value", - ): - with self._netCDF_var(cube): - pass - - def test_contains_default_fill_value_byte(self): - # Test that no warning is raised if the data contains the default fill - # value if no fill_value argument is supplied when the data is of a - # byte type. - cube = self._make_cube(">i1") - with self.assertNoWarningsRegexp(r"\(fill\|mask\)"): - with self._netCDF_var(cube): - pass - - def test_contains_masked_fill_value(self): - # Test that no warning is raised if the data contains the fill_value at - # a masked point. - fill_value = 1 - cube = self._make_cube(">f4", masked_value=fill_value) - with self.assertNoWarningsRegexp(r"\(fill\|mask\)"): - with self._netCDF_var(cube, fill_value=fill_value): - pass - - def test_masked_byte_default_fill_value(self): - # Test that a warning is raised when saving masked byte data with no - # fill value supplied. - cube = self._make_cube(">i1", masked_value=1) - with self.assertNoWarningsRegexp(r"\(fill\|mask\)"): - with self._netCDF_var(cube): - pass - - def test_masked_byte_fill_value_passed(self): - # Test that no warning is raised when saving masked byte data with a - # fill value supplied if the the data does not contain the fill_value. - fill_value = 100 - cube = self._make_cube(">i1", masked_value=2) - with self.assertNoWarningsRegexp(r"\(fill\|mask\)"): - with self._netCDF_var(cube, fill_value=fill_value): - pass - class Test_cf_valid_var_name(tests.IrisTest): def test_no_replacement(self): diff --git a/lib/iris/tests/unit/fileformats/netcdf/saver/test_Saver__lazy_stream_data.py b/lib/iris/tests/unit/fileformats/netcdf/saver/test_Saver__lazy_stream_data.py index f252d53a47..e02f6b16c8 100644 --- a/lib/iris/tests/unit/fileformats/netcdf/saver/test_Saver__lazy_stream_data.py +++ b/lib/iris/tests/unit/fileformats/netcdf/saver/test_Saver__lazy_stream_data.py @@ -11,15 +11,13 @@ """ from unittest import mock -import warnings import dask.array as da import numpy as np import pytest import iris.fileformats.netcdf._thread_safe_nc as threadsafe_nc -from iris.fileformats.netcdf.saver import Saver, _FillvalueCheckInfo -from iris.warnings import IrisMaskValueMatchWarning +from iris.fileformats.netcdf.saver import Saver class Test__lazy_stream_data: @@ -89,10 +87,7 @@ def test_data_save(self, compute, data_form): cf_var = self.mock_var( data.shape, with_data_array=(data_form == "emulateddata") ) - fill_value = -1.0 # not occurring in data - saver._lazy_stream_data( - data=data, fill_value=fill_value, fill_warn=True, cf_var=cf_var - ) + saver._lazy_stream_data(data=data, cf_var=cf_var) if data_form == "lazydata": expect_n_setitem = 0 expect_n_delayed = 1 @@ -108,73 +103,11 @@ def test_data_save(self, compute, data_form): assert len(saver._delayed_writes) == expect_n_delayed if data_form == "lazydata": - result_data, result_writer, fill_info = saver._delayed_writes[0] + result_data, result_writer = saver._delayed_writes[0] assert result_data is data assert isinstance(result_writer, threadsafe_nc.NetCDFWriteProxy) - assert isinstance(fill_info, _FillvalueCheckInfo) elif data_form == "realdata": cf_var.__setitem__.assert_called_once_with(slice(None), data) else: assert data_form == "emulateddata" cf_var._data_array == mock.sentinel.exact_data_array - - def test_warnings(self, compute, data_form): - """For real data, fill-value warnings are issued immediately. - For lazy data, warnings are returned from computing a delayed completion. - For 'emulated' data (direct array transfer), no checks + no warnings ever. - - N.B. The 'compute' keyword has **no effect** on this : It only causes delayed - writes to be automatically actioned on exiting a Saver context. - Streaming *always* creates delayed writes for lazy data, since this is required - to make dask distributed operation work. - """ - saver = self.saver(compute=compute) - - data = np.arange(5.0) - if data_form == "lazydata": - data = da.from_array(data) - - fill_value = 2.0 # IS occurring in data - cf_var = self.mock_var( - data.shape, with_data_array=(data_form == "emulateddata") - ) - - # Do initial save. When compute=True, this issues warnings - with warnings.catch_warnings(record=True) as logged_warnings: - saver._lazy_stream_data( - data=data, fill_value=fill_value, fill_warn=True, cf_var=cf_var - ) - - # Check warnings issued by initial call. - issued_warnings = [log.message for log in logged_warnings] - if data_form == "lazydata": - n_expected_warnings = 0 - elif data_form == "realdata": - n_expected_warnings = 1 - else: - # No checks in the emulated case - assert data_form == "emulateddata" - n_expected_warnings = 0 - assert len(issued_warnings) == n_expected_warnings - - # Complete the write : any delayed warnings should be *returned*. - # NOTE: - # (1) this still works when there are no delayed writes. - # (2) the Saver 'compute' keyword makes no difference to this usage, as it - # *only* affects what happens when the saver context exits. - result2 = saver.delayed_completion().compute() - issued_warnings += list(result2) - - # Check warnings issued during 'completion'. - if data_form == "emulateddata": - # No checks in this case, ever. - n_expected_warnings = 0 - else: - # Otherwise, either way, a suitable warning should now have been produced. - n_expected_warnings = 1 - assert len(issued_warnings) == n_expected_warnings - if n_expected_warnings > 0: - warning = issued_warnings[0] - msg = "contains unmasked data points equal to the fill-value, 2.0" - assert isinstance(warning, IrisMaskValueMatchWarning) - assert msg in warning.args[0] diff --git a/lib/iris/tests/unit/fileformats/netcdf/saver/test__data_fillvalue_check.py b/lib/iris/tests/unit/fileformats/netcdf/saver/test__data_fillvalue_check.py deleted file mode 100644 index 9fb7485734..0000000000 --- a/lib/iris/tests/unit/fileformats/netcdf/saver/test__data_fillvalue_check.py +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""Unit tests for :func:`iris.fileformats.netcdf.saver._data_fillvalue_check`. - -Note: now runs all testcases on both real + lazy data. - -""" - -# Import iris.tests first so that some things can be initialised before -# importing anything else. -import iris.tests as tests # isort:skip -import collections - -import dask.array as da -import numpy as np - -from iris.fileformats.netcdf.saver import _data_fillvalue_check - - -class Check__fillvalueandmasking: - def _call_target(self, fill_value, keys, vals): - data = np.zeros(20, dtype=np.float32) - if any(np.ma.isMaskedArray(val) for val in vals): - # N.B. array is masked if "vals" is, but has no masked points initially. - data = np.ma.masked_array(data, mask=np.zeros_like(data)) - - for key, val in zip(keys, vals): - data[key] = val - - if hasattr(self.arraylib, "compute"): - data = da.from_array(data, chunks=-1) - - results = _data_fillvalue_check( - arraylib=self.arraylib, data=data, check_value=fill_value - ) - - if hasattr(results, "compute"): - results = results.compute() - - # Return a named tuple, for named-property access to the 2 result values. - result = collections.namedtuple("_", ["is_masked", "contains_value"])(*results) - return result - - def test_no_fill_value_not_masked(self): - # Test when the fill value is not present and the data is not masked - keys = [slice(0, 10), slice(10, 15)] - vals = [np.arange(10), np.arange(5)] - fill_value = 16 - target = self._call_target(fill_value, keys, vals) - self.assertFalse(target.contains_value) - self.assertFalse(target.is_masked) - - def test_contains_fill_value_not_masked(self): - # Test when the fill value is present and the data is not masked - keys = [slice(0, 10), slice(10, 15)] - vals = [np.arange(10), np.arange(5)] - fill_value = 5 - target = self._call_target(fill_value, keys, vals) - self.assertTrue(target.contains_value) - self.assertFalse(target.is_masked) - - def test_no_fill_value_masked(self): - # Test when the fill value is not present and the data is masked - keys = [slice(0, 10), slice(10, 15)] - vals = [np.arange(10), np.ma.masked_equal(np.arange(5), 3)] - fill_value = 16 - target = self._call_target(fill_value, keys, vals) - self.assertFalse(target.contains_value) - self.assertTrue(target.is_masked) - - def test_contains_fill_value_masked(self): - # Test when the fill value is present and the data is masked - keys = [slice(0, 10), slice(10, 15)] - vals = [np.arange(10), np.ma.masked_equal(np.arange(5), 3)] - fill_value = 5 - target = self._call_target(fill_value, keys, vals) - self.assertTrue(target.contains_value) - self.assertTrue(target.is_masked) - - def test_fill_value_None(self): - # Test when the fill value is None - keys = [slice(0, 10), slice(10, 15)] - vals = [np.arange(10), np.arange(5)] - fill_value = None - target = self._call_target(fill_value, keys, vals) - self.assertFalse(target.contains_value) - - def test_contains_masked_fill_value(self): - # Test when the fill value is present but masked the data is masked - keys = [slice(0, 10), slice(10, 15)] - vals = [np.arange(10), np.ma.masked_equal(np.arange(10, 15), 13)] - fill_value = 13 - target = self._call_target(fill_value, keys, vals) - self.assertFalse(target.contains_value) - self.assertTrue(target.is_masked) - - -class Test__real(Check__fillvalueandmasking, tests.IrisTest): - arraylib = np - - -class Test__lazy(Check__fillvalueandmasking, tests.IrisTest): - arraylib = da diff --git a/lib/iris/tests/unit/fileformats/netcdf/saver/test__fillvalue_report.py b/lib/iris/tests/unit/fileformats/netcdf/saver/test__fillvalue_report.py deleted file mode 100644 index 32059bb058..0000000000 --- a/lib/iris/tests/unit/fileformats/netcdf/saver/test__fillvalue_report.py +++ /dev/null @@ -1,114 +0,0 @@ -# Copyright Iris contributors -# -# This file is part of Iris and is released under the BSD license. -# See LICENSE in the root of the repository for full licensing details. -"""Unit tests for :func:`iris.fileformats.netcdf.saver._fillvalue_report`.""" - -import warnings - -import numpy as np -import pytest - -from iris.fileformats.netcdf._thread_safe_nc import default_fillvals -from iris.fileformats.netcdf.saver import _fillvalue_report, _FillvalueCheckInfo -from iris.warnings import IrisSaverFillValueWarning - - -class Test__fillvaluereport: - @pytest.mark.parametrize("is_bytes", [True, False], ids=["ByteData", "NonbyteData"]) - @pytest.mark.parametrize( - "is_masked", [True, False], ids=["MaskedData", "NonmaskedData"] - ) - @pytest.mark.parametrize( - "contains_fv", [True, False], ids=["FillInData", "NofillInData"] - ) - @pytest.mark.parametrize( - "given_user_fv", [True, False], ids=["WithUserfill", "NoUserfill"] - ) - def test_fillvalue_checking(self, is_bytes, is_masked, contains_fv, given_user_fv): - dtype_code = "u1" if is_bytes else "f4" - dtype = np.dtype(dtype_code) - if given_user_fv: - user_fill = 123 if is_bytes else 1.234 - check_value = user_fill - else: - user_fill = None - check_value = default_fillvals[dtype_code] - - fill_info = _FillvalueCheckInfo( - user_value=user_fill, - check_value=check_value, - dtype=dtype, - varname="", - ) - - # Work out expected action, according to intended logic. - if is_bytes and is_masked and not given_user_fv: - msg_fragment = "'' contains byte data with masked points" - elif contains_fv: - msg_fragment = ( - "'' contains unmasked data points equal to the fill-value" - ) - else: - msg_fragment = None - - # Trial the action - result = _fillvalue_report( - fill_info, - is_masked=is_masked, - contains_fill_value=contains_fv, - warn=False, - ) - - # Check the result - if msg_fragment is None: - assert result is None - else: - assert isinstance(result, Warning) - assert msg_fragment in result.args[0] - - @pytest.mark.parametrize( - "has_collision", - [True, False], - ids=["WithFvCollision", "NoFvCollision"], - ) - def test_warn(self, has_collision): - fill_info = _FillvalueCheckInfo( - user_value=1.23, - check_value=1.23, - dtype=np.float32, - varname="", - ) - - # Check results - if has_collision: - # Check that we get the expected warning - expected_msg = ( - "'' contains unmasked data points equal to the fill-value" - ) - # Enter a warnings context that checks for the error. - warning_context = pytest.warns( - IrisSaverFillValueWarning, match=expected_msg - ) - warning_context.__enter__() - else: - # Check that we get NO warning of the expected type. - warnings.filterwarnings("error", category=IrisSaverFillValueWarning) - - # Do call: it should raise AND return a warning, ONLY IF there was a collision. - result = _fillvalue_report( - fill_info, - is_masked=True, - contains_fill_value=has_collision, - warn=True, - ) - - # Check result - if has_collision: - # Fail if no warning was raised .. - warning_context.__exit__(None, None, None) - # .. or result does not have the expected message content - assert expected_msg in result.args[0] - else: - # Fail if any warning result was produced. - assert result is None