diff --git a/lib/iris/_concatenate.py b/lib/iris/_concatenate.py index 418cf86554..9f37fd24b2 100644 --- a/lib/iris/_concatenate.py +++ b/lib/iris/_concatenate.py @@ -330,7 +330,6 @@ def __init__(self, cube): self.defn = cube.metadata self.data_type = cube.dtype - self.fill_value = cube.fill_value # # Collate the dimension coordinate metadata. @@ -675,8 +674,6 @@ def concatenate(self): dim_coords_and_dims=dim_coords_and_dims, aux_coords_and_dims=aux_coords_and_dims, cell_measures_and_dims=new_cm_and_dims, - dtype=cube_signature.data_type, - fill_value=cube_signature.fill_value, **kwargs) else: # There are no other source-cubes to concatenate @@ -722,13 +719,6 @@ def register(self, cube, axis=None, error_on_mismatch=False, # Check for compatible coordinate signatures. if match: - fill_value = self._cube_signature.fill_value - # Determine whether the fill value requires to be - # demoted to the default value. - if fill_value is not None: - if cube_signature.fill_value != fill_value: - # Demote the fill value to the default. - self._cube_signature.fill_value = None coord_signature = _CoordSignature(cube_signature) candidate_axis = self._coord_signature.candidate_axis( coord_signature) diff --git a/lib/iris/_data_manager.py b/lib/iris/_data_manager.py index 5659604d3b..fdda7e24cd 100644 --- a/lib/iris/_data_manager.py +++ b/lib/iris/_data_manager.py @@ -37,7 +37,7 @@ class DataManager(object): """ - def __init__(self, data, fill_value='none', realised_dtype=None): + def __init__(self, data): """ Create a data manager for the specified data. @@ -48,40 +48,14 @@ def __init__(self, data, fill_value='none', realised_dtype=None): real data, or :class:`~dask.array.core.Array` lazy data to be managed. - Kwargs: - - * fill_value: - The intended fill-value of :class:`~iris._data_manager.DataManager` - masked data. Note that, the fill-value is cast relative to the - dtype of the :class:`~iris._data_manager.DataManager`. - - * realised_dtype: - The intended dtype of the specified lazy data, which must be - either integer or boolean. This is to handle the case of lazy - integer or boolean masked data. - """ # Initialise the instance. - self._fill_value = None self._lazy_array = None self._real_array = None - self._realised_dtype = None # Assign the data payload to be managed. self.data = data - # Set the lazy data realised dtype, if appropriate. - self._realised_dtype_setter(realised_dtype) - - default_fill_value = (isinstance(fill_value, six.string_types) and - fill_value == 'none') - - # Set the fill-value, must be set after the realised dtype. - if ma.isMaskedArray(data) and default_fill_value: - self._propagate_masked_data_fill_value() - else: - self.fill_value = None if default_fill_value else fill_value - # Enforce the manager contract. self._assert_axioms() @@ -134,11 +108,8 @@ def __eq__(self, other): if isinstance(other, type(self)): result = False same_lazy = self.has_lazy_data() == other.has_lazy_data() - same_fill_value = self.fill_value == other.fill_value - same_realised_dtype = self._realised_dtype == other._realised_dtype same_dtype = self.dtype == other.dtype - if same_lazy and same_fill_value and same_realised_dtype \ - and same_dtype: + if same_lazy and same_dtype: result = array_equal(self.core_data(), other.core_data()) return result @@ -171,18 +142,8 @@ def __repr__(self): Returns an string representation of the instance. """ - fmt = '{cls}({data!r}{fill_value}{dtype})' - fill_value = '' - dtype = '' - - if self.fill_value is not None: - fill_value = ', fill_value={!r}'.format(self.fill_value) - - if self._realised_dtype is not None: - dtype = ', realised_dtype={!r}'.format(self._realised_dtype) - - result = fmt.format(data=self.core_data(), cls=type(self).__name__, - fill_value=fill_value, dtype=dtype) + fmt = '{cls}({data!r})' + result = fmt.format(data=self.core_data(), cls=type(self).__name__) return result @@ -198,27 +159,8 @@ def _assert_axioms(self): state = is_lazy ^ is_real assert state, emsg.format('' if is_lazy else 'no ', '' if is_real else 'no ') - # Ensure validity of realised dtype. - state = (self._realised_dtype is None or - self._realised_dtype.kind in 'biu') - emsg = 'Unexpected realised dtype state, got {!r}' - assert state, emsg.format(self._realised_dtype) - - # Ensure validity of lazy data with realised dtype. - state = self.has_lazy_data() or self._realised_dtype is None - emsg = ('Unexpected real data with realised dtype, got ' - 'real data and realised {!r}.') - assert state, emsg.format(self._realised_dtype) - - state = not (self.has_lazy_data() and - self._lazy_array.dtype.kind != 'f' and - self._realised_dtype is not None) - emsg = ('Unexpected lazy data dtype with realised dtype, got ' - 'lazy data {!r} and realised {!r}.') - assert state, emsg.format(self._lazy_array.dtype, self._realised_dtype) - - def _deepcopy(self, memo, data=None, fill_value='none', - realised_dtype='none'): + + def _deepcopy(self, memo, data=None): """ Perform a deepcopy of the :class:`~iris._data_manager.DataManager` instance. @@ -234,12 +176,6 @@ def _deepcopy(self, memo, data=None, fill_value='none', Replacement data to substitute the currently managed data with. - * fill_value: - Replacement fill-value. - - * realised_dtype: - Replacement for the intended dtype of the realised lazy data. - Returns: :class:`~iris._data_manager.DataManager` instance. @@ -251,81 +187,13 @@ def _deepcopy(self, memo, data=None, fill_value='none', data = copy.deepcopy(self._lazy_array, memo) else: data = self._real_array.copy() - else: - # Check that the replacement data is valid relative to - # the currently managed data. - DataManager(self.core_data()).replace(data) - # If the replacement data is valid, then use it but - # without copying it. - - if isinstance(fill_value, six.string_types) and \ - fill_value == 'none': - fill_value = self.fill_value - - if isinstance(realised_dtype, six.string_types) and \ - realised_dtype == 'none': - realised_dtype = self._realised_dtype - - result = DataManager(data, fill_value=fill_value, - realised_dtype=realised_dtype) + result = DataManager(data) except ValueError as error: emsg = 'Cannot copy {!r} - {}' raise ValueError(emsg.format(type(self).__name__, error)) return result - def _propagate_masked_data_fill_value(self): - """ - Align the data manager fill-value with the real masked array - fill-value. - - """ - data = self._real_array - if ma.isMaskedArray(data): - # Determine the default numpy fill-value. - np_fill_value = ma.masked_array(0, dtype=data.dtype).fill_value - if data.fill_value == np_fill_value: - # Never store the numpy default fill-value, rather - # represent this by clearing the data manager fill-value. - self.fill_value = None - else: - # Propagate the masked array fill-value to the data manager. - self.fill_value = data.fill_value - - def _realised_dtype_setter(self, realised_dtype): - """ - Set the intended dtype of the realised lazy data. This is to support - the case of lazy masked integral and boolean data in dask. - - Args: - - * realised_dtype: - A numpy :class:`~numpy.dtype`, array-protocol type string, - or built-in scalar type. - - """ - if realised_dtype is None: - self._realised_dtype = None - else: - realised_dtype = np.dtype(realised_dtype) - if realised_dtype != self.dtype: - if not self.has_lazy_data(): - emsg = ('Cannot set realised dtype, no lazy data ' - 'is available.') - raise ValueError(emsg) - if self._lazy_array.dtype.kind != 'f': - emsg = ('Cannot set realised dtype for lazy data ' - 'with {!r}.') - raise ValueError(emsg.format(self._lazy_array.dtype)) - if realised_dtype.kind not in 'biu': - emsg = ('Can only cast lazy data to an integer or boolean ' - 'dtype, got {!r}.') - raise ValueError(emsg.format(realised_dtype)) - self._realised_dtype = realised_dtype - - # Check the manager contract, as the managed dtype has changed. - self._assert_axioms() - @property def data(self): """ @@ -338,14 +206,11 @@ def data(self): if self.has_lazy_data(): try: # Realise the lazy data. - result = as_concrete_data(self._lazy_array, - nans_replacement=ma.masked, - result_dtype=self.dtype) + result = as_concrete_data(self._lazy_array) # Assign the realised result. self._real_array = result # Reset the lazy data and the realised dtype. self._lazy_array = None - self._realised_dtype = None except MemoryError: emsg = ('Failed to realise the lazy data as there was not ' 'enough memory available.\n' @@ -354,10 +219,6 @@ def data(self): 'before trying again.') raise MemoryError(emsg.format(self.shape, self.dtype)) - if ma.isMaskedArray(self._real_array): - # Align the numpy fill-value with the data manager fill-value. - self._real_array.fill_value = self.fill_value - # Check the manager contract, as the managed data has changed. self._assert_axioms() @@ -412,18 +273,6 @@ def data(self, data): self._lazy_array = None self._real_array = data - # Always reset the realised dtype, as the managed data has changed. - self._realised_dtype = None - - # Reset the fill-value appropriately. - if init_done: - if ma.isMaskedArray(data): - # Align the data manager fill-value with the numpy fill-value. - self._propagate_masked_data_fill_value() - else: - # Clear the data manager fill-value. - self.fill_value = None - # Check the manager contract, as the managed data has changed. self._assert_axioms() @@ -433,32 +282,7 @@ def dtype(self): The dtype of the realised lazy data or the dtype of the real data. """ - if self._realised_dtype is not None: - result = self._realised_dtype - else: - result = self.core_data().dtype - - return result - - @property - def fill_value(self): - return self._fill_value - - @fill_value.setter - def fill_value(self, fill_value): - if fill_value is not None: - # Convert the given value to the dtype of the data manager. - fill_value = np.asarray([fill_value])[0] - target_dtype = self.dtype - if fill_value.dtype.kind == 'f' and target_dtype.kind in 'biu': - # Perform rounding when converting floats to ints. - fill_value = np.rint(fill_value) - try: - [fill_value] = np.asarray([fill_value], dtype=target_dtype) - except OverflowError: - emsg = 'Fill value of {!r} invalid for {!r}.' - raise ValueError(emsg.format(fill_value, self.dtype)) - self._fill_value = fill_value + return self.core_data().dtype @property def ndim(self): @@ -476,7 +300,7 @@ def shape(self): """ return self.core_data().shape - def copy(self, data=None, fill_value='none', realised_dtype='none'): + def copy(self, data=None): """ Returns a deep copy of this :class:`~iris._data_manager.DataManager` instance. @@ -486,20 +310,12 @@ def copy(self, data=None, fill_value='none', realised_dtype='none'): * data: Replace the data of the copy with this data. - * fill_value: - Replacement fill-value. - - * realised_dtype: - Replace the intended dtype of the lazy data - in the copy with this :class:`~numpy.dtype`. - Returns: A copy :class:`~iris._data_manager.DataManager` instance. """ memo = {} - return self._deepcopy(memo, data=data, fill_value=fill_value, - realised_dtype=realised_dtype) + return self._deepcopy(memo, data=data) def core_data(self): """ @@ -548,45 +364,3 @@ def lazy_data(self): result = as_lazy_data(self._real_array) return result - - def replace(self, data, fill_value=None, realised_dtype=None): - """ - Perform an in-place replacement of the managed data. - - Args: - - * data: - Replace the managed data with either the :class:`~numpy.ndarray` - or :class:`~numpy.ma.core.MaskedArray` real data, or lazy - :class:`dask.array.core.Array` - - Kwargs: - - * fill_value: - Replacement for the :class:`~iris._data_manager.DataManager` - fill-value. - - * realised_dtype: - The intended dtype of the specified lazy data. - - .. note:: - Data replacement alone will clear the intended dtype - of the realised lazy data, and the fill-value. - - """ - # Snapshot the currently managed data. - original_data = self.core_data() - # Perform in-place data assignment. - self.data = data - try: - self._realised_dtype_setter(realised_dtype) - self.fill_value = fill_value - except ValueError as error: - # Backout the data replacement, and reinstate the cached - # original managed data. - self._lazy_array = self._real_array = None - if is_lazy_data(original_data): - self._lazy_array = original_data - else: - self._real_array = original_data - raise error diff --git a/lib/iris/_lazy_data.py b/lib/iris/_lazy_data.py index 5fb61b81f3..985e79af13 100644 --- a/lib/iris/_lazy_data.py +++ b/lib/iris/_lazy_data.py @@ -94,13 +94,12 @@ def as_lazy_data(data, chunks=_MAX_CHUNK_SIZE): """ if not is_lazy_data(data): - if ma.isMaskedArray(data): - data = array_masked_to_nans(data) - data = da.from_array(data, chunks=chunks) + asarray = not ma.isMaskedArray(data) + data = da.from_array(data, chunks=chunks, asarray=asarray) return data -def as_concrete_data(data, **kwargs): +def as_concrete_data(data): """ Return the actual content of a lazy array, as a numpy array. If the input data is a NumPy `ndarray` or masked array, return it @@ -108,17 +107,11 @@ def as_concrete_data(data, **kwargs): If the input data is lazy, return the realised result. - Where lazy data contains NaNs these are translated by filling or converting - to masked data, using the :func:`~iris._lazy_data.convert_nans_array` - function. - Args: * data: A dask array, NumPy `ndarray` or masked array - Kwargs are passed through to :func:`~iris._lazy_data.convert_nans_array`. - Returns: A NumPy `ndarray` or masked array. @@ -129,53 +122,10 @@ def as_concrete_data(data, **kwargs): # rather than a numpy.ndarray object. # Recorded in https://github.com/dask/dask/issues/2111. data = np.asanyarray(data.compute()) - # Convert any missing data as requested. - data = convert_nans_array(data, **kwargs) return data -def nan_array_type(dtype): - return np.dtype('f8') if dtype.kind in 'biu' else dtype - - -def array_masked_to_nans(array): - """ - Convert a masked array to a NumPy `ndarray` filled with NaN values. Input - NumPy arrays with no mask are returned unchanged. - This is used for dask integration, as dask does not support masked arrays. - - Args: - - * array: - A NumPy `ndarray` or masked array. - - Returns: - A NumPy `ndarray`. This is the input array if unmasked, or an array - of floating-point values with NaN values where the mask was `True` if - the input array is masked. - - .. note:: - The fill value and mask of the input masked array will be lost. - - .. note:: - Integer masked arrays are cast to 8-byte floats because NaN is a - floating-point value. - - """ - if not ma.isMaskedArray(array): - result = array - else: - if ma.is_masked(array): - mask = array.mask - new_dtype = nan_array_type(array.data.dtype) - result = array.data.astype(new_dtype) - result[mask] = np.nan - else: - result = array.data - return result - - def multidim_lazy_stack(stack): """ Recursively build a multidimensional stacked dask array. @@ -202,70 +152,3 @@ def multidim_lazy_stack(stack): result = da.stack([multidim_lazy_stack(subarray) for subarray in stack]) return result - - -def convert_nans_array(array, nans_replacement=None, result_dtype=None): - """ - Convert a :class:`~numpy.ndarray` that may contain one or more NaN values - to either a :class:`~numpy.ma.core.MaskedArray` or a - :class:`~numpy.ndarray` with the NaN values filled. - - Args: - - * array: - The :class:`~numpy.ndarray` to be converted. - - Kwargs: - - * nans_replacement: - If `nans_replacement` is None, then raise an exception if the `array` - contains any NaN values (default behaviour). - If `nans_replacement` is `numpy.ma.masked`, then convert the `array` - to a :class:`~numpy.ma.core.MaskedArray`. - Otherwise, use the specified `nans_replacement` value as the `array` - fill value. - - * result_dtype: - Cast the resultant array to this target :class:`~numpy.dtype`. - - Returns: - An :class:`numpy.ndarray`. - - .. note:: - An input array that is either a :class:`~numpy.ma.core.MaskedArray` - or has an integral dtype will be returned unaltered. - - .. note:: - In some cases, the input array is modified in-place. - - """ - if not ma.isMaskedArray(array) and array.dtype.kind == 'f': - # First, calculate the mask. - mask = np.isnan(array) - # Now, cast the dtype, if required. - if result_dtype is not None: - result_dtype = np.dtype(result_dtype) - if array.dtype != result_dtype: - array = array.astype(result_dtype) - # Finally, mask or fill the data, as required or raise an exception - # if we detect there are NaNs present and we didn't expect any. - if np.any(mask): - if nans_replacement is None: - emsg = 'Array contains unexpected NaNs.' - raise ValueError(emsg) - elif nans_replacement is ma.masked: - # Mask the array with the default fill_value. - array = ma.masked_array(array, mask=mask) - else: - # Check the fill value is appropriate for the - # result array dtype. - try: - [fill_value] = np.asarray([nans_replacement], - dtype=array.dtype) - except OverflowError: - emsg = 'Fill value of {!r} invalid for array result {!r}.' - raise ValueError(emsg.format(nans_replacement, - array.dtype)) - # Fill the array. - array[mask] = fill_value - return array diff --git a/lib/iris/_merge.py b/lib/iris/_merge.py index 754bee7b86..ee87cefe82 100644 --- a/lib/iris/_merge.py +++ b/lib/iris/_merge.py @@ -315,7 +315,7 @@ class _CoordSignature(namedtuple('CoordSignature', class _CubeSignature(namedtuple('CubeSignature', ['defn', 'data_shape', 'data_type', - 'fill_value', 'cell_measures_and_dims'])): + 'cell_measures_and_dims'])): """ Criterion for identifying a specific type of :class:`iris.cube.Cube` based on its metadata. @@ -331,9 +331,6 @@ class _CubeSignature(namedtuple('CubeSignature', * data_type: The data payload :class:`numpy.dtype` of a :class:`iris.cube.Cube`. - * fill_value: - The fill-value for the data payload of a :class:`iris.cube.Cube`. - * cell_measures_and_dims: A list of cell_measures and dims for the cube. @@ -1241,9 +1238,7 @@ def merge(self, unique=True): # All inputs were concrete, so turn the result back into a # normal array. dtype = self._cube_signature.data_type - merged_data = as_concrete_data(merged_data, - nans_replacement=ma.masked, - result_dtype=dtype) + merged_data = as_concrete_data(merged_data) # Unmask the array if it has no masked points. if (ma.isMaskedArray(merged_data) and not ma.is_masked(merged_data)): @@ -1283,14 +1278,6 @@ def register(self, cube, error_on_mismatch=False): other = self._build_signature(cube) match = cube_signature.match(other, error_on_mismatch) if match: - # Determine whether the fill value requires to be demoted - # to the default value. - if cube_signature.fill_value is not None: - if cube_signature.fill_value != other.fill_value: - # Demote the fill value to the default. - signature = self._build_signature(self._source, - default_fill_value=True) - self._cube_signature = signature coord_payload = self._extract_coord_payload(cube) match = coord_payload.match_signature(self._coord_signature, error_on_mismatch) @@ -1504,8 +1491,6 @@ def _get_cube(self, data): dim_coords_and_dims=dim_coords_and_dims, aux_coords_and_dims=aux_coords_and_dims, cell_measures_and_dims=cms_and_dims, - fill_value=signature.fill_value, - dtype=signature.data_type, **kwargs) # Add on any aux coord factories. @@ -1611,7 +1596,7 @@ def _build_coordinates(self): self._vector_aux_coords_dims): aux_coords_and_dims.append(_CoordAndDims(item.coord, dims)) - def _build_signature(self, cube, default_fill_value=False): + def _build_signature(self, cube): """ Generate the signature that defines this cube. @@ -1620,22 +1605,13 @@ def _build_signature(self, cube, default_fill_value=False): * cube: The source cube to create the cube signature from. - Kwargs: - - * default_fill_value: - Override the cube fill value with the default fill value of None. - Default is False i.e. use the provided cube.fill_value when - constructing the cube signature. - Returns: The cube signature. """ - fill_value = cube.fill_value - if default_fill_value: - fill_value = None - return _CubeSignature(cube.metadata, cube.shape, cube.dtype, - fill_value, cube._cell_measures_and_dims) + + return _CubeSignature(cube.metadata, cube.shape, + cube.dtype, cube._cell_measures_and_dims) def _add_cube(self, cube, coord_payload): """Create and add the source-cube skeleton to the ProtoCube.""" diff --git a/lib/iris/analysis/__init__.py b/lib/iris/analysis/__init__.py index 818bf2ac91..74bce2a458 100644 --- a/lib/iris/analysis/__init__.py +++ b/lib/iris/analysis/__init__.py @@ -563,8 +563,7 @@ def post_process(self, collapsed_cube, data_result, coords, **kwargs): The collapsed cube with its aggregated data payload. """ - collapsed_cube.replace(data_result, - fill_value=collapsed_cube.fill_value) + collapsed_cube.data = data_result return collapsed_cube def aggregate_shape(self, **kwargs): diff --git a/lib/iris/analysis/_interpolate_private.py b/lib/iris/analysis/_interpolate_private.py index a56683f8c2..3990fb6414 100644 --- a/lib/iris/analysis/_interpolate_private.py +++ b/lib/iris/analysis/_interpolate_private.py @@ -602,7 +602,6 @@ def regrid(source_cube, grid_cube, mode='bilinear', **kwargs): # Start with just the metadata and the re-sampled data... new_cube = iris.cube.Cube(new_data) new_cube.metadata = source_cube.metadata - new_cube.fill_value = source_cube.fill_value # ... and then copy across all the unaffected coordinates. diff --git a/lib/iris/analysis/_interpolation.py b/lib/iris/analysis/_interpolation.py index 46dd1115b6..405a14e24e 100644 --- a/lib/iris/analysis/_interpolation.py +++ b/lib/iris/analysis/_interpolation.py @@ -627,7 +627,6 @@ def __call__(self, sample_points, collapse_scalar=True): cube = self._src_cube new_cube = iris.cube.Cube(interpolated_data) new_cube.metadata = cube.metadata - new_cube.fill_value = cube.fill_value def construct_new_coord_given_points(coord, points): # Handle what was previously a DimCoord which may no longer be diff --git a/lib/iris/analysis/_regrid.py b/lib/iris/analysis/_regrid.py index 8dfffc0c2e..eb7a16d075 100644 --- a/lib/iris/analysis/_regrid.py +++ b/lib/iris/analysis/_regrid.py @@ -391,7 +391,6 @@ def _create_cube(data, src, x_dim, y_dim, src_x_coord, src_y_coord, # Create a result cube with the appropriate metadata result = iris.cube.Cube(data) result.metadata = copy.deepcopy(src.metadata) - result.fill_value = src.fill_value # Copy across all the coordinates which don't span the grid. # Record a mapping from old coordinate IDs to new coordinates, diff --git a/lib/iris/analysis/calculus.py b/lib/iris/analysis/calculus.py index 8d7d35d9c4..a18a4a2aed 100644 --- a/lib/iris/analysis/calculus.py +++ b/lib/iris/analysis/calculus.py @@ -341,9 +341,8 @@ def _copy_cube_transformed(src_cube, data, coord_func): assert src_cube.ndim == data.ndim # Start with just the metadata and the data... - new_cube = iris.cube.Cube(data, - fill_value=src_cube.fill_value, - dtype=src_cube.dtype) + new_cube = iris.cube.Cube(data) + new_cube.metadata = src_cube.metadata new_cube.metadata = src_cube.metadata # ... and then create all the coordinates. diff --git a/lib/iris/analysis/cartography.py b/lib/iris/analysis/cartography.py index 481350b39d..2f61cb7f37 100644 --- a/lib/iris/analysis/cartography.py +++ b/lib/iris/analysis/cartography.py @@ -765,7 +765,6 @@ def project(cube, target_proj, nx=None, ny=None): # Copy metadata across new_cube.metadata = cube.metadata - new_cube.fill_value = cube.fill_value return new_cube, extent diff --git a/lib/iris/analysis/trajectory.py b/lib/iris/analysis/trajectory.py index 7ca49f0e7b..c5433c91fc 100644 --- a/lib/iris/analysis/trajectory.py +++ b/lib/iris/analysis/trajectory.py @@ -380,9 +380,6 @@ def interpolate(cube, sample_points, method=None): new_cube_coord.points = src_coord.points[fancy_coord_index_arrays] # NOTE: the new coords do *not* have bounds. - # Set the fill-value last, as any previous data setter will clear it. - new_cube.fill_value = cube.fill_value - return new_cube @@ -547,7 +544,6 @@ def __call__(self, src_cube): # Make a new result cube with the reshaped data. result_cube = iris.cube.Cube(data_2d_x_and_y) result_cube.metadata = src_cube.metadata - result_cube.fill_value = src_cube.fill_value # Copy all the coords from the trajectory result. i_trajectory_dim = result_trajectory_cube.ndim - 1 diff --git a/lib/iris/coords.py b/lib/iris/coords.py index e2cc855de2..d651770c6e 100644 --- a/lib/iris/coords.py +++ b/lib/iris/coords.py @@ -1866,12 +1866,7 @@ def data(self, data): raise ValueError('The data payload of a CellMeasure may not be ' 'None; it must be a numpy array or equivalent.') if is_lazy_data(data) and data.dtype.kind in 'biu': - # Disallow lazy integral data, as it will cause problems with dask - # if it turns out to contain any masked points. - # Non-floating cell measures are not valid up to CF v1.7 anyway, - # but this avoids any possible problems with non-compliant files. - # Future usage could be supported by adding a fill_value and dtype - # as for cube data. For now, disallowing it is just simpler. + # Non-floating cell measures are not valid up to CF v1.7 msg = ('Cannot create cell measure with lazy data of type {}, as ' 'integer types are not currently supported.') raise ValueError(msg.format(data.dtype)) diff --git a/lib/iris/cube.py b/lib/iris/cube.py index efa8d4a34d..67f6ecfebf 100644 --- a/lib/iris/cube.py +++ b/lib/iris/cube.py @@ -41,7 +41,6 @@ import iris._concatenate import iris._constraints from iris._data_manager import DataManager -from iris._deprecation import warn_deprecated import iris._merge import iris.analysis @@ -648,7 +647,7 @@ def __init__(self, data, standard_name=None, long_name=None, var_name=None, units=None, attributes=None, cell_methods=None, dim_coords_and_dims=None, aux_coords_and_dims=None, aux_factories=None, - cell_measures_and_dims=None, fill_value=None, dtype=None): + cell_measures_and_dims=None): """ Creates a cube with data and optional metadata. @@ -695,14 +694,6 @@ def __init__(self, data, standard_name=None, long_name=None, :mod:`iris.aux_factory`. * cell_measures_and_dims A list of CellMeasures with dimension mappings. - * fill_value - The intended fill-value of :class:`~iris.cube.Cube` masked data. - Note that, the fill-value is cast relative to the dtype of the - :class:`~iris.cube.Cube`. - * dtype - The intended dtype of the specified lazy data, which must be - either integer or boolean. This is to handle the case of lazy - integer or boolean masked data. For example:: >>> from iris.coords import DimCoord @@ -723,8 +714,7 @@ def __init__(self, data, standard_name=None, long_name=None, raise TypeError('Invalid data type: {!r}.'.format(data)) # Initialise the cube data manager. - self._data_manager = DataManager(data, fill_value=fill_value, - realised_dtype=dtype) + self._data_manager = DataManager(data) #: The "standard name" for the Cube's phenomenon. self.standard_name = standard_name @@ -1589,27 +1579,6 @@ def dtype(self): """ return self._data_manager.dtype - @property - def fill_value(self): - """ - A fill value for the data of the :class:`~iris.cube.Cube`. - - This is a value suitable for filling masked points in this cube's data. - It may be ``None``, meaning that no suitable fill value is known. - - .. Note:: - - Ideally, this value will not occur anywhere in the cube data. - Thus, many operations that change cube data will also set - ``fill_value`` to ``None``. - - """ - return self._data_manager.fill_value - - @fill_value.setter - def fill_value(self, fill_value): - self._data_manager.fill_value = fill_value - @property def ndim(self): """The number of dimensions in the data of this cube.""" @@ -2146,9 +2115,7 @@ def new_cell_measure_dims(cm_): data = ma.array(data.data, mask=data.mask, dtype=cube_data.dtype) # Make the new cube slice - cube = Cube(data, - fill_value=self.fill_value, - dtype=self._data_manager.dtype) + cube = Cube(data) cube.metadata = deepcopy(self.metadata) # Record a mapping from old coordinate IDs to new coordinates, @@ -2386,9 +2353,7 @@ def make_chunk(key): module = ma if ma.isMaskedArray(self.data) else np func = module.concatenate data = func(chunk_data, dim) - result = iris.cube.Cube(data, - fill_value=self.fill_value, - dtype=self.dtype) + result = iris.cube.Cube(data) result.metadata = deepcopy(self.metadata) # Record a mapping from old coordinate IDs to new coordinates, @@ -2778,8 +2743,7 @@ def transpose(self, new_order=None): # Transpose the data payload. dm = self._data_manager data = dm.core_data().transpose(new_order) - self._data_manager = DataManager(data, fill_value=dm.fill_value, - realised_dtype=dm.dtype) + self._data_manager = DataManager(data) dim_mapping = {src: dest for dest, src in enumerate(new_order)} @@ -2821,8 +2785,6 @@ def _xml_element(self, doc, checksum=False, order=True, byteorder=True): if self.var_name: cube_xml_element.setAttribute('var_name', self.var_name) cube_xml_element.setAttribute('units', str(self.units)) - if self.fill_value is not None: - cube_xml_element.setAttribute('fill_value', str(self.fill_value)) cube_xml_element.setAttribute('dtype', self.dtype.name) cube_xml_element.setAttribute('core-dtype', self.core_data().dtype.name) @@ -2947,32 +2909,7 @@ def _order(array): return cube_xml_element - def replace(self, data, dtype=None, fill_value=None): - """ - Perform an in-place replacement of the cube data. - - Args: - - * data: - Replace the data of the cube with the provided data payload. - - Kwargs: - - * dtype: - Replacement for the intended dtype of the realised lazy data. - - * fill_value: - Replacement for the cube data fill-value. - - .. note:: - Data replacement alone will clear the intended dtype - of the realised lazy data and the fill-value. - - """ - self._data_manager.replace(data, fill_value=fill_value, - realised_dtype=dtype) - - def copy(self, data=None, dtype='none', fill_value='none'): + def copy(self, data=None): """ Returns a deep copy of this cube. @@ -2981,19 +2918,12 @@ def copy(self, data=None, dtype='none', fill_value='none'): * data: Replace the data of the cube copy with provided data payload. - * dtype: - Replacement for the intended dtype of the realised lazy data. - - * fill_value: - Replacement fill-value. - Returns: A copy instance of the :class:`Cube`. """ memo = {} - cube = self._deepcopy(memo, data=data, dtype=dtype, - fill_value=fill_value) + cube = self._deepcopy(memo, data=data) return cube def __copy__(self): @@ -3004,9 +2934,8 @@ def __copy__(self): def __deepcopy__(self, memo): return self._deepcopy(memo) - def _deepcopy(self, memo, data=None, dtype='none', fill_value='none'): - dm = self._data_manager.copy(data=data, fill_value=fill_value, - realised_dtype=dtype) + def _deepcopy(self, memo, data=None): + dm = self._data_manager.copy(data=data) new_dim_coords_and_dims = deepcopy(self._dim_coords_and_dims, memo) new_aux_coords_and_dims = deepcopy(self._aux_coords_and_dims, memo) @@ -3025,9 +2954,7 @@ def _deepcopy(self, memo, data=None, dtype='none', fill_value='none'): new_cube = Cube(dm.core_data(), dim_coords_and_dims=new_dim_coords_and_dims, - aux_coords_and_dims=new_aux_coords_and_dims, - fill_value=dm.fill_value, - dtype=dm.dtype) + aux_coords_and_dims=new_aux_coords_and_dims) new_cube.metadata = deepcopy(self.metadata, memo) @@ -3043,10 +2970,6 @@ def __eq__(self, other): if isinstance(other, Cube): result = self.metadata == other.metadata - # check the cube fill-value. - if result: - result = self.fill_value == other.fill_value - # having checked the metadata, now check the coordinates if result: coord_comparison = iris.analysis.coord_comparison(self, other) diff --git a/lib/iris/etc/pp_save_rules.txt b/lib/iris/etc/pp_save_rules.txt index 8d3183729e..ee78cbfd01 100644 --- a/lib/iris/etc/pp_save_rules.txt +++ b/lib/iris/etc/pp_save_rules.txt @@ -761,12 +761,12 @@ THEN #MDI IF - cm.fill_value is not None + ma.isMaskedArray(cm.data) THEN - pp.bmdi = cm.fill_value + pp.bmdi = cm.data.fill_value IF - cm.fill_value is None + not isinstance(cm.data, ma.core.MaskedArray) THEN pp.bmdi = -1e30 diff --git a/lib/iris/experimental/regrid.py b/lib/iris/experimental/regrid.py index 021d0ce3e2..c5c67ff688 100644 --- a/lib/iris/experimental/regrid.py +++ b/lib/iris/experimental/regrid.py @@ -1131,7 +1131,6 @@ def _regrid_weighted_curvilinear_to_rectilinear__perform( cube = iris.cube.Cube(weighted_mean.reshape(grid_cube.shape), dim_coords_and_dims=dim_coords_and_dims) cube.metadata = copy.deepcopy(src_cube.metadata) - cube.fill_value = src_cube.fill_value for coord in src_cube.coords(dimensions=()): cube.add_aux_coord(coord.copy()) @@ -1494,7 +1493,6 @@ def _create_cube(self, data, src, src_xy_dim, src_x_coord, src_y_coord, # Create a result cube with the appropriate metadata result = iris.cube.Cube(data) result.metadata = copy.deepcopy(src.metadata) - result.fill_value = src.fill_value # Copy across all the coordinates which don't span the grid. # Record a mapping from old coordinate IDs to new coordinates, diff --git a/lib/iris/fileformats/grib/__init__.py b/lib/iris/fileformats/grib/__init__.py index 34ce13ac36..d4ea3d86af 100644 --- a/lib/iris/fileformats/grib/__init__.py +++ b/lib/iris/fileformats/grib/__init__.py @@ -34,7 +34,7 @@ import numpy as np import numpy.ma as ma -from iris._lazy_data import as_lazy_data, convert_nans_array +from iris._lazy_data import as_lazy_data import iris.coord_systems as coord_systems from iris.exceptions import TranslationError, NotYetImplementedError # NOTE: careful here, to avoid circular imports (as iris imports grib) @@ -139,7 +139,6 @@ class GribWrapper(object): def __init__(self, grib_message, grib_fh=None): """Store the grib message and compute our extra keys.""" self.grib_message = grib_message - self.realised_dtype = np.array([0.]).dtype if self.edition != 1: emsg = 'GRIB edition {} is not supported by {!r}.' @@ -178,14 +177,11 @@ def __init__(self, grib_message, grib_fh=None): # The byte offset requires to be reset back to the first byte # of this message. The file pointer offset is always at the end # of the current message due to the grib-api reading the message. - proxy = GribDataProxy(shape, self.realised_dtype, grib_fh.name, + proxy = GribDataProxy(shape, np.array([0.]).dtype, grib_fh.name, offset - message_length) self._data = as_lazy_data(proxy) else: values_array = _message_values(grib_message, shape) - # mask where the values are nan - self.data = convert_nans_array(values_array, - nans_replacement=ma.masked) def _confirm_in_scope(self): """Ensure we have a grib flavour that we choose to support.""" @@ -692,6 +688,10 @@ def _message_values(grib_message, shape): data = gribapi.grib_get_double_array(grib_message, 'values') data = data.reshape(shape) + # Handle missing values in a sensible way. + mask = np.isnan(data) + if mask.any(): + data = ma.array(data, mask=mask, fill_value=np.nan) return data diff --git a/lib/iris/fileformats/grib/_save_rules.py b/lib/iris/fileformats/grib/_save_rules.py index 7a5a8fce49..8182246305 100644 --- a/lib/iris/fileformats/grib/_save_rules.py +++ b/lib/iris/fileformats/grib/_save_rules.py @@ -1160,17 +1160,20 @@ def product_definition_section(cube, grib): def data_section(cube, grib): # Masked data? if ma.isMaskedArray(cube.data): - fill_value = cube.fill_value - if fill_value is None or np.isnan(cube.fill_value): + if not np.isnan(cube.data.fill_value): + # Use the data's fill value. + fill_value = float(cube.data.fill_value) + else: # We can't use the cube's fill value if it's NaN, # the GRIB API doesn't like it. # Calculate an MDI outside the data range. min, max = cube.data.min(), cube.data.max() fill_value = min - (max - min) * 0.1 + # Prepare the unmaksed data array, using fill_value as the MDI. + data = cube.data.filled(fill_value) else: fill_value = None - - data = cube.data + data = cube.data # units scaling grib2_info = gptx.cf_phenom_to_grib2_info(cube.standard_name, diff --git a/lib/iris/fileformats/grib/message.py b/lib/iris/fileformats/grib/message.py index c936fe6e8b..7869aa32c9 100644 --- a/lib/iris/fileformats/grib/message.py +++ b/lib/iris/fileformats/grib/message.py @@ -30,7 +30,7 @@ import numpy as np import numpy.ma as ma -from iris._lazy_data import array_masked_to_nans, as_lazy_data +from iris._lazy_data import as_lazy_data from iris.exceptions import TranslationError @@ -120,10 +120,6 @@ def bmdi(self): # Default for fill value is None. return None - @property - def realised_dtype(self): - return np.dtype('f8') - def core_data(self): return self.data @@ -164,8 +160,7 @@ def data(self): shape = (grid_section['numberOfDataPoints'],) else: shape = (grid_section['Nj'], grid_section['Ni']) - proxy = _DataProxy(shape, self.realised_dtype, np.nan, - self._recreate_raw) + proxy = _DataProxy(shape, np.dtype('f8'), self._recreate_raw) data = as_lazy_data(proxy) else: fmt = 'Grid definition template {} is not supported' @@ -196,7 +191,7 @@ class _DataProxy(object): __slots__ = ('shape', 'dtype', 'recreate_raw') - def __init__(self, shape, dtype, fill_value, recreate_raw): + def __init__(self, shape, dtype, recreate_raw): self.shape = shape self.dtype = dtype self.recreate_raw = recreate_raw @@ -258,10 +253,10 @@ def __getitem__(self, keys): # Only the non-masked values are included in codedValues. _data = np.empty(shape=bitmap.shape) _data[bitmap.astype(bool)] = data - # Use nan where input = 1, the opposite of the behaviour - # specified by the GRIB spec. - _data[np.logical_not(bitmap.astype(bool))] = np.nan - data = _data + # `ma.masked_array` masks where input = 1, the opposite of + # the behaviour specified by the GRIB spec. + data = ma.masked_array(_data, mask=np.logical_not(bitmap), + fill_value=np.nan) else: msg = 'Shapes of data and bitmap do not match.' raise TranslationError(msg) diff --git a/lib/iris/fileformats/netcdf.py b/lib/iris/fileformats/netcdf.py index deb6a26b73..e411d3ac1b 100644 --- a/lib/iris/fileformats/netcdf.py +++ b/lib/iris/fileformats/netcdf.py @@ -56,8 +56,7 @@ import iris.fileformats._pyke_rules import iris.io import iris.util -from iris._lazy_data import (array_masked_to_nans, as_lazy_data, - convert_nans_array, nan_array_type) +from iris._lazy_data import as_lazy_data # Show Pyke inference engine statistics. DEBUG = False @@ -395,14 +394,7 @@ def __getitem__(self, keys): var = variable[keys] finally: dataset.close() - if ma.isMaskedArray(var): - if self.dtype.kind in 'biu': - msg = "NetCDF variable {!r} has masked data, which is not " \ - "supported for declared dtype {!r}." - raise TypeError( - msg.format(self.variable_name, self.dtype.name)) - var = array_masked_to_nans(var) - return np.asanyarray(var, dtype=self.dtype) + return np.asanyarray(var) def __repr__(self): fmt = '<{self.__class__.__name__} shape={self.shape}' \ @@ -508,13 +500,12 @@ def _load_cube(engine, cf, cf_var, filename): dummy_data = cf_var.add_offset + dummy_data # Create cube with deferred data, but no metadata - fill_value = getattr(cf_var.cf_data, '_FillValue', None) - - dtype = nan_array_type(dummy_data.dtype) - proxy = NetCDFDataProxy(cf_var.shape, dtype, + fill_value = getattr(cf_var.cf_data, '_FillValue', + netCDF4.default_fillvals[cf_var.dtype.str[1:]]) + proxy = NetCDFDataProxy(cf_var.shape, dummy_data.dtype, filename, cf_var.cf_name, fill_value) data = as_lazy_data(proxy, chunks=cf_var.shape) - cube = iris.cube.Cube(data, fill_value=fill_value, dtype=dummy_data.dtype) + cube = iris.cube.Cube(data) # Reset the pyke inference engine. engine.reset() @@ -1958,8 +1949,10 @@ def set_packing_ncattrs(cfvar): # Explicitly assign the fill_value, which will be the type default # in the case of an unmasked array. if packing is None: - fill_value = cube.fill_value - dtype = cube.lazy_data().dtype.newbyteorder('=') + fill_value = None + if not cube.has_lazy_data() and ma.isMaskedArray(cube.data): + fill_value = cube.data.fill_value + dtype = cube.dtype.newbyteorder('=') cf_var = self._dataset.createVariable( cf_name, dtype, @@ -1967,13 +1960,7 @@ def set_packing_ncattrs(cfvar): **kwargs) set_packing_ncattrs(cf_var) - # Now stream the cube data payload straight to the netCDF - # data variable within the netCDF file, where any NaN values - # are replaced with the specified cube fill_value. - data = da.map_blocks(convert_nans_array, cube.lazy_data(), - nans_replacement=cube.fill_value, - result_dtype=cube.dtype) - da.store([data], [cf_var]) + da.store([cube.lazy_data()], [cf_var]) if cube.standard_name: _setncattr(cf_var, 'standard_name', cube.standard_name) diff --git a/lib/iris/fileformats/pp.py b/lib/iris/fileformats/pp.py index 5f79a8966e..89c374ffc7 100644 --- a/lib/iris/fileformats/pp.py +++ b/lib/iris/fileformats/pp.py @@ -39,8 +39,7 @@ import netcdftime from iris._deprecation import warn_deprecated -from iris._lazy_data import (array_masked_to_nans, as_concrete_data, - as_lazy_data, is_lazy_data) +from iris._lazy_data import as_concrete_data, as_lazy_data, is_lazy_data import iris.config import iris.fileformats.rules import iris.fileformats.pp_rules @@ -862,8 +861,8 @@ def _lbpack_getter(self): @property def dtype(self): - return np.dtype('f8') if self.src_dtype.kind == 'i' \ - else self.src_dtype.newbyteorder('=') + return np.dtype('f8') if self.src_dtype.kind == 'i' else \ + self.src_dtype.newbyteorder('=') @property def fill_value(self): @@ -1081,7 +1080,7 @@ def _pp_attribute_names(header_defn): special_headers = list('_' + name for name in _SPECIAL_HEADERS) extra_data = list(EXTRA_DATA.values()) special_attributes = ['_raw_header', 'raw_lbtim', 'raw_lbpack', - 'boundary_packing', '_realised_dtype'] + 'boundary_packing'] return normal_headers + special_headers + extra_data + special_attributes @@ -1114,7 +1113,6 @@ def __init__(self, header=None): self.raw_lbtim = None self.raw_lbpack = None self.boundary_packing = None - self._realised_dtype = None if header is not None: self.raw_lbtim = header[self.HEADER_DICT['lbtim'][0]] self.raw_lbpack = header[self.HEADER_DICT['lbpack'][0]] @@ -1286,9 +1284,7 @@ def data(self): """ if is_lazy_data(self._data): # Replace with real data on the first access. - self._data = as_concrete_data(self._data, - nans_replacement=ma.masked, - result_dtype=self.realised_dtype) + self._data = as_concrete_data(self._data) return self._data @data.setter @@ -1298,16 +1294,6 @@ def data(self, value): def core_data(self): return self._data - @property - def realised_dtype(self): - return self._data.dtype \ - if self._realised_dtype is None \ - else self._realised_dtype - - @realised_dtype.setter - def realised_dtype(self, value): - self._realised_dtype = value - @property def calendar(self): """Return the calendar of the field.""" @@ -1401,8 +1387,7 @@ def save(self, file_handle): # Integer or Boolean data : No masking is supported. msg = 'Non-floating masked data cannot be saved to PP.' raise ValueError(msg) - fill_value = self.bmdi - data = data.filled(fill_value=fill_value) + data = data.filled(fill_value=self.bmdi) # Make sure the data is big-endian if data.dtype.newbyteorder('>') != data.dtype: @@ -1900,7 +1885,6 @@ def _create_field_data(field, data_shape, land_mask): field.raw_lbpack, field.boundary_packing, field.bmdi, land_mask) - field.realised_dtype = dtype.newbyteorder('=') block_shape = data_shape if 0 not in data_shape else (1, 1) field.data = as_lazy_data(proxy, chunks=block_shape) diff --git a/lib/iris/fileformats/rules.py b/lib/iris/fileformats/rules.py index f8e003f9b7..b101604b74 100644 --- a/lib/iris/fileformats/rules.py +++ b/lib/iris/fileformats/rules.py @@ -898,18 +898,11 @@ def _make_cube(field, converter): metadata = converter(field) cube_data = field.core_data() - cube_dtype = field.realised_dtype - if cube_dtype.kind in 'biu': - # Don't adopt BMDI as a fill value for integer data. - cube_fill_value = None - else: - cube_fill_value = field.bmdi cube = iris.cube.Cube(cube_data, attributes=metadata.attributes, cell_methods=metadata.cell_methods, dim_coords_and_dims=metadata.dim_coords_and_dims, - aux_coords_and_dims=metadata.aux_coords_and_dims, - fill_value=cube_fill_value, dtype=cube_dtype) + aux_coords_and_dims=metadata.aux_coords_and_dims) # Temporary code to deal with invalid standard names in the # translation table. diff --git a/lib/iris/util.py b/lib/iris/util.py index 4ca915445c..263babac8e 100644 --- a/lib/iris/util.py +++ b/lib/iris/util.py @@ -984,8 +984,7 @@ def new_axis(src_cube, scalar_coord=None): # If the source cube is a Masked Constant, it is changed here to a Masked # Array to allow the mask to gain an extra dimension with the data. if src_cube.has_lazy_data(): - new_cube = iris.cube.Cube(src_cube.lazy_data()[None], - dtype=src_cube.dtype) + new_cube = iris.cube.Cube(src_cube.lazy_data()[None]) else: if isinstance(src_cube.data, ma.core.MaskedConstant): new_data = ma.array([np.nan], mask=[True]) @@ -994,7 +993,6 @@ def new_axis(src_cube, scalar_coord=None): new_cube = iris.cube.Cube(new_data) new_cube.metadata = src_cube.metadata - new_cube.fill_value = src_cube.fill_value for coord in src_cube.aux_coords: if scalar_coord and scalar_coord == coord: @@ -1076,9 +1074,7 @@ def as_compatible_shape(src_cube, target_cube): new_order = [order.index(i) for i in range(len(order))] new_data = np.transpose(new_data, new_order).copy() - new_cube = iris.cube.Cube(new_data.reshape(new_shape), - fill_value=src_cube.fill_value, - dtype=src_cube.dtype) + new_cube = iris.cube.Cube(new_data.reshape(new_shape)) new_cube.metadata = copy.deepcopy(src_cube.metadata) # Record a mapping from old coordinate IDs to new coordinates,