diff --git a/.travis.yml b/.travis.yml index b155449fc2..2883e7f3d6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,6 +18,9 @@ git: install: + - export BIGGUS_REF="v0.3" + - export BIGGUS_SUFFIX=$(echo "${BIGGUS_REF}" | sed "s/^v//") + - export CARTOPY_REF="v0.10.0" - export CARTOPY_SUFFIX=$(echo "${CARTOPY_REF}" | sed "s/^v//") @@ -78,6 +81,14 @@ install: - ../../.travis_no_output sudo /usr/bin/python setup.py install - cd ../.. +# biggus + - ./.travis_no_output wget -O biggus.zip https://github.com/SciTools/biggus/archive/${BIGGUS_REF}.zip + - ./.travis_no_output unzip -q biggus.zip + - ln -s $(pwd)/biggus-${BIGGUS_SUFFIX} biggus + - cd biggus + - ../.travis_no_output /usr/bin/python setup.py install --user + - cd .. + # cartopy - ./.travis_no_output wget -O cartopy.zip https://github.com/SciTools/cartopy/archive/${CARTOPY_REF}.zip - ./.travis_no_output unzip -q cartopy.zip diff --git a/lib/iris/_merge.py b/lib/iris/_merge.py index 701903b383..383145777d 100644 --- a/lib/iris/_merge.py +++ b/lib/iris/_merge.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2010 - 2013, Met Office +# (C) British Crown Copyright 2010 - 2014, Met Office # # This file is part of Iris. # @@ -24,6 +24,7 @@ from collections import namedtuple from copy import deepcopy +import biggus import numpy as np import numpy.ma as ma @@ -217,8 +218,8 @@ class _CoordSignature(namedtuple('CoordSignature', class _CubeSignature(namedtuple('CubeSignature', - ['defn', 'data_shape', 'data_manager', - 'data_type', 'mdi'])): + ['defn', 'data_shape', 'data_type', + 'fill_value'])): """ Criterion for identifying a specific type of :class:`iris.cube.Cube` based on its metadata. @@ -231,15 +232,12 @@ class _CubeSignature(namedtuple('CubeSignature', * data_shape: The data payload shape of a :class:`iris.cube.Cube`. - * data_manager: - The :class:`iris.fileformats.manager.DataManager` instance. - * data_type: The data payload :class:`numpy.dtype` of a :class:`iris.cube.Cube`. - * mdi: - The missing data value associated with the data payload of a - :class:`iris.cube.Cube`. + * fill_value: + The value to be used to mark missing data in the data payload, + or None if no such value exists. """ @@ -933,7 +931,17 @@ def __init__(self, cube): self._dim_templates = [] self._aux_templates = [] + # During the merge this will contain the complete, merged shape + # of a result cube. + # E.g. Merging three (72, 96) cubes would give: + # self._shape = (3, 72, 96). self._shape = [] + # During the merge this will contain the shape of the "stack" + # of cubes used to create a single result cube. + # E.g. Merging three (72, 96) cubes would give: + # self._stack_shape = (3,) + self._stack_shape = [] + self._nd_names = [] self._cache_by_name = {} self._dim_coords_and_dims = [] @@ -943,6 +951,20 @@ def __init__(self, cube): self._vector_dim_coords_dims = [] self._vector_aux_coords_dims = [] + def _report_duplicate(self, nd_indexes, group_by_nd_index): + # Find the first offending source-cube with duplicate metadata. + index = [group_by_nd_index[nd_index][1] + for nd_index in nd_indexes + if len(group_by_nd_index[nd_index]) > 1][0] + name = self._cube_signature.defn.name() + scalars = [] + for defn, value in zip(self._coord_signature.scalar_defns, + self._skeletons[index].scalar_values): + scalars.append('%s=%r' % (defn.name(), value)) + msg = 'Duplicate %r cube, with scalar coordinates %s' + msg = msg % (name, ', '.join(scalars)) + raise iris.exceptions.DuplicateDataError(msg) + def merge(self, unique=True): """ Returns the list of cubes resulting from merging the registered @@ -987,53 +1009,39 @@ def merge(self, unique=True): # Check for unique data. if unique and group_depth > 1: - # Find the first offending source-cube with duplicate metadata. - index = [group_by_nd_index[nd_index][1] - for nd_index in nd_indexes - if len(group_by_nd_index[nd_index]) > 1][0] - name = self._cube_signature.defn.name() - scalars = [] - for defn, value in zip(self._coord_signature.scalar_defns, - self._skeletons[index].scalar_values): - scalars.append('%s=%r' % (defn.name(), value)) - msg = 'Duplicate %r cube, with scalar coordinates %s' - msg = msg % (name, ', '.join(scalars)) - raise iris.exceptions.DuplicateDataError(msg) + self._report_duplicate(nd_indexes, group_by_nd_index) # Generate group-depth merged cubes from the source-cubes. for level in xrange(group_depth): - # The merged cube's data will be an array of data proxies - # for deferred loading. - merged_cube = self._get_cube() - + # Stack up all the data from all of the relevant source + # cubes in a single biggus ArrayStack. + # If it turns out that all the source cubes already had + # their data loaded then at the end we can convert the + # ArrayStack back to a numpy array. + stack = np.empty(self._stack_shape, 'object') + all_have_data = True for nd_index in nd_indexes: # Get the data of the current existing or last known # good source-cube group = group_by_nd_index[nd_index] offset = min(level, len(group) - 1) data = self._skeletons[group[offset]].data - - # Slot the data into merged cube. The nd-index will have - # less dimensionality than that of the merged cube's - # data. The "missing" dimensions correspond to the - # dimensionality of the source-cubes data. - if nd_index: - # The use of "flatten" allows us to cope with a - # 0-dimensional array. Otherwise, the assignment - # copies the 0-d *array* into the merged cube, and - # not the contents of the array! - if data.ndim == 0: - merged_cube._data[nd_index] = data.flatten()[0] - else: - merged_cube._data[nd_index] = data + # Ensure the data is represented as a biggus.Array and + # slot that Array into the stack. + if isinstance(data, biggus.Array): + all_have_data = False else: - merged_cube._data = data - - # Unmask the array only if it is filled. - if isinstance(merged_cube._data, ma.core.MaskedArray): - if ma.count_masked(merged_cube._data) == 0: - merged_cube._data = merged_cube._data.filled() - + data = biggus.NumpyArrayAdapter(data) + stack[nd_index] = data + + merged_data = biggus.ArrayStack(stack) + if all_have_data: + merged_data = merged_data.masked_array() + # Unmask the array only if it is filled. + if (ma.isMaskedArray(merged_data) and + ma.count_masked(merged_data) == 0): + merged_data = merged_data.data + merged_cube = self._get_cube(merged_data) merged_cubes.append(merged_cube) return merged_cubes @@ -1150,6 +1158,7 @@ def axis_and_name(name): dim_by_name[name] = len(self._shape) self._nd_names.append(name) self._shape.append(len(cells)) + self._stack_shape.append(len(cells)) self._cache_by_name[name] = {cell: index for index, cell in enumerate(cells)} else: @@ -1188,6 +1197,7 @@ def name_in_independents(): self._dim_templates.append( _Template(dim, points, bounds, kwargs)) self._shape.append(len(cells)) + self._stack_shape.append(len(cells)) self._cache_by_name[name] = {cell: index for index, cell in enumerate(cells)} @@ -1249,12 +1259,10 @@ def name_in_independents(): # deferred loading, this does NOT change the shape. self._shape.extend(signature.data_shape) - def _get_cube(self): + def _get_cube(self, data): """ - Returns a cube containing all its coordinates and appropriately shaped - data that corresponds to this ProtoCube. - - All the values in the cube's data array are masked. + Return a fully constructed cube for the given data, containing + all its coordinates and metadata. """ signature = self._cube_signature @@ -1264,22 +1272,10 @@ def _get_cube(self): for coord, dims in self._aux_coords_and_dims] kwargs = dict(zip(iris.cube.CubeMetadata._fields, signature.defn)) - # Create fully masked data, i.e. all missing. - # (The CubeML checksum doesn't respect the mask, so we zero the - # underlying data to ensure repeatable checksums.) - if signature.data_manager is None: - data = ma.MaskedArray(np.zeros(self._shape, - signature.data_type), - mask=np.ones(self._shape, 'bool'), - fill_value=signature.mdi) - else: - data = ma.MaskedArray(np.zeros(self._shape, 'object'), - mask=np.ones(self._shape, 'bool')) - cube = iris.cube.Cube(data, dim_coords_and_dims=dim_coords_and_dims, aux_coords_and_dims=aux_coords_and_dims, - data_manager=signature.data_manager, **kwargs) + **kwargs) # Add on any aux coord factories. for factory_defn in self._coord_signature.factory_defns: @@ -1386,26 +1382,17 @@ def _build_coordinates(self): def _build_signature(self, cube): """Generate the signature that defines this cube.""" - - defn = cube.metadata - data_shape = cube._data.shape - data_manager = cube._data_manager - mdi = None - - if data_manager is None: - data_type = cube._data.dtype.name - if isinstance(cube.data, ma.core.MaskedArray): - mdi = cube.data.fill_value - else: - data_type = data_manager.data_type.name - mdi = data_manager.mdi - - return _CubeSignature(defn, data_shape, data_manager, data_type, mdi) + array = cube.lazy_data() + return _CubeSignature(cube.metadata, cube.shape, array.dtype, + array.fill_value) def _add_cube(self, cube, coord_payload): """Create and add the source-cube skeleton to the ProtoCube.""" - - skeleton = _Skeleton(coord_payload.scalar.values, cube._data) + if cube.has_lazy_data(): + data = cube.lazy_data() + else: + data = cube.data + skeleton = _Skeleton(coord_payload.scalar.values, data) # Attempt to do something sensible with mixed scalar dtypes. for i, metadata in enumerate(coord_payload.scalar.metadata): if metadata.points_dtype > self._coord_metadata[i].points_dtype: diff --git a/lib/iris/cube.py b/lib/iris/cube.py index 5c3bd2d293..b32c0f768d 100644 --- a/lib/iris/cube.py +++ b/lib/iris/cube.py @@ -29,6 +29,7 @@ import warnings import zlib +import biggus import numpy as np import numpy.ma as ma @@ -481,8 +482,7 @@ class Cube(CFVariableMixin): def __init__(self, data, standard_name=None, long_name=None, var_name=None, units=None, attributes=None, cell_methods=None, dim_coords_and_dims=None, - aux_coords_and_dims=None, aux_factories=None, - data_manager=None): + aux_coords_and_dims=None, aux_factories=None): """ Creates a cube with data and optional metadata. @@ -492,16 +492,14 @@ def __init__(self, data, standard_name=None, long_name=None, Args: * data - An object, usually a numpy array, containing the phenomenon - values or a data manager object. This object defines the shape - of the cube and the value in each cell. + This object defines the shape of the cube and the phenomenon + value in each cell. - If the object contains phenomenon values it can be a numpy - array, an array subclass or an *array_like* as described in - :func:`numpy.asarray`. + It can be a biggus array, a numpy array, a numpy array + subclass (such as :class:`numpy.ma.MaskedArray`), or an + *array_like* as described in :func:`numpy.asarray`. - See :attr:`Cube.data` and - :class:`iris.fileformats.manager.DataManager` + See :attr:`Cube.data`. Kwargs: @@ -529,11 +527,6 @@ def __init__(self, data, standard_name=None, long_name=None, * aux_factories A list of auxiliary coordinate factories. See :mod:`iris.aux_factory`. - * data_manager - A :class:`iris.fileformats.manager.DataManager` instance. If a data - manager is provided, then the data should be a numpy array of data - proxy instances. See :class:`iris.fileformats.pp.PPDataProxy` or - :class:`iris.fileformats.netcdf.NetCDFDataProxy`. For example:: @@ -549,15 +542,9 @@ def __init__(self, data, standard_name=None, long_name=None, if isinstance(data, basestring): raise TypeError('Invalid data type: {!r}.'.format(data)) - if data_manager is not None: - self._data = data - self._data_manager = data_manager - else: - if isinstance(data, np.ndarray): - self._data = data - else: - self._data = np.asarray(data) - self._data_manager = None + if not isinstance(data, (biggus.Array, ma.MaskedArray)): + data = np.asarray(data) + self._my_data = data #: The "standard name" for the Cube's phenomenon. self.standard_name = standard_name @@ -1250,13 +1237,7 @@ def cell_methods(self, cell_methods): @property def shape(self): """The shape of the data of this cube.""" - if self._data_manager is None: - if self._data is None: - shape = () - else: - shape = self._data.shape - else: - shape = self._data_manager.shape(self._data) + shape = self.lazy_data().shape return shape @property @@ -1264,6 +1245,49 @@ def ndim(self): """The number of dimensions in the data of this cube.""" return len(self.shape) + def lazy_data(self, array=None): + """ + Return a :class:`biggus.Array` representing the + multi-dimensional data of the Cube, and optionally provide a + new array of values. + + Accessing this method will never cause the data to be loaded. + Similarly, calling methods on, or indexing, the returned Array + will not cause the Cube to have loaded data. + + If the data have already been loaded for the Cube, the returned + Array will be a :class:`biggus.NumpyArrayAdapter` which wraps + the numpy array from `self.data`. + + Kwargs: + + * array (:class:`biggus.Array` or None): + When this is not None it sets the multi-dimensional data of + the cube to the given value. + + Returns: + A :class:`biggus.Array` representing the multi-dimensional + data of the Cube. + + """ + if array is not None: + if not isinstance(array, biggus.Array): + raise TypeError('new values must be a biggus.Array') + if self.shape != array.shape: + # The _ONLY_ data reshape permitted is converting a + # 0-dimensional array into a 1-dimensional array of + # length one. + # i.e. self.shape = () and array.shape == (1,) + if self.shape or array.shape != (1,): + raise ValueError('Require cube data with shape %r, got ' + '%r.' % (self.shape, array.shape)) + self._my_data = array + else: + array = self._my_data + if not isinstance(array, biggus.Array): + array = biggus.NumpyArrayAdapter(array) + return array + @property def data(self): """ @@ -1293,45 +1317,24 @@ def data(self): (10, 20) """ - # Cache the real data on first use - if self._data_manager is not None: + data = self._my_data + if not isinstance(data, np.ndarray): try: - self._data = self._data_manager.load(self._data) - - except (MemoryError, - self._data_manager.ArrayTooBigForAddressSpace), error: - dm_shape = self._data_manager.pre_slice_array_shape(self._data) - dm_dtype = self._data_manager.data_type - # if the data manager shape is not the same as the cube's - # shape, it is because there must be deferred indexing pending - # once the data has been read into memory. Make the error - # message for this nice. - if self.shape != dm_shape: - msg = ("The cube's data array shape would have been %r, " - "with the data manager needing a data shape of %r " - "(before it can reduce to the cube's required " - "size); the data type is %s.\n") % (dm_shape, - self.shape, - dm_dtype) - else: - msg = 'The array shape would have been %r and the data ' \ - 'type %s.\n' % (self.shape, dm_dtype) - - if isinstance(error, MemoryError): - raise MemoryError( - "Failed to create the cube's data as there was not " - "enough memory available.\n" + msg + "Consider " - "freeing up variables or indexing the cube before " - "getting its data.") - else: - raise ValueError( - "Failed to create the cube's data as there is not " - "enough address space to represent the array.\n" + - msg + "The cube will need to be reduced in order " - "to load the data.") - - self._data_manager = None - return self._data + data = data.masked_array() + except MemoryError: + msg = "Failed to create the cube's data as there was not" \ + " enough memory available.\n" \ + "The array shape would have been {0!r} and the data" \ + " type {1}.\n" \ + "Consider freeing up variables or indexing the cube" \ + " before getting its data." + msg = msg.format(self.shape, data.dtype) + raise MemoryError(msg) + # Unmask the array only if it is filled. + if ma.count_masked(data) == 0: + data = data.data + self._my_data = data + return data @data.setter def data(self, value): @@ -1345,8 +1348,10 @@ def data(self, value): raise ValueError('Require cube data with shape %r, got ' '%r.' % (self.shape, data.shape)) - self._data = data - self._data_manager = None + self._my_data = data + + def has_lazy_data(self): + return isinstance(self._my_data, biggus.Array) @property def dim_coords(self): @@ -1748,33 +1753,18 @@ def __getitem__(self, keys): except StopIteration: first_slice = None - # handle unloaded data - data_manager = None - use_data_proxy = self._data_manager is not None - if first_slice is not None: - if use_data_proxy: - data, data_manager = self._data_manager.getitem(self._data, - first_slice) - else: - data = self.data[first_slice] + data = self._my_data[first_slice] else: - if use_data_proxy: - data = copy.deepcopy(self._data) - data_manager = copy.deepcopy(self._data_manager) - else: - data = copy.deepcopy(self.data) + data = copy.deepcopy(self._my_data) for other_slice in slice_gen: - if use_data_proxy: - data, data_manager = data_manager.getitem(data, other_slice) - else: - data = data[other_slice] + data = data[other_slice] - # We don't want a view of the numpy array, so take a copy of it if - # it's not our own (this applies to proxy "empty data" arrays too) - if not data.flags['OWNDATA']: - data = data.copy() + # We don't want a view of the data, so take a copy of it if it's + # not already our own. + if isinstance(data, biggus.Array) or not data.flags['OWNDATA']: + data = copy.deepcopy(data) # We can turn a masked array into a normal array if it's full. if isinstance(data, ma.core.MaskedArray): @@ -1782,7 +1772,7 @@ def __getitem__(self, keys): data = data.filled() # Make the new cube slice - cube = Cube(data, data_manager=data_manager) + cube = Cube(data) cube.metadata = copy.deepcopy(self.metadata) # Record a mapping from old coordinate IDs to new coordinates, @@ -2015,7 +2005,7 @@ def transpose(self, new_order=None): # The data needs to be copied, otherwise this view of the transposed # data will not be contiguous. Ensure not to assign via the cube.data # setter property since we are reshaping the cube payload in-place. - self._data = np.transpose(self.data, new_order).copy() + self._my_data = np.transpose(self.data, new_order).copy() dim_mapping = {src: dest for dest, src in enumerate(new_order)} @@ -2090,78 +2080,77 @@ def _xml_element(self, doc, checksum=False, order=True): cell_methods_xml_element.appendChild(cell_method_xml_element) cube_xml_element.appendChild(cell_methods_xml_element) - if self._data is not None: - data_xml_element = doc.createElement("data") - - data_xml_element.setAttribute("shape", str(self.shape)) - - # NB. Getting a checksum triggers any deferred loading, - # in which case it also has the side-effect of forcing the - # byte order to be native. - if checksum: - data = self.data - - # Ensure consistent memory layout for checksums. - def normalise(data): - data = np.ascontiguousarray(data) - if data.dtype.newbyteorder('<') != data.dtype: - data = data.byteswap(False) - data.dtype = data.dtype.newbyteorder('<') - return data - - if isinstance(data, ma.MaskedArray): - # Fill in masked values to avoid the checksum being - # sensitive to unused numbers. Use a fixed value so - # a change in fill_value doesn't affect the - # checksum. - crc = hex(zlib.crc32(normalise(data.filled(0)))) - data_xml_element.setAttribute("checksum", crc) - if ma.is_masked(data): - crc = hex(zlib.crc32(normalise(data.mask))) - else: - crc = 'no-masked-elements' - data_xml_element.setAttribute("mask_checksum", crc) - data_xml_element.setAttribute('fill_value', - str(data.fill_value)) + data_xml_element = doc.createElement("data") + + data_xml_element.setAttribute("shape", str(self.shape)) + + # NB. Getting a checksum triggers any deferred loading, + # in which case it also has the side-effect of forcing the + # byte order to be native. + if checksum: + data = self.data + + # Ensure consistent memory layout for checksums. + def normalise(data): + data = np.ascontiguousarray(data) + if data.dtype.newbyteorder('<') != data.dtype: + data = data.byteswap(False) + data.dtype = data.dtype.newbyteorder('<') + return data + + if isinstance(data, ma.MaskedArray): + # Fill in masked values to avoid the checksum being + # sensitive to unused numbers. Use a fixed value so + # a change in fill_value doesn't affect the + # checksum. + crc = hex(zlib.crc32(normalise(data.filled(0)))) + data_xml_element.setAttribute("checksum", crc) + if ma.is_masked(data): + crc = hex(zlib.crc32(normalise(data.mask))) else: - crc = hex(zlib.crc32(normalise(data))) - data_xml_element.setAttribute("checksum", crc) - elif self._data_manager is not None: - data_xml_element.setAttribute("state", "deferred") - else: - data_xml_element.setAttribute("state", "loaded") - - # Add the dtype, and also the array and dtype orders if the - # data is loaded. - if self._data_manager is None: - data = self.data - dtype = data.dtype - - def _order(array): - order = '' - if array.flags['C_CONTIGUOUS']: - order = 'C' - elif array.flags['F_CONTIGUOUS']: - order = 'F' - return order - if order: - data_xml_element.setAttribute('order', _order(data)) - - # NB. dtype.byteorder can return '=', which is bad for - # cross-platform consistency - so we use dtype.str - # instead. - byte_order = {'>': 'big', '<': 'little'}.get(dtype.str[0]) - if byte_order: - data_xml_element.setAttribute('byteorder', byte_order) - - if order and isinstance(data, ma.core.MaskedArray): - data_xml_element.setAttribute('mask_order', - _order(data.mask)) + crc = 'no-masked-elements' + data_xml_element.setAttribute("mask_checksum", crc) + data_xml_element.setAttribute('fill_value', + str(data.fill_value)) else: - dtype = self._data_manager.data_type - data_xml_element.setAttribute('dtype', dtype.name) + crc = hex(zlib.crc32(normalise(data))) + data_xml_element.setAttribute("checksum", crc) + elif self.has_lazy_data(): + data_xml_element.setAttribute("state", "deferred") + else: + data_xml_element.setAttribute("state", "loaded") + + # Add the dtype, and also the array and mask orders if the + # data is loaded. + if not self.has_lazy_data(): + data = self.data + dtype = data.dtype + + def _order(array): + order = '' + if array.flags['C_CONTIGUOUS']: + order = 'C' + elif array.flags['F_CONTIGUOUS']: + order = 'F' + return order + if order: + data_xml_element.setAttribute('order', _order(data)) + + # NB. dtype.byteorder can return '=', which is bad for + # cross-platform consistency - so we use dtype.str + # instead. + byte_order = {'>': 'big', '<': 'little'}.get(dtype.str[0]) + if byte_order: + data_xml_element.setAttribute('byteorder', byte_order) + + if order and isinstance(data, ma.core.MaskedArray): + data_xml_element.setAttribute('mask_order', + _order(data.mask)) + else: + dtype = self.lazy_data().dtype + data_xml_element.setAttribute('dtype', dtype.name) - cube_xml_element.appendChild(data_xml_element) + cube_xml_element.appendChild(data_xml_element) return cube_xml_element @@ -2189,18 +2178,16 @@ def __deepcopy__(self, memo): return self._deepcopy(memo) def _deepcopy(self, memo, data=None): - # TODO FIX this with deferred loading and investiaget data=False,... if data is None: - if self._data is not None and self._data.ndim == 0: + if not self.has_lazy_data() and self.ndim == 0: # Cope with NumPy's asymmetric (aka. "annoying!") behaviour # of deepcopy on 0-d arrays. - new_cube_data = np.asanyarray(self._data) + new_cube_data = np.asanyarray(self.data) else: - new_cube_data = copy.deepcopy(self._data, memo) - - new_cube_data_manager = copy.deepcopy(self._data_manager, memo) + new_cube_data = copy.copy(self._my_data) else: - data = np.asanyarray(data) + if not isinstance(data, biggus.Array): + data = np.asanyarray(data) if data.shape != self.shape: msg = 'Cannot copy cube with new data of a different shape ' \ @@ -2208,7 +2195,6 @@ def _deepcopy(self, memo, data=None): raise ValueError(msg) new_cube_data = data - new_cube_data_manager = None new_dim_coords_and_dims = copy.deepcopy(self._dim_coords_and_dims, memo) @@ -2227,8 +2213,7 @@ def _deepcopy(self, memo, data=None): new_cube = Cube(new_cube_data, dim_coords_and_dims=new_dim_coords_and_dims, - aux_coords_and_dims=new_aux_coords_and_dims, - data_manager=new_cube_data_manager) + aux_coords_and_dims=new_aux_coords_and_dims) new_cube.metadata = copy.deepcopy(self.metadata, memo) for factory in self.aux_factories: diff --git a/lib/iris/fileformats/manager.py b/lib/iris/fileformats/manager.py deleted file mode 100644 index 5528d7b06c..0000000000 --- a/lib/iris/fileformats/manager.py +++ /dev/null @@ -1,305 +0,0 @@ -# (C) British Crown Copyright 2010 - 2013, Met Office -# -# This file is part of Iris. -# -# Iris is free software: you can redistribute it and/or modify it under -# the terms of the GNU Lesser General Public License as published by the -# Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Iris is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with Iris. If not, see . -""" -Provides support for virtual cube data and deferred loading. - -""" - -from copy import deepcopy -import types - -import numpy as np -import numpy.ma as ma - -import iris.util - - -class _HashableSlice(iris.util._OrderedHashable): - """Represents a :class:`slice` in a hashable way.""" - _names = ('start', 'stop', 'step') - - @staticmethod - def from_slice(slice_object): - """ - Generate a :class:`iris.fileformats.manager._HashableSlice` from a :class:`slice` object. - - Args: - - * slice_object (:class:`slice`): - Slice object to be converted into a :class:`iris.fileformats.manager._HashableSlice`. - - Returns: - :class:`iris.fileformats.manager._HashableSlice`. - - """ - return _HashableSlice(slice_object.start, slice_object.stop, slice_object.step) - - def indices(self, length): - """ - Calculate start, stop and stride for current slice given the length of the sequence. - - Args: - - * length (int): - Length of the sequence for which this slice is to calculate over. - - * Returns: - Tuple of start, stop and stride. - - """ - return self.as_slice().indices(length) - - def as_slice(self): - """ - Convert the :class:`iris.fileformats.manager._HashableSlice` into a :class:`slice`. - - Returns: - :class:`slice`. - - """ - return slice(self.start, self.stop, self.step) - - -class DataManager(iris.util._OrderedHashable): - """ - Holds the context that allows a corresponding array of DataProxy objects to be - converted into a real data array. - - """ - - _names = ('_orig_data_shape', 'data_type', 'mdi', 'deferred_slices') - - def __init__(self, data_shape, data_type, mdi, deferred_slices=()): - self._init(data_shape, data_type, mdi, deferred_slices) - - #: The data shape of the array in file; may differ from the result - #: of :py:ref:`load` if there are pending slices. - _orig_data_shape = None - - #: Tuple of keys tuples as would be used in a __getitem__ context. - deferred_slices = None - - def pre_slice_array_shape(self, proxy_array): - """ - Given the associated proxy_array, calculate the shape of the resultant - data without loading it. - - .. note:: - - This may differ from the result of :meth:`load` if there are - pending post load slices in :attr:`deferred_slices`. - - """ - return proxy_array.shape + self._orig_data_shape - - def _post_slice_data_shape(self): - """The shape of the data manager data, after deferred slicing.""" - orig_shape = self._orig_data_shape - resultant_shape = [] - - for deferred_keys in self.deferred_slices: - # For each of the slices, identify which will leave a dimension intact, - # and store each intact dimension's length in a list - for i, key in enumerate(deferred_keys): - if isinstance(key, _HashableSlice): - len_this_dim = orig_shape[i] - (start, stop, step) = key.indices(len_this_dim) - count = len(range(start, stop, step)) - resultant_shape.append(count) - elif isinstance(key, tuple): - if key and isinstance(key[0], (bool, np.bool_)): - resultant_shape.append(sum(key)) - else: - resultant_shape.append(len(key)) - elif isinstance(key, int): - pass - else: - raise TypeError('Unexpected type for key in DataManager. Got %s.' % type(key)) - - orig_shape = tuple(resultant_shape) - resultant_shape = [] - - return orig_shape - - def shape(self, proxy_array): - """The shape of the data array given the associated proxy array, including effects of deferred slicing.""" - return proxy_array.shape + self._post_slice_data_shape() - - def getitem(self, proxy_array, keys): - """The equivalent method to python's __getitem__ but with added proxy array capability.""" - # Find out how many dimensions the data array would have if it were loaded now - self_ndim = len(self.shape(proxy_array)) - - full_slice = iris.util._build_full_slice_given_keys(keys, self_ndim) - - # slice the proxy array according to the full slice provided - # Add the Ellipsis object to the end of the slice to handle the special case where the full slice is - # a tuple of a single tuple i.e. ( (0, 2, 3), ) which in numpy should be represented as ( (0, 2, 3), :) - # NB: assumes that the proxy array is always the first dimensions - new_proxy_array = proxy_array[full_slice[0:proxy_array.ndim] + (Ellipsis, )] - - # catch the situation where exactly one element from the proxy_array is requested: - # (A MaskedConstant is an instance of a numpy array, so check for this specifically too) - if (not isinstance(new_proxy_array, np.ndarray)) or (isinstance(new_proxy_array, ma.core.MaskedConstant)): - new_proxy_array = np.array(new_proxy_array) - - # get the ndim of the data manager array - ds_ndim = len(self._post_slice_data_shape()) - # Just pull out the keys which apply to the data manager array - if ds_ndim == 0: - deferred_slice = full_slice[0:0] - else: - deferred_slice = full_slice[-ds_ndim:] - - hashable_conversion = { - types.SliceType: _HashableSlice.from_slice, - np.ndarray: tuple, - } - new_deferred_slice = tuple([hashable_conversion.get(type(index), lambda index: index)(index) - for index in deferred_slice]) - - # Apply the slice to a new data manager (to be deferred) - defered_slices = self.deferred_slices + (new_deferred_slice, ) - new_data_manager = self.new_data_manager(defered_slices) - - return new_proxy_array, new_data_manager - - def new_data_manager(self, deferred_slices=Ellipsis): - """ - Creates a new data manager instance with the given deferred slice. - - """ - return self.__class__(data_shape=deepcopy(self._orig_data_shape), - data_type=deepcopy(self.data_type), - mdi=deepcopy(self.mdi), - deferred_slices=deferred_slices) - - def _deferred_slice_merge(self): - """Determine the single slice that is equivalent to all the accumulated deferred slices.""" - - # The merged slice will always have the same number of index items - # as the original data dimensionality. - merged_slice = [slice(None)] * len(self._orig_data_shape) - # Maintain the overall deferred slice shape as we merge in each - # of the deferred slices. - deferred_shape = list(self._orig_data_shape) - full_slice = slice(None) - - # The deferred slices tuple consists of one or more sub-tuples each of which may - # contain a mixture of a _HashableSlice object, tuple of scalar indexes, - # or a single scalar index. The dimensionality of each sub-tuple will be no - # greater than the original data dimensionality. The dimensionality of a sub-tuple - # will be less than the original data dimensionality only if a previously merged - # sub-tuple collapsed a dimension via a single scalar index. - - # Process each deferred slice sub-tuple. - for deferred_slice in self.deferred_slices: - # Identify those dimensions in the merged slice that have not been collapsed. - # A collapsed dimension is one that is represented by a single scalar index. - mapping = [i for i, value in enumerate(merged_slice) if not isinstance(value, int)] - - # Process each index item in the deferred slice sub-tuple. - for i, index_item in enumerate(deferred_slice): - # First re-map deferred slice dimensions to account for any pre-merged - # dimensions that have already been collapsed. - i = mapping[i] - - # Translate a hashable slice into a slice. - if isinstance(index_item, _HashableSlice): - index_item = index_item.as_slice() - - # Process the index item only if it will change the - # corresponding merged slice index item. - if index_item != full_slice: - if isinstance(merged_slice[i], slice): - # A slice object is not iterable, so it is not possible - # to index or slice a slice object. Therefore translate - # the slice into an explicit tuple. - merged_slice_item = tuple(range(*merged_slice[i].indices(deferred_shape[i]))) - else: - # The merged slice item must be a tuple. - merged_slice_item = merged_slice[i] - - # Sample the merged slice item with the index item. - if isinstance(index_item, tuple): - if index_item and isinstance(index_item[0], (bool, np.bool_)): - index_item = np.where(index_item)[0] - - # Sample for each tuple item. - merged_slice[i] = tuple([merged_slice_item[index] - for index in index_item]) - else: - # Sample with a slice or single scalar index. - merged_slice[i] = merged_slice_item[index_item] - - # Maintain the overall deferred slice shape as we merge. - if isinstance(merged_slice[i], tuple): - # New dimension depth is the length of the tuple. - deferred_shape[i] = len(merged_slice[i]) - elif isinstance(merged_slice[i], int): - # New dimension depth has been collapsed by single scalar index. - deferred_shape[i] = 0 - else: - # New dimension depth is the length of the sliced dimension. - deferred_shape[i] = len(range(deferred_shape[i])[merged_slice[i]]) - - return tuple(merged_slice) - - def load(self, proxy_array): - """Returns the real data array that corresponds to the given array of proxies.""" - - deferred_slice = self._deferred_slice_merge() - array_shape = self.shape(proxy_array) - - # Create fully masked data (all missing) - try: - raw_data = np.empty(array_shape, - dtype=self.data_type.newbyteorder('=')) - mask = np.ones(array_shape, dtype=np.bool) - data = ma.MaskedArray(raw_data, mask=mask, - fill_value=self.mdi) - except ValueError: - raise DataManager.ArrayTooBigForAddressSpace( - 'Cannot create an array of shape %r as it will not fit in' - ' memory. Consider using indexing to select a subset of' - ' the Cube.'.format(array_shape)) - - for index, proxy in np.ndenumerate(proxy_array): - if proxy not in [None, 0]: # 0 can come from slicing masked proxy; np.array(masked_constant). - payload = proxy.load(self._orig_data_shape, self.data_type, self.mdi, deferred_slice) - - # Explicitly set the data fill value when no mdi value has been specified - # in order to override default masked array fill value behaviour. - if self.mdi is None and ma.isMaskedArray(payload): - data.fill_value = payload.fill_value - - data[index] = payload - - # we can turn the masked array into a normal array if it's full. - if ma.count_masked(data) == 0: - data = data.filled() - - # take a copy of the data as it may be discontiguous (i.e. when numpy "fancy" indexing has taken place) - if not data.flags['C_CONTIGUOUS']: - data = data.copy() - - return data - - # nested exception definition inside DataManager - class ArrayTooBigForAddressSpace(Exception): - """Raised when numpy cannot possibly allocate an array as it is too big for the address space.""" - pass diff --git a/lib/iris/fileformats/netcdf.py b/lib/iris/fileformats/netcdf.py index 1730b645d5..0f53b23234 100644 --- a/lib/iris/fileformats/netcdf.py +++ b/lib/iris/fileformats/netcdf.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2010 - 2013, Met Office +# (C) British Crown Copyright 2010 - 2014, Met Office # # This file is part of Iris. # @@ -31,6 +31,7 @@ import string import warnings +import biggus import iris.proxy iris.proxy.apply_proxy('netCDF4', globals()) import numpy as np @@ -43,7 +44,6 @@ import iris.cube import iris.exceptions import iris.fileformats.cf -import iris.fileformats.manager import iris.fileformats._pyke_rules import iris.io import iris.unit @@ -215,15 +215,34 @@ def _pyke_kb_engine(): class NetCDFDataProxy(object): """A reference to the data payload of a single NetCDF file variable.""" - __slots__ = ('path', 'variable_name') + __slots__ = ('shape', 'dtype', 'path', 'variable_name', 'fill_value') - def __init__(self, path, variable_name): + def __init__(self, shape, dtype, path, variable_name, fill_value): + self.shape = shape + self.dtype = dtype self.path = path self.variable_name = variable_name + self.fill_value = fill_value + + @property + def ndim(self): + return len(self.shape) + + def __getitem__(self, keys): + dataset = netCDF4.Dataset(self.path) + try: + variable = dataset.variables[self.variable_name] + # Get the NetCDF variable data and slice. + data = variable[keys] + finally: + dataset.close() + return data def __repr__(self): - return '%s(%r, %r)' % (self.__class__.__name__, self.path, - self.variable_name) + fmt = '<{self.__class__.__name__} shape={self.shape}' \ + ' dtype={self.dtype!r} path={self.path!r}' \ + ' variable_name={self.variable_name!r}>' + return fmt.format(self=self) def __getstate__(self): return {attr: getattr(self, attr) for attr in self.__slots__} @@ -232,34 +251,6 @@ def __setstate__(self, state): for key, value in state.iteritems(): setattr(self, key, value) - def load(self, data_shape, data_type, mdi, deferred_slice): - """ - Load the corresponding proxy data item and perform any deferred - slicing. - - Args: - - * data_shape (tuple of int): - The data shape of the proxy data item. - * data_type (:class:`numpy.dtype`): - The data type of the proxy data item. - * mdi (float): - The missing data indicator value. - * deferred_slice (tuple): - The deferred slice to be applied to the proxy data item. - - Returns: - :class:`numpy.ndarray` - - """ - dataset = netCDF4.Dataset(self.path) - variable = dataset.variables[self.variable_name] - # Get the NetCDF variable data and slice. - payload = variable[deferred_slice] - dataset.close() - - return payload - def _assert_case_specific_facts(engine, cf, cf_group): # Initialise pyke engine "provides" hooks. @@ -346,12 +337,13 @@ def _load_cube(engine, cf, cf_var, filename): if hasattr(cf_var, 'add_offset'): dummy_data = cf_var.add_offset + dummy_data - # Create cube with data (not yet deferred), but no metadata - data_proxies = np.array(NetCDFDataProxy(filename, cf_var.cf_name)) - data_manager = iris.fileformats.manager.DataManager(cf_var.shape, - dummy_data.dtype, - None) - cube = iris.cube.Cube(data_proxies, data_manager=data_manager) + # Create cube with deferred data, but no metadata + fill_value = getattr(cf_var.cf_data, '_FillValue', + netCDF4.default_fillvals[cf_var.dtype.str[1:]]) + proxy = NetCDFDataProxy(cf_var.shape, dummy_data.dtype, + filename, cf_var.cf_name, fill_value) + data = biggus.OrthoArrayAdapter(proxy) + cube = iris.cube.Cube(data) # Reset the pyke inference engine. engine.reset() diff --git a/lib/iris/fileformats/pp.py b/lib/iris/fileformats/pp.py index fcb5ea12b6..8b629fd633 100644 --- a/lib/iris/fileformats/pp.py +++ b/lib/iris/fileformats/pp.py @@ -29,6 +29,7 @@ import struct import warnings +import biggus import numpy as np import numpy.ma as ma import netcdftime @@ -36,7 +37,6 @@ import iris.config import iris.fileformats.rules import iris.unit -from iris.fileformats.manager import DataManager import iris.fileformats.pp_rules import iris.coord_systems import iris.proxy @@ -619,98 +619,56 @@ def __setattr__(self, name, value): class PPDataProxy(object): """A reference to the data payload of a single PP field.""" - __slots__ = ('path', 'offset', 'data_len', 'lbpack', 'mask') - - def __init__(self, path, offset, data_len, lbpack, mask): + __slots__ = ('shape', 'src_dtype', 'path', 'offset', 'data_len', 'lbpack', + 'mdi', 'mask') + + def __init__(self, shape, src_dtype, path, offset, data_len, lbpack, mdi, + mask): + self.shape = shape + self.src_dtype = src_dtype self.path = path self.offset = offset self.data_len = data_len self.lbpack = lbpack + self.mdi = mdi self.mask = mask - # NOTE: - # "__getstate__" and "__setstate__" functions are defined here to provide a custom interface for Pickle - # : Pickle "normal" behaviour is just to save/reinstate the object dictionary - # : that won't work here, because the use of __slots__ means **there is no object dictionary** - def __getstate__(self): - # object state capture method for Pickle.dump() - # - return the instance data values needed to reconstruct the PPDataProxy object - return dict([(k,getattr(self,k)) for k in PPDataProxy.__slots__]) - - def __setstate__(self, state): - # object reconstruction method for Pickle.load() - # reinitialise the object state from the serialised values (using setattr, as there is no object dictionary) - for (key, val) in state.items(): - setattr(self, key, val) - - def __repr__(self): - return '%s(%r, %r, %r, %r, %r)' % \ - (self.__class__.__name__, self.path, self.offset, - self.data_len, self.lbpack, self.mask) - - def load(self, data_shape, data_type, mdi, deferred_slice): - """ - Load the corresponding proxy data item and perform any deferred slicing. - - Args: + @property + def dtype(self): + return self.src_dtype.newbyteorder('=') - * data_shape (tuple of int): - The data shape of the proxy data item. - * data_type (:class:`numpy.dtype`): - The data type of the proxy data item. - * mdi (float): - The missing data indicator value. - * deferred_slice (tuple): - The deferred slice to be applied to the proxy data item. + @property + def fill_value(self): + return self.mdi - Returns: - :class:`numpy.ndarray` + @property + def ndim(self): + return len(self.shape) - """ - # Load the appropriate proxy data conveniently with a context manager. + def __getitem__(self, keys): with open(self.path, 'rb') as pp_file: pp_file.seek(self.offset, os.SEEK_SET) data_bytes = pp_file.read(self.data_len) - data = _read_data_bytes(data_bytes, self.lbpack, data_shape, - data_type, mdi, self.mask) - - # Identify which index items in the deferred slice are tuples. - tuple_dims = [i for i, value in enumerate(deferred_slice) if isinstance(value, tuple)] - - # Whenever a slice consists of more than one tuple index item, numpy does not slice the - # data array as we want it to. We therefore require to split the deferred slice into - # multiple slices and consistently slice the data with one slice per tuple. - if len(tuple_dims) > 1: - # Identify which index items in the deferred slice are single scalar values. - # Such dimensions will collapse in the sliced data shape. - collapsed_dims = [i for i, value in enumerate(deferred_slice) if isinstance(value, int)] - - # Equate the first slice to be the original deferred slice. - tuple_slice = list(deferred_slice) - # Replace all tuple index items in the slice, except for the first, - # to be full slices over their dimension. - for dim in tuple_dims[1:]: - tuple_slice[dim] = slice(None) - - # Perform the deferred slice containing only the first tuple index item. - payload = data[tuple_slice] - - # Re-slice the data consistently with the next single tuple index item. - for dim in tuple_dims[1:]: - # Identify all those pre-sliced collapsed dimensions less than - # the dimension of the current slice tuple index item. - ndims_collapsed = len(filter(lambda x: x < dim, collapsed_dims)) - # Construct the single tuple slice. - tuple_slice = [slice(None)] * payload.ndim - tuple_slice[dim - ndims_collapsed] = deferred_slice[dim] - # Slice the data with this single tuple slice. - payload = payload[tuple_slice] - else: - # The deferred slice contains no more than one tuple index item, so - # it's safe to slice the data directly. - payload = data[deferred_slice] + data = _data_bytes_to_shaped_array(data_bytes, self.lbpack, + self.shape, self.src_dtype, + self.mdi, self.mask) + return data.__getitem__(keys) + + def __repr__(self): + fmt = '<{self.__class__.__name__} shape={self.shape}' \ + ' src_dtype={self.dtype!r} path={self.path!r}' \ + ' offset={self.offset} mask={self.mask!r}>' + return fmt.format(self=self) - return payload + def __getstate__(self): + # Because we have __slots__, this is needed to support Pickle.dump() + return [(name, getattr(self, name)) for name in self.__slots__] + + def __setstate__(self, state): + # Because we have __slots__, this is needed to support Pickle.load() + # (Use setattr, as there is no object dictionary.) + for (key, value) in state: + setattr(self, key, value) def __eq__(self, other): result = NotImplemented @@ -729,8 +687,8 @@ def __ne__(self, other): return result -def _read_data_bytes(data_bytes, lbpack, data_shape, data_type, mdi, - mask=None): +def _data_bytes_to_shaped_array(data_bytes, lbpack, data_shape, data_type, mdi, + mask=None): """ Convert the already read binary data payload into a numpy array, unpacking and decompressing as per the F3 specification. @@ -832,7 +790,7 @@ def _read_data_bytes(data_bytes, lbpack, data_shape, data_type, mdi, # The special headers of the PPField classes which get some improved functionality _SPECIAL_HEADERS = ('lbtim', 'lbcode', 'lbpack', 'lbproc', - 'data', 'data_manager', 'stash', 't1', 't2') + 'data', 'stash', 't1', 't2') def _header_defn(release_number): @@ -910,23 +868,17 @@ def __repr__(self): self_attrs = [(name, getattr(self, name, None)) for name in public_attribute_names] self_attrs = filter(lambda pair: pair[1] is not None, self_attrs) - if hasattr(self, '_data_manager'): - if self._data_manager is None: - data = self.data - # Output any masked data as separate `data` and `mask` - # components, to avoid the standard MaskedArray output - # which causes irrelevant discrepancies between NumPy - # v1.6 and v1.7. - if ma.isMaskedArray(data): - # Force the fill value to zero to have the minimum - # impact on the output style. - self_attrs.append(('data.data', data.filled(0))) - self_attrs.append(('data.mask', data.mask)) - else: - self_attrs.append(('data', self.data)) - else: - self_attrs.append( ('unloaded_data_manager', self._data_manager) ) - self_attrs.append( ('unloaded_data_proxy', self._data) ) + # Output any masked data as separate `data` and `mask` + # components, to avoid the standard MaskedArray output + # which causes irrelevant discrepancies between NumPy + # v1.6 and v1.7. + if ma.isMaskedArray(self._data): + # Force the fill value to zero to have the minimum + # impact on the output style. + self_attrs.append(('data.data', self._data.filled(0))) + self_attrs.append(('data.mask', self._data.mask)) + else: + self_attrs.append(('data', self._data)) # sort the attributes by position in the pp header followed, then by alphabetical order. attributes = sorted(self_attrs, key=lambda pair: (attribute_priority_lookup.get(pair[0], 999), pair[0]) ) @@ -996,15 +948,16 @@ def _lbproc_setter(self, new_value): def data(self): """The :class:`numpy.ndarray` representing the multidimensional data of the pp file""" # Cache the real data on first use - if self._data_manager is not None: - self._data = self._data_manager.load(self._data) - self._data_manager = None + if isinstance(self._data, biggus.Array): + data = self._data.masked_array() + if ma.count_masked(data) == 0: + data = data.data + self._data = data return self._data @data.setter def data(self, value): self._data = value - self._data_manager = None @property def calendar(self): @@ -1347,7 +1300,13 @@ def __eq__(self, other): for attr in self.__slots__: attrs = [hasattr(self, attr), hasattr(other, attr)] if all(attrs): - if not np.all(getattr(self, attr) == getattr(other, attr)): + self_attr = getattr(self, attr) + other_attr = getattr(other, attr) + if isinstance(self_attr, biggus.NumpyArrayAdapter): + self_attr = self_attr.concrete + if isinstance(other_attr, biggus.NumpyArrayAdapter): + other_attr = other_attr.concrete + if not np.all(self_attr == other_attr): result = False break elif any(attrs): @@ -1549,24 +1508,40 @@ def _interpret_fields(fields): def _create_field_data(field, data_shape, land_mask): """ Modifies a field's ``_data`` attribute either by: - * converting DeferredArrayBytes into a "deferred array". + * converting DeferredArrayBytes into a biggus array, * converting LoadedArrayBytes into an actual numpy array. + """ if isinstance(field._data, LoadedArrayBytes): loaded_bytes = field._data - field._data = _read_data_bytes(loaded_bytes.bytes, field.lbpack, data_shape, - loaded_bytes.dtype, field.bmdi, land_mask) - field._data_manager = None + field._data = _data_bytes_to_shaped_array(loaded_bytes.bytes, + field.lbpack, data_shape, + loaded_bytes.dtype, + field.bmdi, land_mask) else: # Get hold of the DeferredArrayBytes instance. deferred_bytes = field._data - # NB. This makes a 0-dimensional array - field._data = np.array(PPDataProxy(deferred_bytes.fname, deferred_bytes.position, - deferred_bytes.n_bytes, field.lbpack, land_mask)) - field._data_manager = DataManager(data_shape, deferred_bytes.dtype, field.bmdi) + proxy = PPDataProxy(data_shape, deferred_bytes.dtype, + deferred_bytes.fname, deferred_bytes.position, + deferred_bytes.n_bytes, field.lbpack, + field.bmdi, land_mask) + field._data = biggus.NumpyArrayAdapter(proxy) def _field_gen(filename, read_data_bytes): + """ + Returns a generator of "half-formed" PPField instances derived from + the given filename. + + A field returned by the generator is only "half-formed" because its + `_data` attribute represents a simple one-dimensional stream of + bytes. (Encoded as an instance of either LoadedArrayBytes or + DeferredArrayBytes, depending on the value of `read_data_bytes`.) + This is because fields encoded with a land/sea mask do not contain + sufficient information within the field to determine the final + two-dimensional shape of the data. + + """ pp_file = open(filename, 'rb') # Get a reference to the seek method on the file diff --git a/lib/iris/fileformats/rules.py b/lib/iris/fileformats/rules.py index 7c0f9a25bd..dd648e17ca 100644 --- a/lib/iris/fileformats/rules.py +++ b/lib/iris/fileformats/rules.py @@ -713,14 +713,12 @@ def _make_cube(field, converter): (factories, references, standard_name, long_name, units, attributes, cell_methods, dim_coords_and_dims, aux_coords_and_dims) = converter(field) - if getattr(field, '_data_manager', None) is not None: + try: data = field._data - data_manager = field._data_manager - else: + except AttributeError: data = field.data - data_manager = None - cube = iris.cube.Cube(data, data_manager=data_manager, + cube = iris.cube.Cube(data, attributes=attributes, cell_methods=cell_methods, dim_coords_and_dims=dim_coords_and_dims, diff --git a/lib/iris/tests/test_cdm.py b/lib/iris/tests/test_cdm.py index eee97438d6..df25adc210 100644 --- a/lib/iris/tests/test_cdm.py +++ b/lib/iris/tests/test_cdm.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2010 - 2013, Met Office +# (C) British Crown Copyright 2010 - 2014, Met Office # # This file is part of Iris. # @@ -27,6 +27,7 @@ import sys import warnings +import biggus import numpy as np import numpy.ma as ma @@ -919,97 +920,30 @@ def test_is_compatible_metadata(self): class TestDataManagerIndexing(TestCube2d): def setUp(self): self.cube = iris.load_cube(tests.get_data_path(('PP', 'aPProt1', 'rotatedMHtimecube.pp'))) - self.pa = self.cube._data - self.dm = self.cube._data_manager - self.data_array = self.dm.load(self.pa) + + def _is_lazy(self, cube): + self.assertTrue(cube.has_lazy_data()) + + def _is_concrete(self, cube): + self.assertFalse(cube.has_lazy_data()) def test_slices(self): lat_cube = self.cube.slices(['grid_latitude', ]).next() - self.assertIsNotNone(lat_cube._data_manager) - self.assertIsNotNone(self.cube._data_manager) + self._is_lazy(lat_cube) + self._is_lazy(self.cube) - def check_indexing(self, keys): - pa, dm = self.dm.getitem(self.pa, keys) - r = dm.load(pa) - np.testing.assert_array_equal(r, self.data_array[keys], - 'Arrays were not the same after indexing ' - '(original shape %s) using:\n %r' % (self.data_array.shape, keys) - ) - - def _check_consecutive(self, keys1, keys2): - pa, dm = self.dm.getitem(self.pa, keys1) - pa, dm = dm.getitem(pa, keys2) - # Test the access of the data shape... - r = dm.shape(pa) - np.testing.assert_array_equal(r, self.data_array[keys1][keys2].shape, 'Reported shapes were not the same after consecutive indexing' - '(original shape %s) using:\n 1: %r\n 2: %r' % (self.data_array.shape, keys1, keys2), - ) - - r = dm.load(pa) - np.testing.assert_array_equal(r, self.data_array[keys1][keys2], - 'Arrays were not the same after consecutive indexing ' - '(original shape %s) using:\n 1: %r\n 2: %r' % (self.data_array.shape, keys1, keys2), - ) - - def check_consecutive(self, keys1, keys2): - self._check_consecutive(keys1, keys2) - self._check_consecutive(keys2, keys1) - - def check_indexing_error(self, keys): - self.assertRaises(IndexError, self.dm.getitem, self.pa, keys) - - def test_single_index(self): - self.check_indexing(2) - self.check_indexing(-1) - self.check_indexing(0) - self.check_indexing(None) - - def test_basic(self): - self.check_indexing( (2, ) ) - self.check_indexing( (slice(None, None), 2) ) - self.check_indexing( (slice(None, None, 2), 2) ) - self.check_indexing( (slice(None, -4, -2), 2) ) - self.check_indexing( (3, slice(None, -4, -2), 2) ) - self.check_indexing( (3, 3, 2) ) - self.check_indexing( (Ellipsis, 2, 3) ) - self.check_indexing( (slice(3, 4), Ellipsis, 2, 3) ) - self.check_indexing( (np.array([3], ndmin=1), Ellipsis, 2, 3) ) - self.check_indexing( (slice(3, 4), Ellipsis, Ellipsis, 3) ) - self.check_indexing( (slice(3, 4), Ellipsis, Ellipsis, Ellipsis) ) - self.check_indexing( (Ellipsis, Ellipsis, Ellipsis, Ellipsis) ) - - def test_out_of_range(self): - self.check_indexing_error( tuple([slice(None, None)] * 5) ) - self.check_indexing_error( tuple([slice(None, None)] * 6) ) - self.check_indexing_error( 10000 ) - self.check_indexing_error( (10000, 2) ) - self.check_indexing_error( (10000, ) ) - self.check_indexing_error( (10, 10000) ) - - def test_consecutive(self): - self.check_consecutive(3, 2) - self.check_consecutive(3, slice(None, None)) - self.check_consecutive(1, slice(None, -6, -2)) - self.check_consecutive(3, (slice(None, None), 3)) - self.check_consecutive(1, ((3, 2, 1, 3), 3)) - self.check_consecutive(1, (np.array([3, 2, 1, 3]), 3)) - self.check_consecutive(1, (3, np.array([3, 2, 1, 3]))) - self.check_consecutive((4, slice(6, 7)), 0) - self.check_consecutive((Ellipsis, slice(6, 7), 5), 0) - self.check_consecutive((Ellipsis, slice(7, 5, -1), 5), 0) - self.check_consecutive((Ellipsis, (3, 2, 1, 3), slice(6, 7)), 0) - def test_cube_empty_indexing(self): test_filename = ('cube_slice', 'real_empty_data_indexing.cml') r = self.cube[:5, ::-1][3] rshape = r.shape - - # Make sure the datamanager is still being uses (i.e. is not None) - self.assertNotEqual( r._data_manager, None ) + + # Make sure we still have deferred data. + self._is_lazy(r) # check the CML of this result self.assertCML(r, test_filename) - # The CML was checked, meaning the data must have been loaded. Check that the cube no longer has a datamanager - self.assertEqual( r._data_manager, None ) + # The CML was checked, meaning the data must have been loaded. + # Check that the cube no longer has deferred data. + self._is_concrete(r) r_data = r.data @@ -1165,15 +1099,14 @@ def test_slicing(self): partial_slice = cube[0] self.assertIsInstance(full_slice.data, np.ndarray) self.assertIsInstance(partial_slice.data, ma.core.MaskedArray) - self.assertEqual(ma.count_masked(partial_slice._data), 25) + self.assertEqual(ma.count_masked(partial_slice.data), 25) # Test the slicing is consistent after deferred loading - cube.data full_slice = cube[3] partial_slice = cube[0] self.assertIsInstance(full_slice.data, np.ndarray) self.assertIsInstance(partial_slice.data, ma.core.MaskedArray) - self.assertEqual(ma.count_masked(partial_slice._data), 25) + self.assertEqual(ma.count_masked(partial_slice.data), 25) def test_save_and_merge(self): cube = self._load_3d_cube() diff --git a/lib/iris/tests/test_coding_standards.py b/lib/iris/tests/test_coding_standards.py index e25523db95..607add1e1c 100644 --- a/lib/iris/tests/test_coding_standards.py +++ b/lib/iris/tests/test_coding_standards.py @@ -77,7 +77,6 @@ class StandardReportWithExclusions(pep8.StandardReport): '*/iris/fileformats/grib/__init__.py', '*/iris/fileformats/grib/_grib_cf_map.py', '*/iris/fileformats/grib/load_rules.py', - '*/iris/fileformats/manager.py', '*/iris/fileformats/pp.py', '*/iris/fileformats/pp_rules.py', '*/iris/fileformats/rules.py', diff --git a/lib/iris/tests/test_constraints.py b/lib/iris/tests/test_constraints.py index 65619addfa..704840b4d5 100644 --- a/lib/iris/tests/test_constraints.py +++ b/lib/iris/tests/test_constraints.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2010 - 2013, Met Office +# (C) British Crown Copyright 2010 - 2014, Met Office # # This file is part of Iris. # @@ -21,6 +21,8 @@ # import iris tests first so that some things can be initialised before importing anything else import iris.tests as tests +import biggus + import iris import iris.tests.stock as stock @@ -307,12 +309,14 @@ def test_standard_name(self): self.assertEqual(self.cube.extract(r), None) def test_empty_data(self): - # Ensure that the process of WHERE does not load data if there was empty data to start with... - self.assertNotEquals(None, self.cube._data_manager) - - self.assertNotEquals(None, self.cube.extract(self.level_10)._data_manager) - - self.assertNotEquals(None, self.cube.extract(self.level_10).extract(self.level_10)._data_manager) + # Ensure that the process of WHERE does not load data if there + # was empty data to start with... + cube = self.cube + self.assertTrue(cube.has_lazy_data()) + cube = self.cube.extract(self.level_10) + self.assertTrue(cube.has_lazy_data()) + cube = self.cube.extract(self.level_10).extract(self.level_10) + self.assertTrue(cube.has_lazy_data()) def test_non_existant_coordinate(self): # Check the behaviour when a constraint is given for a coordinate which does not exist/span a dimension diff --git a/lib/iris/tests/test_interpolation.py b/lib/iris/tests/test_interpolation.py index 51d36e5403..6938351157 100644 --- a/lib/iris/tests/test_interpolation.py +++ b/lib/iris/tests/test_interpolation.py @@ -21,6 +21,7 @@ # import iris tests first so that some things can be initialised before importing anything else import iris.tests as tests +import biggus import numpy as np import numpy.ma as ma from scipy.interpolate import interp1d @@ -574,8 +575,8 @@ def test_nearest_neighbour(self): # Check that the data has not been loaded on either the original cube, # nor the interpolated one. - self.assertIsNotNone(b._data_manager) - self.assertIsNotNone(self.cube._data_manager) + self.assertTrue(b.has_lazy_data()) + self.assertTrue(self.cube.has_lazy_data()) self.assertCML(b, ('analysis', 'interpolation', 'nearest_neighbour_extract_latitude_longitude.cml')) value = iris.analysis.interpolate.nearest_neighbour_data_value(self.cube, point_spec) diff --git a/lib/iris/tests/test_pickling.py b/lib/iris/tests/test_pickling.py index 39dcedf6af..2d19c4e45d 100644 --- a/lib/iris/tests/test_pickling.py +++ b/lib/iris/tests/test_pickling.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2010 - 2013, Met Office +# (C) British Crown Copyright 2010 - 2014, Met Office # # This file is part of Iris. # @@ -26,6 +26,8 @@ import cPickle import StringIO +import biggus +import numpy as np import iris @@ -43,15 +45,20 @@ def pickle_then_unpickle(self, obj): yield protocol, reconstructed_obj + def assertCubeData(self, cube1, cube2): + np.testing.assert_array_equal(cube1.lazy_data().ndarray(), + cube2.lazy_data().ndarray()) + @iris.tests.skip_data def test_cube_pickle(self): cube = iris.load_cube(tests.get_data_path(('PP', 'globClim1', 'theta.pp'))) + self.assertTrue(cube.has_lazy_data()) self.assertCML(cube, ('cube_io', 'pickling', 'theta.cml'), checksum=False) for _, recon_cube in self.pickle_then_unpickle(cube): - self.assertNotEqual(recon_cube._data_manager, None) - self.assertEqual(cube._data_manager, recon_cube._data_manager) + self.assertTrue(recon_cube.has_lazy_data()) self.assertCML(recon_cube, ('cube_io', 'pickling', 'theta.cml'), checksum=False) + self.assertCubeData(cube, recon_cube) @iris.tests.skip_data def test_cube_with_deferred_coord_points(self): diff --git a/lib/iris/tests/test_pp_module.py b/lib/iris/tests/test_pp_module.py index 2656180f6c..f0bfe29878 100644 --- a/lib/iris/tests/test_pp_module.py +++ b/lib/iris/tests/test_pp_module.py @@ -24,6 +24,7 @@ from types import GeneratorType import unittest +import biggus import netcdftime import iris.fileformats @@ -39,6 +40,7 @@ def setUp(self): def test_copy_field_deferred(self): field = pp.load(self.filename).next() clone = field.copy() + self.assertIsInstance(clone._data, biggus.Array) self.assertEqual(field, clone) clone.lbyr = 666 self.assertNotEqual(field, clone) @@ -46,6 +48,7 @@ def test_copy_field_deferred(self): def test_deepcopy_field_deferred(self): field = pp.load(self.filename).next() clone = deepcopy(field) + self.assertIsInstance(clone._data, biggus.Array) self.assertEqual(field, clone) clone.lbyr = 666 self.assertNotEqual(field, clone) @@ -504,7 +507,8 @@ class Terry(object): pass class TestPPDataProxyEquality(unittest.TestCase): def test_not_implemented(self): class Terry(object): pass - pox = pp.PPDataProxy("john", "michael", "eric", "graham", "brian") + pox = pp.PPDataProxy("john", "michael", "eric", "graham", "brian", + "spam", "beans", "eggs") self.assertIs(pox.__eq__(Terry()), NotImplemented) self.assertIs(pox.__ne__(Terry()), NotImplemented) diff --git a/lib/iris/tests/test_trajectory.py b/lib/iris/tests/test_trajectory.py index b453660669..4018eae47c 100644 --- a/lib/iris/tests/test_trajectory.py +++ b/lib/iris/tests/test_trajectory.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2010 - 2013, Met Office +# (C) British Crown Copyright 2010 - 2014, Met Office # # This file is part of Iris. # @@ -19,11 +19,11 @@ # import iris tests first so that some things can be initialised before importing anything else import iris.tests as tests +import biggus import matplotlib.pyplot as plt import numpy as np import iris.analysis.trajectory -from iris.fileformats.manager import DataManager import iris.tests.stock @@ -152,9 +152,8 @@ def test_tri_polar(self): def test_hybrid_height(self): cube = tests.stock.simple_4d_with_hybrid_height() - # Put a data manager on the cube so that we can test deferred loading. - cube._data_manager = ConcreteDataManager(cube.data) - cube._data = np.empty([]) + # Put a biggus array on the cube so we can test deferred loading. + cube.lazy_data(biggus.NumpyArrayAdapter(cube.data)) traj = (('grid_latitude',[20.5, 21.5, 22.5, 23.5]), ('grid_longitude',[31, 32, 33, 34])) @@ -162,32 +161,9 @@ def test_hybrid_height(self): # Check that creating the trajectory hasn't led to the original # data being loaded. - self.assertIsNotNone(cube._data_manager) + self.assertTrue(cube.has_lazy_data()) self.assertCML([cube, xsec], ('trajectory', 'hybrid_height.cml')) -class ConcreteDataManager(DataManager): - """ - Implements the DataManager interface for a real array. - Useful for testing. Obsolete with biggus. - - """ - def __init__(self, concrete_array, deferred_slices=()): - DataManager.__init__(self, concrete_array.shape, - concrete_array.dtype, - mdi=None, deferred_slices=deferred_slices) - # Add the concrete array as an attribute on the manager. - object.__setattr__(self, 'concrete_array', concrete_array) - - def load(self, proxy_array): - data = self.concrete_array[self._deferred_slice_merge()] - if not data.flags['C_CONTIGUOUS']: - data = data.copy() - return data - - def new_data_manager(self, deferred_slices): - return ConcreteDataManager(self.concrete_array, deferred_slices) - - if __name__ == '__main__': tests.main() diff --git a/lib/iris/tests/unit/cube/test_Cube.py b/lib/iris/tests/unit/cube/test_Cube.py index 3c4f96ce1f..7a5bf7bac4 100644 --- a/lib/iris/tests/unit/cube/test_Cube.py +++ b/lib/iris/tests/unit/cube/test_Cube.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2013, Met Office +# (C) British Crown Copyright 2013 - 2014, Met Office # # This file is part of Iris. # @@ -29,6 +29,30 @@ from iris.coords import AuxCoord, DimCoord +class Test___init___data(tests.IrisTest): + def test_ndarray(self): + # np.ndarray should be allowed through + data = np.arange(12).reshape(3, 4) + cube = Cube(data) + self.assertEqual(type(cube.data), np.ndarray) + self.assertArrayEqual(cube.data, data) + + def test_masked(self): + # np.ma.MaskedArray should be allowed through + data = np.ma.masked_greater(np.arange(12).reshape(3, 4), 1) + cube = Cube(data) + self.assertEqual(type(cube.data), np.ma.MaskedArray) + self.assertMaskedArrayEqual(cube.data, data) + + def test_matrix(self): + # Subclasses of np.ndarray should be coerced back to np.ndarray. + # (Except for np.ma.MaskedArray.) + data = np.matrix([[1, 2, 3], [4, 5, 6]]) + cube = Cube(data) + self.assertEqual(type(cube.data), np.ndarray) + self.assertArrayEqual(cube.data, data) + + class Test_xml(tests.IrisTest): def test_checksum_ignores_masked_values(self): # Mask out an single element. diff --git a/lib/iris/tests/unit/fileformats/netcdf/test__load_cube.py b/lib/iris/tests/unit/fileformats/netcdf/test__load_cube.py new file mode 100644 index 0000000000..7cf563bd86 --- /dev/null +++ b/lib/iris/tests/unit/fileformats/netcdf/test__load_cube.py @@ -0,0 +1,104 @@ +# (C) British Crown Copyright 2014, Met Office +# +# This file is part of Iris. +# +# Iris is free software: you can redistribute it and/or modify it under +# the terms of the GNU Lesser General Public License as published by the +# Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Iris is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with Iris. If not, see . +"""Unit tests for the `iris.fileformats.netcdf._load_cube` function.""" + +# Import iris.tests first so that some things can be initialised before +# importing anything else. +import iris.tests as tests + +import iris.fileformats.cf +import mock +import netCDF4 +import numpy as np + +from iris.fileformats.netcdf import _load_cube + + +class TestFillValue(tests.IrisTest): + def setUp(self): + name = 'iris.fileformats.netcdf._assert_case_specific_facts' + patch = mock.patch(name) + patch.start() + self.addCleanup(patch.stop) + + self.engine = mock.Mock() + self.cf = None + self.filename = 'DUMMY' + + def _make_cf_var(self, dtype): + variable = mock.Mock(spec=netCDF4.Variable, dtype=dtype) + cf_var = mock.MagicMock(spec=iris.fileformats.cf.CFVariable, + cf_data=variable, cf_name='DUMMY_VAR', + cf_group=mock.Mock(), dtype=dtype, + shape=mock.MagicMock()) + return cf_var + + def _test(self, cf_var, expected_fill_value): + cube = _load_cube(self.engine, self.cf, cf_var, self.filename) + self.assertEqual(cube._my_data.fill_value, expected_fill_value) + + def test_from_attribute_dtype_f4(self): + # A _FillValue attribute on the netCDF variable should end up as + # the fill_value for the cube. + dtype = np.dtype('f4') + cf_var = self._make_cf_var(dtype) + cf_var.cf_data._FillValue = mock.sentinel.FILL_VALUE + self._test(cf_var, mock.sentinel.FILL_VALUE) + + def test_from_default_dtype_f4(self): + # Without an explicit _FillValue attribute on the netCDF + # variable, the fill value should be selected from the default + # netCDF fill values. + dtype = np.dtype('f4') + cf_var = self._make_cf_var(dtype) + self._test(cf_var, netCDF4.default_fillvals['f4']) + + def test_from_attribute_dtype_i4(self): + # A _FillValue attribute on the netCDF variable should end up as + # the fill_value for the cube. + dtype = np.dtype('i4') + cf_var = self._make_cf_var(dtype) + cf_var.cf_data._FillValue = mock.sentinel.FILL_VALUE + self._test(cf_var, mock.sentinel.FILL_VALUE) + + def test_from_default_dtype_i4(self): + # Without an explicit _FillValue attribute on the netCDF + # variable, the fill value should be selected from the default + # netCDF fill values. + dtype = np.dtype('i4') + cf_var = self._make_cf_var(dtype) + self._test(cf_var, netCDF4.default_fillvals['i4']) + + def test_from_attribute_with_scale_offset(self): + # The _FillValue attribute still takes priority even when an + # offset/scale transformation takes place on the data. + dtype = np.dtype('i2') + cf_var = self._make_cf_var(dtype) + cf_var.scale_factor = np.float64(1.5) + cf_var.cf_data._FillValue = mock.sentinel.FILL_VALUE + self._test(cf_var, mock.sentinel.FILL_VALUE) + + def test_from_default_with_scale_offset(self): + # The fill value should be related to the *non-scaled* dtype. + dtype = np.dtype('i2') + cf_var = self._make_cf_var(dtype) + cf_var.scale_factor = np.float64(1.5) + self._test(cf_var, netCDF4.default_fillvals['i2']) + + +if __name__ == "__main__": + tests.main() diff --git a/lib/iris/tests/unit/fileformats/pp/test__create_field_data.py b/lib/iris/tests/unit/fileformats/pp/test__create_field_data.py index 1df549bbe3..a72f893076 100644 --- a/lib/iris/tests/unit/fileformats/pp/test__create_field_data.py +++ b/lib/iris/tests/unit/fileformats/pp/test__create_field_data.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2013, Met Office +# (C) British Crown Copyright 2013 - 2014, Met Office # # This file is part of Iris. # @@ -20,6 +20,7 @@ # importing anything else. import iris.tests as tests +import biggus import mock import numpy as np @@ -28,39 +29,52 @@ class Test__create_field_data(tests.IrisTest): def test_loaded_bytes(self): - # Check that a field with LoadedArrayBytes in _data gets a suitable - # call to _read_data_bytes. + # Check that a field with LoadedArrayBytes in _data gets the + # result of a suitable call to _data_bytes_to_shaped_array(). mock_loaded_bytes = mock.Mock(spec=pp.LoadedArrayBytes) field = mock.Mock(_data=mock_loaded_bytes) data_shape = mock.Mock() land_mask = mock.Mock() - with mock.patch('iris.fileformats.pp._read_data_bytes') as read_bytes: + with mock.patch('iris.fileformats.pp._data_bytes_to_shaped_array') as \ + convert_bytes: + convert_bytes.return_value = mock.sentinel.array pp._create_field_data(field, data_shape, land_mask) - call = mock.call(mock_loaded_bytes.bytes, field.lbpack, - data_shape, mock_loaded_bytes.dtype, field.bmdi, - land_mask) - self.assertEqual(read_bytes.call_args, call) - self.assertEqual(read_bytes.call_count, 1) - self.assertEqual(field._data, read_bytes.return_value) - self.assertIsNone(field._data_manager) + + self.assertIs(field._data, mock.sentinel.array) + convert_bytes.assert_called_once_with(mock_loaded_bytes.bytes, + field.lbpack, data_shape, + mock_loaded_bytes.dtype, + field.bmdi, land_mask) def test_deferred_bytes(self): - # Check that a field with DeferredArrayBytes in _data gets a data - # manager. - mock_deferred_bytes = mock.Mock(spec=pp.DeferredArrayBytes) - field = mock.Mock(_data=mock_deferred_bytes) - data_shape = mock.Mock() + # Check that a field with DeferredArrayBytes in _data gets a + # biggus array. + deferred_bytes = mock.Mock(spec=pp.DeferredArrayBytes) + deferred_bytes.dtype.newbyteorder.return_value = mock.sentinel.dtype + field = mock.Mock(_data=deferred_bytes) + data_shape = (mock.sentinel.lat, mock.sentinel.lon) land_mask = mock.Mock() - proxy = pp.PPDataProxy(mock_deferred_bytes.fname, - mock_deferred_bytes.position, - mock_deferred_bytes.n_bytes, - field.lbpack, land_mask) - _data = np.array(proxy) - _data_manager = pp.DataManager(data_shape, mock_deferred_bytes.dtype, - field.bmdi) - pp._create_field_data(field, data_shape, land_mask) - self.assertEqual(field._data, _data) - self.assertEqual(field._data_manager, _data_manager) + proxy = mock.Mock(dtype=mock.sentinel.dtype, shape=data_shape) + # We can't directly inspect the concrete data source underlying + # the biggus array (it's a private attribute), so instead we + # patch the proxy creation and check it's being created and + # invoked correctly. + with mock.patch('iris.fileformats.pp.PPDataProxy') as PPDataProxy: + PPDataProxy.return_value = proxy + pp._create_field_data(field, data_shape, land_mask) + # Does the biggus array look OK from the outside? + self.assertIsInstance(field._data, biggus.Array) + self.assertEqual(field._data.shape, data_shape) + self.assertEqual(field._data.dtype, mock.sentinel.dtype) + # Is it making use of a correctly configured proxy? + # NB. We know it's *using* the result of this call because + # that's where the dtype came from above. + PPDataProxy.assert_called_once_with((data_shape), deferred_bytes.dtype, + deferred_bytes.fname, + deferred_bytes.position, + deferred_bytes.n_bytes, + field.lbpack, field.bmdi, + land_mask) if __name__ == "__main__": diff --git a/lib/iris/tests/unit/fileformats/pp/test__read_data_bytes.py b/lib/iris/tests/unit/fileformats/pp/test__data_bytes_to_shaped_array.py similarity index 82% rename from lib/iris/tests/unit/fileformats/pp/test__read_data_bytes.py rename to lib/iris/tests/unit/fileformats/pp/test__data_bytes_to_shaped_array.py index 072a3bca59..0fa5467818 100644 --- a/lib/iris/tests/unit/fileformats/pp/test__read_data_bytes.py +++ b/lib/iris/tests/unit/fileformats/pp/test__data_bytes_to_shaped_array.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2013, Met Office +# (C) British Crown Copyright 2013 - 2014, Met Office # # This file is part of Iris. # @@ -14,7 +14,10 @@ # # You should have received a copy of the GNU Lesser General Public License # along with Iris. If not, see . -"""Unit tests for the `iris.fileformats.pp._read_data_bytes` function.""" +""" +Unit tests for the `iris.fileformats.pp._data_bytes_to_shaped_array` function. + +""" # Import iris.tests first so that some things can be initialised before # importing anything else. @@ -28,7 +31,8 @@ import iris.fileformats.pp as pp -class Test__read_data_bytes__lateral_boundary_compression(tests.IrisTest): +class Test__data_bytes_to_shaped_array__lateral_boundary_compression( + tests.IrisTest): def setUp(self): self.data_shape = 30, 40 y_halo, x_halo, rim = 2, 3, 4 @@ -61,12 +65,13 @@ def setUp(self): def test_boundary_decompression(self): boundary_packing = mock.Mock(rim_width=4, x_halo=3, y_halo=2) lbpack = mock.Mock(n1=0, boundary_packing=boundary_packing) - r = pp._read_data_bytes(self.data_payload_bytes, lbpack, - self.data_shape, self.decompressed.dtype, -99) + r = pp._data_bytes_to_shaped_array(self.data_payload_bytes, lbpack, + self.data_shape, + self.decompressed.dtype, -99) self.assertMaskedArrayEqual(r, self.decompressed) -class Test__read_data_bytes__land_packed(tests.IrisTest): +class Test__data_bytes_to_shaped_array__land_packed(tests.IrisTest): def setUp(self): # Sets up some useful arrays for use with the land/sea mask # decompression. @@ -102,9 +107,10 @@ def test_no_land_mask(self): with mock.patch('numpy.frombuffer', return_value=np.arange(3)): with self.assertRaises(ValueError) as err: - pp._read_data_bytes(mock.Mock(), self.create_lbpack(120), - (3, 4), np.dtype('>f4'), - -999, mask=None) + pp._data_bytes_to_shaped_array(mock.Mock(), + self.create_lbpack(120), + (3, 4), np.dtype('>f4'), + -999, mask=None) self.assertEqual(str(err.exception), ('No mask was found to unpack the data. ' 'Could not load.')) @@ -140,12 +146,13 @@ def test_bad_lbpack(self): self.check_read_data(field_data, 320, self.land_mask) def check_read_data(self, field_data, lbpack, mask): - # Calls pp._read_data_bytes with the necessary mocked items, an lbpack - # instance, the correct data shape and mask instance. + # Calls pp._data_bytes_to_shaped_array with the necessary mocked + # items, an lbpack instance, the correct data shape and mask instance. with mock.patch('numpy.frombuffer', return_value=field_data): - return pp._read_data_bytes(mock.Mock(), self.create_lbpack(lbpack), - mask.shape, np.dtype('>f4'), - -999, mask=mask) + return pp._data_bytes_to_shaped_array(mock.Mock(), + self.create_lbpack(lbpack), + mask.shape, np.dtype('>f4'), + -999, mask=mask) if __name__ == "__main__": diff --git a/lib/iris/tests/unit/fileformats/pp/test__interpret_field.py b/lib/iris/tests/unit/fileformats/pp/test__interpret_field.py index 94d07999d9..ff4fabb2c1 100644 --- a/lib/iris/tests/unit/fileformats/pp/test__interpret_field.py +++ b/lib/iris/tests/unit/fileformats/pp/test__interpret_field.py @@ -1,4 +1,4 @@ -# (C) British Crown Copyright 2013, Met Office +# (C) British Crown Copyright 2013 - 2014, Met Office # # This file is part of Iris. # @@ -30,9 +30,11 @@ class Test__interpret_fields__land_packed_fields(tests.IrisTest): def setUp(self): + # A field packed using a land/sea mask. self.pp_field = mock.Mock(lblrec=1, lbext=0, lbuser=[0], lbrow=0, lbnpt=0, lbpack=mock.Mock(n2=2)) + # The field specifying the land/seamask. self.land_mask_field = mock.Mock(lblrec=1, lbext=0, lbuser=[0], lbrow=3, lbnpt=4, stash='m01s00i030', @@ -47,8 +49,8 @@ def test_non_deferred_fix_lbrow_lbnpt(self): list(pp._interpret_fields([mask, f1])) self.assertEqual(f1.lbrow, 3) self.assertEqual(f1.lbnpt, 4) - # Check the data manager's shape has been updated too. - self.assertEqual(f1._data_manager._orig_data_shape, (3, 4)) + # Check the data's shape has been updated too. + self.assertEqual(f1._data.shape, (3, 4)) def test_fix_lbrow_lbnpt_no_mask_available(self): # Check a warning is issued when loading a land masked field @@ -89,9 +91,12 @@ def test_shared_land_mask_field(self): f1 = deepcopy(self.pp_field) f2 = deepcopy(self.pp_field) self.assertIsNot(f1, f2) - list(pp._interpret_fields([f1, self.land_mask_field, f2])) - self.assertIs(f1._data.item().mask, - f2._data.item().mask) + with mock.patch('iris.fileformats.pp.PPDataProxy') as PPDataProxy: + PPDataProxy.return_value = mock.MagicMock() + list(pp._interpret_fields([f1, self.land_mask_field, f2])) + for call in PPDataProxy.call_args_list: + positional_args = call[0] + self.assertIs(positional_args[7], self.land_mask_field) if __name__ == "__main__":