diff --git a/Changelog b/Changelog index 4494cdbca..cb93f8951 100644 --- a/Changelog +++ b/Changelog @@ -3,6 +3,10 @@ * disable workaround for slow nc_get_vars for __netcdflibversion__ >= 4.6.2, since a fix was added to speed up nc_get_vars in the C library. Issue 680. + * new methods to optionally re-enable old numpy array type + behaviour (issue #809): Dataset.set_auto_array_type and + Varible.set_auto_array_type + version 1.4.0 (tag v1.4.0rel) ============================= * fixed bug in detection of CDF5 library support in setup.py (pull request diff --git a/netCDF4/_netCDF4.pyx b/netCDF4/_netCDF4.pyx index 74eb0c51a..4cacb7dcc 100644 --- a/netCDF4/_netCDF4.pyx +++ b/netCDF4/_netCDF4.pyx @@ -1470,7 +1470,7 @@ cdef _set_att(grp, int varid, name, value,\ if value_arr.dtype.kind == 'V': # compound attribute. xtype = _find_cmptype(grp,value_arr.dtype) elif value_arr.dtype.str[1:] not in _supportedtypes: - raise TypeError, 'illegal data type for attribute, must be one of %s, got %s' % (_supportedtypes, value_arr.dtype.str[1:]) + raise TypeError, 'illegal data type for attribute %r, must be one of %s, got %s' % (attname, _supportedtypes, value_arr.dtype.str[1:]) elif xtype == -99: # if xtype is not passed in as kwarg. xtype = _nptonctype[value_arr.dtype.str[1:]] lenarr = PyArray_SIZE(value_arr) @@ -2840,6 +2840,31 @@ after calling this function will follow the default behaviour. for var in group.variables.values(): var.set_auto_scale(value) + def set_always_mask(self, value): + """ +**`set_always_mask(self, True_or_False)`** + +Call `netCDF4.Variable.set_always_mask` for all variables contained in +this `netCDF4.Dataset` or `netCDF4.Group`, as well as for all +variables in all its subgroups. + +**`True_or_False`**: Boolean determining if automatic conversion of +masked arrays with no missing values to regular ararys shall be +applied for all variables. + +***Note***: Calling this function only affects existing +variables. Variables created after calling this function will follow +the default behaviour. + """ + + for var in self.variables.values(): + var.set_always_mask(value) + + for groups in _walk_grps(self): + for group in groups: + for var in group.variables.values(): + var.set_always_mask(value) + def get_variables_by_attributes(self, **kwargs): """ **`get_variables_by_attribute(self, **kwargs)`** @@ -3200,8 +3225,8 @@ behavior is similar to Fortran or Matlab, but different than numpy. **`size`**: The number of stored elements. """ cdef public int _varid, _grpid, _nunlimdim - cdef public _name, ndim, dtype, mask, scale, chartostring, _isprimitive, _iscompound,\ - _isvlen, _isenum, _grp, _cmptype, _vltype, _enumtype,\ + cdef public _name, ndim, dtype, mask, scale, always_mask, chartostring, _isprimitive, \ + _iscompound, _isvlen, _isenum, _grp, _cmptype, _vltype, _enumtype,\ __orthogonal_indexing__, _has_lsd, _no_get_vars # Docstrings for class variables (used by pdoc). __pdoc__['Variable.dimensions'] = \ @@ -3600,6 +3625,9 @@ behavior is similar to Fortran or Matlab, but different than numpy. # add_offset, and converting to/from masked arrays is True. self.scale = True self.mask = True + # issue 809: default for converting arrays with no missing values to + # regular numpy arrays + self.always_mask = True # default is to automatically convert to/from character # to string arrays when _Encoding variable attribute is set. self.chartostring = True @@ -4295,11 +4323,11 @@ rename a `netCDF4.Variable` attribute named `oldname` to `newname`.""" if fill_value is None: fill_value = default_fillvals[self.dtype.str[1:]] # create masked array with computed mask - if totalmask.any(): + masked_values = bool(totalmask.any()) + if masked_values: data = ma.masked_array(data,mask=totalmask,fill_value=fill_value) else: # issue #785: always return masked array, if no values masked - # set mask=False. data = ma.masked_array(data,mask=False,fill_value=fill_value) # issue 515 scalar array with mask=True should be converted # to numpy.ma.MaskedConstant to be consistent with slicing @@ -4308,6 +4336,11 @@ rename a `netCDF4.Variable` attribute named `oldname` to `newname`.""" # return a scalar numpy masked constant not a 0-d masked array, # so that data == numpy.ma.masked. data = data[()] # changed from [...] (issue #662) + elif not self.always_mask and not masked_values: + # issue #809: return a regular numpy array if requested + # and there are no missing values + data = numpy.array(data, copy=False) + return data def _assign_vlen(self, elem, data): @@ -4763,6 +4796,21 @@ The default value of `mask` is `True` """ self.mask = bool(mask) + def set_always_mask(self,always_mask): + """ +**`set_always_mask(self,always_mask)`** + +turn on or off conversion of data without missing values to regular +numpy arrays. + +If `always_mask` is set to `True` then a masked array with no missing +values is converted to a regular numpy array. + +The default value of `always_mask` is `True` (conversions to regular +numpy arrays are not performed). + + """ + self.always_mask = bool(always_mask) def _put(self,ndarray data,start,count,stride): """Private method to put data into a netCDF variable""" diff --git a/test/tst_masked6.py b/test/tst_masked6.py new file mode 100644 index 000000000..65db53dde --- /dev/null +++ b/test/tst_masked6.py @@ -0,0 +1,136 @@ +import unittest +import os +import tempfile + +import numpy as np +from numpy import ma +from numpy.testing import assert_array_almost_equal +from netCDF4 import Dataset + +# Test automatic conversion of masked arrays (set_always_mask()) + +class SetAlwaysMaskTestBase(unittest.TestCase): + + """Base object for tests checking the functionality of set_always_mask()""" + + def setUp(self): + + self.testfile = tempfile.NamedTemporaryFile(suffix='.nc', delete=False).name + + self.v = np.array([4, 3, 2, 1], dtype="i2") + self.w = np.ma.array([-1, -2, -3, -4], mask=[False, True, False, False], dtype="i2") + + f = Dataset(self.testfile, 'w') + _ = f.createDimension('x', None) + v = f.createVariable('v', "i2", 'x') + w = f.createVariable('w', "i2", 'x') + + v[...] = self.v + w[...] = self.w + + f.close() + + def tearDown(self): + + os.remove(self.testfile) + + +class SetAlwaysMaskTrue(SetAlwaysMaskTestBase): + + def test_always_mask(self): + + """Testing auto-conversion of masked arrays with no missing values to regular arrays.""" + f = Dataset(self.testfile) + + f.variables["v"].set_always_mask(True) # The default anyway... + + v = f.variables['v'][:] + + self.assertTrue(isinstance(v, np.ndarray)) + self.assertTrue(isinstance(v, ma.core.MaskedArray)) + assert_array_almost_equal(v, self.v) + + w = f.variables['w'][:] + + self.assertTrue(isinstance(w, np.ndarray)) + self.assertTrue(isinstance(w, ma.core.MaskedArray)) + assert_array_almost_equal(w, self.w) + + f.close() + +class SetAlwyasMaskFalse(SetAlwaysMaskTestBase): + + def test_always_mask(self): + + """Testing auto-conversion of masked arrays with no missing values to regular arrays.""" + f = Dataset(self.testfile) + + f.variables["v"].set_always_mask(False) + v = f.variables['v'][:] + + self.assertTrue(isinstance(v, np.ndarray)) + self.assertFalse(isinstance(v, ma.core.MaskedArray)) + assert_array_almost_equal(v, self.v) + + w = f.variables['w'][:] + + self.assertTrue(isinstance(w, np.ndarray)) + self.assertTrue(isinstance(w, ma.core.MaskedArray)) + assert_array_almost_equal(w, self.w) + + f.close() + +class GlobalSetAlwaysMaskTest(unittest.TestCase): + + def setUp(self): + + self.testfile = tempfile.NamedTemporaryFile(suffix='.nc', delete=False).name + + f = Dataset(self.testfile, 'w') + + grp1 = f.createGroup('Group1') + grp2 = f.createGroup('Group2') + f.createGroup('Group3') # empty group + + f.createVariable('var0', "i2", ()) + grp1.createVariable('var1', 'f8', ()) + grp2.createVariable('var2', 'f4', ()) + + f.close() + + def tearDown(self): + + os.remove(self.testfile) + + def runTest(self): + + # Note: The default behaviour is to always return masked + # arrays, which is already tested elsewhere. + + f = Dataset(self.testfile, "r") + + # Without regular numpy arrays + + f.set_always_mask(True) + + v0 = f.variables['var0'] + v1 = f.groups['Group1'].variables['var1'] + v2 = f.groups['Group2'].variables['var2'] + + self.assertTrue(v0.always_mask) + self.assertTrue(v1.always_mask) + self.assertTrue(v2.always_mask) + + # With regular numpy arrays + + f.set_always_mask(False) + + self.assertFalse(v0.always_mask) + self.assertFalse(v1.always_mask) + self.assertFalse(v2.always_mask) + + f.close() + + +if __name__ == '__main__': + unittest.main()