Skip to content

Commit a87e2df

Browse files
author
Jeff Whitaker
authored
Merge pull request #816 from davidhassell/master
new methods to optionally re-enable old numpy array type behaviour (issue #809)
2 parents a336665 + 202053a commit a87e2df

File tree

3 files changed

+193
-5
lines changed

3 files changed

+193
-5
lines changed

Changelog

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
* disable workaround for slow nc_get_vars for __netcdflibversion__ >= 4.6.2,
44
since a fix was added to speed up nc_get_vars in the C library. Issue 680.
55

6+
* new methods to optionally re-enable old numpy array type
7+
behaviour (issue #809): Dataset.set_auto_array_type and
8+
Varible.set_auto_array_type
9+
610
version 1.4.0 (tag v1.4.0rel)
711
=============================
812
* fixed bug in detection of CDF5 library support in setup.py (pull request

netCDF4/_netCDF4.pyx

Lines changed: 53 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1470,7 +1470,7 @@ cdef _set_att(grp, int varid, name, value,\
14701470
if value_arr.dtype.kind == 'V': # compound attribute.
14711471
xtype = _find_cmptype(grp,value_arr.dtype)
14721472
elif value_arr.dtype.str[1:] not in _supportedtypes:
1473-
raise TypeError, 'illegal data type for attribute, must be one of %s, got %s' % (_supportedtypes, value_arr.dtype.str[1:])
1473+
raise TypeError, 'illegal data type for attribute %r, must be one of %s, got %s' % (attname, _supportedtypes, value_arr.dtype.str[1:])
14741474
elif xtype == -99: # if xtype is not passed in as kwarg.
14751475
xtype = _nptonctype[value_arr.dtype.str[1:]]
14761476
lenarr = PyArray_SIZE(value_arr)
@@ -2840,6 +2840,31 @@ after calling this function will follow the default behaviour.
28402840
for var in group.variables.values():
28412841
var.set_auto_scale(value)
28422842

2843+
def set_always_mask(self, value):
2844+
"""
2845+
**`set_always_mask(self, True_or_False)`**
2846+
2847+
Call `netCDF4.Variable.set_always_mask` for all variables contained in
2848+
this `netCDF4.Dataset` or `netCDF4.Group`, as well as for all
2849+
variables in all its subgroups.
2850+
2851+
**`True_or_False`**: Boolean determining if automatic conversion of
2852+
masked arrays with no missing values to regular ararys shall be
2853+
applied for all variables.
2854+
2855+
***Note***: Calling this function only affects existing
2856+
variables. Variables created after calling this function will follow
2857+
the default behaviour.
2858+
"""
2859+
2860+
for var in self.variables.values():
2861+
var.set_always_mask(value)
2862+
2863+
for groups in _walk_grps(self):
2864+
for group in groups:
2865+
for var in group.variables.values():
2866+
var.set_always_mask(value)
2867+
28432868
def get_variables_by_attributes(self, **kwargs):
28442869
"""
28452870
**`get_variables_by_attribute(self, **kwargs)`**
@@ -3200,8 +3225,8 @@ behavior is similar to Fortran or Matlab, but different than numpy.
32003225
**`size`**: The number of stored elements.
32013226
"""
32023227
cdef public int _varid, _grpid, _nunlimdim
3203-
cdef public _name, ndim, dtype, mask, scale, chartostring, _isprimitive, _iscompound,\
3204-
_isvlen, _isenum, _grp, _cmptype, _vltype, _enumtype,\
3228+
cdef public _name, ndim, dtype, mask, scale, always_mask, chartostring, _isprimitive, \
3229+
_iscompound, _isvlen, _isenum, _grp, _cmptype, _vltype, _enumtype,\
32053230
__orthogonal_indexing__, _has_lsd, _no_get_vars
32063231
# Docstrings for class variables (used by pdoc).
32073232
__pdoc__['Variable.dimensions'] = \
@@ -3600,6 +3625,9 @@ behavior is similar to Fortran or Matlab, but different than numpy.
36003625
# add_offset, and converting to/from masked arrays is True.
36013626
self.scale = True
36023627
self.mask = True
3628+
# issue 809: default for converting arrays with no missing values to
3629+
# regular numpy arrays
3630+
self.always_mask = True
36033631
# default is to automatically convert to/from character
36043632
# to string arrays when _Encoding variable attribute is set.
36053633
self.chartostring = True
@@ -4295,11 +4323,11 @@ rename a `netCDF4.Variable` attribute named `oldname` to `newname`."""
42954323
if fill_value is None:
42964324
fill_value = default_fillvals[self.dtype.str[1:]]
42974325
# create masked array with computed mask
4298-
if totalmask.any():
4326+
masked_values = bool(totalmask.any())
4327+
if masked_values:
42994328
data = ma.masked_array(data,mask=totalmask,fill_value=fill_value)
43004329
else:
43014330
# issue #785: always return masked array, if no values masked
4302-
# set mask=False.
43034331
data = ma.masked_array(data,mask=False,fill_value=fill_value)
43044332
# issue 515 scalar array with mask=True should be converted
43054333
# to numpy.ma.MaskedConstant to be consistent with slicing
@@ -4308,6 +4336,11 @@ rename a `netCDF4.Variable` attribute named `oldname` to `newname`."""
43084336
# return a scalar numpy masked constant not a 0-d masked array,
43094337
# so that data == numpy.ma.masked.
43104338
data = data[()] # changed from [...] (issue #662)
4339+
elif not self.always_mask and not masked_values:
4340+
# issue #809: return a regular numpy array if requested
4341+
# and there are no missing values
4342+
data = numpy.array(data, copy=False)
4343+
43114344
return data
43124345

43134346
def _assign_vlen(self, elem, data):
@@ -4763,6 +4796,21 @@ The default value of `mask` is `True`
47634796
"""
47644797
self.mask = bool(mask)
47654798

4799+
def set_always_mask(self,always_mask):
4800+
"""
4801+
**`set_always_mask(self,always_mask)`**
4802+
4803+
turn on or off conversion of data without missing values to regular
4804+
numpy arrays.
4805+
4806+
If `always_mask` is set to `True` then a masked array with no missing
4807+
values is converted to a regular numpy array.
4808+
4809+
The default value of `always_mask` is `True` (conversions to regular
4810+
numpy arrays are not performed).
4811+
4812+
"""
4813+
self.always_mask = bool(always_mask)
47664814

47674815
def _put(self,ndarray data,start,count,stride):
47684816
"""Private method to put data into a netCDF variable"""

test/tst_masked6.py

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
import unittest
2+
import os
3+
import tempfile
4+
5+
import numpy as np
6+
from numpy import ma
7+
from numpy.testing import assert_array_almost_equal
8+
from netCDF4 import Dataset
9+
10+
# Test automatic conversion of masked arrays (set_always_mask())
11+
12+
class SetAlwaysMaskTestBase(unittest.TestCase):
13+
14+
"""Base object for tests checking the functionality of set_always_mask()"""
15+
16+
def setUp(self):
17+
18+
self.testfile = tempfile.NamedTemporaryFile(suffix='.nc', delete=False).name
19+
20+
self.v = np.array([4, 3, 2, 1], dtype="i2")
21+
self.w = np.ma.array([-1, -2, -3, -4], mask=[False, True, False, False], dtype="i2")
22+
23+
f = Dataset(self.testfile, 'w')
24+
_ = f.createDimension('x', None)
25+
v = f.createVariable('v', "i2", 'x')
26+
w = f.createVariable('w', "i2", 'x')
27+
28+
v[...] = self.v
29+
w[...] = self.w
30+
31+
f.close()
32+
33+
def tearDown(self):
34+
35+
os.remove(self.testfile)
36+
37+
38+
class SetAlwaysMaskTrue(SetAlwaysMaskTestBase):
39+
40+
def test_always_mask(self):
41+
42+
"""Testing auto-conversion of masked arrays with no missing values to regular arrays."""
43+
f = Dataset(self.testfile)
44+
45+
f.variables["v"].set_always_mask(True) # The default anyway...
46+
47+
v = f.variables['v'][:]
48+
49+
self.assertTrue(isinstance(v, np.ndarray))
50+
self.assertTrue(isinstance(v, ma.core.MaskedArray))
51+
assert_array_almost_equal(v, self.v)
52+
53+
w = f.variables['w'][:]
54+
55+
self.assertTrue(isinstance(w, np.ndarray))
56+
self.assertTrue(isinstance(w, ma.core.MaskedArray))
57+
assert_array_almost_equal(w, self.w)
58+
59+
f.close()
60+
61+
class SetAlwyasMaskFalse(SetAlwaysMaskTestBase):
62+
63+
def test_always_mask(self):
64+
65+
"""Testing auto-conversion of masked arrays with no missing values to regular arrays."""
66+
f = Dataset(self.testfile)
67+
68+
f.variables["v"].set_always_mask(False)
69+
v = f.variables['v'][:]
70+
71+
self.assertTrue(isinstance(v, np.ndarray))
72+
self.assertFalse(isinstance(v, ma.core.MaskedArray))
73+
assert_array_almost_equal(v, self.v)
74+
75+
w = f.variables['w'][:]
76+
77+
self.assertTrue(isinstance(w, np.ndarray))
78+
self.assertTrue(isinstance(w, ma.core.MaskedArray))
79+
assert_array_almost_equal(w, self.w)
80+
81+
f.close()
82+
83+
class GlobalSetAlwaysMaskTest(unittest.TestCase):
84+
85+
def setUp(self):
86+
87+
self.testfile = tempfile.NamedTemporaryFile(suffix='.nc', delete=False).name
88+
89+
f = Dataset(self.testfile, 'w')
90+
91+
grp1 = f.createGroup('Group1')
92+
grp2 = f.createGroup('Group2')
93+
f.createGroup('Group3') # empty group
94+
95+
f.createVariable('var0', "i2", ())
96+
grp1.createVariable('var1', 'f8', ())
97+
grp2.createVariable('var2', 'f4', ())
98+
99+
f.close()
100+
101+
def tearDown(self):
102+
103+
os.remove(self.testfile)
104+
105+
def runTest(self):
106+
107+
# Note: The default behaviour is to always return masked
108+
# arrays, which is already tested elsewhere.
109+
110+
f = Dataset(self.testfile, "r")
111+
112+
# Without regular numpy arrays
113+
114+
f.set_always_mask(True)
115+
116+
v0 = f.variables['var0']
117+
v1 = f.groups['Group1'].variables['var1']
118+
v2 = f.groups['Group2'].variables['var2']
119+
120+
self.assertTrue(v0.always_mask)
121+
self.assertTrue(v1.always_mask)
122+
self.assertTrue(v2.always_mask)
123+
124+
# With regular numpy arrays
125+
126+
f.set_always_mask(False)
127+
128+
self.assertFalse(v0.always_mask)
129+
self.assertFalse(v1.always_mask)
130+
self.assertFalse(v2.always_mask)
131+
132+
f.close()
133+
134+
135+
if __name__ == '__main__':
136+
unittest.main()

0 commit comments

Comments
 (0)