diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 7ed483b1a6d..b77019808cb 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -21,6 +21,22 @@ v0.9.0 (unreleased) Breaking changes ~~~~~~~~~~~~~~~~ +Deprecations +~~~~~~~~~~~~ + +- Renamed the ``Coordinate`` class from xarray's low level API to + :py:class:`~xarray.IndexVariable`. ``Variable.to_variable`` and + ``Variable.to_coord`` have been renamed to + :py:meth:`~xarray.Variable.to_base_variable` and + :py:meth:`~xarray.Variable.to_index_variable`. +- Deprecated supplying ``coords`` as a dictionary to the ``DataArray`` + constructor without also supplying an explicit ``dims`` argument. The old + behavior encouraged relying on the iteration order of dictionaries, which is + a bad practice (:issue:`727`). +- Removed a number of methods deprecated since v0.7.0 or earlier: + ``load_data``, ``vars``, ``drop_vars``, ``dump``, ``dumps`` and the + ``variables`` keyword argument to ``Dataset``. + Enhancements ~~~~~~~~~~~~ diff --git a/xarray/__init__.py b/xarray/__init__.py index 9e50b59a4aa..bd3c96d81f6 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -2,7 +2,7 @@ from .core.combine import concat, auto_combine from .core.extensions import (register_dataarray_accessor, register_dataset_accessor) -from .core.variable import Variable, Coordinate +from .core.variable import Variable, IndexVariable, Coordinate from .core.dataset import Dataset from .core.dataarray import DataArray from .core.merge import merge, MergeError diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 2cbe49d5de6..04805985e49 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -9,7 +9,7 @@ from .common import _maybe_promote from .pycompat import iteritems, OrderedDict from .utils import is_full_slice, is_dict_like -from .variable import Variable, Coordinate +from .variable import Variable, IndexVariable def _get_joiner(join): @@ -119,7 +119,7 @@ def reindex_variables(variables, indexes, indexers, method=None, variables : dict-like Dictionary of xarray.Variable objects. indexes : dict-like - Dictionary of xarray.Coordinate objects associated with variables. + Dictionary of xarray.IndexVariable objects associated with variables. indexers : dict Dictionary with keys given by dimension names and values given by arrays of coordinates tick labels. Any mis-matched coordinate values @@ -200,8 +200,8 @@ def var_indexers(var, indexers): for name, var in iteritems(variables): if name in indexers: # no need to copy, because index data is immutable - new_var = Coordinate(var.dims, indexers[name], var.attrs, - var.encoding) + new_var = IndexVariable(var.dims, indexers[name], var.attrs, + var.encoding) else: assign_to = var_indexers(var, to_indexers) assign_from = var_indexers(var, from_indexers) diff --git a/xarray/core/combine.py b/xarray/core/combine.py index df1f971d46a..bdd564e47dc 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -6,7 +6,7 @@ from .alignment import align from .merge import merge from .pycompat import iteritems, OrderedDict, basestring -from .variable import Variable, as_variable, Coordinate, concat as concat_vars +from .variable import Variable, as_variable, IndexVariable, concat as concat_vars def concat(objs, dim=None, data_vars='all', coords='different', @@ -125,14 +125,14 @@ def _calc_concat_dim_coord(dim): if isinstance(dim, basestring): coord = None elif not hasattr(dim, 'dims'): - # dim is not a DataArray or Coordinate + # dim is not a DataArray or IndexVariable dim_name = getattr(dim, 'name', None) if dim_name is None: dim_name = 'concat_dim' - coord = Coordinate(dim_name, dim) + coord = IndexVariable(dim_name, dim) dim = dim_name elif not hasattr(dim, 'name'): - coord = as_variable(dim).to_coord() + coord = as_variable(dim).to_index_variable() dim, = coord.dims else: coord = dim diff --git a/xarray/core/common.py b/xarray/core/common.py index 1a15c9b01c4..0c5739f3d94 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -326,7 +326,7 @@ def groupby(self, group, squeeze=True): Parameters ---------- - group : str, DataArray or Coordinate + group : str, DataArray or IndexVariable Array whose unique values should be used to group this array. If a string, must be the name of a variable contained in this dataset. squeeze : boolean, optional @@ -353,7 +353,7 @@ def groupby_bins(self, group, bins, right=True, labels=None, precision=3, Parameters ---------- - group : str, DataArray or Coordinate + group : str, DataArray or IndexVariable Array whose binned values should be used to group this array. If a string, must be the name of a variable contained in this dataset. bins : int or array of scalars diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 73c7a3a109c..5a5d4d519d0 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -177,8 +177,7 @@ class DataArrayCoordinates(AbstractCoordinates): """Dictionary like container for DataArray coordinates. Essentially an OrderedDict with keys given by the array's - dimensions and the values given by the corresponding xarray.Coordinate - objects. + dimensions and the values given by corresponding DataArray objects. """ def __init__(self, dataarray): self._data = dataarray diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 3ec059f007d..55b254fa358 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -17,7 +17,7 @@ from .coordinates import DataArrayCoordinates, Indexes from .dataset import Dataset from .pycompat import iteritems, basestring, OrderedDict, zip -from .variable import (as_variable, Variable, as_compatible_data, Coordinate, +from .variable import (as_variable, Variable, as_compatible_data, IndexVariable, default_index_coordinate) from .formatting import format_item @@ -39,11 +39,14 @@ def _infer_coords_and_dims(shape, coords, dims): if coords is not None and len(coords) == len(shape): # try to infer dimensions from coords if utils.is_dict_like(coords): - # TODO: deprecate this path + warnings.warn('inferring DataArray dimensions from dictionary ' + 'like ``coords`` has been deprecated. Use an ' + 'explicit list of ``dims`` instead.', + FutureWarning, stacklevel=3) dims = list(coords.keys()) else: for n, (dim, coord) in enumerate(zip(dims, coords)): - coord = as_variable(coord, name=dims[n]).to_coord() + coord = as_variable(coord, name=dims[n]).to_index_variable() dims[n] = coord.name dims = tuple(dims) else: @@ -142,7 +145,7 @@ class DataArray(AbstractArray, BaseDataObject): values : np.ndarray Access or modify DataArray values as a numpy array. coords : dict-like - Dictionary of Coordinate objects that label values along each dimension. + Dictionary of DataArray objects that label values along each dimension. name : str or None Name of this array. attrs : OrderedDict @@ -197,7 +200,7 @@ def __init__(self, data, coords=None, dims=None, name=None, coords = [data.index] elif isinstance(data, pd.DataFrame): coords = [data.index, data.columns] - elif isinstance(data, (pd.Index, Coordinate)): + elif isinstance(data, (pd.Index, IndexVariable)): coords = [data] elif isinstance(data, pd.Panel): coords = [data.items, data.major_axis, data.minor_axis] @@ -245,7 +248,7 @@ def _replace_indexes(self, indexes): return self coords = self._coords.copy() for name, idx in indexes.items(): - coords[name] = Coordinate(name, idx) + coords[name] = IndexVariable(name, idx) obj = self._replace(coords=coords) # switch from dimension to level names, if necessary @@ -535,12 +538,6 @@ def load(self): self._coords = new._coords return self - def load_data(self): # pragma: no cover - warnings.warn('the DataArray method `load_data` has been deprecated; ' - 'use `load` instead', - FutureWarning, stacklevel=2) - return self.load() - def copy(self, deep=True): """Returns a copy of this array. diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index c93c3c5515e..6f34107686c 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -20,7 +20,7 @@ from .merge import (dataset_update_method, dataset_merge_method, merge_data_and_coords) from .utils import Frozen, SortedKeysDict, maybe_wrap_array, hashable -from .variable import (Variable, as_variable, Coordinate, broadcast_variables) +from .variable import (Variable, as_variable, IndexVariable, broadcast_variables) from .pycompat import (iteritems, basestring, OrderedDict, dask_array_type) from .combine import concat @@ -157,7 +157,7 @@ class Dataset(Mapping, ImplementsDatasetReduce, BaseDataObject, groupby_cls = groupby.DatasetGroupBy def __init__(self, data_vars=None, coords=None, attrs=None, - compat='broadcast_equals', **kwargs): + compat='broadcast_equals'): """To load data from a file or file-like object, use the `open_dataset` function. @@ -183,7 +183,7 @@ def __init__(self, data_vars=None, coords=None, attrs=None, Global attributes to save on this dataset. compat : {'broadcast_equals', 'equals', 'identical'}, optional String indicating how to compare variables of the same name for - potential conflicts: + potential conflicts when initializing this dataset: - 'broadcast_equals': all values must be equal when variables are broadcast against each other to ensure common dimensions. @@ -196,14 +196,6 @@ def __init__(self, data_vars=None, coords=None, attrs=None, self._dims = {} self._attrs = None self._file_obj = None - if kwargs: - if 'variables' in kwargs: - data_vars = kwargs.pop('variables') - warnings.warn('`variables` kwarg is deprecated. Use ' - '`data_vars` instead.', stacklevel=2) - if kwargs: - raise TypeError( - '{0} are not valid kwargs'.format(kwargs.keys())) if data_vars is None: data_vars = {} if coords is None: @@ -326,12 +318,6 @@ def load(self): return self - def load_data(self): # pragma: no cover - warnings.warn('the Dataset method `load_data` has been deprecated; ' - 'use `load` instead', - FutureWarning, stacklevel=2) - return self.load() - @classmethod def _construct_direct(cls, variables, coord_names, dims=None, attrs=None, file_obj=None): @@ -398,7 +384,7 @@ def _replace_indexes(self, indexes): return self variables = self._variables.copy() for name, idx in indexes.items(): - variables[name] = Coordinate(name, idx) + variables[name] = IndexVariable(name, idx) obj = self._replace_vars_and_dims(variables) # switch from dimension to level names, if necessary @@ -641,13 +627,6 @@ def data_vars(self): """ return DataVariables(self) - @property - def vars(self): # pragma: no cover - warnings.warn('the Dataset property `vars` has been deprecated; ' - 'use `data_vars` instead', - FutureWarning, stacklevel=2) - return self.data_vars - def set_coords(self, names, inplace=False): """Given names of one or more variables, set them as coordinates @@ -781,9 +760,6 @@ def to_netcdf(self, path=None, mode='w', format=None, group=None, return to_netcdf(self, path, mode, format=format, group=group, engine=engine, encoding=encoding) - dump = utils.function_alias(to_netcdf, 'dump') - dumps = utils.function_alias(to_netcdf, 'dumps') - def __unicode__(self): return formatting.dataset_repr(self) @@ -1303,7 +1279,10 @@ def swap_dims(self, dims_dict, inplace=False): for k, v in iteritems(self.variables): dims = tuple(dims_dict.get(dim, dim) for dim in v.dims) - var = v.to_coord() if k in result_dims else v.to_variable() + if k in result_dims: + var = v.to_index_variable() + else: + var = v.to_base_variable() var.dims = dims variables[k] = var @@ -1326,7 +1305,7 @@ def _stack_once(self, dims, new_dim): idx = utils.multiindex_from_product_levels( [self.indexes[d] for d in dims], names=dims) - variables[new_dim] = Coordinate(new_dim, idx) + variables[new_dim] = IndexVariable(new_dim, idx) coord_names = set(self._coord_names) - set(dims) | set([new_dim]) @@ -1404,7 +1383,7 @@ def unstack(self, dim): variables[name] = var for name, lev in zip(new_dim_names, index.levels): - variables[name] = Coordinate(name, lev) + variables[name] = IndexVariable(name, lev) coord_names = set(self._coord_names) - set([dim]) | set(new_dim_names) @@ -1533,12 +1512,6 @@ def _drop_vars(self, names): coord_names = set(k for k in self._coord_names if k in variables) return self._replace_vars_and_dims(variables, coord_names) - def drop_vars(self, *names): # pragma: no cover - warnings.warn('the Dataset method `drop_vars` has been deprecated; ' - 'use `drop` instead', - FutureWarning, stacklevel=2) - return self.drop(names) - def transpose(self, *dims): """Return a new Dataset object with all array dimensions transposed. diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index ed14eb16471..e8b45c97e0f 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -99,7 +99,7 @@ def format_timestamp(t): datetime_str = unicode_type(pd.Timestamp(t)) except OutOfBoundsDatetime: datetime_str = unicode_type(t) - + try: date_str, time_str = datetime_str.split() except ValueError: @@ -271,7 +271,7 @@ def indexes_repr(indexes): def array_repr(arr): - # used for DataArray, Variable and Coordinate + # used for DataArray, Variable and IndexVariable if hasattr(arr, 'name') and arr.name is not None: name_str = '%r ' % arr.name else: diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 27b0b089f41..a182cae92c4 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -10,7 +10,7 @@ ) from .pycompat import zip from .utils import peek_at, maybe_wrap_array, safe_cast_to_index -from .variable import as_variable, Variable, Coordinate +from .variable import as_variable, Variable, IndexVariable def unique_value_groups(ar, sort=True): @@ -63,6 +63,7 @@ def _dummy_copy(xarray_obj): dict((k, _get_fill_value(v.dtype)) for k, v in xarray_obj.coords.items() if k not in xarray_obj.dims), + dims=[], name=xarray_obj.name, attrs=xarray_obj.attrs) else: # pragma: no cover @@ -140,7 +141,7 @@ def __init__(self, obj, group, squeeze=False, grouper=None, bins=None, ---------- obj : Dataset or DataArray Object to group. - group : DataArray or Coordinate + group : DataArray or IndexVariable 1-dimensional array with the group values. squeeze : boolean, optional If "group" is a coordinate of object, `squeeze` controls whether @@ -206,7 +207,7 @@ def __init__(self, obj, group, squeeze=False, grouper=None, bins=None, sbins = first_items.values.astype(np.int64) group_indices = ([slice(i, j) for i, j in zip(sbins[:-1], sbins[1:])] + [slice(sbins[-1], None)]) - unique_coord = Coordinate(group.name, first_items.index) + unique_coord = IndexVariable(group.name, first_items.index) elif group.name in obj.dims and bins is None: # assume that group already has sorted, unique values # (if using bins, the group will have the same name as a dimension @@ -224,7 +225,7 @@ def __init__(self, obj, group, squeeze=False, grouper=None, bins=None, # look through group to find the unique values sort = bins is None unique_values, group_indices = unique_value_groups(group, sort=sort) - unique_coord = Coordinate(group.name, unique_values) + unique_coord = IndexVariable(group.name, unique_values) self.obj = obj self.group = group diff --git a/xarray/core/utils.py b/xarray/core/utils.py index d314b3053d4..c615008197c 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -15,29 +15,26 @@ from .pycompat import iteritems, OrderedDict, basestring, bytes_type -def alias_warning(old_name, new_name, stacklevel=3): # pragma: no cover - warnings.warn('%s has been deprecated and renamed to %s' - % (old_name, new_name), - FutureWarning, stacklevel=stacklevel) +def alias_message(old_name, new_name): + return '%s has been deprecated. Use %s instead.' % (old_name, new_name) -def function_alias(obj, old_name): # pragma: no cover +def alias_warning(old_name, new_name, stacklevel=3): + warnings.warn(alias_message(old_name, new_name), FutureWarning, + stacklevel=stacklevel) + + +def alias(obj, old_name): + assert isinstance(old_name, basestring) + @functools.wraps(obj) def wrapper(*args, **kwargs): alias_warning(old_name, obj.__name__) return obj(*args, **kwargs) + wrapper.__doc__ = alias_message(old_name, obj.__name__) return wrapper -def class_alias(obj, old_name): # pragma: no cover - class Wrapper(obj): - def __new__(cls, *args, **kwargs): - alias_warning(old_name, obj.__name__) - return super(Wrapper, cls).__new__(cls, *args, **kwargs) - Wrapper.__name__ = obj.__name__ - return Wrapper - - def safe_cast_to_index(array): """Given an array, safely cast it to a pandas.Index. diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 8818e144fbf..d29137fb61b 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -77,7 +77,7 @@ def as_variable(obj, name=None, copy=False): 'dimensions %r. xarray disallows such variables because they ' 'conflict with the coordinates used to label dimensions.' % (name, obj.dims)) - obj = obj.to_coord() + obj = obj.to_index_variable() return obj @@ -87,7 +87,7 @@ def default_index_coordinate(dim, size): This is equivalent to np.arange(size), but waits to create the array until its actually accessed. """ - return Coordinate(dim, LazyIntegerRange(size)) + return IndexVariable(dim, LazyIntegerRange(size)) def _maybe_wrap_data(data): @@ -290,12 +290,6 @@ def load(self): self._data_cached() return self - def load_data(self): # pragma: no cover - warnings.warn('the Variable method `load_data` has been deprecated; ' - 'use `load` instead', - FutureWarning, stacklevel=2) - return self.load() - def __getstate__(self): """Always cache data as an in-memory array before pickling""" self._data_cached() @@ -312,19 +306,23 @@ def values(self): def values(self, values): self.data = values - def to_variable(self): + def to_base_variable(self): """Return this variable as a base xarray.Variable""" return Variable(self.dims, self._data, self._attrs, encoding=self._encoding, fastpath=True) - def to_coord(self): - """Return this variable as an xarray.Coordinate""" - return Coordinate(self.dims, self._data, self._attrs, - encoding=self._encoding, fastpath=True) + to_variable = utils.alias(to_base_variable, 'to_variable') + + def to_index_variable(self): + """Return this variable as an xarray.IndexVariable""" + return IndexVariable(self.dims, self._data, self._attrs, + encoding=self._encoding, fastpath=True) + + to_coord = utils.alias(to_index_variable, 'to_coord') def to_index(self): """Convert this variable to a pandas.Index""" - return self.to_coord().to_index() + return self.to_index_variable().to_index() @property def dims(self): @@ -1066,21 +1064,19 @@ def func(self, other): ops.inject_all_ops_and_reduce_methods(Variable) -class Coordinate(Variable): +class IndexVariable(Variable): + """Wrapper for accommodating a pandas.Index in an xarray.Variable. - """Wrapper around pandas.Index that adds xarray specific functionality. + IndexVariable preserve loaded values in the form of a pandas.Index instead + of a NumPy array. Hence, their values are immutable and must always be one- + dimensional. - The most important difference is that Coordinate objects must always have a - name, which is the dimension along which they index values. - - Coordinates must always be 1-dimensional. In addition to Variable methods - and properties (attributes, encoding, broadcasting), they support some - pandas.Index methods directly (e.g., get_indexer), even though pandas does - not (yet) support duck-typing for indexes. + They also have a name property, which is the name of their sole dimension. """ def __init__(self, name, data, attrs=None, encoding=None, fastpath=False): - super(Coordinate, self).__init__(name, data, attrs, encoding, fastpath) + super(IndexVariable, self).__init__( + name, data, attrs, encoding, fastpath) if self.ndim != 1: raise ValueError('%s objects must be 1-dimensional' % type(self).__name__) @@ -1105,7 +1101,7 @@ def __setitem__(self, key, value): @classmethod def concat(cls, variables, dim='concat_dim', positions=None, shortcut=False): - """Specialized version of Variable.concat for Coordinate variables. + """Specialized version of Variable.concat for IndexVariable objects. This exists because we want to avoid converting Index objects to NumPy arrays, if possible. @@ -1117,8 +1113,8 @@ def concat(cls, variables, dim='concat_dim', positions=None, first_var = variables[0] if any(not isinstance(v, cls) for v in variables): - raise TypeError('Coordinate.concat requires that all input ' - 'variables be Coordinate objects') + raise TypeError('IndexVariable.concat requires that all input ' + 'variables be IndexVariable objects') arrays = [v._data_cached().array for v in variables] @@ -1155,10 +1151,12 @@ def copy(self, deep=True): def _data_equals(self, other): return self.to_index().equals(other.to_index()) - def to_coord(self): - """Return this variable as an xarray.Coordinate""" + def to_index_variable(self): + """Return this variable as an xarray.IndexVariable""" return self + to_coord = utils.alias(to_index_variable, 'to_coord') + def to_index(self): """Convert this variable to a pandas.Index""" # n.b. creating a new pandas.Index from an old pandas.Index is @@ -1181,7 +1179,10 @@ def name(self): @name.setter def name(self, value): - raise AttributeError('cannot modify name of Coordinate in-place') + raise AttributeError('cannot modify name of IndexVariable in-place') + +# for backwards compatibility +Coordinate = utils.alias(IndexVariable, 'Coordinate') def _unified_dims(variables): @@ -1271,7 +1272,7 @@ def concat(variables, dim='concat_dim', positions=None, shortcut=False): along the given dimension. """ variables = list(variables) - if all(isinstance(v, Coordinate) for v in variables): - return Coordinate.concat(variables, dim, positions, shortcut) + if all(isinstance(v, IndexVariable) for v in variables): + return IndexVariable.concat(variables, dim, positions, shortcut) else: return Variable.concat(variables, dim, positions, shortcut) diff --git a/xarray/test/test_combine.py b/xarray/test/test_combine.py index e379ff023a8..216d3273ead 100644 --- a/xarray/test/test_combine.py +++ b/xarray/test/test_combine.py @@ -107,8 +107,8 @@ def test_concat_size0(self): self.assertDatasetIdentical(data, actual) def test_concat_autoalign(self): - ds1 = Dataset({'foo': DataArray([1, 2], coords={'x': [1, 2]})}) - ds2 = Dataset({'foo': DataArray([1, 2], coords={'x': [1, 3]})}) + ds1 = Dataset({'foo': DataArray([1, 2], coords=[('x', [1, 2])])}) + ds2 = Dataset({'foo': DataArray([1, 2], coords=[('x', [1, 3])])}) actual = concat([ds1, ds2], 'y') expected = Dataset({'foo': DataArray([[1, 2, np.nan], [1, np.nan, 2]], dims=['y', 'x'], coords={'y': [0, 1], 'x': [1, 2, 3]})}) diff --git a/xarray/test/test_dataarray.py b/xarray/test/test_dataarray.py index bcfa6816cc4..2e789516574 100644 --- a/xarray/test/test_dataarray.py +++ b/xarray/test/test_dataarray.py @@ -7,7 +7,7 @@ import xarray as xr from xarray import (align, broadcast, Dataset, DataArray, - Coordinate, Variable) + IndexVariable, Variable) from xarray.core.pycompat import iteritems, OrderedDict from xarray.core.common import _full_like @@ -133,9 +133,9 @@ def test_name(self): self.assertEqual(arr.name, 'bar') self.assertDataArrayEqual(copied, arr) - actual = DataArray(Coordinate('x', [3])) + actual = DataArray(IndexVariable('x', [3])) actual.name = 'y' - expected = DataArray([3], {'x': [3]}, name='y') + expected = DataArray([3], [('x', [3])], name='y') self.assertDataArrayIdentical(actual, expected) def test_dims(self): @@ -192,9 +192,6 @@ def test_constructor(self): actual = DataArray(data, coords) self.assertDataArrayIdentical(expected, actual) - actual = DataArray(data, OrderedDict(coords)) - self.assertDataArrayIdentical(expected, actual) - expected = Dataset({None: (['x', 'y'], data), 'x': ('x', ['a', 'b'])})[None] actual = DataArray(data, {'x': ['a', 'b']}, ['x', 'y']) @@ -281,7 +278,7 @@ def test_constructor_from_self_described(self): actual = DataArray(pd.Index(['a', 'b'], name='foo')) self.assertDataArrayIdentical(expected, actual) - actual = DataArray(Coordinate('foo', ['a', 'b'])) + actual = DataArray(IndexVariable('foo', ['a', 'b'])) self.assertDataArrayIdentical(expected, actual) def test_constructor_from_0d(self): @@ -596,8 +593,8 @@ def test_time_components(self): def test_coords(self): # use int64 to ensure repr() consistency on windows - coords = [Coordinate('x', np.array([-1, -2], 'int64')), - Coordinate('y', np.array([0, 1, 2], 'int64'))] + coords = [IndexVariable('x', np.array([-1, -2], 'int64')), + IndexVariable('y', np.array([0, 1, 2], 'int64'))] da = DataArray(np.random.randn(2, 3), coords, name='foo') self.assertEquals(2, len(da.coords)) @@ -1354,8 +1351,8 @@ def make_groupby_multidim_example_array(self): def test_groupby_multidim(self): array = self.make_groupby_multidim_example_array() for dim, expected_sum in [ - ('lon', DataArray([5, 28, 23], coords={'lon': [30., 40., 50.]})), - ('lat', DataArray([16, 40], coords={'lat': [10., 20.]}))]: + ('lon', DataArray([5, 28, 23], coords=[('lon', [30., 40., 50.])])), + ('lat', DataArray([16, 40], coords=[('lat', [10., 20.])]))]: actual_sum = array.groupby(dim).sum() self.assertDataArrayIdentical(expected_sum, actual_sum) @@ -1629,8 +1626,8 @@ def test_align_dtype(self): # regression test for #264 x1 = np.arange(30) x2 = np.arange(5, 35) - a = DataArray(np.random.random((30,)).astype(np.float32), {'x': x1}) - b = DataArray(np.random.random((30,)).astype(np.float32), {'x': x2}) + a = DataArray(np.random.random((30,)).astype(np.float32), [('x', x1)]) + b = DataArray(np.random.random((30,)).astype(np.float32), [('x', x2)]) c, d = align(a, b, join='outer') self.assertEqual(c.dtype, np.float32) @@ -1912,8 +1909,7 @@ def test_to_and_from_dict_with_time_dim(self): x = np.random.randn(10, 3) t = pd.date_range('20130101', periods=10) lat = [77.7, 83.2, 76] - da = DataArray(x, OrderedDict([('t', ('t', t)), - ('lat', ('lat', lat))])) + da = DataArray(x, {'t': t, 'lat': lat}, dims=['t', 'lat']) roundtripped = DataArray.from_dict(da.to_dict()) self.assertDataArrayIdentical(da, roundtripped) @@ -1923,8 +1919,7 @@ def test_to_and_from_dict_with_nan_nat(self): t = pd.Series(pd.date_range('20130101', periods=10)) t[2] = np.nan lat = [77.7, 83.2, 76] - da = DataArray(y, OrderedDict([('t', ('t', t)), - ('lat', ('lat', lat))])) + da = DataArray(y, {'t': t, 'lat': lat}, dims=['t', 'lat']) roundtripped = DataArray.from_dict(da.to_dict()) self.assertDataArrayIdentical(da, roundtripped) @@ -1980,8 +1975,8 @@ def test_to_and_from_cdms2(self): [('distance', [-2, 2], {'units': 'meters'}), ('time', pd.date_range('2000-01-01', periods=3))], name='foo', attrs={'baz': 123}) - expected_coords = [Coordinate('distance', [-2, 2]), - Coordinate('time', [0, 1, 2])] + expected_coords = [IndexVariable('distance', [-2, 2]), + IndexVariable('time', [0, 1, 2])] actual = original.to_cdms2() self.assertArrayEqual(actual, original) self.assertEqual(actual.id, original.name) diff --git a/xarray/test/test_dataset.py b/xarray/test/test_dataset.py index 9b2c6c1e22d..61170f85a71 100644 --- a/xarray/test/test_dataset.py +++ b/xarray/test/test_dataset.py @@ -15,7 +15,7 @@ import pytest from xarray import (align, broadcast, concat, merge, conventions, backends, - Dataset, DataArray, Variable, Coordinate, auto_combine, + Dataset, DataArray, Variable, IndexVariable, auto_combine, open_dataset, set_options, MergeError) from xarray.core import indexing, utils from xarray.core.pycompat import iteritems, OrderedDict, unicode_type @@ -153,19 +153,6 @@ def test_constructor(self): actual = Dataset({'z': expected['z']}) self.assertDatasetIdentical(expected, actual) - def test_constructor_kwargs(self): - x1 = ('x', 2 * np.arange(100)) - - with self.assertRaises(TypeError): - Dataset(data_vars={'x1': x1}, invalid_kwarg=42) - - import warnings - # this can be removed once the variables keyword is fully removed - with warnings.catch_warnings(record=False): - ds = Dataset(variables={'x1': x1}) - # but assert dataset is still created - self.assertDatasetEqual(ds, Dataset(data_vars={'x1': x1})) - def test_constructor_1d(self): expected = Dataset({'x': (['x'], 5.0 + np.arange(5))}) actual = Dataset({'x': 5.0 + np.arange(5)}) @@ -193,6 +180,10 @@ class Arbitrary(object): actual = Dataset({'x': arg}) self.assertDatasetIdentical(expected, actual) + def test_constructor_deprecated(self): + with self.assertWarns('deprecated'): + DataArray([1, 2, 3], coords={'x': [0, 1, 2]}) + def test_constructor_auto_align(self): a = DataArray([1, 2], [('x', [0, 1])]) b = DataArray([3, 4], [('x', [1, 2])]) @@ -204,7 +195,7 @@ def test_constructor_auto_align(self): self.assertDatasetIdentical(expected, actual) # regression test for GH346 - self.assertIsInstance(actual.variables['x'], Coordinate) + self.assertIsInstance(actual.variables['x'], IndexVariable) # variable with different dimensions c = ('y', [3, 4]) @@ -228,13 +219,13 @@ def test_constructor_pandas_sequence(self): pandas_objs = OrderedDict( (var_name, ds[var_name].to_pandas()) for var_name in ['foo','bar'] ) - ds_based_on_pandas = Dataset(data_vars=pandas_objs, coords=ds.coords, attrs=ds.attrs) + ds_based_on_pandas = Dataset(pandas_objs, ds.coords, attrs=ds.attrs) self.assertDatasetEqual(ds, ds_based_on_pandas) # reindex pandas obj, check align works rearranged_index = reversed(pandas_objs['foo'].index) pandas_objs['foo'] = pandas_objs['foo'].reindex(rearranged_index) - ds_based_on_pandas = Dataset(variables=pandas_objs, coords=ds.coords, attrs=ds.attrs) + ds_based_on_pandas = Dataset(pandas_objs, ds.coords, attrs=ds.attrs) self.assertDatasetEqual(ds, ds_based_on_pandas) def test_constructor_pandas_single(self): @@ -1066,10 +1057,10 @@ def test_align(self): align(left, right, foo='bar') def test_align_exclude(self): - x = Dataset({'foo': DataArray([[1, 2],[3, 4]], dims=['x', 'y'], - coords={'x': [1, 2], 'y': [3, 4]})}) - y = Dataset({'bar': DataArray([[1, 2],[3, 4]], dims=['x', 'y'], - coords={'x': [1, 3], 'y': [5, 6]})}) + x = Dataset({'foo': DataArray([[1, 2], [3, 4]], dims=['x', 'y'], + coords={'x': [1, 2], 'y': [3, 4]})}) + y = Dataset({'bar': DataArray([[1, 2], [3, 4]], dims=['x', 'y'], + coords={'x': [1, 3], 'y': [5, 6]})}) x2, y2 = align(x, y, exclude=['y'], join='outer') expected_x2 = Dataset( @@ -1084,11 +1075,11 @@ def test_align_exclude(self): self.assertDatasetIdentical(expected_y2, y2) def test_align_nocopy(self): - x = Dataset({'foo': DataArray([1, 2, 3], coords={'x': [1, 2, 3]})}) - y = Dataset({'foo': DataArray([1, 2], coords={'x': [1, 2]})}) + x = Dataset({'foo': DataArray([1, 2, 3], coords=[('x', [1, 2, 3])])}) + y = Dataset({'foo': DataArray([1, 2], coords=[('x', [1, 2])])}) expected_x2 = x expected_y2 = Dataset({'foo': DataArray([1, 2, np.nan], - coords={'x': [1, 2, 3]})}) + coords=[('x', [1, 2, 3])])}) x2, y2 = align(x, y, copy=False, join='outer') self.assertDatasetIdentical(expected_x2, x2) @@ -1101,7 +1092,7 @@ def test_align_nocopy(self): assert source_ndarray(x['foo'].data) is not source_ndarray(x2['foo'].data) def test_align_indexes(self): - x = Dataset({'foo': DataArray([1, 2, 3], coords={'x': [1, 2, 3]})}) + x = Dataset({'foo': DataArray([1, 2, 3], coords=[('x', [1, 2, 3])])}) x2, = align(x, indexes={'x': [2, 3, 1]}) expected_x2 = Dataset({'foo': DataArray([2, 3, 1], coords={'x': [2, 3, 1]})}) self.assertDatasetIdentical(expected_x2, x2) @@ -1173,11 +1164,18 @@ def test_broadcast_exclude(self): self.assertDatasetIdentical(expected_y2, y2) def test_broadcast_misaligned(self): - x = Dataset({'foo': DataArray([1, 2, 3], coords={'x': [-1, -2, -3]})}) - y = Dataset({'bar': DataArray([[1, 2], [3, 4]], dims=['y', 'x'], coords={'y': [1, 2], 'x': [10, -3]})}) + x = Dataset({'foo': DataArray([1, 2, 3], coords=[('x', [-1, -2, -3])])}) + y = Dataset({'bar': DataArray([[1, 2], [3, 4]], dims=['y', 'x'], + coords={'y': [1, 2], 'x': [10, -3]})}) x2, y2 = broadcast(x, y) - expected_x2 = Dataset({'foo': DataArray([[3, 3], [2, 2], [1, 1], [np.nan, np.nan]], dims=['x', 'y'], coords={'y': [1, 2], 'x': [-3, -2, -1, 10]})}) - expected_y2 = Dataset({'bar': DataArray([[2, 4], [np.nan, np.nan], [np.nan, np.nan], [1, 3]], dims=['x', 'y'], coords={'y': [1, 2], 'x': [-3, -2, -1, 10]})}) + expected_x2 = Dataset( + {'foo': DataArray([[3, 3], [2, 2], [1, 1], [np.nan, np.nan]], + dims=['x', 'y'], + coords={'y': [1, 2], 'x': [-3, -2, -1, 10]})}) + expected_y2 = Dataset( + {'bar': DataArray( + [[2, 4], [np.nan, np.nan], [np.nan, np.nan], [1, 3]], + dims=['x', 'y'], coords={'y': [1, 2], 'x': [-3, -2, -1, 10]})}) self.assertDatasetIdentical(expected_x2, x2) self.assertDatasetIdentical(expected_y2, y2) @@ -1311,7 +1309,7 @@ def test_swap_dims(self): expected = Dataset({'z': 42}, {'x': ('y', [1, 2, 3]), 'y': list('abc')}) actual = original.swap_dims({'x': 'y'}) self.assertDatasetIdentical(expected, actual) - self.assertIsInstance(actual.variables['y'], Coordinate) + self.assertIsInstance(actual.variables['y'], IndexVariable) self.assertIsInstance(actual.variables['x'], Variable) roundtripped = actual.swap_dims({'y': 'x'}) @@ -1472,7 +1470,7 @@ def test_virtual_variable_same_name(self): times = pd.date_range('2000-01-01', freq='H', periods=5) data = Dataset({'time': times}) actual = data['time.time'] - expected = DataArray(times.time, {'time': times}, name='time') + expected = DataArray(times.time, [('time', times)], name='time') self.assertDataArrayIdentical(actual, expected) def test_time_season(self): diff --git a/xarray/test/test_utils.py b/xarray/test/test_utils.py index 83ce59c5c44..373940c97d4 100644 --- a/xarray/test/test_utils.py +++ b/xarray/test/test_utils.py @@ -6,6 +6,16 @@ from . import TestCase +class TestAlias(TestCase): + def test(self): + def new_method(): + pass + old_method = utils.alias(new_method, 'old_method') + assert 'deprecated' in old_method.__doc__ + with self.assertWarns('deprecated'): + old_method() + + class TestSafeCastToIndex(TestCase): def test(self): dates = pd.date_range('2000-01-01', periods=10) diff --git a/xarray/test/test_variable.py b/xarray/test/test_variable.py index c96f752d0c8..10e360f5322 100644 --- a/xarray/test/test_variable.py +++ b/xarray/test/test_variable.py @@ -8,9 +8,9 @@ import pytz import pandas as pd -from xarray import Variable, Dataset, DataArray +from xarray import Variable, IndexVariable, Coordinate, Dataset, DataArray from xarray.core import indexing -from xarray.core.variable import (Coordinate, as_variable, as_compatible_data) +from xarray.core.variable import as_variable, as_compatible_data from xarray.core.indexing import PandasIndexAdapter, LazilyIndexedArray from xarray.core.pycompat import PY3, OrderedDict @@ -243,9 +243,9 @@ def test_1d_math(self): self.assertEqual(float, (0 + v).values.dtype) # check types of returned data self.assertIsInstance(+v, Variable) - self.assertNotIsInstance(+v, Coordinate) + self.assertNotIsInstance(+v, IndexVariable) self.assertIsInstance(0 + v, Variable) - self.assertNotIsInstance(0 + v, Coordinate) + self.assertNotIsInstance(0 + v, IndexVariable) def test_1d_reduce(self): x = np.arange(5) @@ -266,7 +266,7 @@ def test_array_interface(self): # test ufuncs self.assertVariableIdentical(np.sin(v), self.cls(['x'], np.sin(x))) self.assertIsInstance(np.sin(v), Variable) - self.assertNotIsInstance(np.sin(v), Coordinate) + self.assertNotIsInstance(np.sin(v), IndexVariable) def example_1d_objects(self): for data in [range(3), @@ -469,7 +469,7 @@ def test_numpy_same_methods(self): self.assertEqual(v.item(), 0) self.assertIs(type(v.item()), float) - v = Coordinate('x', np.arange(5)) + v = IndexVariable('x', np.arange(5)) self.assertEqual(2, v.searchsorted(2)) def test_datetime64_conversion_scalar(self): @@ -586,7 +586,7 @@ def test_as_variable(self): actual = as_variable(data, name='x') self.assertVariableIdentical(expected, actual) - self.assertIsInstance(actual, Coordinate) + self.assertIsInstance(actual, IndexVariable) actual = as_variable(0) expected = Variable([], 0) @@ -1002,37 +1002,37 @@ def test_count(self): self.assertVariableIdentical(expected, actual) -class TestCoordinate(TestCase, VariableSubclassTestCases): - cls = staticmethod(Coordinate) +class TestIndexVariable(TestCase, VariableSubclassTestCases): + cls = staticmethod(IndexVariable) def test_init(self): with self.assertRaisesRegexp(ValueError, 'must be 1-dimensional'): - Coordinate((), 0) + IndexVariable((), 0) def test_to_index(self): data = 0.5 * np.arange(10) - v = Coordinate(['time'], data, {'foo': 'bar'}) + v = IndexVariable(['time'], data, {'foo': 'bar'}) self.assertTrue(pd.Index(data, name='time').identical(v.to_index())) def test_multiindex_default_level_names(self): midx = pd.MultiIndex.from_product([['a', 'b'], [1, 2]]) - v = Coordinate(['x'], midx, {'foo': 'bar'}) + v = IndexVariable(['x'], midx, {'foo': 'bar'}) self.assertEqual(v.to_index().names, ('x_level_0', 'x_level_1')) def test_data(self): - x = Coordinate('x', np.arange(3.0)) + x = IndexVariable('x', np.arange(3.0)) # data should be initially saved as an ndarray self.assertIs(type(x._data), np.ndarray) self.assertEqual(float, x.dtype) self.assertArrayEqual(np.arange(3), x) self.assertEqual(float, x.values.dtype) - # after inspecting x.values, the Coordinate value will be saved as an Index + # after inspecting x.values, the IndexVariable value will be saved as an Index self.assertIsInstance(x._data, PandasIndexAdapter) with self.assertRaisesRegexp(TypeError, 'cannot be modified'): x[:] = 0 def test_name(self): - coord = Coordinate('x', [10.0]) + coord = IndexVariable('x', [10.0]) self.assertEqual(coord.name, 'x') with self.assertRaises(AttributeError): @@ -1040,25 +1040,31 @@ def test_name(self): def test_concat_periods(self): periods = pd.period_range('2000-01-01', periods=10) - coords = [Coordinate('t', periods[:5]), Coordinate('t', periods[5:])] - expected = Coordinate('t', periods) - actual = Coordinate.concat(coords, dim='t') + coords = [IndexVariable('t', periods[:5]), IndexVariable('t', periods[5:])] + expected = IndexVariable('t', periods) + actual = IndexVariable.concat(coords, dim='t') assert actual.identical(expected) assert isinstance(actual.to_index(), pd.PeriodIndex) positions = [list(range(5)), list(range(5, 10))] - actual = Coordinate.concat(coords, dim='t', positions=positions) + actual = IndexVariable.concat(coords, dim='t', positions=positions) assert actual.identical(expected) assert isinstance(actual.to_index(), pd.PeriodIndex) def test_concat_multiindex(self): idx = pd.MultiIndex.from_product([[0, 1, 2], ['a', 'b']]) - coords = [Coordinate('x', idx[:2]), Coordinate('x', idx[2:])] - expected = Coordinate('x', idx) - actual = Coordinate.concat(coords, dim='x') + coords = [IndexVariable('x', idx[:2]), IndexVariable('x', idx[2:])] + expected = IndexVariable('x', idx) + actual = IndexVariable.concat(coords, dim='x') assert actual.identical(expected) assert isinstance(actual.to_index(), pd.MultiIndex) + def test_coordinate_alias(self): + with self.assertWarns('deprecated'): + x = Coordinate('x', [1, 2, 3]) + self.assertIsInstance(x, IndexVariable) + + class TestAsCompatibleData(TestCase): def test_unchanged_types(self):