diff --git a/.github/stale.yml b/.github/stale.yml new file mode 100644 index 00000000000..5a8f6596d69 --- /dev/null +++ b/.github/stale.yml @@ -0,0 +1,58 @@ +# Configuration for probot-stale - https://github.com/probot/stale + +# Number of days of inactivity before an Issue or Pull Request becomes stale +daysUntilStale: 700 # start with a large number and reduce shortly + +# Number of days of inactivity before an Issue or Pull Request with the stale label is closed. +# Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale. +daysUntilClose: 30 + +# Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable +exemptLabels: + - pinned + - security + - "[Status] Maybe Later" + +# Set to true to ignore issues in a project (defaults to false) +exemptProjects: false + +# Set to true to ignore issues in a milestone (defaults to false) +exemptMilestones: false + +# Set to true to ignore issues with an assignee (defaults to false) +exemptAssignees: true + +# Label to use when marking as stale +staleLabel: stale + +# Comment to post when marking as stale. Set to `false` to disable +markComment: | + In order to maintain a list of currently relevant issues, we mark issues as stale after a period of inactivity + If this issue remains relevant, please comment here; otherwise it will be marked as closed automatically + +# Comment to post when removing the stale label. +# unmarkComment: > +# Your comment here. + +# Comment to post when closing a stale Issue or Pull Request. +# closeComment: > +# Your comment here. + +# Limit the number of actions per hour, from 1-30. Default is 30 +limitPerRun: 1 # start with a small number + + +# Limit to only `issues` or `pulls` +# only: issues + +# Optionally, specify configuration settings that are specific to just 'issues' or 'pulls': +# pulls: +# daysUntilStale: 30 +# markComment: > +# This pull request has been automatically marked as stale because it has not had +# recent activity. It will be closed if no further activity occurs. Thank you +# for your contributions. + +# issues: +# exemptLabels: +# - confirmed \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index a21d4d94413..fbc01b4815d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -60,8 +60,8 @@ script: - python --version - python -OO -c "import xarray" - if [[ "$CONDA_ENV" == "docs" ]]; then - conda install -c conda-forge --override-channels sphinx sphinx_rtd_theme sphinx-gallery numpydoc "gdal>2.2.4"; - sphinx-build -n -j auto -b html -d _build/doctrees doc _build/html; + cd doc; + sphinx-build -n -j auto -b html -d _build/doctrees . _build/html; elif [[ "$CONDA_ENV" == "lint" ]]; then pycodestyle xarray ; elif [[ "$CONDA_ENV" == "py36-hypothesis" ]]; then diff --git a/ci/requirements-py36.yml b/ci/requirements-py36.yml index 0ed6dd78c3a..7523b14608b 100644 --- a/ci/requirements-py36.yml +++ b/ci/requirements-py36.yml @@ -26,8 +26,8 @@ dependencies: - pseudonetcdf>=3.0.1 - eccodes - cdms2 - # - pynio # xref #2683 - # - iris>=1.10 # xref #2683 +# - pynio # xref #2683 +# - iris>=1.10 # xref #2683 - pydap - lxml - pip: diff --git a/doc/conf.py b/doc/conf.py index 897c0443054..322741556b6 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -13,9 +13,10 @@ # serve to show the default. from __future__ import absolute_import, division, print_function +from contextlib import suppress import datetime -import importlib import os +import subprocess import sys import xarray @@ -24,29 +25,33 @@ print("python exec:", sys.executable) print("sys.path:", sys.path) -for name in ('numpy scipy pandas matplotlib dask IPython seaborn ' - 'cartopy netCDF4 rasterio zarr iris flake8 ' - 'sphinx_gallery cftime').split(): - try: - module = importlib.import_module(name) - if name == 'matplotlib': - module.use('Agg') - fname = module.__file__.rstrip('__init__.py') - print("%s: %s, %s" % (name, module.__version__, fname)) - except ImportError: - print("no %s" % name) - # neither rasterio nor cartopy should be hard requirements for - # the doc build. - if name == 'rasterio': - allowed_failures.update(['gallery/plot_rasterio_rgb.py', - 'gallery/plot_rasterio.py']) - elif name == 'cartopy': - allowed_failures.update(['gallery/plot_cartopy_facetgrid.py', - 'gallery/plot_rasterio_rgb.py', - 'gallery/plot_rasterio.py']) + +if 'conda' in sys.executable: + print('conda environment:') + subprocess.run(['conda', 'list']) +else: + print('pip environment:') + subprocess.run(['pip', 'list']) print("xarray: %s, %s" % (xarray.__version__, xarray.__file__)) +with suppress(ImportError): + import matplotlib + matplotlib.use('Agg') + +try: + import rasterio +except ImportError: + allowed_failures.update(['gallery/plot_rasterio_rgb.py', + 'gallery/plot_rasterio.py']) + +try: + import cartopy +except ImportError: + allowed_failures.update(['gallery/plot_cartopy_facetgrid.py', + 'gallery/plot_rasterio_rgb.py', + 'gallery/plot_rasterio.py']) + # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. diff --git a/doc/environment.yml b/doc/environment.yml index ca4f622cd38..f4d1f4e9008 100644 --- a/doc/environment.yml +++ b/doc/environment.yml @@ -2,22 +2,25 @@ name: xarray-docs channels: - conda-forge dependencies: - - python=3.6 - - numpy=1.14.5 + - python=3.7 + - numpy=1.16.0 - pandas=0.23.3 - - scipy=1.1.0 - - matplotlib=2.2.2 + - scipy=1.2.0 + - matplotlib=3.0.2 - seaborn=0.9.0 - - dask=0.18.2 - - ipython=6.4.0 - - netCDF4=1.4.0 - - cartopy=0.16.0 - - rasterio=1.0.1 + - dask=1.1.0 + - ipython=7.2.0 + - netCDF4=1.4.2 + - cartopy=0.17.0 + - rasterio=1.0.13 - zarr=2.2.0 - - iris=2.1.0 - - flake8=3.5.0 + - iris=2.2.0 + - flake8=3.6.0 - cftime=1.0.3.4 - - bottleneck=1.2 - - sphinx=1.7.6 + - bottleneck=1.2.1 + - sphinx=1.8.2 - numpydoc=0.8.0 - sphinx-gallery=0.2.0 + - pillow=5.4.1 + - sphinx_rtd_theme=0.4.2 + - mock=2.0.0 diff --git a/doc/examples/multidimensional-coords.rst b/doc/examples/multidimensional-coords.rst index 7c86f897a24..a5084043977 100644 --- a/doc/examples/multidimensional-coords.rst +++ b/doc/examples/multidimensional-coords.rst @@ -78,9 +78,8 @@ grid, we can take advantage of xarray's ability to apply ax = plt.axes(projection=ccrs.PlateCarree()); ds.Tair[0].plot.pcolormesh(ax=ax, transform=ccrs.PlateCarree(), x='xc', y='yc', add_colorbar=False); - ax.coastlines(); @savefig xarray_multidimensional_coords_12_0.png width=100% - plt.tight_layout(); + ax.coastlines(); Multidimensional Groupby ------------------------ diff --git a/doc/indexing.rst b/doc/indexing.rst index 3878d983cf6..77ec7428991 100644 --- a/doc/indexing.rst +++ b/doc/indexing.rst @@ -371,7 +371,7 @@ Vectorized indexing also works with ``isel``, ``loc``, and ``sel``: ind = xr.DataArray([['a', 'b'], ['b', 'a']], dims=['a', 'b']) da.loc[:, ind] # same as da.sel(y=ind) -These methods may and also be applied to ``Dataset`` objects +These methods may also be applied to ``Dataset`` objects .. ipython:: python diff --git a/doc/io.rst b/doc/io.rst index 151f5eb740f..0dc5181f9b8 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -81,6 +81,16 @@ require external libraries and dicts can easily be pickled, or converted to json, or geojson. All the values are converted to lists, so dicts might be quite large. +To export just the dataset schema, without the data itself, use the +``data=False`` option: + +.. ipython:: python + + ds.to_dict(data=False) + +This can be useful for generating indices of dataset contents to expose to +search indices or other automated data discovery tools. + .. _io.netcdf: netCDF @@ -665,7 +675,7 @@ To read a consolidated store, pass the ``consolidated=True`` option to :py:func:`~xarray.open_zarr`:: ds = xr.open_zarr('foo.zarr', consolidated=True) - + Xarray can't perform consolidation on pre-existing zarr datasets. This should be done directly from zarr, as described in the `zarr docs `_. diff --git a/doc/pandas.rst b/doc/pandas.rst index fc20d161e05..1538fced648 100644 --- a/doc/pandas.rst +++ b/doc/pandas.rst @@ -14,7 +14,7 @@ __ http://pandas.pydata.org/pandas-docs/stable/visualization.html __ http://stanford.edu/~mwaskom/software/seaborn/ .. ipython:: python - :suppress: + :suppress: import numpy as np import pandas as pd @@ -93,7 +93,6 @@ DataFrames: s = ds['foo'].to_series() s - # or equivalently, with Series.to_xarray() xr.DataArray.from_series(s) @@ -173,11 +172,10 @@ So you can represent a Panel, in two ways: Let's take a look: .. ipython:: python - :okwarning: + :okwarning: panel = pd.Panel(np.random.rand(2, 3, 4), items=list('ab'), major_axis=list('mno'), minor_axis=pd.date_range(start='2000', periods=4, name='date')) - panel As a DataArray: diff --git a/doc/time-series.rst b/doc/time-series.rst index 9c53c56679e..49d23634694 100644 --- a/doc/time-series.rst +++ b/doc/time-series.rst @@ -15,7 +15,6 @@ core functionality. import numpy as np import pandas as pd import xarray as xr - np.random.seed(123456) Creating datetime64 data @@ -241,7 +240,6 @@ coordinate with dates from a no-leap calendar and a from itertools import product from cftime import DatetimeNoLeap - dates = [DatetimeNoLeap(year, month, 1) for year, month in product(range(1, 3), range(1, 13))] da = xr.DataArray(np.arange(24), coords=[dates], dims=['time'], name='foo') diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d2fd7188da3..2e6a20fc715 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -13,9 +13,9 @@ What's New import xarray as xr np.random.seed(123456) -.. _whats-new.0.11.3: +.. _whats-new.0.12.0: -v0.11.3 (unreleased) +v0.12.0 (unreleased) -------------------- Breaking changes @@ -28,6 +28,8 @@ Breaking changes Enhancements ~~~~~~~~~~~~ +- Add ``data=False`` option to ``to_dict()`` methods. (:issue:`2656`) + By `Ryan Abernathey `_ - :py:meth:`~xarray.DataArray.coarsen` and :py:meth:`~xarray.Dataset.coarsen` are newly added. See :ref:`comput.coarsen` for details. @@ -45,18 +47,39 @@ Enhancements :py:class:`~xarray.CFTimeIndex` is now possible. (:issue:`2191`). By `Jwen Fai Low `_ and `Spencer Clark `_. +- :py:func:`xarray.testing.assert_equal` and + :py:func:`xarray.testing.assert_identical` now provide a more detailed + report showing what exactly differs between the two objects (dimensions / + coordinates / variables / attributes) (:issue:`1507`). + By `Benoit Bovy `_. Bug fixes ~~~~~~~~~ +- Silenced warnings that appear when using pandas 0.24. + By `Stephan Hoyer `_ - Interpolating via resample now internally specifies ``bounds_error=False`` as an argument to ``scipy.interpolate.interp1d``, allowing for interpolation from higher frequencies to lower frequencies. Datapoints outside the bounds of the original time coordinate are now filled with NaN (:issue:`2197`). By `Spencer Clark `_. + +.. _whats-new.0.11.3: + +v0.11.3 (26 January 2019) +------------------------- + +Bug fixes +~~~~~~~~~ + - Saving files with times encoded with reference dates with timezones (e.g. '2000-01-01T00:00:00-05:00') no longer raises an error (:issue:`2649`). By `Spencer Clark `_. +- Fixed performance regression with ``open_mfdataset`` (:issue:`2662`). + By `Tom Nicholas `_. +- Fixed supplying an explicit dimension in the ``concat_dim`` argument to + to ``open_mfdataset`` (:issue:`2647`). + By `Ben Root `_. .. _whats-new.0.11.2: diff --git a/setup.py b/setup.py index ff667d7a113..3921d0c3472 100644 --- a/setup.py +++ b/setup.py @@ -15,8 +15,6 @@ 'Operating System :: OS Independent', 'Intended Audience :: Science/Research', 'Programming Language :: Python', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', diff --git a/xarray/__init__.py b/xarray/__init__.py index 59a961c6b56..773dfe19d01 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -1,7 +1,4 @@ # flake8: noqa -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function from ._version import get_versions __version__ = get_versions()['version'] diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 5f88783bb2e..e52f47a0841 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -1,10 +1,9 @@ -from __future__ import absolute_import, division, print_function - import os.path import warnings from glob import glob from io import BytesIO from numbers import Number +from pathlib import Path import numpy as np @@ -12,7 +11,6 @@ from ..core import indexing from ..core.combine import ( _CONCAT_DIM_DEFAULT, _auto_combine, _infer_concat_order_from_positions) -from ..core.pycompat import basestring, path_type from ..core.utils import close_on_error, is_grib_path, is_remote_uri from .common import ArrayWriter from .locks import _get_scheduler @@ -99,7 +97,7 @@ def _normalize_path(path): def _validate_dataset_names(dataset): """DataArray.name and Dataset keys must be a string or None""" def check_name(name): - if isinstance(name, basestring): + if isinstance(name, str): if not name: raise ValueError('Invalid name for DataArray or Dataset key: ' 'string must be length 1 or greater for ' @@ -117,7 +115,7 @@ def _validate_attrs(dataset): a string, an ndarray or a list/tuple of numbers/strings. """ def check_attr(name, value): - if isinstance(name, basestring): + if isinstance(name, str): if not name: raise ValueError('Invalid name for attr: string must be ' 'length 1 or greater for serialization to ' @@ -126,7 +124,7 @@ def check_attr(name, value): raise TypeError("Invalid name for attr: {} must be a string for " "serialization to netCDF files".format(name)) - if not isinstance(value, (basestring, Number, np.ndarray, np.number, + if not isinstance(value, (str, Number, np.ndarray, np.number, list, tuple)): raise TypeError('Invalid value for attr: {} must be a number, ' 'a string, an ndarray or a list/tuple of ' @@ -279,7 +277,7 @@ def maybe_decode_store(store, lock=False): from dask.base import tokenize # if passed an actual file path, augment the token with # the file modification time - if (isinstance(filename_or_obj, basestring) and + if (isinstance(filename_or_obj, str) and not is_remote_uri(filename_or_obj)): mtime = os.path.getmtime(filename_or_obj) else: @@ -295,13 +293,13 @@ def maybe_decode_store(store, lock=False): return ds2 - if isinstance(filename_or_obj, path_type): + if isinstance(filename_or_obj, Path): filename_or_obj = str(filename_or_obj) if isinstance(filename_or_obj, backends.AbstractDataStore): store = filename_or_obj ds = maybe_decode_store(store) - elif isinstance(filename_or_obj, basestring): + elif isinstance(filename_or_obj, str): if (isinstance(filename_or_obj, bytes) and filename_or_obj.startswith(b'\x89HDF')): @@ -310,7 +308,7 @@ def maybe_decode_store(store, lock=False): filename_or_obj.startswith(b'CDF')): # netCDF3 file images are handled by scipy pass - elif isinstance(filename_or_obj, basestring): + elif isinstance(filename_or_obj, str): filename_or_obj = _normalize_path(filename_or_obj) if engine is None: @@ -352,7 +350,7 @@ def maybe_decode_store(store, lock=False): # Ensure source filename always stored in dataset object (GH issue #2550) if 'source' not in ds.encoding: - if isinstance(filename_or_obj, basestring): + if isinstance(filename_or_obj, str): ds.encoding['source'] = filename_or_obj return ds @@ -588,7 +586,7 @@ def open_mfdataset(paths, chunks=None, concat_dim=_CONCAT_DIM_DEFAULT, .. [1] http://xarray.pydata.org/en/stable/dask.html .. [2] http://xarray.pydata.org/en/stable/dask.html#chunking-and-performance """ # noqa - if isinstance(paths, basestring): + if isinstance(paths, str): if is_remote_uri(paths): raise ValueError( 'cannot do wild-card matching for paths that are remote URLs: ' @@ -596,7 +594,7 @@ def open_mfdataset(paths, chunks=None, concat_dim=_CONCAT_DIM_DEFAULT, .format(paths)) paths = sorted(glob(paths)) else: - paths = [str(p) if isinstance(p, path_type) else p for p in paths] + paths = [str(p) if isinstance(p, Path) else p for p in paths] if not paths: raise IOError('no files to open') @@ -681,7 +679,7 @@ def to_netcdf(dataset, path_or_file=None, mode='w', format=None, group=None, The ``multifile`` argument is only for the private use of save_mfdataset. """ - if isinstance(path_or_file, path_type): + if isinstance(path_or_file, Path): path_or_file = str(path_or_file) if encoding is None: @@ -698,7 +696,7 @@ def to_netcdf(dataset, path_or_file=None, mode='w', format=None, group=None, raise NotImplementedError( 'to_netcdf() with compute=False is not yet implemented when ' 'returning bytes') - elif isinstance(path_or_file, basestring): + elif isinstance(path_or_file, str): if engine is None: engine = _get_default_engine(path_or_file) path_or_file = _normalize_path(path_or_file) @@ -733,7 +731,7 @@ def to_netcdf(dataset, path_or_file=None, mode='w', format=None, group=None, if unlimited_dims is None: unlimited_dims = dataset.encoding.get('unlimited_dims', None) - if isinstance(unlimited_dims, basestring): + if isinstance(unlimited_dims, str): unlimited_dims = [unlimited_dims] writer = ArrayWriter() @@ -896,7 +894,7 @@ def to_zarr(dataset, store=None, mode='w-', synchronizer=None, group=None, See `Dataset.to_zarr` for full API docs. """ - if isinstance(store, path_type): + if isinstance(store, Path): store = str(store) if encoding is None: encoding = {} diff --git a/xarray/backends/cfgrib_.py b/xarray/backends/cfgrib_.py index 96095b7b858..51c3318e794 100644 --- a/xarray/backends/cfgrib_.py +++ b/xarray/backends/cfgrib_.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, print_function - import numpy as np from .. import Variable diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 405d989f4af..a52daaaa65c 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -1,16 +1,15 @@ -from __future__ import absolute_import, division, print_function - import logging import time import traceback import warnings -from collections import Mapping, OrderedDict +from collections import OrderedDict +from collections.abc import Mapping import numpy as np from ..conventions import cf_encoder from ..core import indexing -from ..core.pycompat import dask_array_type, iteritems +from ..core.pycompat import dask_array_type from ..core.utils import FrozenOrderedDict, NdimSizeLenMixin # Create a logger object, but don't add any handlers. Leave that to user code. @@ -109,9 +108,9 @@ class SuffixAppendingDataStore(AbstractDataStore): def load(self): variables, attributes = AbstractDataStore.load(self) variables = {'%s_suffix' % k: v - for k, v in iteritems(variables)} + for k, v in variables.items()} attributes = {'%s_suffix' % k: v - for k, v in iteritems(attributes)} + for k, v in attributes.items()} return variables, attributes This function will be called anytime variables or attributes @@ -275,7 +274,7 @@ def set_attributes(self, attributes): attributes : dict-like Dictionary of key/value (attribute name / attribute) pairs """ - for k, v in iteritems(attributes): + for k, v in attributes.items(): self.set_attribute(k, v) def set_variables(self, variables, check_encoding_set, writer, @@ -297,7 +296,7 @@ def set_variables(self, variables, check_encoding_set, writer, dimensions. """ - for vn, v in iteritems(variables): + for vn, v in variables.items(): name = _encode_variable_name(vn) check = vn in check_encoding_set target, source = self.prepare_variable( diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 0564df5b167..b3c4d088913 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -1,12 +1,10 @@ -from __future__ import absolute_import, division, print_function - import functools +from collections import OrderedDict import numpy as np from .. import Variable from ..core import indexing -from ..core.pycompat import OrderedDict, bytes_type, iteritems, unicode_type from ..core.utils import FrozenOrderedDict, close_on_error from .common import WritableCFDataStore from .file_manager import CachingFileManager @@ -32,7 +30,7 @@ def _getitem(self, key): def maybe_decode_bytes(txt): - if isinstance(txt, bytes_type): + if isinstance(txt, bytes): return txt.decode('utf-8') else: return txt @@ -124,7 +122,7 @@ def open_store_variable(self, name, var): encoding['original_shape'] = var.shape vlen_dtype = h5py.check_dtype(vlen=var.dtype) - if vlen_dtype is unicode_type: + if vlen_dtype is str: encoding['dtype'] = str elif vlen_dtype is not None: # pragma: no cover # xarray doesn't support writing arbitrary vlen dtypes yet. @@ -136,7 +134,7 @@ def open_store_variable(self, name, var): def get_variables(self): return FrozenOrderedDict((k, self.open_store_variable(k, v)) - for k, v in iteritems(self.ds.variables)) + for k, v in self.ds.variables.items()) def get_attrs(self): return FrozenOrderedDict(_read_attributes(self.ds)) @@ -182,7 +180,7 @@ def prepare_variable(self, name, variable, check_encoding=False, 'NC_CHAR type.' % name) if dtype is str: - dtype = h5py.special_dtype(vlen=unicode_type) + dtype = h5py.special_dtype(vlen=str) encoding = _extract_h5nc_encoding(variable, raise_on_invalid=check_encoding) @@ -221,7 +219,7 @@ def prepare_variable(self, name, variable, check_encoding=False, else: nc4_var = self.ds[name] - for k, v in iteritems(attrs): + for k, v in attrs.items(): nc4_var.attrs[k] = v target = H5NetCDFArrayWrapper(name, self) diff --git a/xarray/backends/lru_cache.py b/xarray/backends/lru_cache.py index 321a1ca4da4..e407c384aaf 100644 --- a/xarray/backends/lru_cache.py +++ b/xarray/backends/lru_cache.py @@ -1,8 +1,6 @@ import collections import threading -from ..core.pycompat import move_to_end - class LRUCache(collections.MutableMapping): """Thread-safe LRUCache based on an OrderedDict. @@ -41,7 +39,7 @@ def __getitem__(self, key): # record recent use of the key by moving it to the front of the list with self._lock: value = self._cache[key] - move_to_end(self._cache, key) + self._cache.move_to_end(key) return value def _enforce_size_limit(self, capacity): diff --git a/xarray/backends/memory.py b/xarray/backends/memory.py index 195d4647534..b7161065da8 100644 --- a/xarray/backends/memory.py +++ b/xarray/backends/memory.py @@ -1,10 +1,8 @@ -from __future__ import absolute_import, division, print_function - import copy +from collections import OrderedDict import numpy as np -from ..core.pycompat import OrderedDict from ..core.variable import Variable from .common import AbstractWritableDataStore diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 9306b24a2fc..92e990f76d5 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -1,8 +1,8 @@ -from __future__ import absolute_import, division, print_function - import functools import operator import warnings +from collections import OrderedDict +from contextlib import suppress from distutils.version import LooseVersion import numpy as np @@ -10,7 +10,6 @@ from .. import Variable, coding from ..coding.variables import pop_to from ..core import indexing -from ..core.pycompat import PY3, OrderedDict, basestring, iteritems, suppress from ..core.utils import FrozenOrderedDict, close_on_error, is_remote_uri from .common import ( BackendArray, WritableCFDataStore, find_root, robust_getitem) @@ -81,9 +80,6 @@ def _getitem(self, key): msg = ('The indexing operation you are attempting to perform ' 'is not valid on netCDF4.Variable object. Try loading ' 'your data into memory first by calling .load().') - if not PY3: - import traceback - msg += '\n\nOriginal traceback:\n' + traceback.format_exc() raise IndexError(msg) return array @@ -141,7 +137,7 @@ def _nc4_require_group(ds, group, mode, create_group=_netcdf4_create_group): return ds else: # make sure it's a string - if not isinstance(group, basestring): + if not isinstance(group, str): raise ValueError('group must be a string or None') # support path-like syntax path = group.strip('/').split('/') @@ -392,7 +388,7 @@ def open_store_variable(self, name, var): def get_variables(self): dsvars = FrozenOrderedDict((k, self.open_store_variable(k, v)) for k, v in - iteritems(self.ds.variables)) + self.ds.variables.items()) return dsvars def get_attrs(self): @@ -402,7 +398,7 @@ def get_attrs(self): def get_dimensions(self): dims = FrozenOrderedDict((k, len(v)) - for k, v in iteritems(self.ds.dimensions)) + for k, v in self.ds.dimensions.items()) return dims def get_encoding(self): @@ -467,7 +463,7 @@ def prepare_variable(self, name, variable, check_encoding=False, fill_value=fill_value) _disable_auto_decode_variable(nc4_var) - for k, v in iteritems(attrs): + for k, v in attrs.items(): # set attributes one-by-one since netCDF4<1.0.10 can't handle # OrderedDict as the input to setncatts _set_nc_attribute(nc4_var, k, v) diff --git a/xarray/backends/netcdf3.py b/xarray/backends/netcdf3.py index a6084649442..7f5c8d4b1a7 100644 --- a/xarray/backends/netcdf3.py +++ b/xarray/backends/netcdf3.py @@ -1,11 +1,9 @@ -from __future__ import absolute_import, division, print_function - import unicodedata +from collections import OrderedDict import numpy as np from .. import Variable, coding -from ..core.pycompat import OrderedDict, basestring, unicode_type # Special characters that are permitted in netCDF names except in the # 0th position of the string @@ -50,7 +48,7 @@ def coerce_nc3_dtype(arr): def encode_nc3_attr_value(value): if isinstance(value, bytes): pass - elif isinstance(value, unicode_type): + elif isinstance(value, str): value = value.encode(STRING_ENCODING) else: value = coerce_nc3_dtype(np.atleast_1d(value)) @@ -99,9 +97,9 @@ def is_valid_nc3_name(s): names. Names that have trailing space characters are also not permitted. """ - if not isinstance(s, basestring): + if not isinstance(s, str): return False - if not isinstance(s, unicode_type): + if not isinstance(s, str): s = s.decode('utf-8') num_bytes = len(s.encode('utf-8')) return ((unicodedata.normalize('NFC', s) == s) and diff --git a/xarray/backends/pseudonetcdf_.py b/xarray/backends/pseudonetcdf_.py index 81b5722db78..35b14b10672 100644 --- a/xarray/backends/pseudonetcdf_.py +++ b/xarray/backends/pseudonetcdf_.py @@ -1,10 +1,9 @@ -from __future__ import absolute_import, division, print_function +from collections import OrderedDict import numpy as np from .. import Variable from ..core import indexing -from ..core.pycompat import OrderedDict from ..core.utils import Frozen, FrozenOrderedDict from .common import AbstractDataStore, BackendArray from .file_manager import CachingFileManager diff --git a/xarray/backends/pydap_.py b/xarray/backends/pydap_.py index 71ea4841b71..b555c35c859 100644 --- a/xarray/backends/pydap_.py +++ b/xarray/backends/pydap_.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, print_function - import numpy as np from .. import Variable diff --git a/xarray/backends/pynio_.py b/xarray/backends/pynio_.py index 03507ab6c2c..0995a39019d 100644 --- a/xarray/backends/pynio_.py +++ b/xarray/backends/pynio_.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, print_function - import numpy as np from .. import Variable diff --git a/xarray/backends/rasterio_.py b/xarray/backends/rasterio_.py index 26d408d50f6..b7726611597 100644 --- a/xarray/backends/rasterio_.py +++ b/xarray/backends/rasterio_.py @@ -2,6 +2,7 @@ import warnings from collections import OrderedDict from distutils.version import LooseVersion + import numpy as np from .. import DataArray diff --git a/xarray/backends/scipy_.py b/xarray/backends/scipy_.py index 5739c1a8617..1111f30c139 100644 --- a/xarray/backends/scipy_.py +++ b/xarray/backends/scipy_.py @@ -1,6 +1,5 @@ -from __future__ import absolute_import, division, print_function - import warnings +from collections import OrderedDict from distutils.version import LooseVersion from io import BytesIO @@ -8,7 +7,6 @@ from .. import Variable from ..core.indexing import NumpyIndexingAdapter -from ..core.pycompat import OrderedDict, basestring, iteritems from ..core.utils import Frozen, FrozenOrderedDict from .common import BackendArray, WritableCFDataStore from .file_manager import CachingFileManager, DummyFileManager @@ -27,7 +25,7 @@ def _decode_attrs(d): # don't decode _FillValue from bytes -> unicode, because we want to ensure # that its type matches the data exactly return OrderedDict((k, v if k == '_FillValue' else _decode_string(v)) - for (k, v) in iteritems(d)) + for (k, v) in d.items()) class ScipyArrayWrapper(BackendArray): @@ -70,7 +68,7 @@ def _open_scipy_netcdf(filename, mode, mmap, version): import gzip # if the string ends with .gz, then gunzip and open as netcdf file - if isinstance(filename, basestring) and filename.endswith('.gz'): + if isinstance(filename, str) and filename.endswith('.gz'): try: return scipy.io.netcdf_file(gzip.open(filename), mode=mode, mmap=mmap, version=version) @@ -139,12 +137,12 @@ def __init__(self, filename_or_obj, mode='r', format=None, group=None, % format) if (lock is None and mode != 'r' and - isinstance(filename_or_obj, basestring)): + isinstance(filename_or_obj, str)): lock = get_write_lock(filename_or_obj) self.lock = ensure_lock(lock) - if isinstance(filename_or_obj, basestring): + if isinstance(filename_or_obj, str): manager = CachingFileManager( _open_scipy_netcdf, filename_or_obj, mode=mode, lock=lock, kwargs=dict(mmap=mmap, version=version)) @@ -165,7 +163,7 @@ def open_store_variable(self, name, var): def get_variables(self): return FrozenOrderedDict((k, self.open_store_variable(k, v)) - for k, v in iteritems(self.ds.variables)) + for k, v in self.ds.variables.items()) def get_attrs(self): return Frozen(_decode_attrs(self.ds._attributes)) @@ -213,7 +211,7 @@ def prepare_variable(self, name, variable, check_encoding=False, if name not in self.ds.variables: self.ds.createVariable(name, data.dtype, variable.dims) scipy_var = self.ds.variables[name] - for k, v in iteritems(variable.attrs): + for k, v in variable.attrs.items(): self._validate_attr_key(k) setattr(scipy_var, k, v) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index feefaf1735f..ee77e0833c4 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -1,12 +1,11 @@ -from __future__ import absolute_import, division, print_function - +from collections import OrderedDict from distutils.version import LooseVersion import numpy as np from .. import Variable, coding, conventions from ..core import indexing -from ..core.pycompat import OrderedDict, integer_types, iteritems +from ..core.pycompat import integer_types from ..core.utils import FrozenOrderedDict, HiddenKeyDict from .common import AbstractWritableDataStore, BackendArray @@ -331,7 +330,7 @@ def prepare_variable(self, name, variable, check_encoding=False, encoded_attrs = OrderedDict() # the magic for storing the hidden dimension data encoded_attrs[_DIMENSION_KEY] = dims - for k, v in iteritems(attrs): + for k, v in attrs.items(): encoded_attrs[k] = self.encode_attribute(v) zarr_array = self.ds.create(name, shape=shape, dtype=dtype, diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index df22aaac813..d21139995dd 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -47,7 +47,6 @@ import numpy as np -from ..core.pycompat import basestring from .cftimeindex import CFTimeIndex, _parse_iso8601_with_reso from .times import format_cftime_datetime @@ -469,7 +468,7 @@ def to_offset(freq): def to_cftime_datetime(date_str_or_date, calendar=None): import cftime - if isinstance(date_str_or_date, basestring): + if isinstance(date_str_or_date, str): if calendar is None: raise ValueError( 'If converting a string to a cftime.datetime object, ' diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index af22a3219ad..1861d49a1d4 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -39,8 +39,6 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -from __future__ import absolute_import - import re import warnings from datetime import timedelta @@ -49,7 +47,6 @@ import numpy as np import pandas as pd -from xarray.core import pycompat from xarray.core.utils import is_scalar from .times import _STANDARD_CALENDARS, cftime_to_nptime, infer_calendar_name @@ -314,7 +311,7 @@ def _get_string_slice(self, key): def get_loc(self, key, method=None, tolerance=None): """Adapted from pandas.tseries.index.DatetimeIndex.get_loc""" - if isinstance(key, pycompat.basestring): + if isinstance(key, str): return self._get_string_slice(key) else: return pd.Index.get_loc(self, key, method=method, @@ -323,7 +320,7 @@ def get_loc(self, key, method=None, tolerance=None): def _maybe_cast_slice_bound(self, label, side, kind): """Adapted from pandas.tseries.index.DatetimeIndex._maybe_cast_slice_bound""" - if isinstance(label, pycompat.basestring): + if isinstance(label, str): parsed, resolution = _parse_iso8601_with_reso(self.date_type, label) start, end = _parsed_string_to_bounds(self.date_type, resolution, @@ -393,7 +390,7 @@ def shift(self, n, freq): raise TypeError("'n' must be an int, got {}.".format(n)) if isinstance(freq, timedelta): return self + n * freq - elif isinstance(freq, pycompat.basestring): + elif isinstance(freq, str): return self + n * to_offset(freq) else: raise TypeError( diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index 3502fd773d7..205d285cd81 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -1,12 +1,10 @@ """Coders for strings.""" -from __future__ import absolute_import, division, print_function - from functools import partial import numpy as np from ..core import indexing -from ..core.pycompat import bytes_type, dask_array_type, unicode_type +from ..core.pycompat import dask_array_type from ..core.variable import Variable from .variables import ( VariableCoder, lazy_elemwise_func, pop_to, safe_setitem, @@ -26,11 +24,11 @@ def check_vlen_dtype(dtype): def is_unicode_dtype(dtype): - return dtype.kind == 'U' or check_vlen_dtype(dtype) == unicode_type + return dtype.kind == 'U' or check_vlen_dtype(dtype) == str def is_bytes_dtype(dtype): - return dtype.kind == 'S' or check_vlen_dtype(dtype) == bytes_type + return dtype.kind == 'S' or check_vlen_dtype(dtype) == bytes class EncodedStringCoder(VariableCoder): @@ -90,7 +88,7 @@ def encode_string_array(string_array, encoding='utf-8'): def ensure_fixed_length_bytes(var): """Ensure that a variable with vlen bytes is converted to fixed width.""" dims, data, attrs, encoding = unpack_for_encoding(var) - if check_vlen_dtype(data.dtype) == bytes_type: + if check_vlen_dtype(data.dtype) == bytes: # TODO: figure out how to handle this with dask data = np.asarray(data, dtype=np.string_) return Variable(dims, data, attrs, encoding) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index c337a42e3b4..459e9e0956d 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -1,7 +1,4 @@ -from __future__ import absolute_import, division, print_function - import re -import traceback import warnings from datetime import datetime from functools import partial @@ -12,7 +9,6 @@ from ..core import indexing from ..core.common import contains_cftime_datetimes from ..core.formatting import first_n_items, format_timestamp, last_item -from ..core.pycompat import PY3 from ..core.variable import Variable from .variables import ( SerializationWarning, VariableCoder, lazy_elemwise_func, pop_to, @@ -126,8 +122,6 @@ def _decode_cf_datetime_dtype(data, units, calendar): msg = ('unable to decode time units %r with %s. Try ' 'opening your dataset with decode_times=False.' % (units, calendar_msg)) - if not PY3: - msg += ' Full traceback:\n' + traceback.format_exc() raise ValueError(msg) else: dtype = getattr(result, 'dtype', np.dtype('object')) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index d8453a95fad..1f74181f3b3 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -1,6 +1,4 @@ """Coders for individual Variable objects.""" -from __future__ import absolute_import, division, print_function - from typing import Any import warnings from functools import partial diff --git a/xarray/conventions.py b/xarray/conventions.py index ea85a6d5b74..c1c95a6b60e 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -1,7 +1,5 @@ -from __future__ import absolute_import, division, print_function - import warnings -from collections import defaultdict +from collections import OrderedDict, defaultdict import numpy as np import pandas as pd @@ -9,9 +7,7 @@ from .coding import strings, times, variables from .coding.variables import SerializationWarning from .core import duck_array_ops, indexing -from .core.pycompat import ( - OrderedDict, basestring, bytes_type, dask_array_type, iteritems, - unicode_type) +from .core.pycompat import dask_array_type from .core.variable import IndexVariable, Variable, as_variable @@ -127,7 +123,7 @@ def _infer_dtype(array, name=None): return np.dtype(float) element = array[(0,) * array.ndim] - if isinstance(element, (bytes_type, unicode_type)): + if isinstance(element, (bytes, str)): return strings.create_vlen_dtype(type(element)) dtype = np.array(element).dtype @@ -372,7 +368,7 @@ def stackable(dim): coord_names = set() - if isinstance(drop_variables, basestring): + if isinstance(drop_variables, str): drop_variables = [drop_variables] elif drop_variables is None: drop_variables = [] @@ -383,7 +379,7 @@ def stackable(dim): _update_bounds_attributes(variables) new_vars = OrderedDict() - for k, v in iteritems(variables): + for k, v in variables.items(): if k in drop_variables: continue stack_char_dim = (concat_characters and v.dtype == 'S1' and @@ -507,7 +503,7 @@ def _encode_coordinates(variables, attributes, non_dim_coord_names): non_dim_coord_names = set(non_dim_coord_names) for name in list(non_dim_coord_names): - if isinstance(name, basestring) and ' ' in name: + if isinstance(name, str) and ' ' in name: warnings.warn( 'coordinate {!r} has a space in its name, which means it ' 'cannot be marked as a coordinate on disk and will be ' @@ -602,5 +598,5 @@ def cf_encoder(variables, attributes): See also: encode_cf_variable """ new_vars = OrderedDict((k, encode_cf_variable(v, name=k)) - for k, v in iteritems(variables)) + for k, v in variables.items()) return new_vars, attributes diff --git a/xarray/convert.py b/xarray/convert.py index 6cff72103ff..efcdd079a9f 100644 --- a/xarray/convert.py +++ b/xarray/convert.py @@ -1,8 +1,6 @@ """Functions for converting to and from xarray objects """ -from __future__ import absolute_import, division, print_function - -from collections import Counter +from collections import Counter, OrderedDict import numpy as np import pandas as pd @@ -12,7 +10,6 @@ from .core import duck_array_ops from .core.dataarray import DataArray from .core.dtypes import get_fill_value -from .core.pycompat import OrderedDict, range cdms2_ignored_attrs = {'name', 'tileIndex'} iris_forbidden_keys = {'standard_name', 'long_name', 'units', 'bounds', 'axis', diff --git a/xarray/core/accessors.py b/xarray/core/accessors.py index 72791ed73ec..10c900c4ad1 100644 --- a/xarray/core/accessors.py +++ b/xarray/core/accessors.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, print_function - import numpy as np import pandas as pd diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 789bea90b55..278548cca8c 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -1,15 +1,13 @@ -from __future__ import absolute_import, division, print_function - import functools import operator import warnings -from collections import defaultdict +from collections import OrderedDict, defaultdict +from contextlib import suppress import numpy as np from . import utils from .indexing import get_indexer_nd -from .pycompat import OrderedDict, iteritems, suppress from .utils import is_dict_like, is_full_slice from .variable import IndexVariable @@ -116,7 +114,7 @@ def align(*objects, **kwargs): # pandas). This is useful, e.g., for overwriting such duplicate indexes. joiner = _get_joiner(join) joined_indexes = {} - for dim, matching_indexes in iteritems(all_indexes): + for dim, matching_indexes in all_indexes.items(): if dim in indexes: index = utils.safe_cast_to_index(indexes[dim]) if (any(not index.equals(other) for other in matching_indexes) or @@ -315,7 +313,7 @@ def reindex_variables(variables, sizes, indexes, indexers, method=None, # size of reindexed dimensions new_sizes = {} - for name, index in iteritems(indexes): + for name, index in indexes.items(): if name in indexers: if not index.is_unique: raise ValueError( @@ -366,7 +364,7 @@ def reindex_variables(variables, sizes, indexes, indexers, method=None, args = () reindexed[dim] = IndexVariable((dim,), indexers[dim], *args) - for name, var in iteritems(variables): + for name, var in variables.items(): if name not in indexers: key = tuple(slice(None) if d in unchanged_dims diff --git a/xarray/core/arithmetic.py b/xarray/core/arithmetic.py index a3bb135af24..39901f0befd 100644 --- a/xarray/core/arithmetic.py +++ b/xarray/core/arithmetic.py @@ -1,12 +1,10 @@ """Base classes implementing arithmetic for xarray objects.""" -from __future__ import absolute_import, division, print_function - import numbers import numpy as np from .options import OPTIONS -from .pycompat import bytes_type, dask_array_type, unicode_type +from .pycompat import dask_array_type from .utils import not_implemented @@ -21,8 +19,8 @@ class SupportsArithmetic(object): # numpy.lib.mixins.NDArrayOperatorsMixin. # TODO: allow extending this with some sort of registration system - _HANDLED_TYPES = (np.ndarray, np.generic, numbers.Number, bytes_type, - unicode_type) + dask_array_type + _HANDLED_TYPES = (np.ndarray, np.generic, numbers.Number, bytes, + str) + dask_array_type def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): from .computation import apply_ufunc diff --git a/xarray/core/combine.py b/xarray/core/combine.py index 0327a65ab1b..11961dff520 100644 --- a/xarray/core/combine.py +++ b/xarray/core/combine.py @@ -1,15 +1,12 @@ -from __future__ import absolute_import, division, print_function - import itertools import warnings -from collections import Counter +from collections import Counter, OrderedDict import pandas as pd from . import utils from .alignment import align from .merge import merge -from .pycompat import OrderedDict, basestring, iteritems from .variable import IndexVariable, Variable, as_variable from .variable import concat as concat_vars @@ -129,7 +126,7 @@ def _calc_concat_dim_coord(dim): """ from .dataarray import DataArray - if isinstance(dim, basestring): + if isinstance(dim, str): coord = None elif not isinstance(dim, (DataArray, Variable)): dim_name = getattr(dim, 'name', None) @@ -162,7 +159,7 @@ def _calc_concat_over(datasets, dim, data_vars, coords): if dim in v.dims) def process_subset_opt(opt, subset): - if isinstance(opt, basestring): + if isinstance(opt, str): if opt == 'different': # all nonindexes that are not the same in each dataset for k in getattr(datasets[0], subset): @@ -253,7 +250,7 @@ def insert_result_variable(k, v): if (compat == 'identical' and not utils.dict_equiv(ds.attrs, result_attrs)): raise ValueError('dataset global attributes not equal') - for k, v in iteritems(ds.variables): + for k, v in ds.variables.items(): if k not in result_vars and k not in concat_over: raise ValueError('encountered unexpected variable %r' % k) elif (k in result_coord_names) != (k in ds.coords): diff --git a/xarray/core/common.py b/xarray/core/common.py index db6c675e6b5..d9d86f64da0 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -1,5 +1,5 @@ -from __future__ import absolute_import, division, print_function - +from collections import OrderedDict +from contextlib import suppress from textwrap import dedent import numpy as np @@ -8,7 +8,7 @@ from . import dtypes, duck_array_ops, formatting, ops from .arithmetic import SupportsArithmetic from .options import _get_keep_attrs -from .pycompat import OrderedDict, basestring, dask_array_type, suppress +from .pycompat import dask_array_type from .utils import Frozen, ReprObject, SortedKeysDict, either_dict_or_kwargs # Used as a sentinel value to indicate a all dimensions @@ -75,7 +75,7 @@ def wrapped_func(self, dim=None, **kwargs): # type: ignore and 'axis' arguments can be supplied.""" -class AbstractArray(ImplementsArrayReduce, formatting.ReprMixin): +class AbstractArray(ImplementsArrayReduce): """Shared base class for DataArray and Variable.""" def __bool__(self): @@ -128,7 +128,7 @@ def get_axis_num(self, dim): int or tuple of int Axis number or numbers corresponding to the given dimensions. """ - if isinstance(dim, basestring): + if isinstance(dim, str): return self._get_axis_num(dim) else: return tuple(self._get_axis_num(d) for d in dim) @@ -199,7 +199,7 @@ def __dir__(self): extra_attrs = [item for sublist in self._attr_sources for item in sublist - if isinstance(item, basestring)] + if isinstance(item, str)] return sorted(set(dir(type(self)) + extra_attrs)) def _ipython_key_completions_(self): @@ -210,7 +210,7 @@ def _ipython_key_completions_(self): item_lists = [item for sublist in self._item_sources for item in sublist - if isinstance(item, basestring)] + if isinstance(item, str)] return list(set(item_lists)) @@ -223,7 +223,7 @@ def get_squeeze_dims(xarray_obj, dim, axis=None): if dim is None and axis is None: dim = [d for d, s in xarray_obj.sizes.items() if s == 1] else: - if isinstance(dim, basestring): + if isinstance(dim, str): dim = [dim] if isinstance(axis, int): axis = (axis, ) diff --git a/xarray/core/computation.py b/xarray/core/computation.py index bf9ab56bbb4..b9303a5681d 100644 --- a/xarray/core/computation.py +++ b/xarray/core/computation.py @@ -1,12 +1,10 @@ """ Functions for applying functions that act on arrays to xarray's labeled data. """ -from __future__ import absolute_import, division, print_function - import functools import itertools import operator -from collections import Counter +from collections import Counter, OrderedDict from distutils.version import LooseVersion from typing import ( AbstractSet, Any, Dict, Iterable, List, Mapping, Union, Tuple, @@ -18,7 +16,7 @@ from . import duck_array_ops, utils from .alignment import deep_align from .merge import expand_and_merge_variables -from .pycompat import OrderedDict, basestring, dask_array_type +from .pycompat import dask_array_type from .utils import is_dict_like from .variable import Variable if TYPE_CHECKING: @@ -1043,7 +1041,7 @@ def dot(*arrays, **kwargs): if len(arrays) == 0: raise TypeError('At least one array should be given.') - if isinstance(dims, basestring): + if isinstance(dims, str): dims = (dims, ) common_dims = set.intersection(*[set(arr.dims) for arr in arrays]) diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py index 820937dae6a..9347ba6b6db 100644 --- a/xarray/core/coordinates.py +++ b/xarray/core/coordinates.py @@ -1,6 +1,5 @@ -from __future__ import absolute_import, division, print_function - -from collections import Mapping +import collections.abc +from collections import OrderedDict from contextlib import contextmanager import pandas as pd @@ -8,7 +7,6 @@ from . import formatting, indexing from .merge import ( expand_and_merge_variables, merge_coords, merge_coords_for_inplace_math) -from .pycompat import OrderedDict from .utils import Frozen, ReprObject, either_dict_or_kwargs from .variable import Variable @@ -17,7 +15,7 @@ _THIS_ARRAY = ReprObject('') -class AbstractCoordinates(Mapping, formatting.ReprMixin): +class AbstractCoordinates(collections.abc.Mapping): def __getitem__(self, key): raise NotImplementedError @@ -47,7 +45,7 @@ def __len__(self): def __contains__(self, key): return key in self._names - def __unicode__(self): + def __repr__(self): return formatting.coords_repr(self) @property @@ -237,6 +235,7 @@ def _update_coords(self, coords): raise ValueError('cannot add coordinates with new dimensions to ' 'a DataArray') self._data._coords = coords + self._data._indexes = None @property def variables(self): diff --git a/xarray/core/dask_array_compat.py b/xarray/core/dask_array_compat.py index 6b53dcffe6e..d8f6c0d8a11 100644 --- a/xarray/core/dask_array_compat.py +++ b/xarray/core/dask_array_compat.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, print_function - from distutils.version import LooseVersion import dask.array as da @@ -35,7 +33,7 @@ def isin(element, test_elements, assume_unique=False, invert=False): return result -if LooseVersion(dask_version) > LooseVersion('1.19.2'): +if LooseVersion(dask_version) > LooseVersion('0.19.2'): gradient = da.gradient else: # pragma: no cover diff --git a/xarray/core/dask_array_ops.py b/xarray/core/dask_array_ops.py index 25c572edd54..7e72c93da27 100644 --- a/xarray/core/dask_array_ops.py +++ b/xarray/core/dask_array_ops.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, print_function - from distutils.version import LooseVersion import numpy as np diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index f27958b1c77..3c8344f4514 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1,7 +1,6 @@ -from __future__ import absolute_import, division, print_function - import functools import warnings +from collections import OrderedDict import numpy as np import pandas as pd @@ -19,7 +18,6 @@ from .formatting import format_item from .indexes import default_indexes, Indexes from .options import OPTIONS -from .pycompat import OrderedDict, basestring, iteritems, range, zip from .utils import ( _check_inplace, decode_numpy_dict_values, either_dict_or_kwargs, ensure_us_time_resolution) @@ -37,7 +35,7 @@ def _infer_coords_and_dims(shape, coords, dims): 'which does not match the %s dimensions of the ' 'data' % (len(coords), len(shape))) - if isinstance(dims, basestring): + if isinstance(dims, str): dims = (dims,) if dims is None: @@ -57,7 +55,7 @@ def _infer_coords_and_dims(shape, coords, dims): dims = tuple(dims) else: for d in dims: - if not isinstance(d, basestring): + if not isinstance(d, str): raise TypeError('dimension %s is not a string' % d) new_coords = OrderedDict() @@ -475,14 +473,14 @@ def _getitem_coord(self, key): return self._replace_maybe_drop_dims(var, name=key) def __getitem__(self, key): - if isinstance(key, basestring): + if isinstance(key, str): return self._getitem_coord(key) else: # xarray-style array indexing return self.isel(indexers=self._item_key_to_dict(key)) def __setitem__(self, key, value): - if isinstance(key, basestring): + if isinstance(key, str): self.coords[key] = value else: # Coordinates in key, value and self[key] should be consistent. @@ -1313,9 +1311,9 @@ def stack(self, dimensions=None, **dimensions_kwargs): * y (y) int64 0 1 2 >>> stacked = arr.stack(z=('x', 'y')) >>> stacked.indexes['z'] - MultiIndex(levels=[[u'a', u'b'], [0, 1, 2]], + MultiIndex(levels=[['a', 'b'], [0, 1, 2]], labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], - names=[u'x', u'y']) + names=['x', 'y']) See also -------- @@ -1356,9 +1354,9 @@ def unstack(self, dim=None): * y (y) int64 0 1 2 >>> stacked = arr.stack(z=('x', 'y')) >>> stacked.indexes['z'] - MultiIndex(levels=[[u'a', u'b'], [0, 1, 2]], + MultiIndex(levels=[['a', 'b'], [0, 1, 2]], labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], - names=[u'x', u'y']) + names=['x', 'y']) >>> roundtripped = stacked.unstack() >>> arr.identical(roundtripped) True @@ -1760,7 +1758,7 @@ def to_netcdf(self, *args, **kwargs): return dataset.to_netcdf(*args, **kwargs) - def to_dict(self): + def to_dict(self, data=True): """ Convert this xarray.DataArray into a dictionary following xarray naming conventions. @@ -1769,22 +1767,20 @@ def to_dict(self): Useful for coverting to json. To avoid datetime incompatibility use decode_times=False kwarg in xarrray.open_dataset. + Parameters + ---------- + data : bool, optional + Whether to include the actual data in the dictionary. When set to + False, returns just the schema. + See also -------- DataArray.from_dict """ - d = {'coords': {}, 'attrs': decode_numpy_dict_values(self.attrs), - 'dims': self.dims} - + d = self.variable.to_dict(data=data) + d.update({'coords': {}, 'name': self.name}) for k in self.coords: - data = ensure_us_time_resolution(self[k].values).tolist() - d['coords'].update({ - k: {'data': data, - 'dims': self[k].dims, - 'attrs': decode_numpy_dict_values(self[k].attrs)}}) - - d.update({'data': ensure_us_time_resolution(self.values).tolist(), - 'name': self.name}) + d['coords'][k] = self.coords[k].variable.to_dict(data=data) return d @classmethod @@ -2043,7 +2039,7 @@ def _title_for_slice(self, truncate=50): """ one_dims = [] - for dim, coord in iteritems(self.coords): + for dim, coord in self.coords.items(): if coord.size == 1: one_dims.append('{dim} = {v}'.format( dim=dim, v=format_item(coord.values))) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 2caf45ce954..0d1b9ebd55b 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1,9 +1,8 @@ -from __future__ import absolute_import, division, print_function - import functools import sys import warnings -from collections import Mapping, defaultdict +from collections import OrderedDict, defaultdict +from collections.abc import Mapping from distutils.version import LooseVersion from numbers import Number from typing import Any, Dict, List, Set, Tuple, Union @@ -29,8 +28,7 @@ dataset_merge_method, dataset_update_method, merge_data_and_coords, merge_variables) from .options import OPTIONS, _get_keep_attrs -from .pycompat import ( - OrderedDict, basestring, dask_array_type, iteritems, range) +from .pycompat import dask_array_type from .utils import ( Frozen, SortedKeysDict, _check_inplace, datetime_to_numeric, decode_numpy_dict_values, either_dict_or_kwargs, ensure_us_time_resolution, @@ -58,7 +56,7 @@ def _get_virtual_variable(variables, key, level_vars=None, dim_sizes=None): variable = IndexVariable((key,), data) return key, key, variable - if not isinstance(key, basestring): + if not isinstance(key, str): raise KeyError(key) split_key = key.split('.', 1) @@ -97,8 +95,8 @@ def calculate_dimensions(variables): """ dims = OrderedDict() last_used = {} - scalar_vars = set(k for k, v in iteritems(variables) if not v.dims) - for k, var in iteritems(variables): + scalar_vars = set(k for k, v in variables.items() if not v.dims) + for k, var in variables.items(): for dim, size in zip(var.dims, var.shape): if dim in scalar_vars: raise ValueError('dimension %r already exists as a scalar ' @@ -129,10 +127,10 @@ def merge_indexes( vars_to_remove = [] # type: list for dim, var_names in indexes.items(): - if isinstance(var_names, basestring): + if isinstance(var_names, str): var_names = [var_names] - names, labels, levels = [], [], [] # type: (list, list, list) + names, codes, levels = [], [], [] # type: (list, list, list) current_index_variable = variables.get(dim) for n in var_names: @@ -146,13 +144,18 @@ def merge_indexes( if current_index_variable is not None and append: current_index = current_index_variable.to_index() if isinstance(current_index, pd.MultiIndex): + try: + current_codes = current_index.codes + except AttributeError: + # fpr pandas<0.24 + current_codes = current_index.labels names.extend(current_index.names) - labels.extend(current_index.labels) + codes.extend(current_codes) levels.extend(current_index.levels) else: names.append('%s_level_0' % dim) cat = pd.Categorical(current_index.values, ordered=True) - labels.append(cat.codes) + codes.append(cat.codes) levels.append(cat.categories) if not len(names) and len(var_names) == 1: @@ -163,15 +166,15 @@ def merge_indexes( names.append(n) var = variables[n] cat = pd.Categorical(var.values, ordered=True) - labels.append(cat.codes) + codes.append(cat.codes) levels.append(cat.categories) - idx = pd.MultiIndex(labels=labels, levels=levels, names=names) + idx = pd.MultiIndex(levels, codes, names=names) vars_to_replace[dim] = IndexVariable(dim, idx) vars_to_remove.extend(var_names) - new_variables = OrderedDict([(k, v) for k, v in iteritems(variables) + new_variables = OrderedDict([(k, v) for k, v in variables.items() if k not in vars_to_remove]) new_variables.update(vars_to_replace) new_coord_names = coord_names | set(vars_to_replace) @@ -193,7 +196,7 @@ def split_indexes( Not public API. Used in Dataset and DataArray reset_index methods. """ - if isinstance(dims_or_levels, basestring): + if isinstance(dims_or_levels, str): dims_or_levels = [dims_or_levels] dim_levels = defaultdict(list) # type: Dict[Any, list] @@ -257,7 +260,7 @@ def as_dataset(obj): return obj -class DataVariables(Mapping, formatting.ReprMixin): +class DataVariables(Mapping): def __init__(self, dataset): self._dataset = dataset @@ -278,7 +281,7 @@ def __getitem__(self, key): else: raise KeyError(key) - def __unicode__(self): + def __repr__(self): return formatting.data_vars_repr(self) @property @@ -302,8 +305,7 @@ def __getitem__(self, key): return self.dataset.sel(**key) -class Dataset(Mapping, ImplementsDatasetReduce, DataWithCoords, - formatting.ReprMixin): +class Dataset(Mapping, ImplementsDatasetReduce, DataWithCoords): """A multi-dimensional, in memory, array database. A dataset resembles an in-memory representation of a NetCDF file, and @@ -825,7 +827,7 @@ def copy(self, deep=False, data=None): """ # noqa if data is None: variables = OrderedDict((k, v.copy(deep=deep)) - for k, v in iteritems(self._variables)) + for k, v in self._variables.items()) elif not utils.is_dict_like(data): raise ValueError('Data must be dict-like') else: @@ -844,7 +846,7 @@ def copy(self, deep=False, data=None): 'dataset. Data is missing {}' .format(keys_missing_from_data)) variables = OrderedDict((k, v.copy(deep=deep, data=data.get(k))) - for k, v in iteritems(self._variables)) + for k, v in self._variables.items()) # skip __init__ to avoid costly validation return self._construct_direct(variables, self._coord_names.copy(), @@ -1119,7 +1121,7 @@ def set_coords(self, names, inplace=None): # nb. check in self._variables, not self.data_vars to insure that the # operation is idempotent inplace = _check_inplace(inplace) - if isinstance(names, basestring): + if isinstance(names, str): names = [names] self._assert_all_in_dataset(names) obj = self if inplace else self.copy() @@ -1149,7 +1151,7 @@ def reset_coords(self, names=None, drop=False, inplace=None): if names is None: names = self._coord_names - set(self.dims) else: - if isinstance(names, basestring): + if isinstance(names, str): names = [names] self._assert_all_in_dataset(names) bad_coords = set(names) & set(self.dims) @@ -1290,7 +1292,7 @@ def to_zarr(self, store=None, mode='w-', synchronizer=None, group=None, group=group, encoding=encoding, compute=compute, consolidated=consolidated) - def __unicode__(self): + def __repr__(self): return formatting.dataset_repr(self) def info(self, buf=None): @@ -1311,24 +1313,24 @@ def info(self, buf=None): buf = sys.stdout lines = [] - lines.append(u'xarray.Dataset {') - lines.append(u'dimensions:') + lines.append('xarray.Dataset {') + lines.append('dimensions:') for name, size in self.dims.items(): - lines.append(u'\t{name} = {size} ;'.format(name=name, size=size)) - lines.append(u'\nvariables:') + lines.append('\t{name} = {size} ;'.format(name=name, size=size)) + lines.append('\nvariables:') for name, da in self.variables.items(): - dims = u', '.join(da.dims) - lines.append(u'\t{type} {name}({dims}) ;'.format( + dims = ', '.join(da.dims) + lines.append('\t{type} {name}({dims}) ;'.format( type=da.dtype, name=name, dims=dims)) for k, v in da.attrs.items(): - lines.append(u'\t\t{name}:{k} = {v} ;'.format(name=name, k=k, - v=v)) - lines.append(u'\n// global attributes:') + lines.append('\t\t{name}:{k} = {v} ;'.format(name=name, k=k, + v=v)) + lines.append('\n// global attributes:') for k, v in self.attrs.items(): - lines.append(u'\t:{k} = {v} ;'.format(k=k, v=v)) - lines.append(u'}') + lines.append('\t:{k} = {v} ;'.format(k=k, v=v)) + lines.append('}') - buf.write(u'\n'.join(lines)) + buf.write('\n'.join(lines)) @property def chunks(self): @@ -1424,7 +1426,7 @@ def _validate_indexers(self, indexers): # all indexers should be int, slice, np.ndarrays, or Variable indexers_list = [] - for k, v in iteritems(indexers): + for k, v in indexers.items(): if isinstance(v, (slice, Variable)): pass elif isinstance(v, DataArray): @@ -1537,7 +1539,7 @@ def isel(self, indexers=None, drop=False, **indexers_kwargs): indexers_list = self._validate_indexers(indexers) variables = OrderedDict() - for name, var in iteritems(self._variables): + for name, var in self._variables.items(): var_indexers = {k: v for k, v in indexers_list if k in var.dims} new_var = var.isel(indexers=var_indexers) if not (drop and name in var_indexers): @@ -1686,7 +1688,7 @@ def relevant_keys(mapping): coords = relevant_keys(self.coords) indexers = [(k, np.asarray(v)) # type: ignore - for k, v in iteritems(indexers)] + for k, v in indexers.items()] indexers_dict = dict(indexers) non_indexed_dims = set(self.dims) - indexer_dims non_indexed_coords = set(self.coords) - set(coords) @@ -1707,7 +1709,7 @@ def relevant_keys(mapping): raise ValueError('All indexers must be the same length') # Existing dimensions are not valid choices for the dim argument - if isinstance(dim, basestring): + if isinstance(dim, str): if dim in self.dims: # dim is an invalid string raise ValueError('Existing dimension names are not valid ' @@ -1999,7 +2001,7 @@ def _validate_interp_indexer(x, new_x): return (x, new_x) variables = OrderedDict() - for name, var in iteritems(obj._variables): + for name, var in obj._variables.items(): if name not in indexers: if var.dtype.kind in 'uifc': var_indexers = {k: _validate_interp_indexer( @@ -2115,7 +2117,7 @@ def rename(self, name_dict=None, inplace=None, **names): variables = OrderedDict() coord_names = set() - for k, v in iteritems(self._variables): + for k, v in self._variables.items(): name = name_dict.get(k, k) dims = tuple(name_dict.get(dim, dim) for dim in v.dims) var = v.copy(deep=False) @@ -2173,7 +2175,7 @@ def swap_dims(self, dims_dict, inplace=None): coord_names = self._coord_names.copy() coord_names.update(dims_dict.values()) - for k, v in iteritems(self.variables): + for k, v in self.variables.items(): dims = tuple(dims_dict.get(dim, dim) for dim in v.dims) if k in result_dims: var = v.to_index_variable() @@ -2212,7 +2214,7 @@ def expand_dims(self, dim, axis=None): if isinstance(dim, int): raise ValueError('dim should be str or sequence of strs or dict') - if isinstance(dim, basestring): + if isinstance(dim, str): dim = [dim] if axis is not None and not isinstance(axis, (list, tuple)): axis = [axis] @@ -2236,7 +2238,7 @@ def expand_dims(self, dim, axis=None): raise ValueError('dims should not contain duplicate values.') variables = OrderedDict() - for k, v in iteritems(self._variables): + for k, v in self._variables.items(): if k not in dim: if k in self._coord_names: # Do not change coordinates variables[k] = v @@ -2501,7 +2503,7 @@ def unstack(self, dim=None): dims = [d for d in self.dims if isinstance(self.get_index(d), pd.MultiIndex)] else: - dims = [dim] if isinstance(dim, basestring) else dim + dims = [dim] if isinstance(dim, str) else dim missing_dims = [d for d in dims if d not in self.dims] if missing_dims: @@ -2644,7 +2646,7 @@ def drop(self, labels, dim=None): def _drop_vars(self, names): self._assert_all_in_dataset(names) drop = set(names) - variables = OrderedDict((k, v) for k, v in iteritems(self._variables) + variables = OrderedDict((k, v) for k, v in self._variables.items() if k not in drop) coord_names = set(k for k in self._coord_names if k in variables) return self._replace_vars_and_dims(variables, coord_names) @@ -2683,7 +2685,7 @@ def transpose(self, *dims): 'permuted dataset dimensions (%s)' % (dims, tuple(self.dims))) ds = self.copy() - for name, var in iteritems(self._variables): + for name, var in self._variables.items(): var_dims = tuple(dim for dim in dims if dim in var.dims) ds._variables[name] = var.transpose(*var_dims) return ds @@ -2920,7 +2922,7 @@ def reduce(self, func, dim=None, keep_attrs=None, numeric_only=False, """ if dim is ALL_DIMS: dim = None - if isinstance(dim, basestring): + if isinstance(dim, str): dims = set([dim]) elif dim is None: dims = set(self.dims) @@ -2936,7 +2938,7 @@ def reduce(self, func, dim=None, keep_attrs=None, numeric_only=False, keep_attrs = _get_keep_attrs(default=False) variables = OrderedDict() - for name, var in iteritems(self._variables): + for name, var in self._variables.items(): reduce_dims = [d for d in var.dims if d in dims] if name in self.coords: if not reduce_dims: @@ -3007,7 +3009,7 @@ def apply(self, func, keep_attrs=None, args=(), **kwargs): """ # noqa variables = OrderedDict( (k, maybe_wrap_array(v, func(v, *args, **kwargs))) - for k, v in iteritems(self.data_vars)) + for k, v in self.data_vars.items()) if keep_attrs is None: keep_attrs = _get_keep_attrs(default=False) attrs = self.attrs if keep_attrs else None @@ -3140,7 +3142,7 @@ def from_dataframe(cls, dataframe): obj[dims[0]] = (dims, idx) shape = -1 - for name, series in iteritems(dataframe): + for name, series in dataframe.items(): data = np.asarray(series).reshape(shape) obj[name] = (dims, data) return obj @@ -3221,7 +3223,7 @@ def to_dask_dataframe(self, dim_order=None, set_index=False): return df - def to_dict(self): + def to_dict(self, data=True): """ Convert this dataset to a dictionary following xarray naming conventions. @@ -3230,25 +3232,22 @@ def to_dict(self): Useful for coverting to json. To avoid datetime incompatibility use decode_times=False kwarg in xarrray.open_dataset. + Parameters + ---------- + data : bool, optional + Whether to include the actual data in the dictionary. When set to + False, returns just the schema. + See also -------- Dataset.from_dict """ d = {'coords': {}, 'attrs': decode_numpy_dict_values(self.attrs), 'dims': dict(self.dims), 'data_vars': {}} - for k in self.coords: - data = ensure_us_time_resolution(self[k].values).tolist() - d['coords'].update({ - k: {'data': data, - 'dims': self[k].dims, - 'attrs': decode_numpy_dict_values(self[k].attrs)}}) + d['coords'].update({k: self[k].variable.to_dict(data=data)}) for k in self.data_vars: - data = ensure_us_time_resolution(self[k].values).tolist() - d['data_vars'].update({ - k: {'data': data, - 'dims': self[k].dims, - 'attrs': decode_numpy_dict_values(self[k].attrs)}}) + d['data_vars'].update({k: self[k].variable.to_dict(data=data)}) return d @classmethod @@ -3474,7 +3473,7 @@ def diff(self, dim, n=1, label='upper'): variables = OrderedDict() - for name, var in iteritems(self.variables): + for name, var in self.variables.items(): if dim in var.dims: if name in self.data_vars: variables[name] = (var.isel(**kwargs_end) - @@ -3537,7 +3536,7 @@ def shift(self, shifts=None, fill_value=dtypes.NA, **shifts_kwargs): raise ValueError("dimensions %r do not exist" % invalid) variables = OrderedDict() - for name, var in iteritems(self.variables): + for name, var in self.variables.items(): if name in self.data_vars: var_shifts = {k: v for k, v in shifts.items() if k in var.dims} @@ -3606,7 +3605,7 @@ def roll(self, shifts=None, roll_coords=None, **shifts_kwargs): unrolled_vars = () if roll_coords else self.coords variables = OrderedDict() - for k, v in iteritems(self.variables): + for k, v in self.variables.items(): if k not in unrolled_vars: variables[k] = v.roll(**{k: s for k, s in shifts.items() if k in v.dims}) @@ -3722,7 +3721,7 @@ def quantile(self, q, dim=None, interpolation='linear', numpy.nanpercentile, pandas.Series.quantile, DataArray.quantile """ - if isinstance(dim, basestring): + if isinstance(dim, str): dims = set([dim]) elif dim is None: dims = set(self.dims) @@ -3735,7 +3734,7 @@ def quantile(self, q, dim=None, interpolation='linear', q = np.asarray(q, dtype=np.float64) variables = OrderedDict() - for name, var in iteritems(self.variables): + for name, var in self.variables.items(): reduce_dims = [d for d in var.dims if d in dims] if reduce_dims or not var.dims: if name not in self.coords: @@ -3798,7 +3797,7 @@ def rank(self, dim, pct=False, keep_attrs=None): 'Dataset does not contain the dimension: %s' % dim) variables = OrderedDict() - for name, var in iteritems(self.variables): + for name, var in self.variables.items(): if name in self.data_vars: if dim in var.dims: variables[name] = var.rank(dim, pct=pct) diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 54fd8881a56..eb1e928b58e 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -3,8 +3,6 @@ Currently, this means Dask or NumPy arrays. None of these functions should accept or return xarray objects. """ -from __future__ import absolute_import, division, print_function - import contextlib from functools import partial import inspect diff --git a/xarray/core/extensions.py b/xarray/core/extensions.py index 8070e07a5ef..574c05f1a6b 100644 --- a/xarray/core/extensions.py +++ b/xarray/core/extensions.py @@ -1,11 +1,7 @@ -from __future__ import absolute_import, division, print_function - -import traceback import warnings from .dataarray import DataArray from .dataset import Dataset -from .pycompat import PY2 class AccessorRegistrationWarning(Warning): @@ -29,10 +25,7 @@ def __get__(self, obj, cls): # __getattr__ on data object will swallow any AttributeErrors # raised when initializing the accessor, so we need to raise as # something else (GH933): - msg = 'error initializing %r accessor.' % self._name - if PY2: - msg += ' Full traceback:\n' + traceback.format_exc() - raise RuntimeError(msg) + raise RuntimeError('error initializing %r accessor.' % self._name) # Replace the property with the accessor object. Inspired by: # http://www.pydanny.com/cached-property.html # We need to use object.__setattr__ because we overwrite __setattr__ on diff --git a/xarray/core/formatting.py b/xarray/core/formatting.py index 50fa64c9987..f3fcc1ecb37 100644 --- a/xarray/core/formatting.py +++ b/xarray/core/formatting.py @@ -1,22 +1,16 @@ """String formatting routines for __repr__. - -For the sake of sanity, we only do internal formatting with unicode, which can -be returned by the __unicode__ special method. We use ReprMixin to provide the -__repr__ method so that things can work on Python 2. """ -from __future__ import absolute_import, division, print_function - import contextlib import functools from datetime import datetime, timedelta +from itertools import zip_longest import numpy as np import pandas as pd from .duck_array_ops import array_equiv from .options import OPTIONS -from .pycompat import ( - PY2, bytes_type, dask_array_type, unicode_type, zip_longest) +from .pycompat import dask_array_type try: from pandas.errors import OutOfBoundsDatetime @@ -35,9 +29,9 @@ def pretty_print(x, numchars): def maybe_truncate(obj, maxlen=500): - s = unicode_type(obj) + s = str(obj) if len(s) > maxlen: - s = s[:(maxlen - 3)] + u'...' + s = s[:(maxlen - 3)] + '...' return s @@ -48,24 +42,6 @@ def wrap_indent(text, start='', length=None): return start + indent.join(x for x in text.splitlines()) -def ensure_valid_repr(string): - """Ensure that the given value is valid for the result of __repr__. - - On Python 2, this means we need to convert unicode to bytes. We won't need - this function once we drop Python 2.7 support. - """ - if PY2 and isinstance(string, unicode_type): - string = string.encode('utf-8') - return string - - -class ReprMixin(object): - """Mixin that defines __repr__ for a class that already has __unicode__.""" - - def __repr__(self): - return ensure_valid_repr(self.__unicode__()) - - def _get_indexer_at_least_n_items(shape, n_desired, from_end): assert 0 < n_desired <= np.prod(shape) cum_items = np.cumprod(shape[::-1]) @@ -127,9 +103,9 @@ def format_timestamp(t): """Cast given object to a Timestamp and return a nicely formatted string""" # Timestamp is only valid for 1678 to 2262 try: - datetime_str = unicode_type(pd.Timestamp(t)) + datetime_str = str(pd.Timestamp(t)) except OutOfBoundsDatetime: - datetime_str = unicode_type(t) + datetime_str = str(t) try: date_str, time_str = datetime_str.split() @@ -145,7 +121,7 @@ def format_timestamp(t): def format_timedelta(t, timedelta_format=None): """Cast given object to a Timestamp and return a nicely formatted string""" - timedelta_str = unicode_type(pd.Timedelta(t)) + timedelta_str = str(pd.Timedelta(t)) try: days_str, time_str = timedelta_str.split(' days ') except ValueError: @@ -166,12 +142,12 @@ def format_item(x, timedelta_format=None, quote_strings=True): return format_timestamp(x) if isinstance(x, (np.timedelta64, timedelta)): return format_timedelta(x, timedelta_format=timedelta_format) - elif isinstance(x, (unicode_type, bytes_type)): + elif isinstance(x, (str, bytes)): return repr(x) if quote_strings else x elif isinstance(x, (float, np.float)): - return u'{0:.4}'.format(x) + return '{0:.4}'.format(x) else: - return unicode_type(x) + return str(x) def format_items(x): @@ -215,20 +191,20 @@ def format_array_flat(array, max_width): cum_len = np.cumsum([len(s) + 1 for s in relevant_items]) - 1 if (array.size > 2) and ((max_possibly_relevant < array.size) or (cum_len > max_width).any()): - padding = u' ... ' + padding = ' ... ' count = min(array.size, max(np.argmax(cum_len + len(padding) - 1 > max_width), 2)) else: count = array.size - padding = u'' if (count <= 1) else u' ' + padding = '' if (count <= 1) else ' ' num_front = (count + 1) // 2 num_back = count - num_front # note that num_back is 0 <--> array.size is 0 or 1 # <--> relevant_back_items is [] - pprint_str = (u' '.join(relevant_front_items[:num_front]) + + pprint_str = (' '.join(relevant_front_items[:num_front]) + padding + - u' '.join(relevant_back_items[-num_back:])) + ' '.join(relevant_back_items[-num_back:])) return pprint_str @@ -236,30 +212,30 @@ def summarize_variable(name, var, col_width, show_values=True, marker=' ', max_width=None): if max_width is None: max_width = OPTIONS['display_width'] - first_col = pretty_print(u' %s %s ' % (marker, name), col_width) + first_col = pretty_print(' %s %s ' % (marker, name), col_width) if var.dims: - dims_str = u'(%s) ' % u', '.join(map(unicode_type, var.dims)) + dims_str = '(%s) ' % ', '.join(map(str, var.dims)) else: - dims_str = u'' - front_str = u'%s%s%s ' % (first_col, dims_str, var.dtype) + dims_str = '' + front_str = '%s%s%s ' % (first_col, dims_str, var.dtype) if show_values: values_str = format_array_flat(var, max_width - len(front_str)) elif isinstance(var._data, dask_array_type): values_str = short_dask_repr(var, show_dtype=False) else: - values_str = u'...' + values_str = '...' return front_str + values_str def _summarize_coord_multiindex(coord, col_width, marker): - first_col = pretty_print(u' %s %s ' % (marker, coord.name), col_width) - return u'%s(%s) MultiIndex' % (first_col, unicode_type(coord.dims[0])) + first_col = pretty_print(' %s %s ' % (marker, coord.name), col_width) + return '%s(%s) MultiIndex' % (first_col, str(coord.dims[0])) -def _summarize_coord_levels(coord, col_width, marker=u'-'): +def _summarize_coord_levels(coord, col_width, marker='-'): relevant_coord = coord[:30] - return u'\n'.join( + return '\n'.join( [summarize_variable(lname, relevant_coord.get_level_variable(lname), col_width, marker=marker) @@ -274,11 +250,11 @@ def summarize_datavar(name, var, col_width): def summarize_coord(name, var, col_width): is_index = name in var.dims show_values = var._in_memory - marker = u'*' if is_index else u' ' + marker = '*' if is_index else ' ' if is_index: coord = var.variable.to_index_variable() if coord.level_names is not None: - return u'\n'.join( + return '\n'.join( [_summarize_coord_multiindex(coord, col_width, marker), _summarize_coord_levels(coord, col_width)]) return summarize_variable( @@ -288,16 +264,16 @@ def summarize_coord(name, var, col_width): def summarize_attr(key, value, col_width=None): """Summary for __repr__ - use ``X.attrs[key]`` for full value.""" # Indent key and add ':', then right-pad if col_width is not None - k_str = u' %s:' % key + k_str = ' %s:' % key if col_width is not None: k_str = pretty_print(k_str, col_width) # Replace tabs and newlines, so we print on one line in known width - v_str = unicode_type(value).replace(u'\t', u'\\t').replace(u'\n', u'\\n') + v_str = str(value).replace('\t', '\\t').replace('\n', '\\n') # Finally, truncate to the desired display width - return maybe_truncate(u'%s %s' % (k_str, v_str), OPTIONS['display_width']) + return maybe_truncate('%s %s' % (k_str, v_str), OPTIONS['display_width']) -EMPTY_REPR = u' *empty*' +EMPTY_REPR = ' *empty*' def _get_col_items(mapping): @@ -318,7 +294,7 @@ def _get_col_items(mapping): def _calculate_col_width(col_items): - max_name_length = (max(len(unicode_type(s)) for s in col_items) + max_name_length = (max(len(str(s)) for s in col_items) if col_items else 0) col_width = max(max_name_length, 7) + 6 return col_width @@ -327,26 +303,26 @@ def _calculate_col_width(col_items): def _mapping_repr(mapping, title, summarizer, col_width=None): if col_width is None: col_width = _calculate_col_width(mapping) - summary = [u'%s:' % title] + summary = ['%s:' % title] if mapping: summary += [summarizer(k, v, col_width) for k, v in mapping.items()] else: summary += [EMPTY_REPR] - return u'\n'.join(summary) + return '\n'.join(summary) -data_vars_repr = functools.partial(_mapping_repr, title=u'Data variables', +data_vars_repr = functools.partial(_mapping_repr, title='Data variables', summarizer=summarize_datavar) -attrs_repr = functools.partial(_mapping_repr, title=u'Attributes', +attrs_repr = functools.partial(_mapping_repr, title='Attributes', summarizer=summarize_attr) def coords_repr(coords, col_width=None): if col_width is None: col_width = _calculate_col_width(_get_col_items(coords)) - return _mapping_repr(coords, title=u'Coordinates', + return _mapping_repr(coords, title='Coordinates', summarizer=summarize_coord, col_width=col_width) @@ -354,19 +330,19 @@ def indexes_repr(indexes): summary = [] for k, v in indexes.items(): summary.append(wrap_indent(repr(v), '%s: ' % k)) - return u'\n'.join(summary) + return '\n'.join(summary) def dim_summary(obj): - elements = [u'%s: %s' % (k, v) for k, v in obj.sizes.items()] - return u', '.join(elements) + elements = ['%s: %s' % (k, v) for k, v in obj.sizes.items()] + return ', '.join(elements) def unindexed_dims_repr(dims, coords): unindexed_dims = [d for d in dims if d not in coords] if unindexed_dims: - dims_str = u', '.join(u'%s' % d for d in unindexed_dims) - return u'Dimensions without coordinates: ' + dims_str + dims_str = ', '.join('%s' % d for d in unindexed_dims) + return 'Dimensions without coordinates: ' + dims_str else: return None @@ -426,9 +402,9 @@ def array_repr(arr): if hasattr(arr, 'name') and arr.name is not None: name_str = '%r ' % arr.name else: - name_str = u'' + name_str = '' - summary = [u'' + summary = ['' % (type(arr).__name__, name_str, dim_summary(arr))] summary.append(short_data_repr(arr)) @@ -444,16 +420,16 @@ def array_repr(arr): if arr.attrs: summary.append(attrs_repr(arr.attrs)) - return u'\n'.join(summary) + return '\n'.join(summary) def dataset_repr(ds): - summary = [u'' % type(ds).__name__] + summary = ['' % type(ds).__name__] col_width = _calculate_col_width(_get_col_items(ds.variables)) - dims_start = pretty_print(u'Dimensions:', col_width) - summary.append(u'%s(%s)' % (dims_start, dim_summary(ds))) + dims_start = pretty_print('Dimensions:', col_width) + summary.append('%s(%s)' % (dims_start, dim_summary(ds))) if ds.coords: summary.append(coords_repr(ds.coords, col_width=col_width)) @@ -467,7 +443,7 @@ def dataset_repr(ds): if ds.attrs: summary.append(attrs_repr(ds.attrs)) - return u'\n'.join(summary) + return '\n'.join(summary) def diff_dim_summary(a, b): diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 46ac515005f..63041b887ed 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, print_function - import datetime import functools import warnings @@ -12,7 +10,7 @@ from .combine import concat from .common import ALL_DIMS, ImplementsArrayReduce, ImplementsDatasetReduce from .options import _get_keep_attrs -from .pycompat import integer_types, range, zip +from .pycompat import integer_types from .utils import hashable, maybe_wrap_array, peek_at, safe_cast_to_index from .variable import IndexVariable, Variable, as_variable diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py index ffa483fc370..c360a209c46 100644 --- a/xarray/core/indexes.py +++ b/xarray/core/indexes.py @@ -1,14 +1,10 @@ -from __future__ import absolute_import, division, print_function -try: - from collections.abc import Mapping -except ImportError: - from collections import Mapping +from collections.abc import Mapping from collections import OrderedDict from . import formatting -class Indexes(Mapping, formatting.ReprMixin): +class Indexes(Mapping): """Immutable proxy for Dataset or DataArrary indexes.""" def __init__(self, indexes): """Not for public consumption. @@ -32,7 +28,7 @@ def __contains__(self, key): def __getitem__(self, key): return self._indexes[key] - def __unicode__(self): + def __repr__(self): return formatting.indexes_repr(self) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index dab23540178..65a123c3319 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -1,23 +1,17 @@ -from __future__ import absolute_import, division, print_function - import functools import operator from collections import defaultdict +from collections.abc import Hashable +from contextlib import suppress from datetime import timedelta import numpy as np import pandas as pd from . import duck_array_ops, nputils, utils -from .pycompat import ( - dask_array_type, integer_types, iteritems, range, suppress) +from .pycompat import dask_array_type, integer_types from .utils import is_dict_like -try: - from collections.abc import Hashable -except ImportError: # Py2 - from collections import Hashable - def expanded_indexer(key, ndim): """Given a key for indexing an ndarray, return an equivalent key which is a @@ -214,7 +208,7 @@ def get_dim_indexers(data_obj, indexers): level_indexers = defaultdict(dict) dim_indexers = {} - for key, label in iteritems(indexers): + for key, label in indexers.items(): dim, = data_obj[key].dims if key != dim: # assume here multi-index level indexer @@ -222,7 +216,7 @@ def get_dim_indexers(data_obj, indexers): else: dim_indexers[key] = label - for dim, level_labels in iteritems(level_indexers): + for dim, level_labels in level_indexers.items(): if dim_indexers.get(dim, False): raise ValueError("cannot combine multi-index level indexers " "with an indexer for dimension %s" % dim) @@ -243,7 +237,7 @@ def remap_label_indexers(data_obj, indexers, method=None, tolerance=None): new_indexes = {} dim_indexers = get_dim_indexers(data_obj, indexers) - for dim, label in iteritems(dim_indexers): + for dim, label in dim_indexers.items(): try: index = data_obj.indexes[dim] except KeyError: diff --git a/xarray/core/merge.py b/xarray/core/merge.py index 637a9cbda7f..7bbd14470f2 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import, division, print_function +from collections import OrderedDict from typing import ( Any, Dict, List, Mapping, Optional, Set, Tuple, TYPE_CHECKING, Union, @@ -7,7 +7,6 @@ import pandas as pd from .alignment import deep_align -from .pycompat import OrderedDict, basestring from .utils import Frozen from .variable import ( Variable, as_variable, assert_unique_multiindex_level_names) @@ -539,7 +538,7 @@ def dataset_merge_method(dataset, other, overwrite_vars, compat, join): # method due for backwards compatibility # TODO: consider deprecating it? - if isinstance(overwrite_vars, basestring): + if isinstance(overwrite_vars, str): overwrite_vars = set([overwrite_vars]) overwrite_vars = set(overwrite_vars) diff --git a/xarray/core/missing.py b/xarray/core/missing.py index ff0e63801bc..4c9435e0bf4 100644 --- a/xarray/core/missing.py +++ b/xarray/core/missing.py @@ -1,7 +1,5 @@ -from __future__ import absolute_import, division, print_function - import warnings -from collections import Iterable +from collections.abc import Iterable from functools import partial from typing import Any, Dict @@ -12,7 +10,6 @@ from .common import _contains_datetime_like_objects from .computation import apply_ufunc from .duck_array_ops import dask_array_type -from .pycompat import iteritems from .utils import OrderedSet, datetime_to_numeric, is_scalar from .variable import Variable, broadcast_variables @@ -147,7 +144,7 @@ def _apply_over_vars_with_dim(func, self, dim=None, **kwargs): ds = type(self)(coords=self.coords, attrs=self.attrs) - for name, var in iteritems(self.data_vars): + for name, var in self.data_vars.items(): if dim in var.dims: ds[name] = func(var, dim=dim, **kwargs) else: diff --git a/xarray/core/nanops.py b/xarray/core/nanops.py index 4d3f03c899e..babc1dd97e6 100644 --- a/xarray/core/nanops.py +++ b/xarray/core/nanops.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, print_function - import numpy as np from . import dtypes, nputils diff --git a/xarray/core/npcompat.py b/xarray/core/npcompat.py index efa68c8bad5..0adda4557dc 100644 --- a/xarray/core/npcompat.py +++ b/xarray/core/npcompat.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, print_function - from distutils.version import LooseVersion import numpy as np diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py index a8d596abd86..14fbec72341 100644 --- a/xarray/core/nputils.py +++ b/xarray/core/nputils.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, print_function - import warnings import numpy as np diff --git a/xarray/core/ops.py b/xarray/core/ops.py index 272a4eaf2f1..97e240c5126 100644 --- a/xarray/core/ops.py +++ b/xarray/core/ops.py @@ -5,15 +5,12 @@ functions. """ -from __future__ import absolute_import, division, print_function - import operator import numpy as np from . import dtypes, duck_array_ops from .nputils import array_eq, array_ne -from .pycompat import PY3 try: import bottleneck as bn @@ -28,8 +25,6 @@ CMP_BINARY_OPS = ['lt', 'le', 'ge', 'gt'] NUM_BINARY_OPS = ['add', 'sub', 'mul', 'truediv', 'floordiv', 'mod', 'pow', 'and', 'xor', 'or'] -if not PY3: - NUM_BINARY_OPS.append('div') # methods which pass on the numpy return value unchanged # be careful not to list methods that we would want to wrap later diff --git a/xarray/core/options.py b/xarray/core/options.py index 510d2930752..c9d26c3e577 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, print_function - import warnings DISPLAY_WIDTH = 'display_width' @@ -66,8 +64,9 @@ def _get_keep_attrs(default): elif global_choice in [True, False]: return global_choice else: - raise ValueError("The global option keep_attrs must be one of" - " True, False or 'default'.") + raise ValueError( + "The global option keep_attrs must be one of" + " True, False or 'default'.") class set_options(object): diff --git a/xarray/core/pycompat.py b/xarray/core/pycompat.py index 67921b5d145..bd2075fa300 100644 --- a/xarray/core/pycompat.py +++ b/xarray/core/pycompat.py @@ -1,72 +1,8 @@ # flake8: noqa -from __future__ import absolute_import, division, print_function - -import sys - import numpy as np -PY2 = sys.version_info[0] < 3 -PY3 = sys.version_info[0] >= 3 - -if PY3: # pragma: no cover - basestring = str - unicode_type = str - bytes_type = bytes - native_int_types = (int,) - - def iteritems(d): - return iter(d.items()) - - def itervalues(d): - return iter(d.values()) - - range = range - zip = zip - from itertools import zip_longest - from functools import reduce - import builtins - from urllib.request import urlretrieve - from inspect import getfullargspec as getargspec - - def move_to_end(ordered_dict, key): - ordered_dict.move_to_end(key) -else: # pragma: no cover - # Python 2 - basestring = basestring # noqa - unicode_type = unicode # noqa - bytes_type = str - native_int_types = (int, long) # noqa - - def iteritems(d): - return d.iteritems() - - def itervalues(d): - return d.itervalues() - - range = xrange - from itertools import ( - izip as zip, imap as map, izip_longest as zip_longest, - ) - reduce = reduce - import __builtin__ as builtins - from urllib import urlretrieve - from inspect import getargspec - - def move_to_end(ordered_dict, key): - value = ordered_dict[key] - del ordered_dict[key] - ordered_dict[key] = value - -integer_types = native_int_types + (np.integer,) - -try: - from cyordereddict import OrderedDict -except ImportError: # pragma: no cover - try: - from collections import OrderedDict - except ImportError: - from ordereddict import OrderedDict +integer_types = (int, np.integer, ) try: # solely for isinstance checks @@ -74,177 +10,3 @@ def move_to_end(ordered_dict, key): dask_array_type = (dask.array.Array,) except ImportError: # pragma: no cover dask_array_type = () - -try: - try: - from pathlib import Path - except ImportError as e: - from pathlib2 import Path - path_type = (Path, ) -except ImportError as e: - path_type = () - -try: - from contextlib import suppress -except ImportError: - # Backport from CPython 3.5: - # Used under the terms of Python's license, see licenses/PYTHON_LICENSE. - - class suppress: - """Context manager to suppress specified exceptions - - After the exception is suppressed, execution proceeds with the next - statement following the with statement. - - with suppress(FileNotFoundError): - os.remove(somefile) - # Execution still resumes here if the file was already removed - """ - - def __init__(self, *exceptions): - self._exceptions = exceptions - - def __enter__(self): - pass - - def __exit__(self, exctype, excinst, exctb): - # Unlike isinstance and issubclass, CPython exception handling - # currently only looks at the concrete type hierarchy (ignoring - # the instance and subclass checking hooks). While Guido considers - # that a bug rather than a feature, it's a fairly hard one to fix - # due to various internal implementation details. suppress provides - # the simpler issubclass based semantics, rather than trying to - # exactly reproduce the limitations of the CPython interpreter. - # - # See http://bugs.python.org/issue12029 for more details - return exctype is not None and issubclass( - exctype, self._exceptions) -try: - from contextlib import ExitStack -except ImportError: - # backport from Python 3.5: - from collections import deque - - # Inspired by discussions on http://bugs.python.org/issue13585 - class ExitStack(object): - """Context manager for dynamic management of a stack of exit callbacks - """ - - def __init__(self): - self._exit_callbacks = deque() - - def pop_all(self): - new_stack = type(self)() - new_stack._exit_callbacks = self._exit_callbacks - self._exit_callbacks = deque() - return new_stack - - def _push_cm_exit(self, cm, cm_exit): - """Helper to correctly register callbacks to __exit__ methods""" - def _exit_wrapper(*exc_details): - return cm_exit(cm, *exc_details) - _exit_wrapper.__self__ = cm - self.push(_exit_wrapper) - - def push(self, exit): - """Registers a callback with the standard __exit__ method signature - - Can suppress exceptions the same way __exit__ methods can. - - Also accepts any object with an __exit__ method (registering a call - to the method instead of the object itself) - """ - # We use an unbound method rather than a bound method to follow - # the standard lookup behaviour for special methods - _cb_type = type(exit) - try: - exit_method = _cb_type.__exit__ - except AttributeError: - # Not a context manager, so assume its a callable - self._exit_callbacks.append(exit) - else: - self._push_cm_exit(exit, exit_method) - return exit # Allow use as a decorator - - def callback(self, callback, *args, **kwds): - """Registers an arbitrary callback and arguments. - - Cannot suppress exceptions. - """ - def _exit_wrapper(exc_type, exc, tb): - callback(*args, **kwds) - # We changed the signature, so using @wraps is not appropriate, but - # setting __wrapped__ may still help with introspection - _exit_wrapper.__wrapped__ = callback - self.push(_exit_wrapper) - return callback # Allow use as a decorator - - def enter_context(self, cm): - """Enters the supplied context manager - - If successful, also pushes its __exit__ method as a callback and - returns the result of the __enter__ method. - """ - # We look up the special methods on the type to match the with - # statement - _cm_type = type(cm) - _exit = _cm_type.__exit__ - result = _cm_type.__enter__(cm) - self._push_cm_exit(cm, _exit) - return result - - def close(self): - """Immediately unwind the context stack""" - self.__exit__(None, None, None) - - def __enter__(self): - return self - - def __exit__(self, *exc_details): - received_exc = exc_details[0] is not None - - # We manipulate the exception state so it behaves as though - # we were actually nesting multiple with statements - frame_exc = sys.exc_info()[1] - - def _fix_exception_context(new_exc, old_exc): - # Context may not be correct, so find the end of the chain - while True: - exc_context = new_exc.__context__ - if exc_context is old_exc: - # Context is already set correctly (see issue 20317) - return - if exc_context is None or exc_context is frame_exc: - break - new_exc = exc_context - # Change the end of the chain to point to the exception - # we expect it to reference - new_exc.__context__ = old_exc - - # Callbacks are invoked in LIFO order to match the behaviour of - # nested context managers - suppressed_exc = False - pending_raise = False - while self._exit_callbacks: - cb = self._exit_callbacks.pop() - try: - if cb(*exc_details): - suppressed_exc = True - pending_raise = False - exc_details = (None, None, None) - except BaseException: - new_exc_details = sys.exc_info() - # simulate the stack of exceptions by setting the context - _fix_exception_context(new_exc_details[1], exc_details[1]) - pending_raise = True - exc_details = new_exc_details - if pending_raise: - try: - # bare "raise exc_details[1]" replaces our carefully - # set-up context - fixed_ctx = exc_details[1].__context__ - raise exc_details[1] - except BaseException: - exc_details[1].__context__ = fixed_ctx - raise - return received_exc and suppressed_exc diff --git a/xarray/core/resample.py b/xarray/core/resample.py index 886303db345..3c39d2299e4 100644 --- a/xarray/core/resample.py +++ b/xarray/core/resample.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, print_function - from . import ops from .groupby import DEFAULT_DIMS, DataArrayGroupBy, DatasetGroupBy diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py index 57463ef5987..8a974e2da72 100644 --- a/xarray/core/rolling.py +++ b/xarray/core/rolling.py @@ -1,6 +1,5 @@ -from __future__ import absolute_import, division, print_function - import warnings +from collections import OrderedDict from distutils.version import LooseVersion import numpy as np @@ -10,7 +9,7 @@ from .ops import ( bn, has_bottleneck, inject_coarsen_methods, inject_bottleneck_rolling_methods, inject_datasetrolling_methods) -from .pycompat import OrderedDict, dask_array_type, zip +from .pycompat import dask_array_type class Rolling(object): diff --git a/xarray/core/utils.py b/xarray/core/utils.py index 085eaaa5ed1..b8e818693c4 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -1,20 +1,18 @@ """Internal utilties; not for external use """ -from __future__ import absolute_import, division, print_function - import contextlib import functools import itertools import os.path import re import warnings -from collections import Iterable, Mapping, MutableMapping, MutableSet +from collections import OrderedDict +from collections.abc import Iterable, Mapping, MutableMapping, MutableSet import numpy as np import pandas as pd -from .pycompat import ( - OrderedDict, basestring, bytes_type, dask_array_type, iteritems) +from .pycompat import dask_array_type def _check_inplace(inplace, default=False): @@ -37,7 +35,7 @@ def alias_warning(old_name, new_name, stacklevel=3): def alias(obj, old_name): - assert isinstance(old_name, basestring) + assert isinstance(old_name, str) @functools.wraps(obj) def wrapper(*args, **kwargs): @@ -157,7 +155,7 @@ def update_safety_check(first_dict, second_dict, compat=equivalent): Binary operator to determine if two values are compatible. By default, checks for equivalence. """ - for k, v in iteritems(second_dict): + for k, v in second_dict.items(): if k in first_dict and not compat(v, first_dict[k]): raise ValueError('unsafe to merge dictionaries without ' 'overriding values; conflicting key %r' % k) @@ -212,7 +210,7 @@ def is_scalar(value): """ return ( getattr(value, 'ndim', None) == 0 or - isinstance(value, (basestring, bytes_type)) or not + isinstance(value, (str, bytes)) or not isinstance(value, (Iterable, ) + dask_array_type)) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 8bd7225efc3..23ee9f24871 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1,8 +1,6 @@ -from __future__ import absolute_import, division, print_function - import functools import itertools -from collections import defaultdict +from collections import OrderedDict, defaultdict from datetime import timedelta from typing import Tuple, Type @@ -17,9 +15,9 @@ BasicIndexer, OuterIndexer, PandasIndexAdapter, VectorizedIndexer, as_indexable) from .options import _get_keep_attrs -from .pycompat import ( - OrderedDict, basestring, dask_array_type, integer_types, zip) -from .utils import OrderedSet, either_dict_or_kwargs +from .pycompat import dask_array_type, integer_types +from .utils import (OrderedSet, either_dict_or_kwargs, + decode_numpy_dict_values, ensure_us_time_resolution) try: import dask.array as da @@ -410,6 +408,16 @@ def to_index(self): """Convert this variable to a pandas.Index""" return self.to_index_variable().to_index() + def to_dict(self, data=True): + """Dictionary representation of variable.""" + item = {'dims': self.dims, + 'attrs': decode_numpy_dict_values(self.attrs)} + if data: + item['data'] = ensure_us_time_resolution(self.values).tolist() + else: + item.update({'dtype': str(self.dtype), 'shape': self.shape}) + return item + @property def dims(self): """Tuple of dimension names with which this variable is associated. @@ -421,7 +429,7 @@ def dims(self, value): self._dims = self._parse_dimensions(value) def _parse_dimensions(self, dims): - if isinstance(dims, basestring): + if isinstance(dims, str): dims = (dims,) dims = tuple(dims) if len(dims) != self.ndim: @@ -1166,7 +1174,7 @@ def set_dims(self, dims, shape=None): ------- Variable """ - if isinstance(dims, basestring): + if isinstance(dims, str): dims = [dims] if shape is None and utils.is_dict_like(dims): @@ -1402,7 +1410,7 @@ def concat(cls, variables, dim='concat_dim', positions=None, Concatenated Variable formed by stacking all the supplied variables along the given dimension. """ - if not isinstance(dim, basestring): + if not isinstance(dim, str): dim, = dim.dims # can't do this lazily: we need to loop through variables at least @@ -1653,7 +1661,7 @@ def coarsen(self, windows, func, boundary='exact', side='left'): return self.copy() reshaped, axes = self._coarsen_reshape(windows, boundary, side) - if isinstance(func, basestring): + if isinstance(func, str): name = func func = getattr(duck_array_ops, name, None) if func is None: @@ -1834,7 +1842,7 @@ def concat(cls, variables, dim='concat_dim', positions=None, This exists because we want to avoid converting Index objects to NumPy arrays, if possible. """ - if not isinstance(dim, basestring): + if not isinstance(dim, str): dim, = dim.dims variables = list(variables) diff --git a/xarray/plot/__init__.py b/xarray/plot/__init__.py index 4b53b22243c..51712e78bf8 100644 --- a/xarray/plot/__init__.py +++ b/xarray/plot/__init__.py @@ -1,6 +1,3 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function from .plot import (plot, line, step, contourf, contour, hist, imshow, pcolormesh) diff --git a/xarray/plot/facetgrid.py b/xarray/plot/facetgrid.py index f133e7806a3..2a4c67036d6 100644 --- a/xarray/plot/facetgrid.py +++ b/xarray/plot/facetgrid.py @@ -1,13 +1,11 @@ -from __future__ import absolute_import, division, print_function - import functools import itertools import warnings +from inspect import getfullargspec import numpy as np from ..core.formatting import format_item -from ..core.pycompat import getargspec from .utils import ( _determine_cmap_params, _infer_xy_labels, import_matplotlib_pyplot, label_from_attrs) @@ -240,7 +238,7 @@ def map_dataarray(self, func, x, y, **kwargs): 'filled': func.__name__ != 'contour', } - cmap_args = getargspec(_determine_cmap_params).args + cmap_args = getfullargspec(_determine_cmap_params).args cmap_kwargs.update((a, kwargs[a]) for a in cmap_args if a in kwargs) cmap_params = _determine_cmap_params(**cmap_kwargs) diff --git a/xarray/plot/plot.py b/xarray/plot/plot.py index 1f7b8d8587a..13d6ec31104 100644 --- a/xarray/plot/plot.py +++ b/xarray/plot/plot.py @@ -5,8 +5,6 @@ Or use the methods on a DataArray: DataArray.plot._____ """ -from __future__ import absolute_import, division, print_function - import functools import warnings from datetime import datetime @@ -15,7 +13,6 @@ import pandas as pd from xarray.core.common import contains_cftime_datetimes -from xarray.core.pycompat import basestring from .facetgrid import FacetGrid from .utils import ( @@ -834,14 +831,14 @@ def newplotfunc(darray, x=None, y=None, figsize=None, size=None, kwargs['levels'] = cmap_params['levels'] # if colors == a single color, matplotlib draws dashed negative # contours. we lose this feature if we pass cmap and not colors - if isinstance(colors, basestring): + if isinstance(colors, str): cmap_params['cmap'] = None kwargs['colors'] = colors if 'pcolormesh' == plotfunc.__name__: kwargs['infer_intervals'] = infer_intervals - if 'imshow' == plotfunc.__name__ and isinstance(aspect, basestring): + if 'imshow' == plotfunc.__name__ and isinstance(aspect, str): # forbid usage of mpl strings raise ValueError("plt.imshow's `aspect` kwarg is not available " "in xarray") diff --git a/xarray/plot/utils.py b/xarray/plot/utils.py index 41f61554739..a42fbc7aba6 100644 --- a/xarray/plot/utils.py +++ b/xarray/plot/utils.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, print_function - import itertools import textwrap import warnings @@ -8,7 +6,6 @@ import pandas as pd from ..core.options import OPTIONS -from ..core.pycompat import basestring from ..core.utils import is_scalar ROBUST_PERCENTILE = 2.0 @@ -104,7 +101,7 @@ def _color_palette(cmap, n_colors): # we have a list of colors cmap = ListedColormap(cmap, N=n_colors) pal = cmap(colors_i) - elif isinstance(cmap, basestring): + elif isinstance(cmap, str): # we have some sort of named palette try: # is this a matplotlib cmap? diff --git a/xarray/testing.py b/xarray/testing.py index 418f1a08668..794c0614925 100644 --- a/xarray/testing.py +++ b/xarray/testing.py @@ -1,6 +1,4 @@ """Testing functions exposed to the user API""" -from __future__ import absolute_import, division, print_function - import numpy as np from xarray.core import duck_array_ops diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 58f76596822..a7eafa92bd7 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -1,6 +1,3 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function import warnings from contextlib import contextmanager from distutils import version diff --git a/xarray/tests/test_accessors.py b/xarray/tests/test_accessors.py index 5d088e8cd48..ae95bae3a93 100644 --- a/xarray/tests/test_accessors.py +++ b/xarray/tests/test_accessors.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, print_function - import numpy as np import pandas as pd import pytest diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index d3c8599b21b..55e4eb7c8db 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, print_function - import contextlib import itertools import math @@ -11,6 +9,7 @@ import tempfile from typing import Optional import warnings +from contextlib import ExitStack from io import BytesIO import numpy as np @@ -26,8 +25,7 @@ from xarray.backends.pydap_ import PydapDataStore from xarray.core import indexing from xarray.core.options import set_options -from xarray.core.pycompat import ( - ExitStack, basestring, dask_array_type, iteritems) +from xarray.core.pycompat import dask_array_type from xarray.tests import mock from . import ( @@ -206,7 +204,7 @@ def test_zero_dimensional_variable(self): expected = create_test_data() expected['float_var'] = ([], 1.0e9, {'units': 'units of awesome'}) expected['bytes_var'] = ([], b'foobar') - expected['string_var'] = ([], u'foobar') + expected['string_var'] = ([], 'foobar') with self.roundtrip(expected) as actual: assert_identical(expected, actual) @@ -338,8 +336,8 @@ def test_roundtrip_object_dtype(self): floats_nans = np.array([np.nan, np.nan, 1.0, 2.0, 3.0], dtype=object) bytes_ = np.array([b'ab', b'cdef', b'g'], dtype=object) bytes_nans = np.array([b'ab', b'cdef', np.nan], dtype=object) - strings = np.array([u'ab', u'cdef', u'g'], dtype=object) - strings_nans = np.array([u'ab', u'cdef', np.nan], dtype=object) + strings = np.array(['ab', 'cdef', 'g'], dtype=object) + strings_nans = np.array(['ab', 'cdef', np.nan], dtype=object) all_nans = np.array([np.nan, np.nan], dtype=object) original = Dataset({'floats': ('a', floats), 'floats_nans': ('a', floats_nans), @@ -361,7 +359,7 @@ def test_roundtrip_object_dtype(self): # explicitly set. # https://github.com/pydata/xarray/issues/1647 expected['bytes_nans'][-1] = b'' - expected['strings_nans'][-1] = u'' + expected['strings_nans'][-1] = '' assert_identical(expected, actual) def test_roundtrip_string_data(self): @@ -370,7 +368,7 @@ def test_roundtrip_string_data(self): assert_identical(expected, actual) def test_roundtrip_string_encoded_characters(self): - expected = Dataset({'x': ('t', [u'ab', u'cdef'])}) + expected = Dataset({'x': ('t', ['ab', 'cdef'])}) expected['x'].encoding['dtype'] = 'S1' with self.roundtrip(expected) as actual: assert_identical(expected, actual) @@ -641,7 +639,7 @@ def test_roundtrip_bytes_with_fill_value(self): assert_identical(expected, actual) def test_roundtrip_string_with_fill_value_nchar(self): - values = np.array([u'ab', u'cdef', np.nan], dtype=object) + values = np.array(['ab', 'cdef', np.nan], dtype=object) expected = Dataset({'x': ('t', values)}) encoding = {'dtype': 'S1', '_FillValue': b'X'} @@ -790,7 +788,7 @@ def test_encoding_kwarg_fixed_width_string(self): # regression test for GH2149 for strings in [ [b'foo', b'bar', b'baz'], - [u'foo', u'bar', u'baz'], + ['foo', 'bar', 'baz'], ]: ds = Dataset({'x': strings}) kwargs = dict(encoding={'x': {'dtype': 'S1'}}) @@ -982,29 +980,29 @@ def test_write_groups(self): def test_encoding_kwarg_vlen_string(self): for input_strings in [ [b'foo', b'bar', b'baz'], - [u'foo', u'bar', u'baz'], + ['foo', 'bar', 'baz'], ]: original = Dataset({'x': input_strings}) - expected = Dataset({'x': [u'foo', u'bar', u'baz']}) + expected = Dataset({'x': ['foo', 'bar', 'baz']}) kwargs = dict(encoding={'x': {'dtype': str}}) with self.roundtrip(original, save_kwargs=kwargs) as actual: assert actual['x'].encoding['dtype'] is str assert_identical(actual, expected) def test_roundtrip_string_with_fill_value_vlen(self): - values = np.array([u'ab', u'cdef', np.nan], dtype=object) + values = np.array(['ab', 'cdef', np.nan], dtype=object) expected = Dataset({'x': ('t', values)}) # netCDF4-based backends don't support an explicit fillvalue # for variable length strings yet. # https://github.com/Unidata/netcdf4-python/issues/730 # https://github.com/shoyer/h5netcdf/issues/37 - original = Dataset({'x': ('t', values, {}, {'_FillValue': u'XXX'})}) + original = Dataset({'x': ('t', values, {}, {'_FillValue': 'XXX'})}) with pytest.raises(NotImplementedError): with self.roundtrip(original) as actual: assert_identical(expected, actual) - original = Dataset({'x': ('t', values, {}, {'_FillValue': u''})}) + original = Dataset({'x': ('t', values, {}, {'_FillValue': ''})}) with pytest.raises(NotImplementedError): with self.roundtrip(original) as actual: assert_identical(expected, actual) @@ -1054,7 +1052,7 @@ def test_open_encodings(self): with open_dataset(tmp_file) as actual: assert_equal(actual['time'], expected['time']) actual_encoding = dict((k, v) for k, v in - iteritems(actual['time'].encoding) + actual['time'].encoding.items() if k in expected['time'].encoding) assert actual_encoding == \ expected['time'].encoding @@ -1094,7 +1092,7 @@ def test_compression_encoding(self): 'shuffle': True, 'original_shape': data.var2.shape}) with self.roundtrip(data) as actual: - for k, v in iteritems(data['var2'].encoding): + for k, v in data['var2'].encoding.items(): assert v == actual['var2'].encoding[k] # regression test for #156 @@ -1688,7 +1686,7 @@ def create_store(self): yield store def test_encoding_kwarg_vlen_string(self): - original = Dataset({'x': [u'foo', u'bar', u'baz']}) + original = Dataset({'x': ['foo', 'bar', 'baz']}) kwargs = dict(encoding={'x': {'dtype': str}}) with raises_regex(ValueError, 'encoding dtype=str for vlen'): with self.roundtrip(original, save_kwargs=kwargs): @@ -2860,7 +2858,7 @@ def test_utm(self): with create_tmp_geotiff() as (tmp_file, expected): with xr.open_rasterio(tmp_file) as rioda: assert_allclose(rioda, expected) - assert isinstance(rioda.attrs['crs'], basestring) + assert isinstance(rioda.attrs['crs'], str) assert isinstance(rioda.attrs['res'], tuple) assert isinstance(rioda.attrs['is_tiled'], np.uint8) assert isinstance(rioda.attrs['transform'], tuple) @@ -2903,7 +2901,7 @@ def test_platecarree(self): as (tmp_file, expected): with xr.open_rasterio(tmp_file) as rioda: assert_allclose(rioda, expected) - assert isinstance(rioda.attrs['crs'], basestring) + assert isinstance(rioda.attrs['crs'], str) assert isinstance(rioda.attrs['res'], tuple) assert isinstance(rioda.attrs['is_tiled'], np.uint8) assert isinstance(rioda.attrs['transform'], tuple) @@ -3141,15 +3139,15 @@ def test_ENVI_tags(self): with xr.open_rasterio(tmp_file) as rioda: assert_allclose(rioda, expected) - assert isinstance(rioda.attrs['crs'], basestring) + assert isinstance(rioda.attrs['crs'], str) assert isinstance(rioda.attrs['res'], tuple) assert isinstance(rioda.attrs['is_tiled'], np.uint8) assert isinstance(rioda.attrs['transform'], tuple) assert len(rioda.attrs['transform']) == 6 # from ENVI tags - assert isinstance(rioda.attrs['description'], basestring) - assert isinstance(rioda.attrs['map_info'], basestring) - assert isinstance(rioda.attrs['samples'], basestring) + assert isinstance(rioda.attrs['description'], str) + assert isinstance(rioda.attrs['map_info'], str) + assert isinstance(rioda.attrs['samples'], str) def test_no_mftime(self): # rasterio can accept "filename" urguments that are actually urls, diff --git a/xarray/tests/test_backends_api.py b/xarray/tests/test_backends_api.py index 2b025db8cab..04ad473cd26 100644 --- a/xarray/tests/test_backends_api.py +++ b/xarray/tests/test_backends_api.py @@ -1,4 +1,3 @@ - import pytest from xarray.backends.api import _get_default_engine diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index f251d4e9710..645a16bccc3 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - from datetime import timedelta import numpy as np @@ -10,10 +8,11 @@ from xarray.coding.cftimeindex import ( CFTimeIndex, _parse_array_of_cftime_strings, _parse_iso8601_with_reso, _parsed_string_to_bounds, assert_all_valid_date_type, parse_iso8601) -from xarray.tests import assert_array_equal, assert_identical +from xarray.tests import assert_array_equal, assert_allclose, assert_identical -from . import (has_cftime, has_cftime_1_0_2_1, has_cftime_or_netCDF4, - raises_regex, requires_cftime) +from . import ( + has_cftime, has_cftime_1_0_2_1, has_cftime_or_netCDF4, raises_regex, + requires_cftime) from .test_coding_times import ( _ALL_CALENDARS, _NON_STANDARD_CALENDARS, _all_cftime_date_types) diff --git a/xarray/tests/test_cftimeindex_resample.py b/xarray/tests/test_cftimeindex_resample.py index ed55ff6b2d1..0b56f1d1fc6 100644 --- a/xarray/tests/test_cftimeindex_resample.py +++ b/xarray/tests/test_cftimeindex_resample.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - import pytest import datetime @@ -8,7 +6,7 @@ import xarray as xr pytest.importorskip('cftime') -pytest.importorskip('pandas', minversion='0.23.99') +pytest.importorskip('pandas', minversion='0.24') @pytest.fixture( diff --git a/xarray/tests/test_coding.py b/xarray/tests/test_coding.py index 6300a1957f8..95c8ebc0b42 100644 --- a/xarray/tests/test_coding.py +++ b/xarray/tests/test_coding.py @@ -1,9 +1,10 @@ +from contextlib import suppress + import numpy as np import pytest import xarray as xr from xarray.coding import variables -from xarray.core.pycompat import suppress from . import assert_identical, requires_dask diff --git a/xarray/tests/test_coding_strings.py b/xarray/tests/test_coding_strings.py index ca138ca8362..c50376a5841 100644 --- a/xarray/tests/test_coding_strings.py +++ b/xarray/tests/test_coding_strings.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -from __future__ import absolute_import, division, print_function +from contextlib import suppress import numpy as np import pytest @@ -7,7 +7,6 @@ from xarray import Variable from xarray.coding import strings from xarray.core import indexing -from xarray.core.pycompat import bytes_type, suppress, unicode_type from . import ( IndexerMaker, assert_array_equal, assert_identical, raises_regex, @@ -18,17 +17,17 @@ def test_vlen_dtype(): - dtype = strings.create_vlen_dtype(unicode_type) - assert dtype.metadata['element_type'] == unicode_type + dtype = strings.create_vlen_dtype(str) + assert dtype.metadata['element_type'] == str assert strings.is_unicode_dtype(dtype) assert not strings.is_bytes_dtype(dtype) - assert strings.check_vlen_dtype(dtype) is unicode_type + assert strings.check_vlen_dtype(dtype) is str - dtype = strings.create_vlen_dtype(bytes_type) - assert dtype.metadata['element_type'] == bytes_type + dtype = strings.create_vlen_dtype(bytes) + assert dtype.metadata['element_type'] == bytes assert not strings.is_unicode_dtype(dtype) assert strings.is_bytes_dtype(dtype) - assert strings.check_vlen_dtype(dtype) is bytes_type + assert strings.check_vlen_dtype(dtype) is bytes assert strings.check_vlen_dtype(np.dtype(object)) is None @@ -36,12 +35,12 @@ def test_vlen_dtype(): def test_EncodedStringCoder_decode(): coder = strings.EncodedStringCoder() - raw_data = np.array([b'abc', u'ß∂µ∆'.encode('utf-8')]) + raw_data = np.array([b'abc', 'ß∂µ∆'.encode('utf-8')]) raw = Variable(('x',), raw_data, {'_Encoding': 'utf-8'}) actual = coder.decode(raw) expected = Variable( - ('x',), np.array([u'abc', u'ß∂µ∆'], dtype=object)) + ('x',), np.array(['abc', 'ß∂µ∆'], dtype=object)) assert_identical(actual, expected) assert_identical(coder.decode(actual[0]), expected[0]) @@ -51,12 +50,12 @@ def test_EncodedStringCoder_decode(): def test_EncodedStringCoder_decode_dask(): coder = strings.EncodedStringCoder() - raw_data = np.array([b'abc', u'ß∂µ∆'.encode('utf-8')]) + raw_data = np.array([b'abc', 'ß∂µ∆'.encode('utf-8')]) raw = Variable(('x',), raw_data, {'_Encoding': 'utf-8'}).chunk() actual = coder.decode(raw) assert isinstance(actual.data, da.Array) - expected = Variable(('x',), np.array([u'abc', u'ß∂µ∆'], dtype=object)) + expected = Variable(('x',), np.array(['abc', 'ß∂µ∆'], dtype=object)) assert_identical(actual, expected) actual_indexed = coder.decode(actual[0]) @@ -65,8 +64,8 @@ def test_EncodedStringCoder_decode_dask(): def test_EncodedStringCoder_encode(): - dtype = strings.create_vlen_dtype(unicode_type) - raw_data = np.array([u'abc', u'ß∂µ∆'], dtype=dtype) + dtype = strings.create_vlen_dtype(str) + raw_data = np.array(['abc', 'ß∂µ∆'], dtype=dtype) expected_data = np.array([r.encode('utf-8') for r in raw_data], dtype=object) @@ -97,7 +96,7 @@ def test_CharacterArrayCoder_roundtrip(original): @pytest.mark.parametrize('data', [ np.array([b'a', b'bc']), - np.array([b'a', b'bc'], dtype=strings.create_vlen_dtype(bytes_type)), + np.array([b'a', b'bc'], dtype=strings.create_vlen_dtype(bytes)), ]) def test_CharacterArrayCoder_encode(data): coder = strings.CharacterArrayCoder() diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 756d51e7997..863c0378835 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, print_function - import warnings from itertools import product @@ -33,7 +31,7 @@ # here we add a couple minor formatting errors to test # the robustness of the parsing algorithm. (12300 + np.arange(5), 'hour since 1680-01-01 00:00:00'), - (12300 + np.arange(5), u'Hour since 1680-01-01 00:00:00'), + (12300 + np.arange(5), 'Hour since 1680-01-01 00:00:00'), (12300 + np.arange(5), ' Hour since 1680-01-01 00:00:00 '), (10, 'days since 2000-01-01'), ([10], 'daYs since 2000-01-01'), @@ -535,17 +533,22 @@ def test_infer_cftime_datetime_units(calendar, date_args, expected): @pytest.mark.parametrize( ['timedeltas', 'units', 'numbers'], - [('1D', 'days', np.int64(1)), - (['1D', '2D', '3D'], 'days', np.array([1, 2, 3], 'int64')), - ('1h', 'hours', np.int64(1)), - ('1ms', 'milliseconds', np.int64(1)), - ('1us', 'microseconds', np.int64(1)), - (['NaT', '0s', '1s'], None, [np.nan, 0, 1]), - (['30m', '60m'], 'hours', [0.5, 1.0]), - (np.timedelta64('NaT', 'ns'), 'days', np.nan), - (['NaT', 'NaT'], 'days', [np.nan, np.nan])]) + [ + ('1D', 'days', np.int64(1)), + (['1D', '2D', '3D'], 'days', np.array([1, 2, 3], 'int64')), + ('1h', 'hours', np.int64(1)), + ('1ms', 'milliseconds', np.int64(1)), + ('1us', 'microseconds', np.int64(1)), + (['NaT', '0s', '1s'], None, [np.nan, 0, 1]), + (['30m', '60m'], 'hours', [0.5, 1.0]), + ('NaT', 'days', np.nan), + (['NaT', 'NaT'], 'days', [np.nan, np.nan]), + ]) def test_cf_timedelta(timedeltas, units, numbers): - timedeltas = pd.to_timedelta(timedeltas, box=False) + if timedeltas == 'NaT': + timedeltas = np.timedelta64('NaT', 'ns') + else: + timedeltas = pd.to_timedelta(timedeltas, box=False) numbers = np.array(numbers) expected = numbers diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index e978350d322..c37abc98f07 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -1,5 +1,4 @@ -from __future__ import absolute_import, division, print_function - +from collections import OrderedDict from copy import deepcopy from itertools import product @@ -12,7 +11,6 @@ _auto_combine, _auto_combine_1d, _auto_combine_all_along_first_dim, _check_shape_tile_ids, _combine_nd, _infer_concat_order_from_positions, _infer_tile_ids_from_nested_list, _new_tile_id) -from xarray.core.pycompat import OrderedDict, iteritems from . import ( InaccessibleArray, assert_array_equal, assert_combined_tile_ids_equal, @@ -38,7 +36,7 @@ def rectify_dim_order(dataset): # return a new dataset with all variable dimensions transposed into # the order in which they are found in `data` return Dataset(dict((k, v.transpose(*data[k].dims)) - for k, v in iteritems(dataset.data_vars)), + for k, v in dataset.data_vars.items()), dataset.coords, attrs=dataset.attrs) for dim in ['dim1', 'dim2']: @@ -52,7 +50,7 @@ def rectify_dim_order(dataset): data, concat(datasets, data[dim], coords='minimal')) datasets = [g for _, g in data.groupby(dim, squeeze=True)] - concat_over = [k for k, v in iteritems(data.coords) + concat_over = [k for k, v in data.coords.items() if dim in v.dims and k != dim] actual = concat(datasets, data[dim], coords=concat_over) assert_identical(data, rectify_dim_order(actual)) diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index 5fa518f5112..27f5e7ec079 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -1,6 +1,4 @@ # -*- coding: utf-8 -*- -from __future__ import absolute_import, division, print_function - import contextlib import warnings @@ -126,7 +124,7 @@ def test_multidimensional_coordinates(self): @requires_dask def test_string_object_warning(self): original = Variable( - ('x',), np.array([u'foo', u'bar'], dtype=object)).chunk() + ('x',), np.array(['foo', 'bar'], dtype=object)).chunk() with pytest.warns(SerializationWarning, match='dask array with dtype=object'): encoded = conventions.encode_cf_variable(original) diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index c77384c5733..b6a70794c23 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -1,6 +1,6 @@ -from __future__ import absolute_import, division, print_function - import pickle +from collections import OrderedDict +from contextlib import suppress from distutils.version import LooseVersion from textwrap import dedent @@ -11,7 +11,6 @@ import xarray as xr import xarray.ufuncs as xu from xarray import DataArray, Dataset, Variable -from xarray.core.pycompat import OrderedDict, suppress from xarray.tests import mock from . import ( diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 1d2b938c759..42c23bd7ade 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -1,7 +1,6 @@ -from __future__ import absolute_import, division, print_function - import pickle import warnings +from collections import OrderedDict from copy import deepcopy from textwrap import dedent @@ -16,7 +15,6 @@ from xarray.convert import from_cdms2 from xarray.core import dtypes from xarray.core.common import ALL_DIMS, full_like -from xarray.core.pycompat import OrderedDict, iteritems from xarray.tests import ( LooseVersion, ReturnItem, assert_allclose, assert_array_equal, assert_equal, assert_identical, raises_regex, requires_bottleneck, @@ -74,7 +72,7 @@ def test_properties(self): assert len(self.dv) == len(self.v) assert_equal(self.dv.variable, self.v) assert set(self.dv.coords) == set(self.ds.coords) - for k, v in iteritems(self.dv.coords): + for k, v in self.dv.coords.items(): assert_array_equal(v, self.ds.coords[k]) with pytest.raises(AttributeError): self.dv.dataset @@ -124,21 +122,14 @@ def test_struct_array_dims(self): """ # GH837, GH861 # checking array subraction when dims are the same - p_data = np.array([('John', 180), ('Stacy', 150), ('Dick', 200)], + # note: names need to be in sorted order to align consistently with + # pandas < 0.24 and >= 0.24. + p_data = np.array([('Abe', 180), ('Stacy', 150), ('Dick', 200)], dtype=[('name', '|S256'), ('height', object)]) - - p_data_1 = np.array([('John', 180), ('Stacy', 150), ('Dick', 200)], - dtype=[('name', '|S256'), ('height', object)]) - - p_data_2 = np.array([('John', 180), ('Dick', 200)], - dtype=[('name', '|S256'), ('height', object)]) - weights_0 = DataArray([80, 56, 120], dims=['participant'], coords={'participant': p_data}) - weights_1 = DataArray([81, 52, 115], dims=['participant'], - coords={'participant': p_data_1}) - + coords={'participant': p_data}) actual = weights_1 - weights_0 expected = DataArray([1, -4, -5], dims=['participant'], @@ -147,31 +138,27 @@ def test_struct_array_dims(self): assert_identical(actual, expected) # checking array subraction when dims are not the same - p_data_1 = np.array([('John', 180), ('Stacy', 151), ('Dick', 200)], - dtype=[('name', '|S256'), ('height', object)]) - + p_data_alt = np.array([('Abe', 180), ('Stacy', 151), ('Dick', 200)], + dtype=[('name', '|S256'), ('height', object)]) weights_1 = DataArray([81, 52, 115], dims=['participant'], - coords={'participant': p_data_1}) - + coords={'participant': p_data_alt}) actual = weights_1 - weights_0 expected = DataArray([1, -5], dims=['participant'], - coords={'participant': p_data_2}) + coords={'participant': p_data[[0, 2]]}) assert_identical(actual, expected) # checking array subraction when dims are not the same and one # is np.nan - p_data_1 = np.array([('John', 180), ('Stacy', np.nan), ('Dick', 200)], - dtype=[('name', '|S256'), ('height', object)]) - + p_data_nan = np.array([('Abe', 180), ('Stacy', np.nan), ('Dick', 200)], + dtype=[('name', '|S256'), ('height', object)]) weights_1 = DataArray([81, 52, 115], dims=['participant'], - coords={'participant': p_data_1}) - + coords={'participant': p_data_nan}) actual = weights_1 - weights_0 expected = DataArray([1, -5], dims=['participant'], - coords={'participant': p_data_2}) + coords={'participant': p_data[[0, 2]]}) assert_identical(actual, expected) @@ -1222,6 +1209,11 @@ def test_coords_alignment(self): dims='x') assert_identical(lhs, expected) + def test_set_coords_update_index(self): + actual = DataArray([1, 2, 3], [('x', [1, 2, 3])]) + actual.coords['x'] = ['a', 'b', 'c'] + assert actual.indexes['x'].equals(pd.Index(['a', 'b', 'c'])) + def test_coords_replacement_alignment(self): # regression test for GH725 arr = DataArray([0, 1, 2], dims=['abc']) @@ -2908,6 +2900,15 @@ def test_to_and_from_dict(self): ValueError, "cannot convert dict without the key 'data'"): DataArray.from_dict(d) + # check the data=False option + expected_no_data = expected.copy() + del expected_no_data['data'] + del expected_no_data['coords']['x']['data'] + expected_no_data['coords']['x'].update({'dtype': '' @@ -190,20 +188,20 @@ def test_unicode_data(self): Data variables: *empty* Attributes: - å: ∑""" % (byteorder, u'ba®')) - actual = unicode_type(data) + å: ∑""" % (byteorder, 'ba®')) + actual = str(data) assert expected == actual def test_info(self): ds = create_test_data(seed=123) ds = ds.drop('dim3') # string type prints differently in PY2 vs PY3 - ds.attrs['unicode_attr'] = u'ba®' + ds.attrs['unicode_attr'] = 'ba®' ds.attrs['string_attr'] = 'bar' buf = StringIO() ds.info(buf=buf) - expected = dedent(u'''\ + expected = dedent('''\ xarray.Dataset { dimensions: \tdim1 = 8 ; @@ -273,7 +271,7 @@ class Arbitrary(object): pass d = pd.Timestamp('2000-01-01T12') - args = [True, None, 3.4, np.nan, 'hello', u'uni', b'raw', + args = [True, None, 3.4, np.nan, 'hello', b'raw', np.datetime64('2000-01-01'), d, d.to_pydatetime(), Arbitrary()] for arg in args: @@ -589,6 +587,11 @@ def test_coords_modify(self): expected = data.merge({'c': 11}).set_coords('c') assert_identical(expected, actual) + def test_update_index(self): + actual = Dataset(coords={'x': [1, 2, 3]}) + actual['x'] = ['a', 'b', 'c'] + assert actual.indexes['x'].equals(pd.Index(['a', 'b', 'c'])) + def test_coords_setitem_with_new_dimension(self): actual = Dataset() actual.coords['foo'] = ('x', [1, 2, 3]) @@ -836,7 +839,7 @@ def test_isel(self): assert data[v].dims == ret[v].dims assert data[v].attrs == ret[v].attrs slice_list = [slice(None)] * data[v].values.ndim - for d, s in iteritems(slicers): + for d, s in slicers.items(): if d in data[v].dims: inds = np.nonzero(np.array(data[v].dims) == d)[0] for ind in inds: @@ -1889,7 +1892,7 @@ def test_copy(self): def test_copy_with_data(self): orig = create_test_data() new_data = {k: np.random.randn(*v.shape) - for k, v in iteritems(orig.data_vars)} + for k, v in orig.data_vars.items()} actual = orig.copy(data=new_data) expected = orig.copy() @@ -1913,12 +1916,12 @@ def test_rename(self): renamed = data.rename(newnames) variables = OrderedDict(data.variables) - for k, v in iteritems(newnames): + for k, v in newnames.items(): variables[v] = variables.pop(k) - for k, v in iteritems(variables): + for k, v in variables.items(): dims = list(v.dims) - for name, newname in iteritems(newnames): + for name, newname in newnames.items(): if name in dims: dims[dims.index(name)] = newname @@ -2557,7 +2560,7 @@ def test_squeeze(self): def get_args(v): return [set(args[0]) & set(v.dims)] if args else [] expected = Dataset(dict((k, v.squeeze(*get_args(v))) - for k, v in iteritems(data.variables))) + for k, v in data.variables.items())) expected = expected.set_coords(data.coords) assert_identical(expected, data.squeeze(*args)) # invalid squeeze @@ -3042,11 +3045,25 @@ def test_to_and_from_dict(self): # check roundtrip assert_identical(ds, Dataset.from_dict(actual)) + # check the data=False option + expected_no_data = expected.copy() + del expected_no_data['coords']['t']['data'] + del expected_no_data['data_vars']['a']['data'] + del expected_no_data['data_vars']['b']['data'] + expected_no_data['coords']['t'].update({'dtype': '