pydata · shoyer · Jan 4, 2019 · Dec 31, 2018 · Jan 1, 2019 · Jan 3, 2019
diff --git a/xarray/core/coordinates.py b/xarray/core/coordinates.py
@@ -6,6 +6,7 @@
 import pandas as pd
 
 from . import formatting, indexing
+from .indexes import default_indexes
 from .merge import (
     expand_and_merge_variables, merge_coords, merge_coords_for_inplace_math)
 from .pycompat import OrderedDict
@@ -196,6 +197,7 @@ def _update_coords(self, coords):
         self._data._variables = variables
         self._data._coord_names.update(new_coord_names)
         self._data._dims = dict(dims)
+        self._data._indexes = default_indexes(variables, dims)
 
     def __delitem__(self, key):
         if key in self:
@@ -276,44 +278,6 @@ def __iter__(self):
         return iter(self._data._level_coords)
 
 
-class Indexes(Mapping, formatting.ReprMixin):
-    """Ordered Mapping[str, pandas.Index] for xarray objects.
-    """
-
-    def __init__(self, variables, sizes):
-        """Not for public consumption.
-
-        Parameters
-        ----------
-        variables : OrderedDict[Any, Variable]
-            Reference to OrderedDict holding variable objects. Should be the
-            same dictionary used by the source object.
-        sizes : OrderedDict[Any, int]
-            Map from dimension names to sizes.
-        """
-        self._variables = variables
-        self._sizes = sizes
-
-    def __iter__(self):
-        for key in self._sizes:
-            if key in self._variables:
-                yield key
-
-    def __len__(self):
-        return sum(key in self._variables for key in self._sizes)
-
-    def __contains__(self, key):
-        return key in self._sizes and key in self._variables
-
-    def __getitem__(self, key):
-        if key not in self._sizes:
-            raise KeyError(key)
-        return self._variables[key].to_index()
-
-    def __unicode__(self):
-        return formatting.indexes_repr(self)
-
-
 def assert_coordinate_consistent(obj, coords):
     """ Maeke sure the dimension coordinate of obj is
     consistent with coords.

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
@@ -13,10 +13,11 @@
 from .alignment import align, reindex_like_indexers
 from .common import AbstractArray, DataWithCoords
 from .coordinates import (
-    DataArrayCoordinates, Indexes, LevelCoordinatesSource,
+    DataArrayCoordinates, LevelCoordinatesSource,
     assert_coordinate_consistent, remap_label_indexers)
 from .dataset import Dataset, merge_indexes, split_indexes
 from .formatting import format_item
+from .indexes import default_indexes, Indexes
 from .options import OPTIONS
 from .pycompat import OrderedDict, basestring, iteritems, range, zip
 from .utils import (
@@ -165,7 +166,7 @@ class DataArray(AbstractArray, DataWithCoords):
     dt = property(DatetimeAccessor)
 
     def __init__(self, data, coords=None, dims=None, name=None,
-                 attrs=None, encoding=None, fastpath=False):
+                 attrs=None, encoding=None, indexes=None, fastpath=False):
         """
         Parameters
         ----------
@@ -237,6 +238,12 @@ def __init__(self, data, coords=None, dims=None, name=None,
         self._coords = coords
         self._name = name
 
+        # TODO(shoyer): document this argument, once it becomes part of the
+        # public interface.
+        if indexes is None:
+            indexes = default_indexes(coords, variable.dims)
+        self._indexes = indexes
+
         self._file_obj = None
 
         self._initialized = True
@@ -534,9 +541,9 @@ def encoding(self, value):
 
     @property
     def indexes(self):
-        """OrderedDict of pandas.Index objects used for label based indexing
+        """Mapping of pandas.Index objects used for label based indexing
         """
-        return Indexes(self._coords, self.sizes)
+        return Indexes(self._indexes)
 
     @property
     def coords(self):

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -13,16 +13,17 @@
 import xarray as xr
 
 from . import (
-    alignment, dtypes, duck_array_ops, formatting, groupby, indexing, ops,
-    pdcompat, resample, rolling, utils)
+    alignment, dtypes, duck_array_ops, formatting, groupby,
+    indexing, ops, pdcompat, resample, rolling, utils)
 from ..coding.cftimeindex import _parse_array_of_cftime_strings
 from .alignment import align
 from .common import (
     ALL_DIMS, DataWithCoords, ImplementsDatasetReduce,
     _contains_datetime_like_objects)
 from .coordinates import (
-    DatasetCoordinates, Indexes, LevelCoordinatesSource,
+    DatasetCoordinates, LevelCoordinatesSource,
     assert_coordinate_consistent, remap_label_indexers)
+from .indexes import Indexes, default_indexes
 from .merge import (
     dataset_merge_method, dataset_update_method, merge_data_and_coords,
     merge_variables)
@@ -364,6 +365,10 @@ def __init__(self, data_vars=None, coords=None, attrs=None,
             coords = {}
         if data_vars is not None or coords is not None:
             self._set_init_vars_and_dims(data_vars, coords, compat)
+
+        # TODO(shoyer): expose indexes as a public argument in __init__
+        self._indexes = default_indexes(self._variables, self._dims)
+
         if attrs is not None:
             self.attrs = attrs
         self._encoding = None
@@ -642,14 +647,17 @@ def persist(self, **kwargs):
 
     @classmethod
     def _construct_direct(cls, variables, coord_names, dims=None, attrs=None,
-                          file_obj=None, encoding=None):
+                          indexes=None, file_obj=None, encoding=None):
         """Shortcut around __init__ for internal use when we want to skip
         costly validation
         """
         obj = object.__new__(cls)
         obj._variables = variables
         obj._coord_names = coord_names
         obj._dims = dims
+        if indexes is None:
+            indexes = default_indexes(variables, dims)
+        obj._indexes = indexes
         obj._attrs = attrs
         obj._file_obj = file_obj
         obj._encoding = encoding
@@ -664,7 +672,8 @@ def _from_vars_and_coord_names(cls, variables, coord_names, attrs=None):
         return cls._construct_direct(variables, coord_names, dims, attrs)
 
     def _replace_vars_and_dims(self, variables, coord_names=None, dims=None,
-                               attrs=__default_attrs, inplace=False):
+                               attrs=__default_attrs, indexes=None,
+                               inplace=False):
         """Fastpath constructor for internal use.
 
         Preserves coord names and attributes. If not provided explicitly,
@@ -693,6 +702,8 @@ def _replace_vars_and_dims(self, variables, coord_names=None, dims=None,
                 self._coord_names = coord_names
             if attrs is not self.__default_attrs:
                 self._attrs = attrs
+            if indexes is None:
+                self._indexes = default_indexes(variables, dims)
             obj = self
         else:
             if coord_names is None:
@@ -1064,9 +1075,9 @@ def identical(self, other):
 
     @property
     def indexes(self):
-        """OrderedDict of pandas.Index objects used for label based indexing
+        """Mapping of pandas.Index objects used for label based indexing
         """
-        return Indexes(self._variables, self._dims)
+        return Indexes(self._indexes)
 
     @property
     def coords(self):

diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py
@@ -0,0 +1,53 @@
+from __future__ import absolute_import, division, print_function
+try:
+    from collections.abc import Mapping
+except ImportError:
+    from collections import Mapping
+
+from . import formatting
+
+
+class Indexes(Mapping, formatting.ReprMixin):
+    """Immutable proxy for Dataset or DataArrary indexes."""
+    def __init__(self, indexes):
+        """Not for public consumption.
+
+        Parameters
+        ----------
+        indexes : Dict[Any, pandas.Index]
+           Indexes held by this object.
+        """
+        self._indexes = indexes
+
+    def __iter__(self):
+        return iter(self._indexes)
+
+    def __len__(self):
+        return len(self._indexes)
+
+    def __contains__(self, key):
+        return key in self._indexes
+
+    def __getitem__(self, key):
+        return self._indexes[key]
+
+    def __unicode__(self):
+        return formatting.indexes_repr(self)
+
+
+def default_indexes(coords, dims):
+    """Default indexes for a Dataset/DataArray.
+
+    Parameters
+    ----------
+    coords : Mapping[Any, xarray.Variable]
+       Coordinate variables from which to draw default indexes.
+    dims : iterable
+        Iterable of dimension names.
+
+    Returns
+    -------
+    Mapping[Any, pandas.Index] mapping indexing keys (levels/dimension names)
+    to indexes used for indexing along that dimension.
+    """
+    return {key: coords[key].to_index() for key in dims if key in coords}