pandas-dev · jorisvandenbossche · May 13, 2020 · May 13, 2020 · Apr 22, 2021 · Apr 22, 2021
diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -404,6 +404,7 @@ def _create_mi_with_dt64tz_level():
     "mi-with-dt64tz-level": _create_mi_with_dt64tz_level(),
     "multi": _create_multiindex(),
     "repeats": Index([0, 0, 1, 1, 2, 2]),
+    "nullable_int": Index(np.arange(100), dtype="Int64"),
 }
 
 

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -346,11 +346,13 @@ def __new__(
                 ea_cls = dtype.construct_array_type()
                 data = ea_cls._from_sequence(data, dtype=dtype, copy=False)
             else:
-                data = np.asarray(data, dtype=object)
+                # TODO clean-up with extract_array ?
+                if isinstance(data, Index):
+                    data = data._data
+                elif isinstance(data, ABCSeries):
+                    data = data.array
 
-            # coerce to the object dtype
-            data = data.astype(object)
-            return Index(data, dtype=object, copy=copy, name=name, **kwargs)
+            return cls._simple_new(data, name)
 
         # index-like
         elif isinstance(data, (np.ndarray, Index, ABCSeries)):
@@ -458,7 +460,7 @@ def _simple_new(cls, values, name: Label = None):
 
         Must be careful not to recurse.
         """
-        assert isinstance(values, np.ndarray), type(values)
+        assert isinstance(values, (np.ndarray, ExtensionArray)), type(values)
 
         result = object.__new__(cls)
         result._data = values
@@ -2126,6 +2128,8 @@ def fillna(self, value=None, downcast=None):
         Series.fillna : Fill NaN Values of a Series.
         """
         self._assert_can_do_op(value)
+        if is_extension_array_dtype(self.dtype):
+            return self._shallow_copy(self._values.fillna(value))
         if self.hasnans:
             result = self.putmask(self._isnan, value)
             if downcast is None:
@@ -2525,7 +2529,9 @@ def _union(self, other, sort):
                 # worth making this faster? a very unusual case
                 value_set = set(lvals)
                 result.extend([x for x in rvals if x not in value_set])
-                result = Index(result)._values  # do type inference here
+                result = Index(
+                    result, dtype=self.dtype
+                )._values  # do type inference here
         else:
             # find indexes of things in "other" that are not in "self"
             if self.is_unique:
@@ -3797,7 +3803,7 @@ def values(self) -> np.ndarray:
         Index.array : Reference to the underlying data.
         Index.to_numpy : A NumPy array representing the underlying data.
         """
-        return self._data.view(np.ndarray)
+        return self._data  # .view(np.ndarray)
 
     @cache_readonly
     @doc(IndexOpsMixin.array)
@@ -3839,7 +3845,10 @@ def _get_engine_target(self) -> np.ndarray:
         """
         Get the ndarray that we can pass to the IndexEngine constructor.
         """
-        return self._values
+        if isinstance(self._values, np.ndarray):
+            return self._values
+        else:
+            return np.asarray(self._values)
 
     @doc(IndexOpsMixin.memory_usage)
     def memory_usage(self, deep: bool = False) -> int:
@@ -4232,10 +4241,18 @@ def equals(self, other: Any) -> bool:
             # d-level MultiIndex can equal d-tuple Index
             return other.equals(self)
 
-        if is_extension_array_dtype(other.dtype):
+        if is_extension_array_dtype(other.dtype) and type(other) != Index:
             # All EA-backed Index subclasses override equals
             return other.equals(self)
 
+        if is_extension_array_dtype(self.dtype):
+            if is_object_dtype(other.dtype):
+                try:
+                    other = other.astype(self.dtype)
+                except Exception:
+                    return False
+            return self._values.equals(other._values)
+
         return array_equivalent(self._values, other._values)
 
     def identical(self, other) -> bool:
@@ -4759,6 +4776,15 @@ def map(self, mapper, na_action=None):
 
         attributes = self._get_attributes_dict()
 
+        if is_extension_array_dtype(self.dtype):
+            # try to coerce back to original dtype
+            # TODO this should use a strict version
+            try:
+                # TODO use existing helper method for this
+                new_values = self._values._from_sequence(new_values, dtype=self.dtype)
+            except Exception:
+                pass
+
         # we can return a MultiIndex
         if new_values.size and isinstance(new_values[0], tuple):
             if isinstance(self, MultiIndex):
@@ -5193,7 +5219,10 @@ def delete(self, loc):
         >>> idx.delete([0, 2])
         Index(['b'], dtype='object')
         """
-        return self._shallow_copy(np.delete(self._data, loc))
+        # this is currently overriden by EA-based Index subclasses
+        keep = np.ones(len(self), dtype=bool)
+        keep[loc] = False
+        return self._shallow_copy(self._data[keep])
 
     def insert(self, loc: int, item):
         """
@@ -5212,9 +5241,14 @@ def insert(self, loc: int, item):
         """
         # Note: this method is overridden by all ExtensionIndex subclasses,
         #  so self is never backed by an EA.
-        arr = np.asarray(self)
         item = self._coerce_scalar_to_index(item)._values
-        idx = np.concatenate((arr[:loc], item, arr[loc:]))
+
+        if is_extension_array_dtype(self.dtype):
+            arr = self._values
+            idx = arr._concat_same_type([arr[:loc], item, arr[loc:]])
+        else:
+            arr = np.asarray(self)
+            idx = np.concatenate((arr[:loc], item, arr[loc:]))
         return Index(idx, name=self.name)
 
     def drop(self, labels, errors: str_t = "raise"):

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -1766,7 +1766,6 @@ def isetter(loc, v):
                 ):
                     self.obj[item_labels[indexer[info_axis]]] = value
                     return
-
                 indexer = maybe_convert_ix(*indexer)
 
             if isinstance(value, (ABCSeries, dict)):

diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py
@@ -34,6 +34,8 @@ def test_value_counts(index_or_series_obj):
     expected.index = expected.index.astype(obj.dtype)
     if isinstance(obj, pd.MultiIndex):
         expected.index = pd.Index(expected.index)
+    if isinstance(obj.dtype, pd.Int64Dtype):
+        expected = expected.astype("Int64")
 
     # TODO: Order of entries with the same count is inconsistent on CI (gh-32449)
     if obj.duplicated().any():
@@ -69,6 +71,8 @@ def test_value_counts_null(null_obj, index_or_series_obj):
     counter = collections.Counter(obj.dropna())
     expected = pd.Series(dict(counter.most_common()), dtype=np.int64)
     expected.index = expected.index.astype(obj.dtype)
+    if isinstance(obj.dtype, pd.Int64Dtype):
+        expected = expected.astype("Int64")
 
     result = obj.value_counts()
     if obj.duplicated().any():

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
@@ -1000,7 +1000,7 @@ def test_apply_to_nullable_integer_returns_float(values, function):
     # https://github.com/pandas-dev/pandas/issues/32219
     output = 0.5 if function == "var" else 1.5
     arr = np.array([output] * 3, dtype=float)
-    idx = pd.Index([1, 2, 3], dtype=object, name="a")
+    idx = pd.Index([1, 2, 3], dtype="Int64", name="a")
     expected = pd.DataFrame({"b": arr}, index=idx)
 
     groups = pd.DataFrame(values, dtype="Int64").groupby("a")

diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py
@@ -6,7 +6,7 @@
 
 from pandas._libs import iNaT
 
-from pandas.core.dtypes.common import is_datetime64tz_dtype
+from pandas.core.dtypes.common import is_datetime64tz_dtype, is_extension_array_dtype
 from pandas.core.dtypes.dtypes import CategoricalDtype
 
 import pandas as pd
@@ -278,6 +278,9 @@ def test_ensure_copied_data(self, indices):
         elif isinstance(indices, IntervalIndex):
             # checked in test_interval.py
             pass
+        elif is_extension_array_dtype(indices.dtype):
+            # TODO can we check this generally?
+            pass
         else:
             result = index_type(indices.values, copy=False, **init_kwargs)
             tm.assert_numpy_array_equal(

diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
@@ -916,6 +916,10 @@ def test_map_dictlike(self, indices, mapper):
         else:
             expected = Index(np.arange(len(indices), 0, -1))
 
+        if isinstance(indices.dtype, pd.Int64Dtype):
+            # map tries to preserve the nullable dtype
+            expected = expected.astype("Int64")
+
         result = indices.map(mapper(expected, indices))
         tm.assert_index_equal(result, expected)
 

diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py
@@ -81,6 +81,7 @@ def test_getitem_ndarray_3d(self, indices, obj, idxr, idxr_id):
                 "Index data must be 1-dimensional",
                 "positional indexers are out-of-bounds",
                 "Indexing a MultiIndex with a multidimensional key is not implemented",
+                "values must be a 1D array",
             ]
         )
-Original file line number
+Diff line change
@@ Expand Up / @@ -1766,7 +1766,6 @@ def isetter(loc, v): @@
                     ):
                         self.obj[item_labels[indexer[info_axis]]] = value
                         return
                     indexer = maybe_convert_ix(*indexer)
                 if isinstance(value, (ABCSeries, dict)):
@@ Expand Down @@