@@ -462,22 +462,36 @@ def factorize(self, na_sentinel=-1):
462462 # ------------------------------------------------------------------------
463463 # Indexing methods
464464 # ------------------------------------------------------------------------
465- def take (self , indexer , allow_fill = True , fill_value = None ):
465+
466+ def take (self , indices , allow_fill = False , fill_value = None ):
466467 # type: (Sequence[int], bool, Optional[Any]) -> ExtensionArray
467468 """Take elements from an array.
468469
469470 Parameters
470471 ----------
471- indexer : sequence of integers
472- indices to be taken. -1 is used to indicate values
473- that are missing.
474- allow_fill : bool, default True
475- If False, indexer is assumed to contain no -1 values so no filling
476- will be done. This short-circuits computation of a mask. Result is
477- undefined if allow_fill == False and -1 is present in indexer.
478- fill_value : any, default None
479- Fill value to replace -1 values with. If applicable, this should
480- use the sentinel missing value for this type.
472+ indices : sequence of integers
473+ Indices to be taken.
474+ allow_fill : bool, default False
475+ How to handle negative values in `indices`.
476+
477+ * False: negative values in `indices` indicate positional indices
478+ from the right (the default). This is similar to
479+ :func:`numpy.take`.
480+
481+ * True: negative values in `indices` indicate
482+ missing values. These values are set to `fill_value`. Any other
483+ other negative values raise a ``ValueError``.
484+
485+ fill_value : any, optional
486+ Fill value to use for NA-indices when `allow_fill` is True.
487+ This may be ``None``, in which case the default NA value for
488+ the type, ``self.dtype.na_value``, is used.
489+
490+ For many ExtensionArrays, there will be two representations of
491+ `fill_value`: a user-facing "boxed" scalar, and a low-level
492+ physical NA value. `fill_value` should be the user-facing version,
493+ and the implementation should handle translating that to the
494+ physical version for processing the take if nescessary.
481495
482496 Returns
483497 -------
@@ -486,44 +500,56 @@ def take(self, indexer, allow_fill=True, fill_value=None):
486500 Raises
487501 ------
488502 IndexError
489- When the indexer is out of bounds for the array.
503+ When the indices are out of bounds for the array.
504+ ValueError
505+ When `indices` contains negative values other than ``-1``
506+ and `allow_fill` is True.
490507
491508 Notes
492509 -----
493- This should follow pandas' semantics where -1 indicates missing values.
494- Positions where indexer is ``-1`` should be filled with the missing
495- value for this type.
496- This gives rise to the special case of a take on an empty
497- ExtensionArray that does not raises an IndexError straight away
498- when the `indexer` is all ``-1``.
510+ ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``,
511+ ``iloc``, when `indices` is a sequence of values. Additionally,
512+ it's called by :meth:`Series.reindex`, or any other method
513+ that causes realignemnt, with a `fill_value`.
499514
500- This is called by ``Series.__getitem__``, ``.loc``, ``iloc``, when the
501- indexer is a sequence of values.
515+ See Also
516+ --------
517+ numpy.take
518+ pandas.api.extensions.take
502519
503520 Examples
504521 --------
505- Suppose the extension array is backed by a NumPy array stored as
506- ``self.data``. Then ``take`` may be written as
522+ Here's an example implementation, which relies on casting the
523+ extension array to object dtype. This uses the helper method
524+ :func:`pandas.api.extensions.take`.
507525
508526 .. code-block:: python
509527
510- def take(self, indexer, allow_fill=True, fill_value=None):
511- indexer = np.asarray(indexer)
512- mask = indexer == -1
528+ def take(self, indices, allow_fill=False, fill_value=None):
529+ from pandas.core.algorithms import take
513530
514- # take on empty array not handled as desired by numpy
515- # in case of -1 (all missing take)
516- if not len(self) and mask.all():
517- return type(self)([np.nan] * len(indexer))
531+ # If the ExtensionArray is backed by an ndarray, then
532+ # just pass that here instead of coercing to object.
533+ data = self.astype(object)
518534
519- result = self.data.take(indexer)
520- result[mask] = np.nan # NA for this type
521- return type(self)(result)
535+ if allow_fill and fill_value is None:
536+ fill_value = self.dtype.na_value
522537
523- See Also
524- --------
525- numpy.take
538+ # fill value should always be translated from the scalar
539+ # type for the array, to the physical storage type for
540+ # the data, before passing to take.
541+
542+ result = take(data, indices, fill_value=fill_value,
543+ allow_fill=allow_fill)
544+ return self._from_sequence(result)
526545 """
546+ # Implementer note: The `fill_value` parameter should be a user-facing
547+ # value, an instance of self.dtype.type. When passed `fill_value=None`,
548+ # the default of `self.dtype.na_value` should be used.
549+ # This may differ from the physical storage type your ExtensionArray
550+ # uses. In this case, your implementation is responsible for casting
551+ # the user-facing type to the storage type, before using
552+ # pandas.api.extensions.take
527553 raise AbstractMethodError (self )
528554
529555 def copy (self , deep = False ):
0 commit comments