@@ -4364,7 +4364,7 @@ def duplicated(self, subset=None, keep='first', return_inverse=False):
43644364 compatible with ``return_inverse``.
43654365 return_inverse : boolean, default False
43664366 Determines whether the mapping from unique elements to the original
4367- index should be returned. If true , the output is a tuple.
4367+ index should be returned. If True , the output is a tuple.
43684368
43694369 .. versionadded:: 0.24.0
43704370
@@ -4373,12 +4373,14 @@ def duplicated(self, subset=None, keep='first', return_inverse=False):
43734373 duplicated : Series or tuple of Series if return_inverse is True
43744374 """
43754375 from pandas .core .sorting import get_group_index
4376- from pandas ._libs .hashtable import duplicated_int64 , _SIZE_HINT_LIMIT
4376+ from pandas ._libs .hashtable import _SIZE_HINT_LIMIT
4377+ from pandas .core .algorithms import duplicated
43774378
43784379 if return_inverse and not keep :
43794380 raise ValueError ("The parameters return_inverse=True and "
43804381 "keep=False cannot be used together (impossible "
4381- "to calculate an inverse when discarding values)" )
4382+ "to calculate an inverse when discarding all "
4383+ "instances of a duplicate)." )
43824384
43834385 def f (vals ):
43844386 labels , shape = algorithms .factorize (
@@ -4404,32 +4406,13 @@ def f(vals):
44044406 labels , shape = map (list , zip (* map (f , vals )))
44054407
44064408 ids = get_group_index (labels , shape , sort = False , xnull = False )
4407- isdup = Series (duplicated_int64 (ids , keep ), index = self .index )
44084409 if not return_inverse :
4409- return isdup
4410- elif not isdup .any ():
4411- # no need to calculate inverse if no duplicates
4412- inv = Series (self .index , index = self .index )
4413- return isdup , inv
4414-
4415- if keep == 'first' :
4416- # o2u: original indices to indices of ARRAY of unique values
4417- # u2o: reduplication from array of unique values to original array
4418- _ , o2u , u2o = np .unique (ids , return_inverse = True ,
4419- return_index = True )
4420- inv = Series (self .index [o2u ][u2o ], index = self .index )
4421- elif keep == 'last' :
4422- # np.unique takes first occurrence as unique value,
4423- # so we flip ids that first becomes last
4424- ids = ids [::- 1 ]
4425- _ , o2u , u2o = np .unique (ids , return_inverse = True ,
4426- return_index = True )
4427- # the values in the ids-array correspond(ed) to self.index -
4428- # by flipping ids around, we need to do the same for self.index,
4429- # ___because o2u and u2o are relative to that order___.
4430- # Finally, to fit with 'index=self.index' in the constructor,
4431- # we need to flip the values around one last time
4432- inv = Series (self .index [::- 1 ][o2u ][u2o ][::- 1 ], index = self .index )
4410+ return Series (duplicated (ids , keep = keep ), index = self .index )
4411+
4412+ isdup_array , inv_array = duplicated (ids , keep = keep ,
4413+ return_inverse = return_inverse )
4414+ isdup = Series (isdup_array , index = self .index )
4415+ inv = Series (self .index [inv_array ], index = self .index )
44334416 return isdup , inv
44344417
44354418 # ----------------------------------------------------------------------
0 commit comments