@@ -2801,8 +2801,9 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None,
28012801 else :
28022802 return result
28032803
2804+ @deprecate_kwarg ('take_last' , 'keep' , mapping = {True : 'last' , False : 'first' })
28042805 @deprecate_kwarg (old_arg_name = 'cols' , new_arg_name = 'subset' )
2805- def drop_duplicates (self , subset = None , take_last = False , inplace = False ):
2806+ def drop_duplicates (self , subset = None , keep = 'first' , inplace = False ):
28062807 """
28072808 Return DataFrame with duplicate rows removed, optionally only
28082809 considering certain columns
@@ -2812,8 +2813,11 @@ def drop_duplicates(self, subset=None, take_last=False, inplace=False):
28122813 subset : column label or sequence of labels, optional
28132814 Only consider certain columns for identifying duplicates, by
28142815 default use all of the columns
2815- take_last : boolean, default False
2816- Take the last observed row in a row. Defaults to the first row
2816+ keep : {'first', 'last', False}, default 'first'
2817+ - ``first`` : Drop duplicates except for the first occurrence.
2818+ - ``last`` : Drop duplicates except for the last occurrence.
2819+ - False : Drop all duplicates.
2820+ take_last : deprecated
28172821 inplace : boolean, default False
28182822 Whether to drop duplicates in place or to return a copy
28192823 cols : kwargs only argument of subset [deprecated]
@@ -2822,7 +2826,7 @@ def drop_duplicates(self, subset=None, take_last=False, inplace=False):
28222826 -------
28232827 deduplicated : DataFrame
28242828 """
2825- duplicated = self .duplicated (subset , take_last = take_last )
2829+ duplicated = self .duplicated (subset , keep = keep )
28262830
28272831 if inplace :
28282832 inds , = (- duplicated ).nonzero ()
@@ -2831,8 +2835,9 @@ def drop_duplicates(self, subset=None, take_last=False, inplace=False):
28312835 else :
28322836 return self [- duplicated ]
28332837
2838+ @deprecate_kwarg ('take_last' , 'keep' , mapping = {True : 'last' , False : 'first' })
28342839 @deprecate_kwarg (old_arg_name = 'cols' , new_arg_name = 'subset' )
2835- def duplicated (self , subset = None , take_last = False ):
2840+ def duplicated (self , subset = None , keep = 'first' ):
28362841 """
28372842 Return boolean Series denoting duplicate rows, optionally only
28382843 considering certain columns
@@ -2842,9 +2847,13 @@ def duplicated(self, subset=None, take_last=False):
28422847 subset : column label or sequence of labels, optional
28432848 Only consider certain columns for identifying duplicates, by
28442849 default use all of the columns
2845- take_last : boolean, default False
2846- For a set of distinct duplicate rows, flag all but the last row as
2847- duplicated. Default is for all but the first row to be flagged
2850+ keep : {'first', 'last', False}, default 'first'
2851+ - ``first`` : Mark duplicates as ``True`` except for the
2852+ first occurrence.
2853+ - ``last`` : Mark duplicates as ``True`` except for the
2854+ last occurrence.
2855+ - False : Mark all duplicates as ``True``.
2856+ take_last : deprecated
28482857 cols : kwargs only argument of subset [deprecated]
28492858
28502859 Returns
@@ -2870,7 +2879,7 @@ def f(vals):
28702879 labels , shape = map (list , zip ( * map (f , vals )))
28712880
28722881 ids = get_group_index (labels , shape , sort = False , xnull = False )
2873- return Series (duplicated_int64 (ids , take_last ), index = self .index )
2882+ return Series (duplicated_int64 (ids , keep ), index = self .index )
28742883
28752884 #----------------------------------------------------------------------
28762885 # Sorting
0 commit comments