7373    rather than the first line of the file. 
7474names : array-like, default None 
7575    List of column names to use. If file contains no header row, then you 
76-     should explicitly pass header=None 
76+     should explicitly pass header=None. Duplicates in this list are not 
77+     allowed unless mangle_dupe_cols=True, which is the default. 
7778index_col : int or sequence or False, default None 
7879    Column to use as the row labels of the DataFrame. If a sequence is given, a 
7980    MultiIndex is used. If you have a malformed file with delimiters at the end 
9192prefix : str, default None 
9293    Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ... 
9394mangle_dupe_cols : boolean, default True 
94-     Duplicate columns will be specified as 'X.0'...'X.N', rather than 'X'...'X' 
95+     Duplicate columns will be specified as 'X.0'...'X.N', rather than 
96+     'X'...'X'. Passing in False will cause data to be overwritten if there 
97+     are duplicate names in the columns. 
9598dtype : Type name or dict of column -> type, default None 
9699    Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32} 
97100    (Unsupported with engine='python'). Use `str` or `object` to preserve and 
@@ -655,7 +658,14 @@ def _get_options_with_defaults(self, engine):
655658        options  =  {}
656659
657660        for  argname , default  in  compat .iteritems (_parser_defaults ):
658-             options [argname ] =  kwds .get (argname , default )
661+             value  =  kwds .get (argname , default )
662+ 
663+             # see gh-12935 
664+             if  argname  ==  'mangle_dupe_cols'  and  not  value :
665+                 raise  ValueError ('Setting mangle_dupe_cols=False is ' 
666+                                  'not supported yet' )
667+             else :
668+                 options [argname ] =  value 
659669
660670        for  argname , default  in  compat .iteritems (_c_parser_defaults ):
661671            if  argname  in  kwds :
@@ -899,6 +909,7 @@ def __init__(self, kwds):
899909        self .true_values  =  kwds .get ('true_values' )
900910        self .false_values  =  kwds .get ('false_values' )
901911        self .tupleize_cols  =  kwds .get ('tupleize_cols' , False )
912+         self .mangle_dupe_cols  =  kwds .get ('mangle_dupe_cols' , True )
902913        self .infer_datetime_format  =  kwds .pop ('infer_datetime_format' , False )
903914
904915        self ._date_conv  =  _make_date_converter (
@@ -1012,6 +1023,26 @@ def tostr(x):
10121023
10131024        return  names , index_names , col_names , passed_names 
10141025
1026+     def  _maybe_dedup_names (self , names ):
1027+         # see gh-7160 and gh-9424: this helps to provide 
1028+         # immediate alleviation of the duplicate names 
1029+         # issue and appears to be satisfactory to users, 
1030+         # but ultimately, not needing to butcher the names 
1031+         # would be nice! 
1032+         if  self .mangle_dupe_cols :
1033+             names  =  list (names )  # so we can index 
1034+             counts  =  {}
1035+ 
1036+             for  i , col  in  enumerate (names ):
1037+                 cur_count  =  counts .get (col , 0 )
1038+ 
1039+                 if  cur_count  >  0 :
1040+                     names [i ] =  '%s.%d'  %  (col , cur_count )
1041+ 
1042+                 counts [col ] =  cur_count  +  1 
1043+ 
1044+         return  names 
1045+ 
10151046    def  _maybe_make_multi_index_columns (self , columns , col_names = None ):
10161047        # possibly create a column mi here 
10171048        if  (not  self .tupleize_cols  and  len (columns ) and 
@@ -1314,10 +1345,11 @@ def read(self, nrows=None):
13141345        except  StopIteration :
13151346            if  self ._first_chunk :
13161347                self ._first_chunk  =  False 
1348+                 names  =  self ._maybe_dedup_names (self .orig_names )
13171349
13181350                index , columns , col_dict  =  _get_empty_meta (
1319-                     self .orig_names , self .index_col ,
1320-                     self . index_names ,  dtype = self .kwds .get ('dtype' ))
1351+                     names ,  self .index_col , self .index_names ,
1352+                     dtype = self .kwds .get ('dtype' ))
13211353
13221354                if  self .usecols  is  not None :
13231355                    columns  =  self ._filter_usecols (columns )
@@ -1361,6 +1393,8 @@ def read(self, nrows=None):
13611393            if  self .usecols  is  not None :
13621394                names  =  self ._filter_usecols (names )
13631395
1396+             names  =  self ._maybe_dedup_names (names )
1397+ 
13641398            # rename dict keys 
13651399            data  =  sorted (data .items ())
13661400            data  =  dict ((k , v ) for  k , (i , v ) in  zip (names , data ))
@@ -1373,6 +1407,7 @@ def read(self, nrows=None):
13731407
13741408            # ugh, mutation 
13751409            names  =  list (self .orig_names )
1410+             names  =  self ._maybe_dedup_names (names )
13761411
13771412            if  self .usecols  is  not None :
13781413                names  =  self ._filter_usecols (names )
@@ -1567,7 +1602,6 @@ def __init__(self, f, **kwds):
15671602        self .skipinitialspace  =  kwds ['skipinitialspace' ]
15681603        self .lineterminator  =  kwds ['lineterminator' ]
15691604        self .quoting  =  kwds ['quoting' ]
1570-         self .mangle_dupe_cols  =  kwds .get ('mangle_dupe_cols' , True )
15711605        self .usecols  =  _validate_usecols_arg (kwds ['usecols' ])
15721606        self .skip_blank_lines  =  kwds ['skip_blank_lines' ]
15731607
@@ -1756,8 +1790,8 @@ def read(self, rows=None):
17561790        columns  =  list (self .orig_names )
17571791        if  not  len (content ):  # pragma: no cover 
17581792            # DataFrame with the right metadata, even though it's length 0 
1759-             return   _get_empty_meta (self .orig_names , 
1760-                                     self .index_col ,
1793+             names   =   self . _maybe_dedup_names (self .orig_names ) 
1794+             return   _get_empty_meta ( names ,  self .index_col ,
17611795                                   self .index_names )
17621796
17631797        # handle new style for names in index 
@@ -1770,26 +1804,28 @@ def read(self, rows=None):
17701804        alldata  =  self ._rows_to_cols (content )
17711805        data  =  self ._exclude_implicit_index (alldata )
17721806
1773-         columns , data  =  self ._do_date_conversions (self .columns , data )
1807+         columns  =  self ._maybe_dedup_names (self .columns )
1808+         columns , data  =  self ._do_date_conversions (columns , data )
17741809
17751810        data  =  self ._convert_data (data )
17761811        index , columns  =  self ._make_index (data , alldata , columns , indexnamerow )
17771812
17781813        return  index , columns , data 
17791814
17801815    def  _exclude_implicit_index (self , alldata ):
1816+         names  =  self ._maybe_dedup_names (self .orig_names )
17811817
17821818        if  self ._implicit_index :
17831819            excl_indices  =  self .index_col 
17841820
17851821            data  =  {}
17861822            offset  =  0 
1787-             for  i , col  in  enumerate (self . orig_names ):
1823+             for  i , col  in  enumerate (names ):
17881824                while  i  +  offset  in  excl_indices :
17891825                    offset  +=  1 
17901826                data [col ] =  alldata [i  +  offset ]
17911827        else :
1792-             data  =  dict ((k , v ) for  k , v  in  zip (self . orig_names , alldata ))
1828+             data  =  dict ((k , v ) for  k , v  in  zip (names , alldata ))
17931829
17941830        return  data 
17951831
0 commit comments