@@ -80,7 +80,6 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
8080 index ,
8181 columns )
8282
83-
8483 # this _index object is a pd.DataFrame
8584 # and we use that DataFrame's Index to index the rows.
8685 self ._row_lengths , self ._row_index = \
@@ -305,7 +304,7 @@ def _set__col_index(self, new__index):
305304 def _compute_row_lengths (self ):
306305 """Updates the stored lengths of DataFrame partions
307306 """
308- self ._row_lengths = [_deploy_func .remote (_get_row_lengths , d )
307+ self ._row_lengths = [_deploy_func .remote (lambda df : len ( df ) , d )
309308 for d in self ._row_partitions ]
310309
311310 def _get_row_lengths (self ):
@@ -338,7 +337,7 @@ def _set_row_lengths(self, lengths):
338337 def _compute_col_lengths (self ):
339338 """Updates the stored lengths of DataFrame partions
340339 """
341- self ._col_lengths = [_deploy_func .remote (_get_col_lengths , d )
340+ self ._col_lengths = [_deploy_func .remote (lambda df : df . shape [ 1 ] , d )
342341 for d in self ._col_partitions ]
343342
344343 def _get_col_lengths (self ):
@@ -1171,16 +1170,25 @@ def drop(self, labels=None, axis=0, index=None, columns=None, level=None,
11711170 try :
11721171 if not is_axis_zero or columns is not None :
11731172 values = labels if labels else columns
1173+ new_values = [self .columns .get_loc (i ) for i in values ]
11741174 new_df_rows = _map_partitions (
11751175 lambda df : df .drop (
1176- values , axis = 1 , level = level , errors = 'ignore' ),
1176+ new_values , axis = 1 , level = level , errors = 'ignore' ),
11771177 self ._row_partitions
11781178 )
1179- new_columns = self .columns .to_series ().drop (values ,
1180- errors = errors )
1181- new_columns = pd .Index (new_columns )
1179+ new_columns = self ._col_index .drop (values )
1180+
1181+ new_df_cols = self ._col_partitions .copy ()
1182+ col_parts_to_del = pd .Series (self ._col_index .loc [values , 'partition' ]).unique ()
1183+ for i in col_parts_to_del :
1184+ to_del = [self ._col_index .loc [x , 'index_within_partition' ]
1185+ for x in values if self ._col_index .loc [x , 'partition' ] == i ]
1186+ new_df_cols [i ] = _deploy_func .remote (lambda df : df .drop (to_del ), self ._col_partitions [i ])
1187+
1188+
11821189 new_df = DataFrame (columns = new_columns ,
1183- row_partitions = new_df_rows )
1190+ row_partitions = new_df_rows ,
1191+ col_partitions = new_df_cols )
11841192 except (ValueError , KeyError ):
11851193 if errors == 'raise' :
11861194 raise
@@ -2986,9 +2994,13 @@ def __delitem__(self, key):
29862994 Args:
29872995 key: key to delete
29882996 """
2997+ to_delete = self .columns .get_loc (key )
2998+
29892999 def del_helper (df ):
2990- df .__delitem__ (self .columns .index (key ))
3000+ df .__delitem__ (to_delete )
3001+ df .reset_index (drop = True , inplace = True )
29913002 return df
3003+
29923004 self ._row_partitions = _map_partitions (del_helper , self ._row_partitions )
29933005
29943006 # TODO: See if this is faster than just:
0 commit comments