@@ -1143,6 +1143,33 @@ def test_join(self):
1143
1143
join_kdf .sort_values (by = list (join_kdf .columns ), inplace = True )
1144
1144
self .assert_eq (join_pdf .reset_index (drop = True ), join_kdf .reset_index (drop = True ))
1145
1145
1146
+ # multi-index columns
1147
+ columns1 = pd .MultiIndex .from_tuples ([('x' , 'key' ), ('Y' , 'A' )])
1148
+ columns2 = pd .MultiIndex .from_tuples ([('x' , 'key' ), ('Y' , 'B' )])
1149
+ pdf1 .columns = columns1
1150
+ pdf2 .columns = columns2
1151
+ kdf1 .columns = columns1
1152
+ kdf2 .columns = columns2
1153
+
1154
+ join_pdf = pdf1 .join (pdf2 , lsuffix = '_left' , rsuffix = '_right' )
1155
+ join_pdf .sort_values (by = list (join_pdf .columns ), inplace = True )
1156
+
1157
+ join_kdf = kdf1 .join (kdf2 , lsuffix = '_left' , rsuffix = '_right' )
1158
+ join_kdf .sort_values (by = list (join_kdf .columns ), inplace = True )
1159
+
1160
+ self .assert_eq (join_pdf , join_kdf )
1161
+
1162
+ # check `on` parameter
1163
+ join_pdf = pdf1 .join (pdf2 .set_index (('x' , 'key' )), on = [('x' , 'key' )],
1164
+ lsuffix = '_left' , rsuffix = '_right' )
1165
+ join_pdf .sort_values (by = list (join_pdf .columns ), inplace = True )
1166
+
1167
+ join_kdf = kdf1 .join (kdf2 .set_index (('x' , 'key' )), on = [('x' , 'key' )],
1168
+ lsuffix = '_left' , rsuffix = '_right' )
1169
+ join_kdf .sort_values (by = list (join_kdf .columns ), inplace = True )
1170
+
1171
+ self .assert_eq (join_pdf .reset_index (drop = True ), join_kdf .reset_index (drop = True ))
1172
+
1146
1173
def test_replace (self ):
1147
1174
pdf = pd .DataFrame ({"name" : ['Ironman' , 'Captain America' , 'Thor' , 'Hulk' ],
1148
1175
"weapon" : ['Mark-45' , 'Shield' , 'Mjolnir' , 'Smash' ]})
@@ -1195,7 +1222,7 @@ def test_replace(self):
1195
1222
1196
1223
def test_update (self ):
1197
1224
# check base function
1198
- def get_data ():
1225
+ def get_data (left_columns = None , right_columns = None ):
1199
1226
left_pdf = pd .DataFrame ({'A' : ['1' , '2' , '3' , '4' ],
1200
1227
'B' : ['100' , '200' , np .nan , np .nan ]},
1201
1228
columns = ['A' , 'B' ])
@@ -1206,6 +1233,12 @@ def get_data():
1206
1233
columns = ['A' , 'B' ])
1207
1234
right_kdf = ks .DataFrame ({'B' : ['x' , None , 'y' , None ],
1208
1235
'C' : ['100' , '200' , '300' , '400' ]}, columns = ['B' , 'C' ])
1236
+ if left_columns is not None :
1237
+ left_pdf .columns = left_columns
1238
+ left_kdf .columns = left_columns
1239
+ if right_columns is not None :
1240
+ right_pdf .columns = right_columns
1241
+ right_kdf .columns = right_columns
1209
1242
return left_kdf , left_pdf , right_kdf , right_pdf
1210
1243
1211
1244
left_kdf , left_pdf , right_kdf , right_pdf = get_data ()
@@ -1221,6 +1254,32 @@ def get_data():
1221
1254
with self .assertRaises (NotImplementedError ):
1222
1255
left_kdf .update (right_kdf , join = 'right' )
1223
1256
1257
+ # multi-index columns
1258
+ left_columns = pd .MultiIndex .from_tuples ([('X' , 'A' ), ('X' , 'B' )])
1259
+ right_columns = pd .MultiIndex .from_tuples ([('X' , 'B' ), ('Y' , 'C' )])
1260
+
1261
+ left_kdf , left_pdf , right_kdf , right_pdf = get_data (left_columns = left_columns ,
1262
+ right_columns = right_columns )
1263
+ left_pdf .update (right_pdf )
1264
+ left_kdf .update (right_kdf )
1265
+ self .assert_eq (left_pdf .sort_values (by = [('X' , 'A' ), ('X' , 'B' )]),
1266
+ left_kdf .sort_values (by = [('X' , 'A' ), ('X' , 'B' )]))
1267
+
1268
+ left_kdf , left_pdf , right_kdf , right_pdf = get_data (left_columns = left_columns ,
1269
+ right_columns = right_columns )
1270
+ left_pdf .update (right_pdf , overwrite = False )
1271
+ left_kdf .update (right_kdf , overwrite = False )
1272
+ self .assert_eq (left_pdf .sort_values (by = [('X' , 'A' ), ('X' , 'B' )]),
1273
+ left_kdf .sort_values (by = [('X' , 'A' ), ('X' , 'B' )]))
1274
+
1275
+ right_columns = pd .MultiIndex .from_tuples ([('Y' , 'B' ), ('Y' , 'C' )])
1276
+ left_kdf , left_pdf , right_kdf , right_pdf = get_data (left_columns = left_columns ,
1277
+ right_columns = right_columns )
1278
+ left_pdf .update (right_pdf )
1279
+ left_kdf .update (right_kdf )
1280
+ self .assert_eq (left_pdf .sort_values (by = [('X' , 'A' ), ('X' , 'B' )]),
1281
+ left_kdf .sort_values (by = [('X' , 'A' ), ('X' , 'B' )]))
1282
+
1224
1283
def test_pivot_table_dtypes (self ):
1225
1284
pdf = pd .DataFrame ({'a' : [4 , 2 , 3 , 4 , 8 , 6 ],
1226
1285
'b' : [1 , 2 , 2 , 4 , 2 , 4 ],
0 commit comments