@@ -87,6 +87,7 @@ def combine_frames(this, *args, how="full", preserve_order_column=False):
87
87
from databricks .koalas .frame import DataFrame
88
88
from databricks .koalas .internal import (
89
89
InternalFrame ,
90
+ HIDDEN_COLUMNS ,
90
91
NATURAL_ORDER_COLUMN_NAME ,
91
92
SPARK_INDEX_NAME_FORMAT ,
92
93
)
@@ -108,8 +109,37 @@ def combine_frames(this, *args, how="full", preserve_order_column=False):
108
109
raise AssertionError ("args should be single DataFrame or " "single/multiple Series" )
109
110
110
111
if get_option ("compute.ops_on_diff_frames" ):
111
- this_index_map = this ._internal .index_map
112
- that_index_map = that ._internal .index_map
112
+
113
+ def resolve (internal , side ):
114
+ rename = lambda col : "__{}_{}" .format (side , col )
115
+ internal = internal .resolved_copy
116
+ sdf = internal .spark_frame
117
+ sdf = internal .spark_frame .select (
118
+ [
119
+ scol_for (sdf , col ).alias (rename (col ))
120
+ for col in sdf .columns
121
+ if col not in HIDDEN_COLUMNS
122
+ ]
123
+ + list (HIDDEN_COLUMNS )
124
+ )
125
+ return internal .copy (
126
+ spark_frame = sdf ,
127
+ index_map = OrderedDict (
128
+ zip (
129
+ [rename (col ) for col in internal .index_spark_column_names ],
130
+ internal .index_names ,
131
+ )
132
+ ),
133
+ data_spark_columns = [
134
+ scol_for (sdf , rename (col )) for col in internal .data_spark_column_names
135
+ ],
136
+ )
137
+
138
+ this_internal = resolve (this ._internal , "this" )
139
+ that_internal = resolve (that ._internal , "that" )
140
+
141
+ this_index_map = this_internal .index_map
142
+ that_index_map = that_internal .index_map
113
143
assert len (this_index_map ) == len (that_index_map )
114
144
115
145
join_scols = []
@@ -119,8 +149,8 @@ def combine_frames(this, *args, how="full", preserve_order_column=False):
119
149
# level.
120
150
this_and_that_index_map = zip (this_index_map .items (), that_index_map .items ())
121
151
122
- this_sdf = this . _internal . resolved_copy .spark_frame .alias ("this" )
123
- that_sdf = that . _internal . resolved_copy .spark_frame .alias ("that" )
152
+ this_sdf = this_internal .spark_frame .alias ("this" )
153
+ that_sdf = that_internal .spark_frame .alias ("that" )
124
154
125
155
# If the same named index is found, that's used.
126
156
index_column_names = []
@@ -155,16 +185,12 @@ def combine_frames(this, *args, how="full", preserve_order_column=False):
155
185
joined_df = joined_df .select (
156
186
merged_index_scols
157
187
+ [
158
- scol_for (this_sdf , this ._internal .spark_column_name_for (label )).alias (
159
- "__this_%s" % this ._internal .spark_column_name_for (label )
160
- )
161
- for label in this ._internal .column_labels
188
+ scol_for (this_sdf , this_internal .spark_column_name_for (label ))
189
+ for label in this_internal .column_labels
162
190
]
163
191
+ [
164
- scol_for (that_sdf , that ._internal .spark_column_name_for (label )).alias (
165
- "__that_%s" % that ._internal .spark_column_name_for (label )
166
- )
167
- for label in that ._internal .column_labels
192
+ scol_for (that_sdf , that_internal .spark_column_name_for (label ))
193
+ for label in that_internal .column_labels
168
194
]
169
195
+ order_column
170
196
)
@@ -175,7 +201,7 @@ def combine_frames(this, *args, how="full", preserve_order_column=False):
175
201
for col in joined_df .columns
176
202
if col not in index_columns and col != NATURAL_ORDER_COLUMN_NAME
177
203
]
178
- level = max (this . _internal . column_labels_level , that . _internal .column_labels_level )
204
+ level = max (this_internal . column_labels_level , that_internal .column_labels_level )
179
205
180
206
def fill_label (label ):
181
207
if label is None :
@@ -184,15 +210,15 @@ def fill_label(label):
184
210
return (["" ] * (level - len (label ))) + list (label )
185
211
186
212
column_labels = [
187
- tuple (["this" ] + fill_label (label )) for label in this . _internal .column_labels
188
- ] + [tuple (["that" ] + fill_label (label )) for label in that . _internal .column_labels ]
213
+ tuple (["this" ] + fill_label (label )) for label in this_internal .column_labels
214
+ ] + [tuple (["that" ] + fill_label (label )) for label in that_internal .column_labels ]
189
215
column_label_names = (
190
- [None ] * (1 + level - this . _internal .column_labels_level )
191
- ) + this . _internal .column_label_names
216
+ [None ] * (1 + level - this_internal .column_labels_level )
217
+ ) + this_internal .column_label_names
192
218
return DataFrame (
193
219
InternalFrame (
194
220
spark_frame = joined_df ,
195
- index_map = OrderedDict (zip (index_column_names , this . _internal .index_names )),
221
+ index_map = OrderedDict (zip (index_column_names , this_internal .index_names )),
196
222
column_labels = column_labels ,
197
223
data_spark_columns = [scol_for (joined_df , col ) for col in new_data_columns ],
198
224
column_label_names = column_label_names ,
0 commit comments