Skip to content

Commit 1e1882b

Browse files
authored
Rename spark columns only when index=False. (#1649)
Consolidates the logic for `Index.to_frame()` and `MultiIndex.to_frame()` and rename Spark columns only when `index=False`. Related to #1647 (comment), but not fully fixes it.
1 parent 156a4f5 commit 1e1882b

File tree

1 file changed

+19
-28
lines changed

1 file changed

+19
-28
lines changed

databricks/koalas/indexes.py

+19-28
Original file line numberDiff line numberDiff line change
@@ -748,20 +748,32 @@ def to_frame(self, index=True, name=None) -> DataFrame:
748748
name = self._internal.index_names[0]
749749
elif isinstance(name, str):
750750
name = (name,)
751-
scol = self.spark.column.alias(name_like_string(name))
752751

753-
sdf = self._internal.spark_frame.select(scol, NATURAL_ORDER_COLUMN_NAME)
752+
return self._to_frame(index=index, names=[name])
754753

754+
def _to_frame(self, index, names):
755755
if index:
756-
index_map = OrderedDict({name_like_string(name): self._internal.index_names[0]})
756+
index_map = self._internal.index_map
757+
data_columns = self._internal.index_spark_column_names
758+
sdf = self._internal.spark_frame.select(
759+
self._internal.index_spark_columns + [NATURAL_ORDER_COLUMN_NAME]
760+
)
757761
else:
758-
index_map = None # type: ignore
762+
index_map = None
763+
data_columns = [name_like_string(label) for label in names]
764+
sdf = self._internal.spark_frame.select(
765+
[
766+
scol.alias(col)
767+
for scol, col in zip(self._internal.index_spark_columns, data_columns)
768+
]
769+
+ [NATURAL_ORDER_COLUMN_NAME]
770+
)
759771

760772
internal = InternalFrame(
761773
spark_frame=sdf,
762774
index_map=index_map,
763-
column_labels=[name],
764-
data_spark_columns=[scol_for(sdf, name_like_string(name))],
775+
column_labels=names,
776+
data_spark_columns=[scol_for(sdf, col) for col in data_columns],
765777
)
766778
return DataFrame(internal)
767779

@@ -2385,28 +2397,7 @@ def to_frame(self, index=True, name=None) -> DataFrame:
23852397
else:
23862398
raise TypeError("'name' must be a list / sequence of column names.")
23872399

2388-
sdf = self._internal.spark_frame.select(
2389-
[
2390-
scol.alias(name_like_string(label))
2391-
for scol, label in zip(self._internal.index_spark_columns, name)
2392-
]
2393-
+ [NATURAL_ORDER_COLUMN_NAME]
2394-
)
2395-
2396-
if index:
2397-
index_map = OrderedDict(
2398-
(name_like_string(label), n) for label, n in zip(name, self._internal.index_names)
2399-
)
2400-
else:
2401-
index_map = None # type: ignore
2402-
2403-
internal = InternalFrame(
2404-
spark_frame=sdf,
2405-
index_map=index_map,
2406-
column_labels=name,
2407-
data_spark_columns=[scol_for(sdf, name_like_string(label)) for label in name],
2408-
)
2409-
return DataFrame(internal)
2400+
return self._to_frame(index=index, names=name)
24102401

24112402
def to_pandas(self) -> pd.MultiIndex:
24122403
"""

0 commit comments

Comments
 (0)