Skip to content

Commit

Permalink
Explicitly disallow empty list as index_spark_colum_names and index_n…
Browse files Browse the repository at this point in the history
…ames.
  • Loading branch information
ueshin committed Nov 6, 2020
1 parent 3237002 commit 478b0e5
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 35 deletions.
19 changes: 4 additions & 15 deletions databricks/koalas/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3495,21 +3495,10 @@ def rename(index):
scol_for(sdf, column).alias(name_like_string(name)) for column, name in new_index_map
]

if len(index_map) > 0: # type: ignore
index_scols = [scol_for(sdf, column) for column in index_map]
sdf = sdf.select(
index_scols
+ new_data_scols
+ self._internal.data_spark_columns
+ list(HIDDEN_COLUMNS)
)
else:
sdf = sdf.select(
new_data_scols + self._internal.data_spark_columns + list(HIDDEN_COLUMNS)
)

sdf = InternalFrame.attach_default_index(sdf)
index_map = OrderedDict({SPARK_DEFAULT_INDEX_NAME: None})
index_scols = [scol_for(sdf, column) for column in index_map]
sdf = sdf.select(
index_scols + new_data_scols + self._internal.data_spark_columns + list(HIDDEN_COLUMNS)
)

if self._internal.column_labels_level > 1:
column_depth = len(self._internal.column_labels[0])
Expand Down
24 changes: 11 additions & 13 deletions databricks/koalas/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@
from databricks.koalas.config import get_option
from databricks.koalas.utils import (
align_diff_frames,
column_labels_level,
is_name_like_tuple,
is_name_like_value,
name_like_string,
Expand Down Expand Up @@ -1318,29 +1317,28 @@ def _make_pandas_df_builder_func(kdf, func, return_schema, retain_index):
index_names = kdf._internal.index_names
data_columns = kdf._internal.data_spark_column_names
column_labels = kdf._internal.column_labels
column_labels_level = kdf._internal.column_labels_level

def rename_output(pdf):
# TODO: This logic below was borrowed from `DataFrame.to_pandas_frame` to set the index
# within each pdf properly. we might have to deduplicate it.
import pandas as pd

if len(index_columns) > 0:
append = False
for index_field in index_columns:
drop = index_field not in data_columns
pdf = pdf.set_index(index_field, drop=drop, append=append)
append = True
pdf = pdf[data_columns]
append = False
for index_field in index_columns:
drop = index_field not in data_columns
pdf = pdf.set_index(index_field, drop=drop, append=append)
append = True
pdf = pdf[data_columns]

if column_labels_level(column_labels) > 1:
if column_labels_level > 1:
pdf.columns = pd.MultiIndex.from_tuples(column_labels)
else:
pdf.columns = [None if label is None else label[0] for label in column_labels]

if len(index_names) > 0:
pdf.index.names = [
name if name is None or len(name) > 1 else name[0] for name in index_names
]
pdf.index.names = [
name if name is None or len(name) > 1 else name[0] for name in index_names
]

pdf = func(pdf)

Expand Down
13 changes: 6 additions & 7 deletions databricks/koalas/internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ def __init__(
assert isinstance(spark_frame, spark.DataFrame)
assert not spark_frame.isStreaming, "Koalas does not support Structured Streaming."

if index_spark_column_names is None:
if not index_spark_column_names:
assert not any(SPARK_INDEX_NAME_PATTERN.match(name) for name in spark_frame.columns), (
"Index columns should not appear in columns of the Spark DataFrame. Avoid "
"index column names [%s]." % SPARK_INDEX_NAME_PATTERN
Expand All @@ -470,7 +470,7 @@ def __init__(
NATURAL_ORDER_COLUMN_NAME, F.monotonically_increasing_id()
)

if index_names is None:
if not index_names:
index_names = [None] * len(index_spark_column_names)

assert len(index_spark_column_names) == len(index_names), (
Expand Down Expand Up @@ -857,11 +857,10 @@ def to_pandas_frame(self) -> pd.DataFrame:
name=names[0],
)

index_names = self.index_names
if len(index_names) > 0:
pdf.index.names = [
name if name is None or len(name) > 1 else name[0] for name in index_names
]
pdf.index.names = [
name if name is None or len(name) > 1 else name[0] for name in self.index_names
]

return pdf

@lazy_property
Expand Down
1 change: 1 addition & 0 deletions databricks/koalas/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ def test_reset_index(self):
kdf = ks.from_pandas(pdf)

self.assert_eq(kdf.reset_index(), pdf.reset_index())
self.assert_eq(kdf.reset_index().index, pdf.reset_index().index)
self.assert_eq(kdf.reset_index(drop=True), pdf.reset_index(drop=True))

pdf.index.name = "a"
Expand Down

0 comments on commit 478b0e5

Please sign in to comment.