Skip to content

Commit

Permalink
fix and add
Browse files Browse the repository at this point in the history
  • Loading branch information
itholic committed Dec 3, 2019
1 parent 7b7813e commit e5f198f
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 4 deletions.
21 changes: 17 additions & 4 deletions databricks/koalas/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1262,25 +1262,38 @@ def groupby(self, by, as_index: bool = True):
from databricks.koalas.groupby import DataFrameGroupBy, SeriesGroupBy

df_or_s = self
if isinstance(by, str):
if isinstance(by, DataFrame):
raise ValueError("Grouper for '{}' not 1-dimensional".format(type(by)))
elif isinstance(by, str):
if isinstance(df_or_s, Series):
raise KeyError(by)
by = [(by,)]
elif isinstance(by, tuple):
if isinstance(df_or_s, Series):
for key in by:
if isinstance(key, str):
raise KeyError(key)
for key in by:
if isinstance(key, DataFrame):
raise ValueError("Grouper for '{}' not 1-dimensional".format(type(key)))
by = [by]
elif isinstance(by, Series):
by = [by]
elif isinstance(by, Iterable):
if isinstance(df_or_s, Series):
for key in by:
if isinstance(key, str):
raise KeyError(key)
by = [key if isinstance(key, (tuple, Series)) else (key,) for key in by]
else:
raise ValueError('Not a valid index: TODO')
raise ValueError("Grouper for '{}' not 1-dimensional".format(type(by)))
if not len(by):
raise ValueError('No group keys passed!')
if isinstance(df_or_s, DataFrame):
df = df_or_s # type: DataFrame
col_by = [_resolve_col(df, col_or_s) for col_or_s in by]
return DataFrameGroupBy(df_or_s, col_by, as_index=as_index)
if isinstance(df_or_s, Series):
if not isinstance(by[0], Series):
raise KeyError(by[0])
col = df_or_s # type: Series
anchor = df_or_s._kdf
col_by = [_resolve_col(anchor, col_or_s) for col_or_s in by]
Expand Down
4 changes: 4 additions & 0 deletions databricks/koalas/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,10 @@ def test_groupby(self):
self.assertRaises(KeyError, lambda: kdf.a.groupby(by='a'))
self.assertRaises(KeyError, lambda: kdf.a.groupby(by=['a', 'b']))

# we can't use DataFrame as a parameter `by` for `DataFrameGroupBy`/`SeriesGroupBy`.
self.assertRaises(ValueError, lambda: kdf.groupby(kdf))
self.assertRaises(ValueError, lambda: kdf.a.groupby(kdf))

def test_groupby_multiindex_columns(self):
pdf = pd.DataFrame({('x', 'a'): [1, 2, 6, 4, 4, 6, 4, 3, 7],
('x', 'b'): [4, 2, 7, 3, 3, 1, 1, 1, 2],
Expand Down

0 comments on commit e5f198f

Please sign in to comment.