Skip to content

Commit

Permalink
Fix binary operations Index by Series. (#2046)
Browse files Browse the repository at this point in the history
The binary operations between `Index` and `Series` raise an error when the operations are on `Index` and `Series` in this order and the `Series` has no name.

```py
>>> ks.Index([1, 2, 3]) + ks.Series([10, 20, 30])
Traceback (most recent call last):
...
TypeError: object of type 'NoneType' has no len()

>>> ks.Index([1, 2, 3]) - ks.Series([10, 20, 30])
Traceback (most recent call last):
...
TypeError: object of type 'NoneType' has no len()

>>> ks.Index([1, 2, 3]) / ks.Series([10, 20, 30])
Traceback (most recent call last):
...
TypeError: object of type 'NoneType' has no len()

>>> ks.Index([1, 2, 3]) * ks.Series([10, 20, 30])
Traceback (most recent call last):
...
TypeError: object of type 'NoneType' has no len()
```

Resolves #2045.
  • Loading branch information
ueshin authored Feb 9, 2021
1 parent dca91a4 commit e6a9628
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 14 deletions.
31 changes: 21 additions & 10 deletions databricks/koalas/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,15 +96,19 @@ def align_diff_index_ops(func, this_index_ops: "IndexOpsMixin", *args) -> "Index
cols = [arg for arg in args if isinstance(arg, IndexOpsMixin)]

if isinstance(this_index_ops, Series) and all(isinstance(col, Series) for col in cols):
combined = combine_frames(this_index_ops.to_frame(), *cols, how="full")
combined = combine_frames(
this_index_ops.to_frame(),
*[cast(Series, col).rename(i) for i, col in enumerate(cols)],
how="full"
)

return column_op(func)(
combined["this"]._kser_for(combined["this"]._internal.column_labels[0]),
*[
combined["that"]._kser_for(label)
for label in combined["that"]._internal.column_labels
]
)
).rename(this_index_ops.name)
else:
# This could cause as many counts, reset_index calls, joins for combining
# as the number of `Index`s in `args`. So far it's fine since we can assume the ops
Expand Down Expand Up @@ -137,10 +141,10 @@ def align_diff_index_ops(func, this_index_ops: "IndexOpsMixin", *args) -> "Index
elif isinstance(this_index_ops, Series):
this = this_index_ops.reset_index()
that = [
cast(Series, col.to_series() if isinstance(col, Index) else col).reset_index(
drop=True
)
for col in cols
cast(Series, col.to_series() if isinstance(col, Index) else col)
.rename(i)
.reset_index(drop=True)
for i, col in enumerate(cols)
]

combined = combine_frames(this, *that, how="full").sort_index()
Expand All @@ -155,13 +159,16 @@ def align_diff_index_ops(func, this_index_ops: "IndexOpsMixin", *args) -> "Index
combined["that"]._kser_for(label)
for label in combined["that"]._internal.column_labels
]
)
).rename(this_index_ops.name)
else:
this = cast(Index, this_index_ops).to_frame().reset_index(drop=True)

that_series = next(col for col in cols if isinstance(col, Series))
that_frame = that_series._kdf[
[col.to_series() if isinstance(col, Index) else col for col in cols]
[
cast(Series, col.to_series() if isinstance(col, Index) else col).rename(i)
for i, col in enumerate(cols)
]
]

combined = combine_frames(this, that_frame.reset_index()).sort_index()
Expand All @@ -176,8 +183,12 @@ def align_diff_index_ops(func, this_index_ops: "IndexOpsMixin", *args) -> "Index
other.index.names = that_series._internal.index_names

return column_op(func)(
self_index, *[other._kser_for(label) for label in other._internal.column_labels]
)
self_index,
*[
other._kser_for(label)
for label, col in zip(other._internal.column_labels, cols)
]
).rename(that_series.name)


def booleanize_null(left_scol, scol, f) -> Column:
Expand Down
42 changes: 38 additions & 4 deletions databricks/koalas/tests/test_ops_on_diff_frames.py
Original file line number Diff line number Diff line change
Expand Up @@ -1365,15 +1365,41 @@ def test_series_repeat(self):

def test_series_ops(self):
pser1 = pd.Series([1, 2, 3, 4, 5, 6, 7], name="x", index=[11, 12, 13, 14, 15, 16, 17])
pser2 = pd.Series([1, 2, 3, 4, 5, 6, 7], index=[11, 12, 13, 14, 15, 16, 17])
pidx1 = pd.Index([10, 11, 12, 13, 14, 15, 16])
pser2 = pd.Series([1, 2, 3, 4, 5, 6, 7], name="x", index=[11, 12, 13, 14, 15, 16, 17])
pidx1 = pd.Index([10, 11, 12, 13, 14, 15, 16], name="x")
kser1 = ks.from_pandas(pser1)
kser2 = ks.from_pandas(pser2)
kidx1 = ks.from_pandas(pidx1)

self.assert_eq((kser1 + 1 + 10 * kser2).sort_index(), (pser1 + 1 + 10 * pser2).sort_index())
self.assert_eq(
(kser1 + 1 + 10 * kser2.rename()).sort_index(),
(pser1 + 1 + 10 * pser2.rename()).sort_index(),
)
self.assert_eq(
(kser1.rename() + 1 + 10 * kser2).sort_index(),
(pser1.rename() + 1 + 10 * pser2).sort_index(),
)
self.assert_eq(
(kser1.rename() + 1 + 10 * kser2.rename()).sort_index(),
(pser1.rename() + 1 + 10 * pser2.rename()).sort_index(),
)

self.assert_eq(kser1 + 1 + 10 * kidx1, pser1 + 1 + 10 * pidx1)
self.assert_eq(kser1.rename() + 1 + 10 * kidx1, pser1.rename() + 1 + 10 * pidx1)
self.assert_eq(kser1 + 1 + 10 * kidx1.rename(None), pser1 + 1 + 10 * pidx1.rename(None))
self.assert_eq(
kser1.rename() + 1 + 10 * kidx1.rename(None),
pser1.rename() + 1 + 10 * pidx1.rename(None),
)

self.assert_eq(kidx1 + 1 + 10 * kser1, pidx1 + 1 + 10 * pser1)
self.assert_eq(kidx1 + 1 + 10 * kser1.rename(), pidx1 + 1 + 10 * pser1.rename())
self.assert_eq(kidx1.rename(None) + 1 + 10 * kser1, pidx1.rename(None) + 1 + 10 * pser1)
self.assert_eq(
kidx1.rename(None) + 1 + 10 * kser1.rename(),
pidx1.rename(None) + 1 + 10 * pser1.rename(),
)

pidx2 = pd.Index([11, 12, 13])
kidx2 = ks.from_pandas(pidx2)
Expand All @@ -1389,12 +1415,20 @@ def test_series_ops(self):
kidx2 + kser1

def test_index_ops(self):
pidx1 = pd.Index([1, 2, 3, 4, 5])
pidx2 = pd.Index([6, 7, 8, 9, 10])
pidx1 = pd.Index([1, 2, 3, 4, 5], name="x")
pidx2 = pd.Index([6, 7, 8, 9, 10], name="x")
kidx1 = ks.from_pandas(pidx1)
kidx2 = ks.from_pandas(pidx2)

self.assert_eq(kidx1 * 10 + kidx2, pidx1 * 10 + pidx2)
self.assert_eq(kidx1.rename(None) * 10 + kidx2, pidx1.rename(None) * 10 + pidx2)

if LooseVersion(pd.__version__) >= LooseVersion("1.0"):
self.assert_eq(kidx1 * 10 + kidx2.rename(None), pidx1 * 10 + pidx2.rename(None))
else:
self.assert_eq(
kidx1 * 10 + kidx2.rename(None), (pidx1 * 10 + pidx2.rename(None)).rename(None)
)

pidx3 = pd.Index([11, 12, 13])
kidx3 = ks.from_pandas(pidx3)
Expand Down

0 comments on commit e6a9628

Please sign in to comment.