diff --git a/databricks/koalas/base.py b/databricks/koalas/base.py index d4c05ff51a..b8c8c5e21e 100644 --- a/databricks/koalas/base.py +++ b/databricks/koalas/base.py @@ -96,7 +96,11 @@ def align_diff_index_ops(func, this_index_ops: "IndexOpsMixin", *args) -> "Index cols = [arg for arg in args if isinstance(arg, IndexOpsMixin)] if isinstance(this_index_ops, Series) and all(isinstance(col, Series) for col in cols): - combined = combine_frames(this_index_ops.to_frame(), *cols, how="full") + combined = combine_frames( + this_index_ops.to_frame(), + *[cast(Series, col).rename(i) for i, col in enumerate(cols)], + how="full" + ) return column_op(func)( combined["this"]._kser_for(combined["this"]._internal.column_labels[0]), @@ -104,7 +108,7 @@ def align_diff_index_ops(func, this_index_ops: "IndexOpsMixin", *args) -> "Index combined["that"]._kser_for(label) for label in combined["that"]._internal.column_labels ] - ) + ).rename(this_index_ops.name) else: # This could cause as many counts, reset_index calls, joins for combining # as the number of `Index`s in `args`. So far it's fine since we can assume the ops @@ -137,10 +141,10 @@ def align_diff_index_ops(func, this_index_ops: "IndexOpsMixin", *args) -> "Index elif isinstance(this_index_ops, Series): this = this_index_ops.reset_index() that = [ - cast(Series, col.to_series() if isinstance(col, Index) else col).reset_index( - drop=True - ) - for col in cols + cast(Series, col.to_series() if isinstance(col, Index) else col) + .rename(i) + .reset_index(drop=True) + for i, col in enumerate(cols) ] combined = combine_frames(this, *that, how="full").sort_index() @@ -155,13 +159,16 @@ def align_diff_index_ops(func, this_index_ops: "IndexOpsMixin", *args) -> "Index combined["that"]._kser_for(label) for label in combined["that"]._internal.column_labels ] - ) + ).rename(this_index_ops.name) else: this = cast(Index, this_index_ops).to_frame().reset_index(drop=True) that_series = next(col for col in cols if isinstance(col, Series)) that_frame = that_series._kdf[ - [col.to_series() if isinstance(col, Index) else col for col in cols] + [ + cast(Series, col.to_series() if isinstance(col, Index) else col).rename(i) + for i, col in enumerate(cols) + ] ] combined = combine_frames(this, that_frame.reset_index()).sort_index() @@ -176,8 +183,12 @@ def align_diff_index_ops(func, this_index_ops: "IndexOpsMixin", *args) -> "Index other.index.names = that_series._internal.index_names return column_op(func)( - self_index, *[other._kser_for(label) for label in other._internal.column_labels] - ) + self_index, + *[ + other._kser_for(label) + for label, col in zip(other._internal.column_labels, cols) + ] + ).rename(that_series.name) def booleanize_null(left_scol, scol, f) -> Column: diff --git a/databricks/koalas/tests/test_ops_on_diff_frames.py b/databricks/koalas/tests/test_ops_on_diff_frames.py index 02fa9eed41..c36d99147c 100644 --- a/databricks/koalas/tests/test_ops_on_diff_frames.py +++ b/databricks/koalas/tests/test_ops_on_diff_frames.py @@ -1365,15 +1365,41 @@ def test_series_repeat(self): def test_series_ops(self): pser1 = pd.Series([1, 2, 3, 4, 5, 6, 7], name="x", index=[11, 12, 13, 14, 15, 16, 17]) - pser2 = pd.Series([1, 2, 3, 4, 5, 6, 7], index=[11, 12, 13, 14, 15, 16, 17]) - pidx1 = pd.Index([10, 11, 12, 13, 14, 15, 16]) + pser2 = pd.Series([1, 2, 3, 4, 5, 6, 7], name="x", index=[11, 12, 13, 14, 15, 16, 17]) + pidx1 = pd.Index([10, 11, 12, 13, 14, 15, 16], name="x") kser1 = ks.from_pandas(pser1) kser2 = ks.from_pandas(pser2) kidx1 = ks.from_pandas(pidx1) self.assert_eq((kser1 + 1 + 10 * kser2).sort_index(), (pser1 + 1 + 10 * pser2).sort_index()) + self.assert_eq( + (kser1 + 1 + 10 * kser2.rename()).sort_index(), + (pser1 + 1 + 10 * pser2.rename()).sort_index(), + ) + self.assert_eq( + (kser1.rename() + 1 + 10 * kser2).sort_index(), + (pser1.rename() + 1 + 10 * pser2).sort_index(), + ) + self.assert_eq( + (kser1.rename() + 1 + 10 * kser2.rename()).sort_index(), + (pser1.rename() + 1 + 10 * pser2.rename()).sort_index(), + ) + self.assert_eq(kser1 + 1 + 10 * kidx1, pser1 + 1 + 10 * pidx1) + self.assert_eq(kser1.rename() + 1 + 10 * kidx1, pser1.rename() + 1 + 10 * pidx1) + self.assert_eq(kser1 + 1 + 10 * kidx1.rename(None), pser1 + 1 + 10 * pidx1.rename(None)) + self.assert_eq( + kser1.rename() + 1 + 10 * kidx1.rename(None), + pser1.rename() + 1 + 10 * pidx1.rename(None), + ) + self.assert_eq(kidx1 + 1 + 10 * kser1, pidx1 + 1 + 10 * pser1) + self.assert_eq(kidx1 + 1 + 10 * kser1.rename(), pidx1 + 1 + 10 * pser1.rename()) + self.assert_eq(kidx1.rename(None) + 1 + 10 * kser1, pidx1.rename(None) + 1 + 10 * pser1) + self.assert_eq( + kidx1.rename(None) + 1 + 10 * kser1.rename(), + pidx1.rename(None) + 1 + 10 * pser1.rename(), + ) pidx2 = pd.Index([11, 12, 13]) kidx2 = ks.from_pandas(pidx2) @@ -1389,12 +1415,20 @@ def test_series_ops(self): kidx2 + kser1 def test_index_ops(self): - pidx1 = pd.Index([1, 2, 3, 4, 5]) - pidx2 = pd.Index([6, 7, 8, 9, 10]) + pidx1 = pd.Index([1, 2, 3, 4, 5], name="x") + pidx2 = pd.Index([6, 7, 8, 9, 10], name="x") kidx1 = ks.from_pandas(pidx1) kidx2 = ks.from_pandas(pidx2) self.assert_eq(kidx1 * 10 + kidx2, pidx1 * 10 + pidx2) + self.assert_eq(kidx1.rename(None) * 10 + kidx2, pidx1.rename(None) * 10 + pidx2) + + if LooseVersion(pd.__version__) >= LooseVersion("1.0"): + self.assert_eq(kidx1 * 10 + kidx2.rename(None), pidx1 * 10 + pidx2.rename(None)) + else: + self.assert_eq( + kidx1 * 10 + kidx2.rename(None), (pidx1 * 10 + pidx2.rename(None)).rename(None) + ) pidx3 = pd.Index([11, 12, 13]) kidx3 = ks.from_pandas(pidx3)