Skip to content

Commit

Permalink
Test plan
Browse files Browse the repository at this point in the history
  • Loading branch information
xinrong-meng committed Dec 4, 2020
1 parent 901a6f0 commit 1cb3e48
Show file tree
Hide file tree
Showing 2 changed files with 108 additions and 0 deletions.
57 changes: 57 additions & 0 deletions databricks/koalas/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4008,6 +4008,63 @@ def duplicated(self, subset=None, keep="first") -> "Series":
)
)

def dot(self, other: "Series") -> "Series":
"""
Compute the matrix multiplication between the DataFrame and other.
This method computes the matrix product between the DataFrame and the
values of an other Series
It can also be called using ``self @ other`` in Python >= 3.5.
Parameters
----------
other : Series
The other object to compute the matrix product with.
Returns
-------
Series
Return the matrix product between self and other as a Series.
See Also
--------
Series.dot: Similar method for Series.
Notes
-----
The dimensions of DataFrame and other must be compatible in order to
compute the matrix multiplication. In addition, the column names of
DataFrame and the index of other must contain the same values, as they
will be aligned prior to the multiplication.
The dot method for Series computes the inner product, instead of the
matrix product here.
Examples
--------
>>> from databricks.koalas.config import set_option, reset_option
>>> set_option("compute.ops_on_diff_frames", True)
>>> kdf = ks.DataFrame([[0, 1, -2, -1], [1, 1, 1, 1]])
>>> kser = ks.Series([1, 1, 2, 1])
>>> kdf.dot(kser)
0 -4
1 5
dtype: int64
Note how shuffling of the objects does not change the result.
>>> kser2 = kser.reindex([1, 0, 2, 3])
>>> kdf.dot(kser2)
0 -4
1 5
dtype: int64
"""
if not isinstance(other, ks.Series):
raise TypeError("Unsupported type {}".format(type(other).__name__))
else:
return other.dot(self.transpose()).rename(None)

def to_koalas(self, index_col: Optional[Union[str, List[str]]] = None) -> "DataFrame":
"""
Converts the existing DataFrame into a Koalas DataFrame.
Expand Down
51 changes: 51 additions & 0 deletions databricks/koalas/tests/test_ops_on_diff_frames.py
Original file line number Diff line number Diff line change
Expand Up @@ -917,6 +917,57 @@ def test_dot(self):
pdf = kdf.to_pandas()
self.assert_eq(kser.dot(kdf), pser.dot(pdf))

def test_frame_dot(self):
pdf = pd.DataFrame([[0, 1, -2, -1], [1, 1, 1, 1]])
kdf = ks.from_pandas(pdf)

pser = pd.Series([1, 1, 2, 1])
kser = ks.from_pandas(pser)
self.assert_eq(kdf.dot(kser), pdf.dot(pser))

# Index reorder
pser = pser.reindex([1, 0, 2, 3])
kser = ks.from_pandas(pser)
self.assert_eq(kdf.dot(kser), pdf.dot(pser))

# ser with name
pser.name = "ser"
kser = ks.from_pandas(pser)
self.assert_eq(kdf.dot(kser), pdf.dot(pser))

# df with MultiIndex as column (ser with MultiIndex)
arrays = [[1, 1, 2, 2], ["red", "blue", "red", "blue"]]
pidx = pd.MultiIndex.from_arrays(arrays, names=("number", "color"))
pser = pd.Series([1, 1, 2, 1], index=pidx)
pdf = pd.DataFrame([[0, 1, -2, -1], [1, 1, 1, 1]], columns=pidx)
kdf = ks.from_pandas(pdf)
kser = ks.from_pandas(pser)
self.assert_eq(kdf.dot(kser), pdf.dot(pser))

# df with Index as column (ser with Index)
pidx = pd.Index([1, 2, 3, 4], name="number")
pser = pd.Series([1, 1, 2, 1], index=pidx)
pdf = pd.DataFrame([[0, 1, -2, -1], [1, 1, 1, 1]], columns=pidx)
kdf = ks.from_pandas(pdf)
kser = ks.from_pandas(pser)
self.assert_eq(kdf.dot(kser), pdf.dot(pser))

# df with Index
pdf.index = pd.Index(["x", "y"], name="char")
kdf = ks.from_pandas(pdf)
self.assert_eq(kdf.dot(kser), pdf.dot(pser))

# df with MultiIndex
pdf.index = pd.MultiIndex.from_arrays([[1, 1], ["red", "blue"]], names=("number", "color"))
kdf = ks.from_pandas(pdf)
self.assert_eq(kdf.dot(kser), pdf.dot(pser))

pdf = pd.DataFrame([[1, 2], [3, 4]])
kdf = ks.from_pandas(pdf)
self.assert_eq(kdf.dot(kdf[0]), pdf.dot(pdf[0]))
self.assert_eq(kdf.dot(kdf[0] * 10), pdf.dot(pdf[0] * 10))
self.assert_eq((kdf + 1).dot(kdf[0] * 10), (pdf + 1).dot(pdf[0] * 10))

def test_to_series_comparison(self):
kidx1 = ks.Index([1, 2, 3, 4, 5])
kidx2 = ks.Index([1, 2, 3, 4, 5])
Expand Down

0 comments on commit 1cb3e48

Please sign in to comment.