databricks · ueshin · Dec 8, 2020 · Nov 30, 2020 · Dec 4, 2020 · Dec 4, 2020
diff --git a/databricks/koalas/frame.py b/databricks/koalas/frame.py
@@ -4008,6 +4008,65 @@ def duplicated(self, subset=None, keep="first") -> "Series":
             )
         )
 
+    def dot(self, other: "Series") -> "Series":
+        """
+        Compute the matrix multiplication between the DataFrame and other.
+
+        This method computes the matrix product between the DataFrame and the
+        values of an other Series
+
+        It can also be called using ``self @ other`` in Python >= 3.5.
+
+        Parameters
+        ----------
+        other : Series
+            The other object to compute the matrix product with.
+
+        Returns
+        -------
+        Series
+            Return the matrix product between self and other as a Series.
+
+        See Also
+        --------
+        Series.dot: Similar method for Series.
+
+        Notes
+        -----
+        The dimensions of DataFrame and other must be compatible in order to
+        compute the matrix multiplication. In addition, the column names of
+        DataFrame and the index of other must contain the same values, as they
+        will be aligned prior to the multiplication.
+
+        The dot method for Series computes the inner product, instead of the
+        matrix product here.
+
+        Examples
+        --------
+        >>> from databricks.koalas.config import set_option, reset_option
+        >>> set_option("compute.ops_on_diff_frames", True)
+        >>> kdf = ks.DataFrame([[0, 1, -2, -1], [1, 1, 1, 1]])
+        >>> kser = ks.Series([1, 1, 2, 1])
+        >>> kdf.dot(kser)
+        0   -4
+        1    5
+        dtype: int64
+
+        Note how shuffling of the objects does not change the result.
+
+        >>> kser2 = kser.reindex([1, 0, 2, 3])
+        >>> kdf.dot(kser2)
+        0   -4
+        1    5
+        dtype: int64
+
+        >>> reset_option("compute.ops_on_diff_frames")
+        """
+        if not isinstance(other, ks.Series):
+            raise TypeError("Unsupported type {}".format(type(other).__name__))
+        else:
+            return cast(ks.Series, other.dot(self.transpose())).rename(None)
+
     def to_koalas(self, index_col: Optional[Union[str, List[str]]] = None) -> "DataFrame":
         """
         Converts the existing DataFrame into a Koalas DataFrame.

diff --git a/databricks/koalas/missing/frame.py b/databricks/koalas/missing/frame.py
@@ -47,7 +47,6 @@ class _MissingPandasLikeDataFrame(object):
     convert_dtypes = _unsupported_function("convert_dtypes")
     corrwith = _unsupported_function("corrwith")
     cov = _unsupported_function("cov")
-    dot = _unsupported_function("dot")
     ewm = _unsupported_function("ewm")
     first = _unsupported_function("first")
     infer_objects = _unsupported_function("infer_objects")

diff --git a/databricks/koalas/tests/test_ops_on_diff_frames.py b/databricks/koalas/tests/test_ops_on_diff_frames.py
@@ -852,7 +852,7 @@ def test_multi_index_column_assignment_frame(self):
         with self.assertRaisesRegex(KeyError, "Key length \\(3\\) exceeds index depth \\(2\\)"):
             kdf[("1", "2", "3")] = ks.Series([100, 200, 300, 200])
 
-    def test_dot(self):
+    def test_series_dot(self):
         pser = pd.Series([90, 91, 85], index=[2, 4, 1])
         kser = ks.from_pandas(pser)
         pser_other = pd.Series([90, 91, 85], index=[2, 4, 1])
@@ -917,6 +917,57 @@ def test_dot(self):
         pdf = kdf.to_pandas()
         self.assert_eq(kser.dot(kdf), pser.dot(pdf))
 
+    def test_frame_dot(self):
+        pdf = pd.DataFrame([[0, 1, -2, -1], [1, 1, 1, 1]])
+        kdf = ks.from_pandas(pdf)
+
+        pser = pd.Series([1, 1, 2, 1])
+        kser = ks.from_pandas(pser)
+        self.assert_eq(kdf.dot(kser), pdf.dot(pser))
+
+        # Index reorder
+        pser = pser.reindex([1, 0, 2, 3])
+        kser = ks.from_pandas(pser)
+        self.assert_eq(kdf.dot(kser), pdf.dot(pser))
+
+        # ser with name
+        pser.name = "ser"
+        kser = ks.from_pandas(pser)
+        self.assert_eq(kdf.dot(kser), pdf.dot(pser))
+
+        # df with MultiIndex as column (ser with MultiIndex)
+        arrays = [[1, 1, 2, 2], ["red", "blue", "red", "blue"]]
+        pidx = pd.MultiIndex.from_arrays(arrays, names=("number", "color"))
+        pser = pd.Series([1, 1, 2, 1], index=pidx)
+        pdf = pd.DataFrame([[0, 1, -2, -1], [1, 1, 1, 1]], columns=pidx)
+        kdf = ks.from_pandas(pdf)
+        kser = ks.from_pandas(pser)
+        self.assert_eq(kdf.dot(kser), pdf.dot(pser))
+
+        # df with Index as column (ser with Index)
+        pidx = pd.Index([1, 2, 3, 4], name="number")
+        pser = pd.Series([1, 1, 2, 1], index=pidx)
+        pdf = pd.DataFrame([[0, 1, -2, -1], [1, 1, 1, 1]], columns=pidx)
+        kdf = ks.from_pandas(pdf)
+        kser = ks.from_pandas(pser)
+        self.assert_eq(kdf.dot(kser), pdf.dot(pser))
+
+        # df with Index
+        pdf.index = pd.Index(["x", "y"], name="char")
+        kdf = ks.from_pandas(pdf)
+        self.assert_eq(kdf.dot(kser), pdf.dot(pser))
+
+        # df with MultiIndex
+        pdf.index = pd.MultiIndex.from_arrays([[1, 1], ["red", "blue"]], names=("number", "color"))
+        kdf = ks.from_pandas(pdf)
+        self.assert_eq(kdf.dot(kser), pdf.dot(pser))
+
+        pdf = pd.DataFrame([[1, 2], [3, 4]])
+        kdf = ks.from_pandas(pdf)
+        self.assert_eq(kdf.dot(kdf[0]), pdf.dot(pdf[0]))
+        self.assert_eq(kdf.dot(kdf[0] * 10), pdf.dot(pdf[0] * 10))
+        self.assert_eq((kdf + 1).dot(kdf[0] * 10), (pdf + 1).dot(pdf[0] * 10))
+
     def test_to_series_comparison(self):
         kidx1 = ks.Index([1, 2, 3, 4, 5])
         kidx2 = ks.Index([1, 2, 3, 4, 5])

diff --git a/docs/source/reference/frame.rst b/docs/source/reference/frame.rst
@@ -98,6 +98,7 @@ Binary operator functions
    DataFrame.ge
    DataFrame.ne
    DataFrame.eq
+   DataFrame.dot
 
 Function application, GroupBy & Window
 --------------------------------------