From 982ce47b51d44d442cd2d5feb62d1639fb4d98fa Mon Sep 17 00:00:00 2001
From: David Toneian <david@toneian.com>
Date: Sun, 17 May 2020 22:15:51 +0200
Subject: [PATCH 1/2] [SPARK-31739][PYSPARK][DOCS][MINOR] Fix docstring syntax
 issues and misplaced space characters.

---
 python/pyspark/ml/clustering.py  |  2 +-
 python/pyspark/ml/util.py        |  1 +
 python/pyspark/mllib/util.py     |  4 ++--
 python/pyspark/sql/dataframe.py  |  6 +++---
 python/pyspark/sql/readwriter.py | 14 +++++++-------
 python/pyspark/sql/streaming.py  | 26 +++++++++++++-------------
 6 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index 8f0e9aad01b53..54a184bc081ee 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -802,7 +802,7 @@ def computeCost(self, dataset):
         Computes the sum of squared distances between the input points
         and their corresponding cluster centers.
 
-        ..note:: Deprecated in 3.0.0. It will be removed in future versions. Use
+        .. note:: Deprecated in 3.0.0. It will be removed in future versions. Use
            ClusteringEvaluator instead. You can also get the cost on the training dataset in the
            summary.
         """
diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py
index 35ad5518e1c1f..aac2b38d3f57d 100644
--- a/python/pyspark/ml/util.py
+++ b/python/pyspark/ml/util.py
@@ -563,6 +563,7 @@ def loadParamsInstance(path, sc):
 class HasTrainingSummary(object):
     """
     Base class for models that provides Training summary.
+
     .. versionadded:: 3.0.0
     """
 
diff --git a/python/pyspark/mllib/util.py b/python/pyspark/mllib/util.py
index 1a0ce42dc4e4f..f0f9cda4672b1 100644
--- a/python/pyspark/mllib/util.py
+++ b/python/pyspark/mllib/util.py
@@ -372,7 +372,7 @@ def save(self, sc, path):
          * human-readable (JSON) model metadata to path/metadata/
          * Parquet formatted data to path/data/
 
-        The model may be loaded using py:meth:`Loader.load`.
+        The model may be loaded using :py:meth:`Loader.load`.
 
         :param sc: Spark context used to save model data.
         :param path: Path specifying the directory in which to save
@@ -412,7 +412,7 @@ class Loader(object):
     def load(cls, sc, path):
         """
         Load a model from the given path. The model should have been
-        saved using py:meth:`Saveable.save`.
+        saved using :py:meth:`Saveable.save`.
 
         :param sc: Spark context used for loading model files.
         :param path: Path specifying the directory to which the model
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 78b574685327c..a3ce87096e790 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -2138,7 +2138,7 @@ def drop(self, *cols):
 
     @ignore_unicode_prefix
     def toDF(self, *cols):
-        """Returns a new class:`DataFrame` that with new specified column names
+        """Returns a new :class:`DataFrame` that with new specified column names
 
         :param cols: list of new column names (string)
 
@@ -2150,9 +2150,9 @@ def toDF(self, *cols):
 
     @since(3.0)
     def transform(self, func):
-        """Returns a new class:`DataFrame`. Concise syntax for chaining custom transformations.
+        """Returns a new :class:`DataFrame`. Concise syntax for chaining custom transformations.
 
-        :param func: a function that takes and returns a class:`DataFrame`.
+        :param func: a function that takes and returns a :class:`DataFrame`.
 
         >>> from pyspark.sql.functions import col
         >>> df = spark.createDataFrame([(1, 1.0), (2, 2.0)], ["int", "float"])
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 6ad6377288ec5..336345e383729 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -223,15 +223,15 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
         :param mode: allows a mode for dealing with corrupt records during parsing. If None is
                      set, it uses the default value, ``PERMISSIVE``.
 
-                * ``PERMISSIVE`` : when it meets a corrupted record, puts the malformed string \
+                * ``PERMISSIVE``: when it meets a corrupted record, puts the malformed string \
                   into a field configured by ``columnNameOfCorruptRecord``, and sets malformed \
                   fields to ``null``. To keep corrupt records, an user can set a string type \
                   field named ``columnNameOfCorruptRecord`` in an user-defined schema. If a \
                   schema does not have the field, it drops corrupt records during parsing. \
                   When inferring a schema, it implicitly adds a ``columnNameOfCorruptRecord`` \
                   field in an output schema.
-                *  ``DROPMALFORMED`` : ignores the whole corrupted records.
-                *  ``FAILFAST`` : throws an exception when it meets corrupted records.
+                *  ``DROPMALFORMED``: ignores the whole corrupted records.
+                *  ``FAILFAST``: throws an exception when it meets corrupted records.
 
         :param columnNameOfCorruptRecord: allows renaming the new field having malformed string
                                           created by ``PERMISSIVE`` mode. This overrides
@@ -470,7 +470,7 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
                      be controlled by ``spark.sql.csv.parser.columnPruning.enabled``
                      (enabled by default).
 
-                * ``PERMISSIVE`` : when it meets a corrupted record, puts the malformed string \
+                * ``PERMISSIVE``: when it meets a corrupted record, puts the malformed string \
                   into a field configured by ``columnNameOfCorruptRecord``, and sets malformed \
                   fields to ``null``. To keep corrupt records, an user can set a string type \
                   field named ``columnNameOfCorruptRecord`` in an user-defined schema. If a \
@@ -479,8 +479,8 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
                   When it meets a record having fewer tokens than the length of the schema, \
                   sets ``null`` to extra fields. When the record has more tokens than the \
                   length of the schema, it drops extra tokens.
-                * ``DROPMALFORMED`` : ignores the whole corrupted records.
-                * ``FAILFAST`` : throws an exception when it meets corrupted records.
+                * ``DROPMALFORMED``: ignores the whole corrupted records.
+                * ``FAILFAST``: throws an exception when it meets corrupted records.
 
         :param columnNameOfCorruptRecord: allows renaming the new field having malformed string
                                           created by ``PERMISSIVE`` mode. This overrides
@@ -830,7 +830,7 @@ def save(self, path=None, format=None, mode=None, partitionBy=None, **options):
     def insertInto(self, tableName, overwrite=None):
         """Inserts the content of the :class:`DataFrame` to the specified table.
 
-        It requires that the schema of the class:`DataFrame` is the same as the
+        It requires that the schema of the :class:`DataFrame` is the same as the
         schema of the table.
 
         Optionally overwriting any existing data.
diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index 05cf331d897a2..2450a4c93c460 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -461,15 +461,15 @@ def json(self, path, schema=None, primitivesAsString=None, prefersDecimal=None,
         :param mode: allows a mode for dealing with corrupt records during parsing. If None is
                      set, it uses the default value, ``PERMISSIVE``.
 
-                * ``PERMISSIVE`` : when it meets a corrupted record, puts the malformed string \
+                * ``PERMISSIVE``: when it meets a corrupted record, puts the malformed string \
                   into a field configured by ``columnNameOfCorruptRecord``, and sets malformed \
                   fields to ``null``. To keep corrupt records, an user can set a string type \
                   field named ``columnNameOfCorruptRecord`` in an user-defined schema. If a \
                   schema does not have the field, it drops corrupt records during parsing. \
                   When inferring a schema, it implicitly adds a ``columnNameOfCorruptRecord`` \
                   field in an output schema.
-                *  ``DROPMALFORMED`` : ignores the whole corrupted records.
-                *  ``FAILFAST`` : throws an exception when it meets corrupted records.
+                *  ``DROPMALFORMED``: ignores the whole corrupted records.
+                *  ``FAILFAST``: throws an exception when it meets corrupted records.
 
         :param columnNameOfCorruptRecord: allows renaming the new field having malformed string
                                           created by ``PERMISSIVE`` mode. This overrides
@@ -707,7 +707,7 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
         :param mode: allows a mode for dealing with corrupt records during parsing. If None is
                      set, it uses the default value, ``PERMISSIVE``.
 
-                * ``PERMISSIVE`` : when it meets a corrupted record, puts the malformed string \
+                * ``PERMISSIVE``: when it meets a corrupted record, puts the malformed string \
                   into a field configured by ``columnNameOfCorruptRecord``, and sets malformed \
                   fields to ``null``. To keep corrupt records, an user can set a string type \
                   field named ``columnNameOfCorruptRecord`` in an user-defined schema. If a \
@@ -716,8 +716,8 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
                   When it meets a record having fewer tokens than the length of the schema, \
                   sets ``null`` to extra fields. When the record has more tokens than the \
                   length of the schema, it drops extra tokens.
-                * ``DROPMALFORMED`` : ignores the whole corrupted records.
-                * ``FAILFAST`` : throws an exception when it meets corrupted records.
+                * ``DROPMALFORMED``: ignores the whole corrupted records.
+                * ``FAILFAST``: throws an exception when it meets corrupted records.
 
         :param columnNameOfCorruptRecord: allows renaming the new field having malformed string
                                           created by ``PERMISSIVE`` mode. This overrides
@@ -795,11 +795,11 @@ def outputMode(self, outputMode):
 
         Options include:
 
-        * `append`:Only the new rows in the streaming DataFrame/Dataset will be written to
+        * `append`: Only the new rows in the streaming DataFrame/Dataset will be written to
            the sink
-        * `complete`:All the rows in the streaming DataFrame/Dataset will be written to the sink
+        * `complete`: All the rows in the streaming DataFrame/Dataset will be written to the sink
            every time these is some updates
-        * `update`:only the rows that were updated in the streaming DataFrame/Dataset will be
+        * `update`: only the rows that were updated in the streaming DataFrame/Dataset will be
            written to the sink every time there are some updates. If the query doesn't contain
            aggregations, it will be equivalent to `append` mode.
 
@@ -1170,11 +1170,11 @@ def start(self, path=None, format=None, outputMode=None, partitionBy=None, query
         :param outputMode: specifies how data of a streaming DataFrame/Dataset is written to a
                            streaming sink.
 
-            * `append`:Only the new rows in the streaming DataFrame/Dataset will be written to the
+            * `append`: Only the new rows in the streaming DataFrame/Dataset will be written to the
               sink
-            * `complete`:All the rows in the streaming DataFrame/Dataset will be written to the sink
-               every time these is some updates
-            * `update`:only the rows that were updated in the streaming DataFrame/Dataset will be
+            * `complete`: All the rows in the streaming DataFrame/Dataset will be written to the
+              sink every time these is some updates
+            * `update`: only the rows that were updated in the streaming DataFrame/Dataset will be
               written to the sink every time there are some updates. If the query doesn't contain
               aggregations, it will be equivalent to `append` mode.
         :param partitionBy: names of partitioning columns

From 239bf9c0d1ee20deb1705ce4c0658b64596a8e1e Mon Sep 17 00:00:00 2001
From: David Toneian <david@toneian.com>
Date: Mon, 18 May 2020 12:54:26 +0200
Subject: [PATCH 2/2] [SPARK-31739][PYSPARK][DOCS][MINOR] Additional fixes of
 docstring syntax issues.

---
 python/pyspark/ml/classification.py | 2 +-
 python/pyspark/ml/regression.py     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 3bc862cc42af9..c43a79fac3312 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -197,7 +197,7 @@ class _JavaClassificationModel(ClassificationModel, JavaPredictionModel):
     """
     Java Model produced by a ``Classifier``.
     Classes are indexed {0, 1, ..., numClasses - 1}.
-    To be mixed in with class:`pyspark.ml.JavaModel`
+    To be mixed in with :class:`pyspark.ml.JavaModel`
     """
 
     @property
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index 2ce467308e6cb..b58255ea12afc 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -77,7 +77,7 @@ class _JavaRegressor(Regressor, JavaPredictor):
 class _JavaRegressionModel(RegressionModel, JavaPredictionModel):
     """
     Java Model produced by a ``_JavaRegressor``.
-    To be mixed in with class:`pyspark.ml.JavaModel`
+    To be mixed in with :class:`pyspark.ml.JavaModel`
 
     .. versionadded:: 3.0.0
     """