diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index fe01b92036377..29f98ed36e2ec 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -200,7 +200,9 @@ jobs:
         architecture: x64
     - name: Install Python linter dependencies
       run: |
-        pip3 install flake8 sphinx numpy
+        # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
+        #   See also https://github.com/sphinx-doc/sphinx/issues/7551.
+        pip3 install flake8 'sphinx<3.1.0' numpy pydata_sphinx_theme
     - name: Install R 3.6
       uses: r-lib/actions/setup-r@v1
       with:
@@ -218,7 +220,9 @@ jobs:
     - name: Install dependencies for documentation generation
       run: |
         sudo apt-get install -y libcurl4-openssl-dev pandoc
-        pip install sphinx mkdocs numpy
+        # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
+        #   See also https://github.com/sphinx-doc/sphinx/issues/7551.
+        pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme
         gem install jekyll jekyll-redirect-from rouge
         sudo Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')"
     - name: Scala linter
diff --git a/.gitignore b/.gitignore
index 198fdee39be95..0d8addeb10e21 100644
--- a/.gitignore
+++ b/.gitignore
@@ -64,6 +64,7 @@ python/lib/pyspark.zip
 python/.eggs/
 python/deps
 python/docs/_site/
+python/docs/source/reference/api/
 python/test_coverage/coverage_data
 python/test_coverage/htmlcov
 python/pyspark/python
diff --git a/LICENSE b/LICENSE
index af2cdd275d28d..8cec4f5ea5379 100644
--- a/LICENSE
+++ b/LICENSE
@@ -223,7 +223,7 @@ Python Software Foundation License
 ----------------------------------
 
 pyspark/heapq3.py
-python/docs/_static/copybutton.js
+python/docs/source/_static/copybutton.js
 
 BSD 3-Clause
 ------------
diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile
index 44d602415b262..a02a6b7bccf27 100644
--- a/dev/create-release/spark-rm/Dockerfile
+++ b/dev/create-release/spark-rm/Dockerfile
@@ -33,7 +33,10 @@ ENV DEBCONF_NONINTERACTIVE_SEEN true
 # These arguments are just for reuse and not really meant to be customized.
 ARG APT_INSTALL="apt-get install --no-install-recommends -y"
 
-ARG PIP_PKGS="sphinx==2.3.1 mkdocs==1.0.4 numpy==1.18.1"
+# TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
+#   See also https://github.com/sphinx-doc/sphinx/issues/7551.
+#   We should use the latest Sphinx version once this is fixed.
+ARG PIP_PKGS="sphinx==3.0.4 mkdocs==1.0.4 numpy==1.18.1 pydata_sphinx_theme==0.3.1"
 ARG GEM_PKGS="jekyll:4.0.0 jekyll-redirect-from:0.16.0 rouge:3.15.0"
 
 # Install extra needed repos and refresh.
diff --git a/dev/lint-python b/dev/lint-python
index 1fddbfa64b32c..48dd94e36fae8 100755
--- a/dev/lint-python
+++ b/dev/lint-python
@@ -173,7 +173,6 @@ function sphinx_test {
         return
     fi
 
-    # TODO(SPARK-32279): Install Sphinx in Python 3 of Jenkins machines
     PYTHON_HAS_SPHINX=$("$PYTHON_EXECUTABLE" -c 'import importlib.util; print(importlib.util.find_spec("sphinx") is not None)')
     if [[ "$PYTHON_HAS_SPHINX" == "False" ]]; then
         echo "$PYTHON_EXECUTABLE does not have Sphinx installed. Skipping Sphinx build for now."
@@ -181,6 +180,23 @@ function sphinx_test {
         return
     fi
 
+    # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
+    #   See also https://github.com/sphinx-doc/sphinx/issues/7551.
+    PYTHON_HAS_SPHINX_3_0=$("$PYTHON_EXECUTABLE" -c 'from distutils.version import LooseVersion; import sphinx; print(LooseVersion(sphinx.__version__) < LooseVersion("3.1.0"))')
+    if [[ "$PYTHON_HAS_SPHINX_3_0" == "False" ]]; then
+        echo "$PYTHON_EXECUTABLE has Sphinx 3.1+ installed but it requires lower then 3.1. Skipping Sphinx build for now."
+        echo
+        return
+    fi
+
+    # TODO(SPARK-32391): Install pydata_sphinx_theme in Jenkins machines
+    PYTHON_HAS_THEME=$("$PYTHON_EXECUTABLE" -c 'import importlib.util; print(importlib.util.find_spec("pydata_sphinx_theme") is not None)')
+    if [[ "$PYTHON_HAS_THEME" == "False" ]]; then
+        echo "$PYTHON_EXECUTABLE does not have pydata_sphinx_theme installed. Skipping Sphinx build for now."
+        echo
+        return
+    fi
+
     echo "starting $SPHINX_BUILD tests..."
     pushd python/docs &> /dev/null
     make clean &> /dev/null
diff --git a/dev/requirements.txt b/dev/requirements.txt
index baea9213dbc97..a862a6e986791 100644
--- a/dev/requirements.txt
+++ b/dev/requirements.txt
@@ -3,3 +3,4 @@ jira==1.0.3
 PyGithub==1.26.0
 Unidecode==0.04.19
 sphinx
+pydata_sphinx_theme
diff --git a/dev/tox.ini b/dev/tox.ini
index ba5df084daad7..e25595aa6c9a6 100644
--- a/dev/tox.ini
+++ b/dev/tox.ini
@@ -16,4 +16,4 @@
 [pycodestyle]
 ignore=E226,E241,E305,E402,E722,E731,E741,W503,W504
 max-line-length=100
-exclude=python/pyspark/cloudpickle/*.py,heapq3.py,shared.py,python/docs/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/*
+exclude=python/pyspark/cloudpickle/*.py,heapq3.py,shared.py,python/docs/source/conf.py,work/*/*.py,python/.eggs/*,dist/*,.git/*
diff --git a/docs/README.md b/docs/README.md
index 22039871cf63d..e2002a66b0433 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -57,8 +57,13 @@ Note: Other versions of roxygen2 might work in SparkR documentation generation b
 
 To generate API docs for any language, you'll need to install these libraries:
 
+<!--
+TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
+See also https://github.com/sphinx-doc/sphinx/issues/7551.
+-->
+
 ```sh
-$ sudo pip install sphinx mkdocs numpy
+$ sudo pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme
 ```
 
 ## Generating the Documentation HTML
diff --git a/docs/_plugins/copy_api_dirs.rb b/docs/_plugins/copy_api_dirs.rb
index 8e2a06e4bc9a8..17da22bf8a433 100644
--- a/docs/_plugins/copy_api_dirs.rb
+++ b/docs/_plugins/copy_api_dirs.rb
@@ -126,8 +126,8 @@
     puts "Making directory api/python"
     mkdir_p "api/python"
 
-    puts "cp -r ../python/docs/_build/html/. api/python"
-    cp_r("../python/docs/_build/html/.", "api/python")
+    puts "cp -r ../python/docs/build/html/. api/python"
+    cp_r("../python/docs/build/html/.", "api/python")
   end
 
   if not (ENV['SKIP_RDOC'] == '1')
diff --git a/docs/img/spark-logo-reverse.png b/docs/img/spark-logo-reverse.png
new file mode 100644
index 0000000000000..a3e4ed4bb3d08
Binary files /dev/null and b/docs/img/spark-logo-reverse.png differ
diff --git a/python/docs/Makefile b/python/docs/Makefile
index 4272b7488d9a0..763f493a0eb58 100644
--- a/python/docs/Makefile
+++ b/python/docs/Makefile
@@ -3,8 +3,8 @@
 # You can set these variables from the command line.
 SPHINXOPTS    ?=
 SPHINXBUILD   ?= sphinx-build
-SOURCEDIR     ?= .
-BUILDDIR      ?= _build
+SOURCEDIR     ?= source
+BUILDDIR      ?= build
 
 export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.9-src.zip)
 
diff --git a/python/docs/_static/pyspark.css b/python/docs/_static/pyspark.css
deleted file mode 100644
index 41106f2f6e26d..0000000000000
--- a/python/docs/_static/pyspark.css
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-*/
-
-body {
-    background-color: #ffffff;
-}
-
-div.sphinxsidebar {
-    width: 274px;
-}
-
-div.bodywrapper {
-    margin: 0 0 0 274px;
-}
-
-div.sphinxsidebar ul {
-    margin-right: 10px;
-}
-
-div.sphinxsidebar li a {
-    word-break: break-all;
-}
-
-span.pys-tag {
-    font-size: 11px;
-    font-weight: bold;
-    margin: 0 0 0 2px;
-    padding: 1px 3px 1px 3px;
-    -moz-border-radius: 3px;
-    -webkit-border-radius: 3px;
-    border-radius: 3px;
-    text-align: center;
-    text-decoration: none;
-}
-
-span.pys-tag-experimental {
-    background-color: rgb(37, 112, 128);
-    color: rgb(255, 255, 255);
-}
-
-span.pys-tag-deprecated {
-    background-color: rgb(238, 238, 238);
-    color: rgb(62, 67, 73);
-}
-
-div.pys-note-experimental {
-    background-color: rgb(88, 151, 165);
-    border-color: rgb(59, 115, 127);
-    color: rgb(255, 255, 255);
-}
-
-div.pys-note-deprecated {
-}
-
-.hasTooltip {
-    position:relative;
-}
-.hasTooltip span {
-    display:none;
-}
-
-.hasTooltip:hover span.tooltip {
-    display: inline-block;
-    -moz-border-radius: 2px;
-    -webkit-border-radius: 2px;
-    border-radius: 2px;
-    background-color: rgb(250, 250, 250);
-    color: rgb(68, 68, 68);
-    font-weight: normal;
-    box-shadow: 1px 1px 3px rgb(127, 127, 127);
-    position: absolute;
-    padding: 0 3px 0 3px;
-    top: 1.3em;
-    left: 14px;
-    z-index: 9999
-}
diff --git a/python/docs/_static/pyspark.js b/python/docs/_static/pyspark.js
deleted file mode 100644
index 75e4c42492a48..0000000000000
--- a/python/docs/_static/pyspark.js
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-*/
-
-$(function (){
-
-    function startsWith(s, prefix) {
-        return s && s.indexOf(prefix) === 0;
-    }
-
-    function buildSidebarLinkMap() {
-        var linkMap = {};
-        $('div.sphinxsidebar a.reference.internal').each(function (i,a)  {
-            var href = $(a).attr('href');
-            if (startsWith(href, '#module-')) {
-                var id = href.substr(8);
-                linkMap[id] = [$(a), null];
-            }
-        })
-        return linkMap;
-    };
-
-    function getAdNoteDivs(dd) {
-        var noteDivs = {};
-        dd.find('> div.admonition.note > p.last').each(function (i, p) {
-            var text = $(p).text();
-            if (!noteDivs.experimental && startsWith(text, 'Experimental')) {
-                noteDivs.experimental = $(p).parent();
-            }
-            if (!noteDivs.deprecated && startsWith(text, 'Deprecated')) {
-                noteDivs.deprecated = $(p).parent();
-            }
-        });
-        return noteDivs;
-    }
-
-    function getParentId(name) {
-        var last_idx = name.lastIndexOf('.');
-        return last_idx == -1? '': name.substr(0, last_idx);
-    }
-
-    function buildTag(text, cls, tooltip) {
-        return '<span class="pys-tag ' + cls + ' hasTooltip">' + text + '<span class="tooltip">'
-            + tooltip + '</span></span>'
-    }
-
-
-    var sidebarLinkMap = buildSidebarLinkMap();
-
-    $('dl.class, dl.function').each(function (i,dl)  {
-
-        dl = $(dl);
-        dt = dl.children('dt').eq(0);
-        dd = dl.children('dd').eq(0);
-        var id = dt.attr('id');
-        var desc = dt.find('> .descname').text();
-        var adNoteDivs = getAdNoteDivs(dd);
-
-        if (id) {
-            var parent_id = getParentId(id);
-
-            var r = sidebarLinkMap[parent_id];
-            if (r) {
-                if (r[1] === null) {
-                    r[1] = $('<ul/>');
-                    r[0].parent().append(r[1]);
-                }
-                var tags = '';
-                if (adNoteDivs.experimental) {
-                    tags += buildTag('E', 'pys-tag-experimental', 'Experimental');
-                    adNoteDivs.experimental.addClass('pys-note pys-note-experimental');
-                }
-                if (adNoteDivs.deprecated) {
-                    tags += buildTag('D', 'pys-tag-deprecated', 'Deprecated');
-                    adNoteDivs.deprecated.addClass('pys-note pys-note-deprecated');
-                }
-                var li = $('<li/>');
-                var a = $('<a href="#' + id + '">' + desc + '</a>');
-                li.append(a);
-                li.append(tags);
-                r[1].append(li);
-                sidebarLinkMap[id] = [a, null];
-            }
-        }
-    });
-});
diff --git a/python/docs/_templates/layout.html b/python/docs/_templates/layout.html
deleted file mode 100644
index ab36ebababf88..0000000000000
--- a/python/docs/_templates/layout.html
+++ /dev/null
@@ -1,6 +0,0 @@
-{% extends "!layout.html" %}
-{% set script_files = script_files + ["_static/pyspark.js"] %}
-{% set css_files = css_files + ['_static/pyspark.css'] %}
-{% block rootrellink %}
-    {{ super() }}
-{% endblock %}
diff --git a/python/docs/index.rst b/python/docs/index.rst
deleted file mode 100644
index 6e059264e6bbb..0000000000000
--- a/python/docs/index.rst
+++ /dev/null
@@ -1,53 +0,0 @@
-.. pyspark documentation master file, created by
-   sphinx-quickstart on Thu Aug 28 15:17:47 2014.
-   You can adapt this file completely to your liking, but it should at least
-   contain the root `toctree` directive.
-
-Welcome to Spark Python API Docs!
-===================================
-
-Contents:
-
-.. toctree::
-   :maxdepth: 2
-
-   pyspark
-   pyspark.sql
-   pyspark.streaming
-   pyspark.ml
-   pyspark.mllib
-   pyspark.resource
-
-
-Core classes:
----------------
-
-    :class:`pyspark.SparkContext`
-
-    Main entry point for Spark functionality.
-
-    :class:`pyspark.RDD`
-
-    A Resilient Distributed Dataset (RDD), the basic abstraction in Spark.
-
-    :class:`pyspark.streaming.StreamingContext`
-
-    Main entry point for Spark Streaming functionality.
-
-    :class:`pyspark.streaming.DStream`
-
-    A Discretized Stream (DStream), the basic abstraction in Spark Streaming.
-
-    :class:`pyspark.sql.SparkSession`
-
-    Main entry point for DataFrame and SQL functionality.
-
-    :class:`pyspark.sql.DataFrame`
-
-    A distributed collection of data grouped into named columns.
-
-
-Indices and tables
-==================
-
-* :ref:`search`
diff --git a/python/docs/make2.bat b/python/docs/make2.bat
index 7955a83051b8e..2f87032820f42 100644
--- a/python/docs/make2.bat
+++ b/python/docs/make2.bat
@@ -5,8 +5,8 @@ REM Command file for Sphinx documentation
 if "%SPHINXBUILD%" == "" (
 	set SPHINXBUILD=sphinx-build
 )
-set SOURCEDIR=.
-set BUILDDIR=_build
+set SOURCEDIR=source
+set BUILDDIR=build
 
 set PYTHONPATH=..;..\lib\py4j-0.10.9-src.zip
 
diff --git a/python/docs/pyspark.ml.rst b/python/docs/pyspark.ml.rst
deleted file mode 100644
index e31dfddd5988e..0000000000000
--- a/python/docs/pyspark.ml.rst
+++ /dev/null
@@ -1,122 +0,0 @@
-pyspark.ml package
-==================
-
-ML Pipeline APIs
-----------------
-
-.. automodule:: pyspark.ml
-    :members:
-    :undoc-members:
-    :inherited-members:
-
-pyspark.ml.param module
------------------------
-
-.. automodule:: pyspark.ml.param
-    :members:
-    :undoc-members:
-    :inherited-members:
-
-pyspark.ml.feature module
--------------------------
-
-.. automodule:: pyspark.ml.feature
-    :members:
-    :undoc-members:
-    :inherited-members:
-
-pyspark.ml.classification module
---------------------------------
-
-.. automodule:: pyspark.ml.classification
-    :members:
-    :undoc-members:
-    :inherited-members:
-
-pyspark.ml.clustering module
-----------------------------
-
-.. automodule:: pyspark.ml.clustering
-    :members:
-    :undoc-members:
-    :inherited-members:
-
-pyspark.ml.functions module
-----------------------------
-
-.. automodule:: pyspark.ml.functions
-    :members:
-    :undoc-members:
-    :inherited-members:
-
-pyspark.ml.linalg module
-----------------------------
-
-.. automodule:: pyspark.ml.linalg
-    :members:
-    :undoc-members:
-    :inherited-members:
-
-pyspark.ml.recommendation module
---------------------------------
-
-.. automodule:: pyspark.ml.recommendation
-    :members:
-    :undoc-members:
-    :inherited-members:
-
-pyspark.ml.regression module
-----------------------------
-
-.. automodule:: pyspark.ml.regression
-    :members:
-    :undoc-members:
-    :inherited-members:
-
-pyspark.ml.stat module
-----------------------
-
-.. automodule:: pyspark.ml.stat
-    :members:
-    :undoc-members:
-    :inherited-members:
-
-pyspark.ml.tuning module
-------------------------
-
-.. automodule:: pyspark.ml.tuning
-    :members:
-    :undoc-members:
-    :inherited-members:
-
-pyspark.ml.evaluation module
-----------------------------
-
-.. automodule:: pyspark.ml.evaluation
-    :members:
-    :undoc-members:
-    :inherited-members:
-
-pyspark.ml.fpm module
-----------------------------
-
-.. automodule:: pyspark.ml.fpm
-    :members:
-    :undoc-members:
-    :inherited-members:
-
-pyspark.ml.image module
-----------------------------
-
-.. automodule:: pyspark.ml.image
-    :members:
-    :undoc-members:
-    :inherited-members:
-
-pyspark.ml.util module
-----------------------------
-
-.. automodule:: pyspark.ml.util
-    :members:
-    :undoc-members:
-    :inherited-members:
diff --git a/python/docs/pyspark.mllib.rst b/python/docs/pyspark.mllib.rst
deleted file mode 100644
index 2d54ab118b94b..0000000000000
--- a/python/docs/pyspark.mllib.rst
+++ /dev/null
@@ -1,99 +0,0 @@
-pyspark.mllib package
-=====================
-
-pyspark.mllib.classification module
------------------------------------
-
-.. automodule:: pyspark.mllib.classification
-    :members:
-    :undoc-members:
-    :inherited-members:
-
-pyspark.mllib.clustering module
--------------------------------
-
-.. automodule:: pyspark.mllib.clustering
-    :members:
-    :undoc-members:
-
-pyspark.mllib.evaluation module
--------------------------------
-
-.. automodule:: pyspark.mllib.evaluation
-      :members:
-      :undoc-members:
-
-pyspark.mllib.feature module
--------------------------------
-
-.. automodule:: pyspark.mllib.feature
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-pyspark.mllib.fpm module
-------------------------
-
-.. automodule:: pyspark.mllib.fpm
-    :members:
-    :undoc-members:
-
-pyspark.mllib.linalg module
----------------------------
-
-.. automodule:: pyspark.mllib.linalg
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-pyspark.mllib.linalg.distributed module
----------------------------------------
-
-.. automodule:: pyspark.mllib.linalg.distributed
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-pyspark.mllib.random module
----------------------------
-
-.. automodule:: pyspark.mllib.random
-    :members:
-    :undoc-members:
-
-pyspark.mllib.recommendation module
------------------------------------
-
-.. automodule:: pyspark.mllib.recommendation
-    :members:
-    :undoc-members:
-
-pyspark.mllib.regression module
--------------------------------
-
-.. automodule:: pyspark.mllib.regression
-    :members:
-    :undoc-members:
-    :inherited-members:
-
-pyspark.mllib.stat module
--------------------------
-
-.. automodule:: pyspark.mllib.stat
-    :members:
-    :undoc-members:
-
-pyspark.mllib.tree module
--------------------------
-
-.. automodule:: pyspark.mllib.tree
-    :members:
-    :undoc-members:
-    :inherited-members:
-
-pyspark.mllib.util module
--------------------------
-
-.. automodule:: pyspark.mllib.util
-    :members:
-    :undoc-members:
diff --git a/python/docs/pyspark.resource.rst b/python/docs/pyspark.resource.rst
deleted file mode 100644
index 7f3a79b9e5b52..0000000000000
--- a/python/docs/pyspark.resource.rst
+++ /dev/null
@@ -1,11 +0,0 @@
-pyspark.resource module
-=======================
-
-Module Contents
----------------
-
-.. automodule:: pyspark.resource
-    :members:
-    :undoc-members:
-    :inherited-members:
-
diff --git a/python/docs/pyspark.rst b/python/docs/pyspark.rst
deleted file mode 100644
index 402d6ce9eb016..0000000000000
--- a/python/docs/pyspark.rst
+++ /dev/null
@@ -1,21 +0,0 @@
-pyspark package
-===============
-
-Subpackages
------------
-
-.. toctree::
-    :maxdepth: 1
-    
-    pyspark.sql
-    pyspark.streaming
-    pyspark.ml
-    pyspark.mllib
-    pyspark.resource
-
-Contents
---------
-
-.. automodule:: pyspark
-    :members:
-    :undoc-members:
diff --git a/python/docs/pyspark.sql.rst b/python/docs/pyspark.sql.rst
deleted file mode 100644
index 406ada701941a..0000000000000
--- a/python/docs/pyspark.sql.rst
+++ /dev/null
@@ -1,37 +0,0 @@
-pyspark.sql module
-==================
-
-Module Contents
----------------
-
-.. automodule:: pyspark.sql
-    :members:
-    :undoc-members:
-    :inherited-members:
-    :exclude-members: builder
-.. We need `exclude-members` to prevent default description generations
-   as a workaround for old Sphinx (< 1.6.6).
-
-pyspark.sql.types module
-------------------------
-.. automodule:: pyspark.sql.types
-    :members:
-    :undoc-members:
-
-pyspark.sql.functions module
-----------------------------
-.. automodule:: pyspark.sql.functions
-    :members:
-    :undoc-members:
-
-pyspark.sql.avro.functions module
----------------------------------
-.. automodule:: pyspark.sql.avro.functions
-    :members:
-    :undoc-members:
-
-pyspark.sql.streaming module
-----------------------------
-.. automodule:: pyspark.sql.streaming
-    :members:
-    :undoc-members:
diff --git a/python/docs/pyspark.streaming.rst b/python/docs/pyspark.streaming.rst
deleted file mode 100644
index f7df6438b9169..0000000000000
--- a/python/docs/pyspark.streaming.rst
+++ /dev/null
@@ -1,17 +0,0 @@
-pyspark.streaming module
-========================
-
-Module contents
----------------
-
-.. automodule:: pyspark.streaming
-    :members:
-    :undoc-members:
-    :show-inheritance:
-
-pyspark.streaming.kinesis module
---------------------------------
-.. automodule:: pyspark.streaming.kinesis
-    :members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/python/docs/_static/copybutton.js b/python/docs/source/_static/copybutton.js
similarity index 100%
rename from python/docs/_static/copybutton.js
rename to python/docs/source/_static/copybutton.js
diff --git a/python/docs/source/_static/css/pyspark.css b/python/docs/source/_static/css/pyspark.css
new file mode 100644
index 0000000000000..2fd8720e2fa0d
--- /dev/null
+++ b/python/docs/source/_static/css/pyspark.css
@@ -0,0 +1,94 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+
+/* PySpark style CSS overwrite */
+
+/* Lato font (from its parent) does not make any difference when it's bold. Defaults to 'Source Sans Pro' */
+body {
+    font-family:"Source Sans Pro",sans-serif!important;
+}
+
+h1,h2 {
+    color:#1B5162!important;
+}
+
+h3 {
+    color: #555555
+}
+
+/* Top menu */
+#navbar-main {
+    background: #1B5162!important;
+    box-shadow: 0px 2px 4px rgba(0, 0, 0, 0.11);
+}
+
+#navbar-main-elements li.nav-item a {
+    color: rgba(255, 255, 255, 0.8);
+}
+
+#navbar-main-elements li.active a {
+    font-weight: 600;
+    color: #FFFFFF!important;
+}
+
+.col-9 {
+    flex: 0 0 80%;
+    max-width: 80%;
+}
+
+/* Left pannel size */
+@media (min-width: 768px) {
+    .col-md-3 {
+        flex: 0 0 20%;
+        max-width: 20%;
+    }
+}
+
+/* Top menu right button */
+.navbar-toggler {
+    color:rgba(255,255,255,.5)!important;
+    border-color:rgba(255,255,255,.5)!important;
+}
+
+.navbar-toggler-icon {
+    background-image:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='30' height='30'%3E%3Cpath stroke='rgba(255,255,255,.5)' stroke-linecap='round' stroke-miterlimit='10' stroke-width='2' d='M4 7h22M4 15h22M4 23h22'/%3E%3C/svg%3E")!important;
+}
+
+/* Left bar list colors */
+.bd-sidebar .nav>.active>a {
+    color: #1B5162!important;
+}
+
+.bd-sidebar .nav>li>a:hover {
+    color: #1B5162!important;
+}
+
+.bd-sidebar .nav>.active:hover>a,.bd-sidebar .nav>.active>a {
+    color: #1B5162!important;
+}
+
+u.bd-sidebar .nav>li>ul>.active:hover>a,.bd-sidebar .nav>li>ul>.active>a {
+    color: #1B5162!important;
+}
+
+/* Right bar list colors */
+.toc-entry>.nav-link.active {
+    color: #1B5162!important;
+    border-left: 2px solid #1B5162!important;
+}
+
diff --git a/python/docs/source/_templates/class_with_docs.rst b/python/docs/source/_templates/class_with_docs.rst
new file mode 100644
index 0000000000000..7c37b83c0e90e
--- /dev/null
+++ b/python/docs/source/_templates/class_with_docs.rst
@@ -0,0 +1,79 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+
+{{ objname }}
+{{ underline }}
+
+.. currentmodule:: {{ module }}
+
+.. autoclass:: {{ objname }}
+
+    {% if '__init__' in methods %}
+      {% set caught_result = methods.remove('__init__') %}
+    {% endif %}
+
+    {% block methods_summary %}
+    {% if methods %}
+
+    .. rubric:: Methods
+
+    .. autosummary::
+    {% for item in methods %}
+       ~{{ name }}.{{ item }}
+    {%- endfor %}
+
+    {% endif %}
+    {% endblock %}
+
+    {% block attributes_summary %}
+    {% if attributes %}
+
+    .. rubric:: Attributes
+
+    .. autosummary::
+    {% for item in attributes %}
+       ~{{ name }}.{{ item }}
+    {%- endfor %}
+
+    {% endif %}
+    {% endblock %}
+
+    {% block methods_documentation %}
+    {% if methods %}
+
+    .. rubric:: Methods Documentation
+
+    {% for item in methods %}
+    .. automethod:: {{ item }}
+    {%- endfor %}
+
+    {% endif %}
+    {% endblock %}
+
+    {% block attributes_documentation %}
+    {% if attributes %}
+
+    .. rubric:: Attributes Documentation
+
+    {% for item in attributes %}
+    .. autoattribute:: {{ item }}
+    {%- endfor %}
+
+    {% endif %}
+    {% endblock %}
+
diff --git a/python/docs/conf.py b/python/docs/source/conf.py
similarity index 89%
rename from python/docs/conf.py
rename to python/docs/source/conf.py
index 9e7afb7c07298..7b1939d976080 100644
--- a/python/docs/conf.py
+++ b/python/docs/source/conf.py
@@ -14,12 +14,24 @@
 
 import sys
 import os
+import shutil
+import errno
 
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 sys.path.insert(0, os.path.abspath('.'))
 
+# Remove previously generated rst files. Ignore errors just in case it stops
+# generating whole docs.
+shutil.rmtree(
+    "%s/reference/api" % os.path.dirname(os.path.abspath(__file__)), ignore_errors=True)
+try:
+    os.mkdir("%s/reference/api" % os.path.dirname(os.path.abspath(__file__)))
+except OSError as e:
+    if e.errno != errno.EEXIST:
+        raise
+
 # -- General configuration ------------------------------------------------
 
 # If your documentation needs a minimal Sphinx version, state it here.
@@ -32,6 +44,7 @@
     'sphinx.ext.autodoc',
     'sphinx.ext.viewcode',
     'sphinx.ext.mathjax',
+    'sphinx.ext.autosummary',
 ]
 
 # Add any paths that contain templates here, relative to this directory.
@@ -47,8 +60,8 @@
 master_doc = 'index'
 
 # General information about the project.
-project = u'PySpark'
-copyright = u''
+project = 'PySpark'
+copyright = ''
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
@@ -101,12 +114,13 @@
 
 # Look at the first line of the docstring for function and method signatures.
 autodoc_docstring_signature = True
+autosummary_generate = True
 
 # -- Options for HTML output ----------------------------------------------
 
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
-html_theme = 'nature'
+html_theme = 'pydata_sphinx_theme'
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
@@ -125,7 +139,7 @@
 
 # The name of an image file (relative to this directory) to place at the top
 # of the sidebar.
-html_logo = "../../docs/img/spark-logo-hd.png"
+html_logo = "../../../docs/img/spark-logo-reverse.png"
 
 # The name of an image file (within the static path) to use as favicon of the
 # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
@@ -137,6 +151,10 @@
 # so a file named "default.css" will overwrite the builtin "default.css".
 html_static_path = ['_static']
 
+html_css_files = [
+    'css/pyspark.css',
+]
+
 # Add any extra paths that contain custom files (such as robots.txt or
 # .htaccess) here, relative to this directory. These files are copied
 # directly to the root of the documentation.
@@ -204,8 +222,8 @@
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
-  ('index', 'pyspark.tex', u'pyspark Documentation',
-   u'Author', 'manual'),
+  ('index', 'pyspark.tex', 'pyspark Documentation',
+   'Author', 'manual'),
 ]
 
 # The name of an image file (relative to this directory) to place at the top of
@@ -234,8 +252,8 @@
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
 man_pages = [
-    ('index', 'pyspark', u'pyspark Documentation',
-     [u'Author'], 1)
+    ('index', 'pyspark', 'pyspark Documentation',
+     ['Author'], 1)
 ]
 
 # If true, show URL addresses after external links.
@@ -248,8 +266,8 @@
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-  ('index', 'pyspark', u'pyspark Documentation',
-   u'Author', 'pyspark', 'One line description of project.',
+  ('index', 'pyspark', 'pyspark Documentation',
+   'Author', 'pyspark', 'One line description of project.',
    'Miscellaneous'),
 ]
 
@@ -269,13 +287,13 @@
 # -- Options for Epub output ----------------------------------------------
 
 # Bibliographic Dublin Core info.
-epub_title = u'pyspark'
-epub_author = u'Author'
-epub_publisher = u'Author'
-epub_copyright = u'2014, Author'
+epub_title = 'pyspark'
+epub_author = 'Author'
+epub_publisher = 'Author'
+epub_copyright = '2014, Author'
 
 # The basename for the epub file. It defaults to the project name.
-#epub_basename = u'pyspark'
+#epub_basename = 'pyspark'
 
 # The HTML theme for the epub output. Since the default themes are not optimized
 # for small screen space, using the same theme for HTML and epub output is
@@ -335,7 +353,8 @@
 # If false, no index is generated.
 #epub_use_index = True
 def setup(app):
-    app.add_javascript('copybutton.js')
+    # The app.add_javascript() is deprecated.
+    getattr(app, "add_js_file", getattr(app, "add_javascript"))('copybutton.js')
 
 # Skip sample endpoint link (not expected to resolve)
 linkcheck_ignore = [r'https://kinesis.us-east-1.amazonaws.com']
diff --git a/python/docs/source/development/index.rst b/python/docs/source/development/index.rst
new file mode 100644
index 0000000000000..db9f667332635
--- /dev/null
+++ b/python/docs/source/development/index.rst
@@ -0,0 +1,21 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+===========
+Development
+===========
+
diff --git a/python/docs/source/getting_started/index.rst b/python/docs/source/getting_started/index.rst
new file mode 100644
index 0000000000000..457368c8194cb
--- /dev/null
+++ b/python/docs/source/getting_started/index.rst
@@ -0,0 +1,22 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+
+===============
+Getting Started
+===============
+
diff --git a/python/docs/source/index.rst b/python/docs/source/index.rst
new file mode 100644
index 0000000000000..34011ec7c5573
--- /dev/null
+++ b/python/docs/source/index.rst
@@ -0,0 +1,32 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+.. PySpark documentation master file
+
+=====================
+PySpark Documentation
+=====================
+
+.. toctree::
+    :maxdepth: 2
+
+    getting_started/index
+    user_guide/index
+    reference/index
+    development/index
+    migration_guide/index
+
diff --git a/python/docs/source/migration_guide/index.rst b/python/docs/source/migration_guide/index.rst
new file mode 100644
index 0000000000000..fc12668f81a58
--- /dev/null
+++ b/python/docs/source/migration_guide/index.rst
@@ -0,0 +1,22 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+
+===============
+Migration Guide
+===============
+
diff --git a/python/docs/source/reference/index.rst b/python/docs/source/reference/index.rst
new file mode 100644
index 0000000000000..77f17da720dd5
--- /dev/null
+++ b/python/docs/source/reference/index.rst
@@ -0,0 +1,34 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+
+=============
+API Reference
+=============
+
+This page lists an overview of all public PySpark modules, classes, functions and methods.
+
+.. toctree::
+   :maxdepth: 2
+
+   pyspark.sql
+   pyspark.ss
+   pyspark.ml
+   pyspark.streaming
+   pyspark.mllib
+   pyspark
+   pyspark.resource
diff --git a/python/docs/source/reference/pyspark.ml.rst b/python/docs/source/reference/pyspark.ml.rst
new file mode 100644
index 0000000000000..b6e7d10276603
--- /dev/null
+++ b/python/docs/source/reference/pyspark.ml.rst
@@ -0,0 +1,363 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+
+ML
+==
+
+ML Pipeline APIs
+----------------
+
+.. currentmodule:: pyspark.ml
+
+.. autosummary::
+    :template: class_with_docs.rst
+    :toctree: api/
+
+    Transformer
+    UnaryTransformer
+    Estimator
+    Model
+    Predictor
+    PredictionModel
+    Pipeline
+    PipelineModel
+
+
+Parameters
+----------
+
+.. currentmodule:: pyspark.ml.param
+
+.. autosummary::
+    :template: class_with_docs.rst
+    :toctree: api/
+
+    Param
+    Params
+    TypeConverters
+
+
+Feature
+-------
+
+.. currentmodule:: pyspark.ml.feature
+
+.. autosummary::
+    :template: class_with_docs.rst
+    :toctree: api/
+
+    ANOVASelector
+    ANOVASelectorModel
+    Binarizer
+    BucketedRandomProjectionLSH
+    BucketedRandomProjectionLSHModel
+    Bucketizer
+    ChiSqSelector
+    ChiSqSelectorModel
+    CountVectorizer
+    CountVectorizerModel
+    DCT
+    ElementwiseProduct
+    FeatureHasher
+    FValueSelector
+    FValueSelectorModel
+    HashingTF
+    IDF
+    IDFModel
+    Imputer
+    ImputerModel
+    IndexToString
+    Interaction
+    MaxAbsScaler
+    MaxAbsScalerModel
+    MinHashLSH
+    MinHashLSHModel
+    MinMaxScaler
+    MinMaxScalerModel
+    NGram
+    Normalizer
+    OneHotEncoder
+    OneHotEncoderModel
+    PCA
+    PCAModel
+    PolynomialExpansion
+    QuantileDiscretizer
+    RobustScaler
+    RobustScalerModel
+    RegexTokenizer
+    RFormula
+    RFormulaModel
+    SQLTransformer
+    StandardScaler
+    StandardScalerModel
+    StopWordsRemover
+    StringIndexer
+    StringIndexerModel
+    Tokenizer
+    VarianceThresholdSelector
+    VarianceThresholdSelectorModel
+    VectorAssembler
+    VectorIndexer
+    VectorIndexerModel
+    VectorSizeHint
+    VectorSlicer
+    Word2Vec
+    Word2VecModel
+
+
+Classification
+--------------
+
+.. currentmodule:: pyspark.ml.classification
+
+.. autosummary::
+    :template: class_with_docs.rst
+    :toctree: api/
+
+    LinearSVC
+    LinearSVCModel
+    LinearSVCSummary
+    LinearSVCTrainingSummary
+    LogisticRegression
+    LogisticRegressionModel
+    LogisticRegressionSummary
+    LogisticRegressionTrainingSummary
+    BinaryLogisticRegressionSummary
+    BinaryLogisticRegressionTrainingSummary
+    DecisionTreeClassifier
+    DecisionTreeClassificationModel
+    GBTClassifier
+    GBTClassificationModel
+    RandomForestClassifier
+    RandomForestClassificationModel
+    RandomForestClassificationSummary
+    RandomForestClassificationTrainingSummary
+    BinaryRandomForestClassificationSummary
+    BinaryRandomForestClassificationTrainingSummary
+    NaiveBayes
+    NaiveBayesModel
+    MultilayerPerceptronClassifier
+    MultilayerPerceptronClassificationModel
+    OneVsRest
+    OneVsRestModel
+    FMClassifier
+    FMClassificationModel
+    FMClassificationSummary
+    FMClassificationTrainingSummary
+
+
+Clustering
+----------
+
+.. currentmodule:: pyspark.ml.clustering
+
+.. autosummary::
+    :template: class_with_docs.rst
+    :toctree: api/
+
+    BisectingKMeans
+    BisectingKMeansModel
+    BisectingKMeansSummary
+    KMeans
+    KMeansModel
+    GaussianMixture
+    GaussianMixtureModel
+    GaussianMixtureSummary
+    LDA
+    LDAModel
+    LocalLDAModel
+    DistributedLDAModel
+    PowerIterationClustering
+
+
+ML Functions
+----------------------------
+
+.. currentmodule:: pyspark.ml.functions
+
+.. autosummary::
+    :toctree: api/
+
+    vector_to_array
+
+
+Vector and Matrix
+-----------------
+
+.. currentmodule:: pyspark.ml.linalg
+
+.. autosummary::
+    :template: class_with_docs.rst
+    :toctree: api/
+
+    Vector
+    DenseVector
+    SparseVector
+    Vectors
+    Matrix
+    DenseMatrix
+    SparseMatrix
+    Matrices
+
+
+Recommendation
+--------------
+
+.. currentmodule:: pyspark.ml.recommendation
+
+.. autosummary::
+    :template: class_with_docs.rst
+    :toctree: api/
+
+    ALS
+    ALSModel
+
+
+Regression
+----------
+
+.. currentmodule:: pyspark.ml.regression
+
+.. autosummary::
+    :template: class_with_docs.rst
+    :toctree: api/
+
+    AFTSurvivalRegression
+    AFTSurvivalRegressionModel
+    DecisionTreeRegressor
+    DecisionTreeRegressionModel
+    GBTRegressor
+    GBTRegressionModel
+    GeneralizedLinearRegression
+    GeneralizedLinearRegressionModel
+    GeneralizedLinearRegressionSummary
+    GeneralizedLinearRegressionTrainingSummary
+    IsotonicRegression
+    IsotonicRegressionModel
+    LinearRegression
+    LinearRegressionModel
+    LinearRegressionSummary
+    LinearRegressionTrainingSummary
+    RandomForestRegressor
+    RandomForestRegressionModel
+    FMRegressor
+    FMRegressionModel
+
+
+Statistics
+----------
+
+.. currentmodule:: pyspark.ml.stat
+
+.. autosummary::
+    :template: class_with_docs.rst
+    :toctree: api/
+
+    ANOVATest
+    ChiSquareTest
+    Correlation
+    FValueTest
+    KolmogorovSmirnovTest
+    MultivariateGaussian
+    Summarizer
+    SummaryBuilder
+
+
+Tuning
+------
+
+.. currentmodule:: pyspark.ml.tuning
+
+.. autosummary::
+    :template: class_with_docs.rst
+    :toctree: api/
+
+    ParamGridBuilder
+    CrossValidator
+    CrossValidatorModel
+    TrainValidationSplit
+    TrainValidationSplitModel
+
+
+Evaluation
+----------
+
+.. currentmodule:: pyspark.ml.evaluation
+
+.. autosummary::
+    :template: class_with_docs.rst
+    :toctree: api/
+
+    Evaluator
+    BinaryClassificationEvaluator
+    RegressionEvaluator
+    MulticlassClassificationEvaluator
+    MultilabelClassificationEvaluator
+    ClusteringEvaluator
+    RankingEvaluator
+
+
+Frequency Pattern Mining
+----------------------------
+
+.. currentmodule:: pyspark.ml.fpm
+
+.. autosummary::
+    :template: class_with_docs.rst
+    :toctree: api/
+
+    FPGrowth
+    FPGrowthModel
+    PrefixSpan
+
+
+Image
+-----
+
+.. currentmodule:: pyspark.ml.image
+
+.. autosummary::
+    :template: class_with_docs.rst
+    :toctree: api/
+
+    ImageSchema
+    _ImageSchema
+
+
+Utilities
+---------
+
+.. currentmodule:: pyspark.ml.util
+
+.. autosummary::
+    :template: class_with_docs.rst
+    :toctree: api/
+
+    BaseReadWrite
+    DefaultParamsReadable
+    DefaultParamsReader
+    DefaultParamsWritable
+    DefaultParamsWriter
+    GeneralMLWriter
+    HasTrainingSummary
+    Identifiable
+    MLReadable
+    MLReader
+    MLWritable
+    MLWriter
+
diff --git a/python/docs/source/reference/pyspark.mllib.rst b/python/docs/source/reference/pyspark.mllib.rst
new file mode 100644
index 0000000000000..1251b1df752c7
--- /dev/null
+++ b/python/docs/source/reference/pyspark.mllib.rst
@@ -0,0 +1,253 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+
+MLlib
+=====
+
+Classification
+--------------
+
+.. currentmodule:: pyspark.mllib.classification
+
+.. autosummary::
+    :template: class_with_docs.rst
+    :toctree: api/
+
+    LogisticRegressionModel
+    LogisticRegressionWithSGD
+    LogisticRegressionWithLBFGS
+    SVMModel
+    SVMWithSGD
+    NaiveBayesModel
+    NaiveBayes
+    StreamingLogisticRegressionWithSGD
+
+
+Clustering
+----------
+
+.. currentmodule:: pyspark.mllib.clustering
+
+.. autosummary::
+    :template: class_with_docs.rst
+    :toctree: api/
+
+
+    BisectingKMeansModel
+    BisectingKMeans
+    KMeansModel
+    KMeans
+    GaussianMixtureModel
+    GaussianMixture
+    PowerIterationClusteringModel
+    PowerIterationClustering
+    StreamingKMeans
+    StreamingKMeansModel
+    LDA
+    LDAModel
+
+
+Evaluation
+----------
+
+.. currentmodule:: pyspark.mllib.evaluation
+
+.. autosummary::
+    :template: class_with_docs.rst
+    :toctree: api/
+
+    BinaryClassificationMetrics
+    RegressionMetrics
+    MulticlassMetrics
+    RankingMetrics
+
+
+Feature
+-------
+
+.. currentmodule:: pyspark.mllib.feature
+
+.. autosummary::
+    :template: class_with_docs.rst
+    :toctree: api/
+
+    Normalizer
+    StandardScalerModel
+    StandardScaler
+    HashingTF
+    IDFModel
+    IDF
+    Word2Vec
+    Word2VecModel
+    ChiSqSelector
+    ChiSqSelectorModel
+    ElementwiseProduct
+
+
+Frequency Pattern Mining
+------------------------
+
+.. currentmodule:: pyspark.mllib.fpm
+
+.. autosummary::
+    :template: class_with_docs.rst
+    :toctree: api/
+
+    FPGrowth
+    FPGrowthModel
+    PrefixSpan
+    PrefixSpanModel
+
+
+Vector and Matrix
+-----------------
+
+.. currentmodule:: pyspark.mllib.linalg
+
+.. autosummary::
+    :template: class_with_docs.rst
+    :toctree: api/
+
+    Vector
+    DenseVector
+    SparseVector
+    Vectors
+    Matrix
+    DenseMatrix
+    SparseMatrix
+    Matrices
+    QRDecomposition
+
+
+Distributed Representation
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. currentmodule:: pyspark.mllib.linalg.distributed
+
+.. autosummary::
+    :template: class_with_docs.rst
+    :toctree: api/
+
+    BlockMatrix
+    CoordinateMatrix
+    DistributedMatrix
+    IndexedRow
+    IndexedRowMatrix
+    MatrixEntry
+    RowMatrix
+    SingularValueDecomposition
+
+
+Random
+------
+
+.. currentmodule:: pyspark.mllib.random
+
+.. autosummary::
+    :template: class_with_docs.rst
+    :toctree: api/
+
+    RandomRDDs
+
+
+Recommendation
+--------------
+
+.. currentmodule:: pyspark.mllib.recommendation
+
+.. autosummary::
+    :template: class_with_docs.rst
+    :toctree: api/
+
+    MatrixFactorizationModel
+    ALS
+    Rating
+
+
+Regression
+----------
+
+.. currentmodule:: pyspark.mllib.regression
+
+.. autosummary::
+    :template: class_with_docs.rst
+    :toctree: api/
+
+    LabeledPoint
+    LinearModel
+    LinearRegressionModel
+    LinearRegressionWithSGD
+    RidgeRegressionModel
+    RidgeRegressionWithSGD
+    LassoModel
+    LassoWithSGD
+    IsotonicRegressionModel
+    IsotonicRegression
+    StreamingLinearAlgorithm
+    StreamingLinearRegressionWithSGD
+
+
+Statistics
+----------
+
+.. currentmodule:: pyspark.mllib.stat
+
+.. autosummary::
+    :template: class_with_docs.rst
+    :toctree: api/
+
+    Statistics
+    MultivariateStatisticalSummary
+    ChiSqTestResult
+    MultivariateGaussian
+    KernelDensity
+
+
+Tree
+----
+
+.. currentmodule:: pyspark.mllib.tree
+
+.. autosummary::
+    :template: class_with_docs.rst
+    :toctree: api/
+
+    DecisionTreeModel
+    DecisionTree
+    RandomForestModel
+    RandomForest
+    GradientBoostedTreesModel
+    GradientBoostedTrees
+
+
+Utilities
+---------
+
+.. currentmodule:: pyspark.mllib.util
+
+.. autosummary::
+    :template: class_with_docs.rst
+    :toctree: api/
+
+    JavaLoader
+    JavaSaveable
+    LinearDataGenerator
+    Loader
+    MLUtils
+    Saveable
+
diff --git a/python/docs/source/reference/pyspark.resource.rst b/python/docs/source/reference/pyspark.resource.rst
new file mode 100644
index 0000000000000..a1d885c44c480
--- /dev/null
+++ b/python/docs/source/reference/pyspark.resource.rst
@@ -0,0 +1,38 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+
+===================
+Resource Management
+===================
+
+Core Classes
+------------
+
+.. currentmodule:: pyspark.resource
+
+.. autosummary::
+    :toctree: api/
+
+    ResourceInformation
+    ResourceProfile
+    ResourceProfileBuilder
+    ExecutorResourceRequest
+    ExecutorResourceRequests
+    TaskResourceRequest
+    TaskResourceRequests
+
diff --git a/python/docs/source/reference/pyspark.rst b/python/docs/source/reference/pyspark.rst
new file mode 100644
index 0000000000000..b50ae37b99690
--- /dev/null
+++ b/python/docs/source/reference/pyspark.rst
@@ -0,0 +1,275 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+
+==========
+Spark Core
+==========
+
+Public Classes
+--------------
+
+.. currentmodule:: pyspark
+
+.. autosummary::
+    :toctree: api/
+
+    SparkContext
+    RDD
+    Broadcast
+    Accumulator
+    SparkConf
+    SparkFiles
+    StorageLevel
+    TaskContext
+    RDDBarrier
+    BarrierTaskContext
+    BarrierTaskInfo
+
+
+Spark Context APIs
+------------------
+
+.. currentmodule:: pyspark
+
+.. autosummary::
+    :toctree: api/
+
+    SparkContext.PACKAGE_EXTENSIONS
+    SparkContext.accumulator
+    SparkContext.addFile
+    SparkContext.addPyFile
+    SparkContext.applicationId
+    SparkContext.binaryFiles
+    SparkContext.binaryRecords
+    SparkContext.broadcast
+    SparkContext.cancelAllJobs
+    SparkContext.cancelJobGroup
+    SparkContext.defaultMinPartitions
+    SparkContext.defaultParallelism
+    SparkContext.dump_profiles
+    SparkContext.emptyRDD
+    SparkContext.getConf
+    SparkContext.getLocalProperty
+    SparkContext.getOrCreate
+    SparkContext.hadoopFile
+    SparkContext.hadoopRDD
+    SparkContext.newAPIHadoopFile
+    SparkContext.newAPIHadoopRDD
+    SparkContext.parallelize
+    SparkContext.pickleFile
+    SparkContext.range
+    SparkContext.resources
+    SparkContext.runJob
+    SparkContext.sequenceFile
+    SparkContext.setCheckpointDir
+    SparkContext.setJobDescription
+    SparkContext.setJobGroup
+    SparkContext.setLocalProperty
+    SparkContext.setLogLevel
+    SparkContext.setSystemProperty
+    SparkContext.show_profiles
+    SparkContext.sparkUser
+    SparkContext.startTime
+    SparkContext.statusTracker
+    SparkContext.stop
+    SparkContext.textFile
+    SparkContext.uiWebUrl
+    SparkContext.union
+    SparkContext.version
+    SparkContext.wholeTextFiles
+
+
+RDD APIs
+--------
+
+.. currentmodule:: pyspark
+
+.. autosummary::
+    :toctree: api/
+
+    RDD.aggregate
+    RDD.aggregateByKey
+    RDD.barrier
+    RDD.cache
+    RDD.cartesian
+    RDD.checkpoint
+    RDD.coalesce
+    RDD.cogroup
+    RDD.collect
+    RDD.collectAsMap
+    RDD.collectWithJobGroup
+    RDD.combineByKey
+    RDD.context
+    RDD.count
+    RDD.countApprox
+    RDD.countApproxDistinct
+    RDD.countByKey
+    RDD.countByValue
+    RDD.distinct
+    RDD.filter
+    RDD.first
+    RDD.flatMap
+    RDD.flatMapValues
+    RDD.fold
+    RDD.foldByKey
+    RDD.foreach
+    RDD.foreachPartition
+    RDD.fullOuterJoin
+    RDD.getCheckpointFile
+    RDD.getNumPartitions
+    RDD.getResourceProfile
+    RDD.getStorageLevel
+    RDD.glom
+    RDD.groupBy
+    RDD.groupByKey
+    RDD.groupWith
+    RDD.histogram
+    RDD.id
+    RDD.intersection
+    RDD.isCheckpointed
+    RDD.isEmpty
+    RDD.isLocallyCheckpointed
+    RDD.join
+    RDD.keyBy
+    RDD.keys
+    RDD.leftOuterJoin
+    RDD.localCheckpoint
+    RDD.lookup
+    RDD.map
+    RDD.mapPartitions
+    RDD.mapPartitionsWithIndex
+    RDD.mapPartitionsWithSplit
+    RDD.mapValues
+    RDD.max
+    RDD.mean
+    RDD.meanApprox
+    RDD.min
+    RDD.name
+    RDD.partitionBy
+    RDD.persist
+    RDD.pipe
+    RDD.randomSplit
+    RDD.reduce
+    RDD.reduceByKey
+    RDD.reduceByKeyLocally
+    RDD.repartition
+    RDD.repartitionAndSortWithinPartitions
+    RDD.rightOuterJoin
+    RDD.sample
+    RDD.sampleByKey
+    RDD.sampleStdev
+    RDD.sampleVariance
+    RDD.saveAsHadoopDataset
+    RDD.saveAsHadoopFile
+    RDD.saveAsNewAPIHadoopDataset
+    RDD.saveAsNewAPIHadoopFile
+    RDD.saveAsPickleFile
+    RDD.saveAsSequenceFile
+    RDD.saveAsTextFile
+    RDD.setName
+    RDD.sortBy
+    RDD.sortByKey
+    RDD.stats
+    RDD.stdev
+    RDD.subtract
+    RDD.subtractByKey
+    RDD.sum
+    RDD.sumApprox
+    RDD.take
+    RDD.takeOrdered
+    RDD.takeSample
+    RDD.toDebugString
+    RDD.toLocalIterator
+    RDD.top
+    RDD.treeAggregate
+    RDD.treeReduce
+    RDD.union
+    RDD.unpersist
+    RDD.values
+    RDD.variance
+    RDD.withResources
+    RDD.zip
+    RDD.zipWithIndex
+    RDD.zipWithUniqueId
+
+
+Broadcast and Accumulator
+-------------------------
+
+.. currentmodule:: pyspark
+
+.. autosummary::
+    :toctree: api/
+
+    Broadcast.destroy
+    Broadcast.dump
+    Broadcast.load
+    Broadcast.load_from_path
+    Broadcast.unpersist
+    Broadcast.value
+    Accumulator.add
+    Accumulator.value
+
+
+Management
+----------
+
+.. currentmodule:: pyspark
+
+.. autosummary::
+    :toctree: api/
+
+    SparkConf.contains
+    SparkConf.get
+    SparkConf.getAll
+    SparkConf.set
+    SparkConf.setAll
+    SparkConf.setAppName
+    SparkConf.setExecutorEnv
+    SparkConf.setIfMissing
+    SparkConf.setMaster
+    SparkConf.setSparkHome
+    SparkConf.toDebugString
+    SparkFiles.get
+    SparkFiles.getRootDirectory
+    StorageLevel.DISK_ONLY
+    StorageLevel.DISK_ONLY_2
+    StorageLevel.MEMORY_AND_DISK
+    StorageLevel.MEMORY_AND_DISK_2
+    StorageLevel.MEMORY_ONLY
+    StorageLevel.MEMORY_ONLY_2
+    StorageLevel.OFF_HEAP
+    TaskContext.attemptNumber
+    TaskContext.get
+    TaskContext.getLocalProperty
+    TaskContext.partitionId
+    TaskContext.resources
+    TaskContext.stageId
+    TaskContext.taskAttemptId
+    RDDBarrier.mapPartitions
+    RDDBarrier.mapPartitionsWithIndex
+    BarrierTaskContext.allGather
+    BarrierTaskContext.attemptNumber
+    BarrierTaskContext.barrier
+    BarrierTaskContext.get
+    BarrierTaskContext.getLocalProperty
+    BarrierTaskContext.getTaskInfos
+    BarrierTaskContext.partitionId
+    BarrierTaskContext.resources
+    BarrierTaskContext.stageId
+    BarrierTaskContext.taskAttemptId
diff --git a/python/docs/source/reference/pyspark.sql.rst b/python/docs/source/reference/pyspark.sql.rst
new file mode 100644
index 0000000000000..7e0357cf9d858
--- /dev/null
+++ b/python/docs/source/reference/pyspark.sql.rst
@@ -0,0 +1,542 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+
+=========
+Spark SQL
+=========
+
+Core Classes
+------------
+
+.. currentmodule:: pyspark.sql
+
+.. autosummary::
+    :toctree: api/
+
+    SparkSession
+    DataFrame
+    Column
+    Row
+    GroupedData
+    DataFrameNaFunctions
+    DataFrameStatFunctions
+    Window
+
+
+Spark Session APIs
+------------------
+
+.. currentmodule:: pyspark.sql
+
+The entry point to programming Spark with the Dataset and DataFrame API.
+To create a Spark session, you should use ``SparkSession.builder`` attribute.
+See also :class:`SparkSession`.
+
+.. autosummary::
+    :toctree: api/
+
+    SparkSession.builder.appName
+    SparkSession.builder.config
+    SparkSession.builder.enableHiveSupport
+    SparkSession.builder.getOrCreate
+    SparkSession.builder.master
+    SparkSession.catalog
+    SparkSession.conf
+    SparkSession.createDataFrame
+    SparkSession.getActiveSession
+    SparkSession.newSession
+    SparkSession.range
+    SparkSession.read
+    SparkSession.readStream
+    SparkSession.sparkContext
+    SparkSession.sql
+    SparkSession.stop
+    SparkSession.streams
+    SparkSession.table
+    SparkSession.udf
+    SparkSession.version
+
+
+Input and Output
+----------------
+
+.. currentmodule:: pyspark.sql
+
+.. autosummary::
+    :toctree: api/
+
+    DataFrameReader.csv
+    DataFrameReader.format
+    DataFrameReader.jdbc
+    DataFrameReader.json
+    DataFrameReader.load
+    DataFrameReader.option
+    DataFrameReader.options
+    DataFrameReader.orc
+    DataFrameReader.parquet
+    DataFrameReader.schema
+    DataFrameReader.table
+    DataFrameWriter.bucketBy
+    DataFrameWriter.csv
+    DataFrameWriter.format
+    DataFrameWriter.insertInto
+    DataFrameWriter.jdbc
+    DataFrameWriter.json
+    DataFrameWriter.mode
+    DataFrameWriter.option
+    DataFrameWriter.options
+    DataFrameWriter.orc
+    DataFrameWriter.parquet
+    DataFrameWriter.partitionBy
+    DataFrameWriter.save
+    DataFrameWriter.saveAsTable
+    DataFrameWriter.sortBy
+    DataFrameWriter.text
+
+
+DataFrame APIs
+--------------
+
+.. currentmodule:: pyspark.sql
+
+.. autosummary::
+    :toctree: api/
+
+    DataFrame.agg
+    DataFrame.alias
+    DataFrame.approxQuantile
+    DataFrame.cache
+    DataFrame.checkpoint
+    DataFrame.coalesce
+    DataFrame.colRegex
+    DataFrame.collect
+    DataFrame.columns
+    DataFrame.corr
+    DataFrame.count
+    DataFrame.cov
+    DataFrame.createGlobalTempView
+    DataFrame.createOrReplaceGlobalTempView
+    DataFrame.createOrReplaceTempView
+    DataFrame.createTempView
+    DataFrame.crossJoin
+    DataFrame.crosstab
+    DataFrame.cube
+    DataFrame.describe
+    DataFrame.distinct
+    DataFrame.drop
+    DataFrame.dropDuplicates
+    DataFrame.drop_duplicates
+    DataFrame.dropna
+    DataFrame.dtypes
+    DataFrame.exceptAll
+    DataFrame.explain
+    DataFrame.fillna
+    DataFrame.filter
+    DataFrame.first
+    DataFrame.foreach
+    DataFrame.foreachPartition
+    DataFrame.freqItems
+    DataFrame.groupBy
+    DataFrame.head
+    DataFrame.hint
+    DataFrame.inputFiles
+    DataFrame.intersect
+    DataFrame.intersectAll
+    DataFrame.isLocal
+    DataFrame.isStreaming
+    DataFrame.join
+    DataFrame.limit
+    DataFrame.localCheckpoint
+    DataFrame.mapInPandas
+    DataFrame.na
+    DataFrame.orderBy
+    DataFrame.persist
+    DataFrame.printSchema
+    DataFrame.randomSplit
+    DataFrame.rdd
+    DataFrame.registerTempTable
+    DataFrame.repartition
+    DataFrame.repartitionByRange
+    DataFrame.replace
+    DataFrame.rollup
+    DataFrame.sameSemantics
+    DataFrame.sample
+    DataFrame.sampleBy
+    DataFrame.schema
+    DataFrame.select
+    DataFrame.selectExpr
+    DataFrame.semanticHash
+    DataFrame.show
+    DataFrame.sort
+    DataFrame.sortWithinPartitions
+    DataFrame.stat
+    DataFrame.storageLevel
+    DataFrame.subtract
+    DataFrame.summary
+    DataFrame.tail
+    DataFrame.take
+    DataFrame.toDF
+    DataFrame.toJSON
+    DataFrame.toLocalIterator
+    DataFrame.toPandas
+    DataFrame.transform
+    DataFrame.union
+    DataFrame.unionAll
+    DataFrame.unionByName
+    DataFrame.unpersist
+    DataFrame.where
+    DataFrame.withColumn
+    DataFrame.withColumnRenamed
+    DataFrame.withWatermark
+    DataFrame.write
+    DataFrame.writeStream
+    DataFrame.writeTo
+    DataFrameNaFunctions.drop
+    DataFrameNaFunctions.fill
+    DataFrameNaFunctions.replace
+    DataFrameStatFunctions.approxQuantile
+    DataFrameStatFunctions.corr
+    DataFrameStatFunctions.cov
+    DataFrameStatFunctions.crosstab
+    DataFrameStatFunctions.freqItems
+    DataFrameStatFunctions.sampleBy
+
+
+Data Types
+----------
+
+.. currentmodule:: pyspark.sql.types
+
+.. autosummary::
+    :template: class_with_docs.rst
+    :toctree: api/
+
+    ArrayType
+    BinaryType
+    BooleanType
+    ByteType
+    DataType
+    DateType
+    DecimalType
+    DoubleType
+    FloatType
+    IntegerType
+    LongType
+    MapType
+    NullType
+    ShortType
+    StringType
+    StructField
+    StructType
+    TimestampType
+
+
+Row
+---
+
+.. currentmodule:: pyspark.sql
+
+.. autosummary::
+    :toctree: api/
+
+    Row.asDict
+
+
+Functions
+---------
+
+.. currentmodule:: pyspark.sql.functions
+
+.. autosummary::
+    :toctree: api/
+
+    abs
+    acos
+    add_months
+    aggregate
+    approxCountDistinct
+    approx_count_distinct
+    array
+    array_contains
+    array_distinct
+    array_except
+    array_intersect
+    array_join
+    array_max
+    array_min
+    array_position
+    array_remove
+    array_repeat
+    array_sort
+    array_union
+    arrays_overlap
+    arrays_zip
+    asc
+    asc_nulls_first
+    asc_nulls_last
+    ascii
+    asin
+    atan
+    atan2
+    avg
+    base64
+    bin
+    bitwiseNOT
+    broadcast
+    bround
+    bucket
+    cbrt
+    ceil
+    coalesce
+    col
+    collect_list
+    collect_set
+    column
+    concat
+    concat_ws
+    conv
+    corr
+    cos
+    cosh
+    count
+    countDistinct
+    covar_pop
+    covar_samp
+    crc32
+    create_map
+    cume_dist
+    current_date
+    current_timestamp
+    date_add
+    date_format
+    date_sub
+    date_trunc
+    datediff
+    dayofmonth
+    dayofweek
+    dayofyear
+    days
+    decode
+    degrees
+    dense_rank
+    desc
+    desc_nulls_first
+    desc_nulls_last
+    element_at
+    encode
+    exists
+    exp
+    explode
+    explode_outer
+    expm1
+    expr
+    factorial
+    filter
+    first
+    flatten
+    floor
+    forall
+    format_number
+    format_string
+    from_csv
+    from_json
+    from_unixtime
+    from_utc_timestamp
+    get_json_object
+    greatest
+    grouping
+    grouping_id
+    hash
+    hex
+    hour
+    hours
+    hypot
+    initcap
+    input_file_name
+    instr
+    isnan
+    isnull
+    json_tuple
+    kurtosis
+    lag
+    last
+    last_day
+    lead
+    least
+    length
+    levenshtein
+    lit
+    locate
+    log
+    log10
+    log1p
+    log2
+    lower
+    lpad
+    ltrim
+    map_concat
+    map_entries
+    map_filter
+    map_from_arrays
+    map_from_entries
+    map_keys
+    map_values
+    map_zip_with
+    max
+    md5
+    mean
+    min
+    minute
+    monotonically_increasing_id
+    month
+    months
+    months_between
+    nanvl
+    next_day
+    ntile
+    overlay
+    pandas_udf
+    percent_rank
+    percentile_approx
+    posexplode
+    posexplode_outer
+    pow
+    quarter
+    radians
+    rand
+    randn
+    rank
+    regexp_extract
+    regexp_replace
+    repeat
+    reverse
+    rint
+    round
+    row_number
+    rpad
+    rtrim
+    schema_of_csv
+    schema_of_json
+    second
+    sequence
+    sha1
+    sha2
+    shiftLeft
+    shiftRight
+    shiftRightUnsigned
+    shuffle
+    signum
+    sin
+    sinh
+    size
+    skewness
+    slice
+    sort_array
+    soundex
+    spark_partition_id
+    split
+    sqrt
+    stddev
+    stddev_pop
+    stddev_samp
+    struct
+    substring
+    substring_index
+    sum
+    sumDistinct
+    tan
+    tanh
+    timestamp_seconds
+    toDegrees
+    toRadians
+    to_csv
+    to_date
+    to_json
+    to_timestamp
+    to_utc_timestamp
+    transform
+    transform_keys
+    transform_values
+    translate
+    trim
+    trunc
+    udf
+    unbase64
+    unhex
+    unix_timestamp
+    upper
+    var_pop
+    var_samp
+    variance
+    weekofyear
+    when
+    window
+    xxhash64
+    year
+    years
+    zip_with
+
+
+.. currentmodule:: pyspark.sql.avro.functions
+
+.. autosummary::
+    :toctree: api/
+
+    from_avro
+    to_avro
+
+Window
+------
+
+.. currentmodule:: pyspark.sql
+
+.. autosummary::
+    :toctree: api/
+
+    Window.currentRow
+    Window.orderBy
+    Window.partitionBy
+    Window.rangeBetween
+    Window.rowsBetween
+    Window.unboundedFollowing
+    Window.unboundedPreceding
+    WindowSpec.orderBy
+    WindowSpec.partitionBy
+    WindowSpec.rangeBetween
+    WindowSpec.rowsBetween
+
+Grouping
+--------
+
+.. currentmodule:: pyspark.sql
+
+.. autosummary::
+    :toctree: api/
+
+    GroupedData.agg
+    GroupedData.apply
+    GroupedData.applyInPandas
+    GroupedData.avg
+    GroupedData.cogroup
+    GroupedData.count
+    GroupedData.max
+    GroupedData.mean
+    GroupedData.min
+    GroupedData.pivot
+    GroupedData.sum
+
diff --git a/python/docs/source/reference/pyspark.ss.rst b/python/docs/source/reference/pyspark.ss.rst
new file mode 100644
index 0000000000000..a7936a4f2a59c
--- /dev/null
+++ b/python/docs/source/reference/pyspark.ss.rst
@@ -0,0 +1,90 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+
+====================
+Structured Streaming
+====================
+
+Core Classes
+------------
+
+.. currentmodule:: pyspark.sql.streaming
+
+.. autosummary::
+    :toctree: api/
+
+    DataStreamReader
+    DataStreamWriter
+    ForeachBatchFunction
+    StreamingQuery
+    StreamingQueryException
+    StreamingQueryManager
+
+Input and Output
+----------------
+
+.. currentmodule:: pyspark.sql.streaming
+
+.. autosummary::
+    :toctree: api/
+
+    DataStreamReader.csv
+    DataStreamReader.format
+    DataStreamReader.json
+    DataStreamReader.load
+    DataStreamReader.option
+    DataStreamReader.options
+    DataStreamReader.orc
+    DataStreamReader.parquet
+    DataStreamReader.schema
+    DataStreamReader.text
+    DataStreamWriter.foreach
+    DataStreamWriter.foreachBatch
+    DataStreamWriter.format
+    DataStreamWriter.option
+    DataStreamWriter.options
+    DataStreamWriter.outputMode
+    DataStreamWriter.partitionBy
+    DataStreamWriter.queryName
+    DataStreamWriter.start
+    DataStreamWriter.trigger
+
+Query Management
+----------------
+
+.. currentmodule:: pyspark.sql.streaming
+
+.. autosummary::
+    :toctree: api/
+
+    StreamingQuery.awaitTermination
+    StreamingQuery.exception
+    StreamingQuery.explain
+    StreamingQuery.id
+    StreamingQuery.isActive
+    StreamingQuery.lastProgress
+    StreamingQuery.name
+    StreamingQuery.processAllAvailable
+    StreamingQuery.recentProgress
+    StreamingQuery.runId
+    StreamingQuery.status
+    StreamingQuery.stop
+    StreamingQueryManager.active
+    StreamingQueryManager.awaitAnyTermination
+    StreamingQueryManager.get
+    StreamingQueryManager.resetTerminated
diff --git a/python/docs/source/reference/pyspark.streaming.rst b/python/docs/source/reference/pyspark.streaming.rst
new file mode 100644
index 0000000000000..57cbd00b67e4c
--- /dev/null
+++ b/python/docs/source/reference/pyspark.streaming.rst
@@ -0,0 +1,130 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+
+===============
+Spark Streaming
+===============
+
+Core Classes
+------------
+
+.. currentmodule:: pyspark.streaming
+
+.. autosummary::
+    :toctree: api/
+
+    StreamingContext
+    DStream
+
+
+Streaming Management
+--------------------
+
+.. currentmodule:: pyspark.streaming
+
+.. autosummary::
+    :toctree: api/
+
+    StreamingContext.addStreamingListener
+    StreamingContext.awaitTermination
+    StreamingContext.awaitTerminationOrTimeout
+    StreamingContext.checkpoint
+    StreamingContext.getActive
+    StreamingContext.getActiveOrCreate
+    StreamingContext.getOrCreate
+    StreamingContext.remember
+    StreamingContext.sparkContext
+    StreamingContext.start
+    StreamingContext.stop
+    StreamingContext.transform
+    StreamingContext.union
+
+
+Input and Output
+----------------
+
+.. autosummary::
+    :toctree: api/
+
+    StreamingContext.binaryRecordsStream
+    StreamingContext.queueStream
+    StreamingContext.socketTextStream
+    StreamingContext.textFileStream
+    DStream.pprint
+    DStream.saveAsTextFiles
+
+
+Transformations and Actions
+---------------------------
+
+.. currentmodule:: pyspark.streaming
+
+.. autosummary::
+    :toctree: api/
+
+    DStream.cache
+    DStream.checkpoint
+    DStream.cogroup
+    DStream.combineByKey
+    DStream.context
+    DStream.count
+    DStream.countByValue
+    DStream.countByValueAndWindow
+    DStream.countByWindow
+    DStream.filter
+    DStream.flatMap
+    DStream.flatMapValues
+    DStream.foreachRDD
+    DStream.fullOuterJoin
+    DStream.glom
+    DStream.groupByKey
+    DStream.groupByKeyAndWindow
+    DStream.join
+    DStream.leftOuterJoin
+    DStream.map
+    DStream.mapPartitions
+    DStream.mapPartitionsWithIndex
+    DStream.mapValues
+    DStream.partitionBy
+    DStream.persist
+    DStream.reduce
+    DStream.reduceByKey
+    DStream.reduceByKeyAndWindow
+    DStream.reduceByWindow
+    DStream.repartition
+    DStream.rightOuterJoin
+    DStream.slice
+    DStream.transform
+    DStream.transformWith
+    DStream.union
+    DStream.updateStateByKey
+    DStream.window
+
+
+Kinesis
+-------
+
+.. currentmodule:: pyspark.streaming.kinesis
+
+.. autosummary::
+    :toctree: api/
+
+    KinesisUtils.createStream
+    InitialPositionInStream.LATEST
+    InitialPositionInStream.TRIM_HORIZON
+
diff --git a/python/docs/source/user_guide/index.rst b/python/docs/source/user_guide/index.rst
new file mode 100644
index 0000000000000..e8a8d905f46d1
--- /dev/null
+++ b/python/docs/source/user_guide/index.rst
@@ -0,0 +1,22 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+
+==========
+User Guide
+==========
+
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 3ca4edafa6873..1807df4bacc85 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1196,6 +1196,8 @@ def to_date(col, format=None):
     By default, it follows casting rules to :class:`pyspark.sql.types.DateType` if the format
     is omitted. Equivalent to ``col.cast("date")``.
 
+    .. _datetime pattern: https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+
     >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
     >>> df.select(to_date(df.t).alias('date')).collect()
     [Row(date=datetime.date(1997, 2, 28))]
@@ -1219,6 +1221,8 @@ def to_timestamp(col, format=None):
     By default, it follows casting rules to :class:`pyspark.sql.types.TimestampType` if the format
     is omitted. Equivalent to ``col.cast("timestamp")``.
 
+    .. _datetime pattern: https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+
     >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
     >>> df.select(to_timestamp(df.t).alias('dt')).collect()
     [Row(dt=datetime.datetime(1997, 2, 28, 10, 30))]
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 6925adf567fb6..e5553a8bb162b 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -335,6 +335,9 @@ def parquet(self, *paths, **options):
         :param recursiveFileLookup: recursively scan a directory for files. Using this option
                                     disables `partition discovery`_.
 
+        .. _partition discovery:
+          https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery
+
         >>> df = spark.read.parquet('python/test_support/sql/parquet_partitioned')
         >>> df.dtypes
         [('name', 'string'), ('year', 'int'), ('month', 'int'), ('day', 'int')]
@@ -367,6 +370,9 @@ def text(self, paths, wholetext=False, lineSep=None, pathGlobFilter=None,
         :param recursiveFileLookup: recursively scan a directory for files. Using this option
                                     disables `partition discovery`_.
 
+        .. _partition discovery:
+          https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery
+
         >>> df = spark.read.text('python/test_support/sql/text-test.txt')
         >>> df.collect()
         [Row(value='hello'), Row(value='this')]
@@ -502,6 +508,10 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
         :param recursiveFileLookup: recursively scan a directory for files. Using this option
                                     disables `partition discovery`_.
 
+        .. _partition discovery:
+          https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery
+        .. _datetime pattern: https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+
         >>> df = spark.read.csv('python/test_support/sql/ages.csv')
         >>> df.dtypes
         [('_c0', 'string'), ('_c1', 'string')]
@@ -561,6 +571,9 @@ def orc(self, path, mergeSchema=None, pathGlobFilter=None, recursiveFileLookup=N
         :param recursiveFileLookup: recursively scan a directory for files. Using this option
                                     disables `partition discovery`_.
 
+        .. _partition discovery:
+          https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery
+
         >>> df = spark.read.orc('python/test_support/sql/orc_partitioned')
         >>> df.dtypes
         [('a', 'bigint'), ('b', 'int'), ('c', 'int')]
@@ -893,6 +906,8 @@ def json(self, path, mode=None, compression=None, dateFormat=None, timestampForm
         :param ignoreNullFields: Whether to ignore null fields when generating JSON objects.
                         If None is set, it uses the default value, ``true``.
 
+        .. _datetime pattern: https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+
         >>> df.write.json(os.path.join(tempfile.mkdtemp(), 'data'))
         """
         self.mode(mode)
@@ -1007,6 +1022,8 @@ def csv(self, path, mode=None, compression=None, sep=None, quote=None, escape=No
         :param lineSep: defines the line separator that should be used for writing. If None is
                         set, it uses the default value, ``\\n``. Maximum length is 1 character.
 
+        .. _datetime pattern: https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+
         >>> df.write.csv(os.path.join(tempfile.mkdtemp(), 'data'))
         """
         self.mode(mode)
diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index 5c528c1d54df7..4ec47305aa13e 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -541,6 +541,9 @@ def orc(self, path, mergeSchema=None, pathGlobFilter=None, recursiveFileLookup=N
         :param recursiveFileLookup: recursively scan a directory for files. Using this option
             disables `partition discovery`_.
 
+        .. _partition discovery:
+          https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery
+
         >>> orc_sdf = spark.readStream.schema(sdf_schema).orc(tempfile.mkdtemp())
         >>> orc_sdf.isStreaming
         True
@@ -571,6 +574,9 @@ def parquet(self, path, mergeSchema=None, pathGlobFilter=None, recursiveFileLook
         :param recursiveFileLookup: recursively scan a directory for files. Using this option
                                     disables `partition discovery`_.
 
+        .. _partition discovery:
+          https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery
+
         >>> parquet_sdf = spark.readStream.schema(sdf_schema).parquet(tempfile.mkdtemp())
         >>> parquet_sdf.isStreaming
         True
@@ -607,6 +613,9 @@ def text(self, path, wholetext=False, lineSep=None, pathGlobFilter=None,
         :param recursiveFileLookup: recursively scan a directory for files. Using this option
                                     disables `partition discovery`_.
 
+        .. _partition discovery:
+          https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery
+
         >>> text_sdf = spark.readStream.text(tempfile.mkdtemp())
         >>> text_sdf.isStreaming
         True
@@ -737,6 +746,10 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non
         :param recursiveFileLookup: recursively scan a directory for files. Using this option
                                     disables `partition discovery`_.
 
+        .. _partition discovery:
+          https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#partition-discovery
+        .. _datetime pattern: https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+
         >>> csv_sdf = spark.readStream.csv(tempfile.mkdtemp(), schema = sdf_schema)
         >>> csv_sdf.isStreaming
         True