richardliaw · wuisawesome · Sep 8, 2020 · Sep 9, 2020 · Sep 9, 2020 · Sep 9, 2020
diff --git a/README.rst b/README.rst
@@ -120,7 +120,7 @@ This example runs a parallel grid search to optimize an example objective functi
     print("Best config: ", analysis.get_best_config(metric="mean_loss"))
 
     # Get a dataframe for analyzing trial results.
-    df = analysis.dataframe()
+    df = analysis.results_df
 
 If TensorBoard is installed, automatically visualize all trial results:
 

diff --git a/doc/requirements-doc.txt b/doc/requirements-doc.txt
@@ -24,9 +24,10 @@ sphinx-gallery
 sphinx-jsonschema
 sphinx-tabs
 sphinx-version-warning
-sphinx_rtd_theme
+# TODO(simon): Use sphinx book theme released version
+git+https://github.com/executablebooks/sphinx-book-theme.git@0a87d26e214c419d2e6efcadddab4be8ae7b2c21
 tabulate
 uvicorn
 werkzeug
-tune-sklearn==0.0.5
+git+git://github.com/ray-project/tune-sklearn@master#tune-sklearn
 scikit-optimize
diff --git a/doc/requirements-rtd.txt b/doc/requirements-rtd.txt
@@ -7,5 +7,4 @@ alabaster>=0.7,<0.8,!=0.7.5
 commonmark==0.8.1
 recommonmark==0.5.0
 sphinx<2
-sphinx-rtd-theme<0.5
 readthedocs-sphinx-ext<1.1
diff --git a/doc/source/_static/css/custom.css b/doc/source/_static/css/custom.css
@@ -1,32 +1,54 @@
 /*Extends the docstring signature box.*/
 .rst-content dl:not(.docutils) dt {
-    display: block;
-    padding: 10px;
-    word-wrap: break-word;
-    padding-right: 100px;
+  display: block;
+  padding: 10px;
+  word-wrap: break-word;
+  padding-right: 100px;
 }
 /*Lists in an admonition note do not have awkward whitespace below.*/
 .rst-content .admonition-note .section ul {
-    margin-bottom: 0px
+  margin-bottom: 0px;
 }
 /*Properties become blue (classmethod, staticmethod, property)*/
 .rst-content dl dt em.property {
-    color: #2980B9;
-    text-transform: uppercase
+  color: #2980b9;
+  text-transform: uppercase;
 }
 
-.rst-content .section ol p, .rst-content .section ul p {
-    margin-bottom: 0px;
+.rst-content .section ol p,
+.rst-content .section ul p {
+  margin-bottom: 0px;
 }
 
 div.sphx-glr-bigcontainer {
-    display: inline-block;
-    width: 100%
+  display: inline-block;
+  width: 100%;
 }
 
-
-td.tune-colab, th.tune-colab {
+td.tune-colab,
+th.tune-colab {
   border: 1px solid #dddddd;
   text-align: left;
   padding: 8px;
 }
+
+/* Adjustment to Sphinx Book Theme */
+.table td {
+  /* Remove row spacing */
+  padding: 0;
+}
+
+table {
+  /* Force full width for all table */
+  width: 136% !important;
+}
+
+img.inline-figure {
+  /* Override the display: block for img */
+  display: inherit !important;
+}
+
+#version-warning-banner {
+  /* Make version warning clickable */
+  z-index: 1;
+}
diff --git a/doc/source/_static/favicon.ico b/doc/source/_static/favicon.ico
diff --git a/doc/source/conf.py b/doc/source/conf.py
@@ -114,6 +114,8 @@ def __getattr__(cls, name):
     'versionwarning.extension',
 ]
 
+versionwarning_admonition_type = "tip"
+
 versionwarning_messages = {
     "master": (
         "This document is for the master branch. "
@@ -125,7 +127,7 @@ def __getattr__(cls, name):
     ),
 }
 
-versionwarning_body_selector = "div.document"
+versionwarning_body_selector = "#main-content"
 sphinx_gallery_conf = {
     "examples_dirs": ["../examples",
                       "tune/_tutorials"],  # path to example scripts
@@ -233,33 +235,38 @@ def __getattr__(cls, name):
 
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
-import sphinx_rtd_theme
-html_theme = 'sphinx_rtd_theme'
-html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
+html_theme = "sphinx_book_theme"
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.
-#html_theme_options = {}
+html_theme_options = {
+    "repository_url": "https://github.com/ray-project/ray",
+    "use_repository_button": True,
+    "use_issues_button": True,
+    "use_edit_page_button": True,
+    "path_to_docs": "doc/source",
+    "home_page_in_toc": True,
+}
 
 # Add any paths that contain custom themes here, relative to this directory.
 #html_theme_path = []
 
 # The name for this set of Sphinx documents.  If None, it defaults to
 # "<project> v<release> documentation".
-#html_title = None
+html_title = f"Ray v{release}"
 
 # A shorter title for the navigation bar.  Default is the same as html_title.
 #html_short_title = None
 
 # The name of an image file (relative to this directory) to place at the top
 # of the sidebar.
-#html_logo = None
+html_logo = "images/ray_logo.png"
 
 # The name of an image file (within the static path) to use as favicon of the
 # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
 # pixels large.
-#html_favicon = None
+html_favicon = "_static/favicon.ico"
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
@@ -280,7 +287,7 @@ def __getattr__(cls, name):
 #html_use_smartypants = True
 
 # Custom sidebar templates, maps document names to template names.
-html_sidebars = {'**': ['index.html']}
+# html_sidebars = {'**': ['index.html']}
 
 # Additional templates that should be rendered to pages, maps page names to
 # template names.

diff --git a/doc/source/ray-overview/basics.rst b/doc/source/ray-overview/basics.rst
@@ -1,10 +1,4 @@
 
-.. raw:: html
-
-  <embed>
-    <a href="https://github.com/ray-project/ray"><img style="position: absolute; top: 0; right: 0; border: 0;" src="https://camo.githubusercontent.com/365986a132ccd6a44c23a9169022c0b5c890c387/68747470733a2f2f73332e616d617a6f6e6177732e636f6d2f6769746875622f726962626f6e732f666f726b6d655f72696768745f7265645f6161303030302e706e67" alt="Fork me on GitHub" data-canonical-src="https://s3.amazonaws.com/github/ribbons/forkme_right_red_aa0000.png"></a>
-  </embed>
-
 .. image:: https://github.com/ray-project/ray/raw/master/doc/source/images/ray_header_logo.png
 
 **Ray provides a simple, universal API for building distributed applications.**
@@ -13,7 +7,7 @@ Ray accomplishes this mission by:
 
 1. Providing simple primitives for building and running distributed applications.
 2. Enabling end users to parallelize single machine code, with little to zero code changes.
-3. Including a large ecosystem of applications, libraries, and tools on top of the core Ray to enable complex applications. 
+3. Including a large ecosystem of applications, libraries, and tools on top of the core Ray to enable complex applications.
 
 **Ray Core** provides the simple primitives for application building.
 

diff --git a/doc/source/rllib-algorithms.rst b/doc/source/rllib-algorithms.rst
@@ -13,6 +13,7 @@ Algorithm           Frameworks Discrete Actions        Continuous Actions Multi-
 =================== ========== ======================= ================== =========== =============================================================
 `A2C, A3C`_         tf + torch **Yes** `+parametric`_  **Yes**            **Yes**     `+RNN`_, `+LSTM auto-wrapping`_, `+Transformer`_, `+autoreg`_
 `ARS`_              tf + torch **Yes**                 **Yes**            No
+`BC`_               tf + torch **Yes** `+parametric`_  **Yes**            **Yes**     `+RNN`_
 `ES`_               tf + torch **Yes**                 **Yes**            No
 `DDPG`_, `TD3`_     tf + torch No                      **Yes**            **Yes**
 `APEX-DDPG`_        tf + torch No                      **Yes**            **Yes**
@@ -22,6 +23,7 @@ Algorithm           Frameworks Discrete Actions        Continuous Actions Multi-
 `IMPALA`_           tf + torch **Yes** `+parametric`_  **Yes**            **Yes**     `+RNN`_, `+LSTM auto-wrapping`_, `+Transformer`_, `+autoreg`_
 `MAML`_             tf + torch No                      **Yes**            No
 `MARWIL`_           tf + torch **Yes** `+parametric`_  **Yes**            **Yes**     `+RNN`_
+`MBMPO`_            torch      No                      **Yes**            No
 `PG`_               tf + torch **Yes** `+parametric`_  **Yes**            **Yes**     `+RNN`_, `+LSTM auto-wrapping`_, `+Transformer`_, `+autoreg`_
 `PPO`_, `APPO`_     tf + torch **Yes** `+parametric`_  **Yes**            **Yes**     `+RNN`_, `+LSTM auto-wrapping`_, `+Transformer`_, `+autoreg`_
 `SAC`_              tf + torch **Yes**                 **Yes**            **Yes**
@@ -442,6 +444,35 @@ Tuned examples: HalfCheetahRandDirecEnv (`Env <https://github.com/ray-project/ra
    :start-after: __sphinx_doc_begin__
    :end-before: __sphinx_doc_end__
 
+.. _mbmpo:
+
+Model-Based Meta-Policy-Optimization (MB-MPO)
+---------------------------------------------
+|pytorch|
+`[paper] <https://arxiv.org/pdf/1809.05214.pdf>`__ `[implementation] <https://github.com/ray-project/ray/blob/master/rllib/agents/mbmpo/mbmpo.py>`__
+
+RLlib's MBMPO implementation is a Dyna-styled model-based RL method that learns based on the predictions of an ensemble of transition-dynamics models. Similar to MAML, MBMPO metalearns an optimial policy by treating each dynamics model as a different task. Code here is adapted from https://github.com/jonasrothfuss/model_ensemble_meta_learning. Similar to the original paper, MBMPO is evaluated on MuJoCo, with the horizon set to 200 instead of the default 1000.
+
+Additional statistics are logged in MBMPO. Each MBMPO iteration corresponds to multiple MAML iterations, and ``MAMLIter$i$_DynaTrajInner_$j$_episode_reward_mean`` measures the agent's returns across the dynamics models at iteration ``i`` of MAML and step ``j`` of inner adaptation. Examples can be seen `here <https://github.com/ray-project/rl-experiments/tree/master/mbmpo>`__.
+
+Tuned examples: `HalfCheetah <https://github.com/ray-project/ray/blob/master/rllib/tuned_examples/mbmpo/halfcheetah-mbmpo.yaml>`__, `Hopper <https://github.com/ray-project/ray/blob/master/rllib/tuned_examples/mbmpo/hopper-mbmpo.yaml>`__
+
+**MuJoCo results @100K steps:** `more details <https://github.com/ray-project/rl-experiments>`__
+
+=============  ============  ====================
+MuJoCo env     RLlib MBMPO   Clavera et al MBMPO
+=============  ============  ====================
+HalfCheetah    520           ~550
+Hopper         620           ~650
+=============  ============  ====================
+
+**MBMPO-specific configs** (see also `common configs <rllib-training.html#common-parameters>`__):
+
+.. literalinclude:: ../../rllib/agents/mbmpo/mbmpo.py
+   :language: python
+   :start-after: __sphinx_doc_begin__
+   :end-before: __sphinx_doc_end__
+
 .. _dreamer:
 
 Dreamer
@@ -517,10 +548,15 @@ Tuned examples: `Humanoid-v1 <https://github.com/ray-project/ray/blob/master/rll
 
 .. _marwil:
 
-Advantage Re-Weighted Imitation Learning (MARWIL)
--------------------------------------------------
+Monotonic Advantage Re-Weighted Imitation Learning (MARWIL)
+-----------------------------------------------------------
 |pytorch| |tensorflow|
-`[paper] <http://papers.nips.cc/paper/7866-exponentially-weighted-imitation-learning-for-batched-historical-data>`__ `[implementation] <https://github.com/ray-project/ray/blob/master/rllib/agents/marwil/marwil.py>`__ MARWIL is a hybrid imitation learning and policy gradient algorithm suitable for training on batched historical data. When the ``beta`` hyperparameter is set to zero, the MARWIL objective reduces to vanilla imitation learning. MARWIL requires the `offline datasets API <rllib-offline.html>`__ to be used.
+`[paper] <http://papers.nips.cc/paper/7866-exponentially-weighted-imitation-learning-for-batched-historical-data>`__
+`[implementation] <https://github.com/ray-project/ray/blob/master/rllib/agents/marwil/marwil.py>`__
+
+MARWIL is a hybrid imitation learning and policy gradient algorithm suitable for training on batched historical data.
+When the ``beta`` hyperparameter is set to zero, the MARWIL objective reduces to vanilla imitation learning (see `BC`_).
+MARWIL requires the `offline datasets API <rllib-offline.html>`__ to be used.
 
 Tuned examples: `CartPole-v0 <https://github.com/ray-project/ray/blob/master/rllib/tuned_examples/marwil/cartpole-marwil.yaml>`__
 
@@ -532,6 +568,29 @@ Tuned examples: `CartPole-v0 <https://github.com/ray-project/ray/blob/master/rll
    :end-before: __sphinx_doc_end__
 
 
+.. _bc:
+
+Behavior Cloning (BC; derived from MARWIL implementation)
+---------------------------------------------------------
+|pytorch| |tensorflow|
+`[paper] <http://papers.nips.cc/paper/7866-exponentially-weighted-imitation-learning-for-batched-historical-data>`__
+`[implementation] <https://github.com/ray-project/ray/blob/master/rllib/agents/marwil/bc.py>`__
+
+Our behavioral cloning implementation is directly derived from our `MARWIL`_ implementation,
+with the only difference being the ``beta`` parameter force-set to 0.0. This makes
+BC try to match the behavior policy, which generated the offline data, disregarding any resulting rewards.
+BC requires the `offline datasets API <rllib-offline.html>`__ to be used.
+
+Tuned examples: `CartPole-v0 <https://github.com/ray-project/ray/blob/master/rllib/tuned_examples/marwil/cartpole-bc.yaml>`__
+
+**BC-specific configs** (see also `common configs <rllib-training.html#common-parameters>`__):
+
+.. literalinclude:: ../../rllib/agents/marwil/bc.py
+   :language: python
+   :start-after: __sphinx_doc_begin__
+   :end-before: __sphinx_doc_end__
+
+
 Contextual Bandits (contrib/bandits)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -600,9 +659,11 @@ Tuned examples: `SimpleContextualBandit <https://github.com/ray-project/ray/blob
 
 
 .. |tensorflow| image:: tensorflow.png
+    :class: inline-figure
     :width: 24
 
 .. |pytorch| image:: pytorch.png
+    :class: inline-figure
     :width: 24
 
 

diff --git a/doc/source/rllib-toc.rst b/doc/source/rllib-toc.rst
@@ -110,6 +110,8 @@ Algorithms
 
    -  |pytorch| |tensorflow| :ref:`Model-Agnostic Meta-Learning (MAML) <maml>`
 
+   -  |pytorch| :ref:`Model-Based Meta-Policy-Optimization (MBMPO) <mbmpo>`
+
    -  |pytorch| |tensorflow| :ref:`Policy Gradients <pg>`
 
    -  |pytorch| |tensorflow| :ref:`Proximal Policy Optimization (PPO) <ppo>`
@@ -208,7 +210,9 @@ TensorFlow 2.0
 RLlib currently runs in ``tf.compat.v1`` mode. This means eager execution is disabled by default, and RLlib imports TF with ``import tensorflow.compat.v1 as tf; tf.disable_v2_behaviour()``. Eager execution can be enabled manually by calling ``tf.enable_eager_execution()`` or setting the ``"eager": True`` trainer config.
 
 .. |tensorflow| image:: tensorflow.png
+    :class: inline-figure
     :width: 16
 
 .. |pytorch| image:: pytorch.png
+    :class: inline-figure
     :width: 16
diff --git a/doc/source/rllib.rst b/doc/source/rllib.rst
@@ -117,7 +117,9 @@ RLlib provides ways to customize almost all aspects of training, including the `
 To learn more, proceed to the `table of contents <rllib-toc.html>`__.
 
 .. |tensorflow| image:: tensorflow.png
+    :class: inline-figure
     :width: 24
 
 .. |pytorch| image:: pytorch.png
+    :class: inline-figure
     :width: 24
diff --git a/doc/source/tune/_tutorials/tune-sklearn.py b/doc/source/tune/_tutorials/tune-sklearn.py
@@ -127,7 +127,7 @@
     clf,
     parameter_grid,
     search_optimization="bayesian",
-    n_iter=3,
+    n_trials=3,
     early_stopping=True,
     max_iters=10,
 )

diff --git a/doc/source/tune/api_docs/analysis.rst b/doc/source/tune/api_docs/analysis.rst
@@ -18,7 +18,7 @@ Here are some example operations for obtaining a summary of your experiment:
 .. code-block:: python
 
     # Get a dataframe for the last reported results of all of the trials
-    df = analysis.dataframe()
+    df = analysis.results_df
 
     # Get a dataframe for the max accuracy seen for each trial
     df = analysis.dataframe(metric="mean_accuracy", mode="max")

diff --git a/doc/source/tune/api_docs/sklearn.rst b/doc/source/tune/api_docs/sklearn.rst
@@ -5,10 +5,16 @@ Scikit-Learn API  (tune.sklearn)
 
 .. _tunegridsearchcv-docs:
 
+TuneGridSearchCV
+----------------
+
 .. autoclass:: ray.tune.sklearn.TuneGridSearchCV
 	:inherited-members:
 
 .. _tunesearchcv-docs:
 
+TuneSearchCV
+------------
+
 .. autoclass:: ray.tune.sklearn.TuneSearchCV
 	:inherited-members:
diff --git a/doc/source/tune/key-concepts.rst b/doc/source/tune/key-concepts.rst
@@ -219,16 +219,24 @@ Analysis
 
     analysis = tune.run(trainable, search_alg=algo, stop={"training_iteration": 20})
 
-    # Get the best hyperparameters
-    best_hyperparameters = analysis.get_best_config()
+    best_trial = analysis.best_trial  # Get best trial
+    best_config = analysis.best_config  # Get best trial's hyperparameters
+    best_logdir = analysis.best_logdir  # Get best trial's logdir
+    best_checkpoint = analysis.best_checkpoint  # Get best trial's best checkpoint
+    best_result = analysis.best_result  # Get best trial's last results
+    best_result_df = analysis.best_result_df  # Get best result as pandas dataframe
 
 This object can also retrieve all training runs as dataframes, allowing you to do ad-hoc data analysis over your results.
 
 .. code-block:: python
 
-    # Get a dataframe for the max score seen for each trial
+    # Get a dataframe with the last results for each trial
+    df_results = analysis.results_df
+
+    # Get a dataframe of results for a specific score or mode
     df = analysis.dataframe(metric="score", mode="max")
 
+
 What's Next?
 -------------