diff --git a/README.rst b/README.rst
index 1888eb37a68c..b4b48da3bbf3 100644
--- a/README.rst
+++ b/README.rst
@@ -120,7 +120,7 @@ This example runs a parallel grid search to optimize an example objective functi
     print("Best config: ", analysis.get_best_config(metric="mean_loss"))
 
     # Get a dataframe for analyzing trial results.
-    df = analysis.dataframe()
+    df = analysis.results_df
 
 If TensorBoard is installed, automatically visualize all trial results:
 
diff --git a/doc/requirements-doc.txt b/doc/requirements-doc.txt
index 83e0979e5db3..6317b37e88f2 100644
--- a/doc/requirements-doc.txt
+++ b/doc/requirements-doc.txt
@@ -24,9 +24,10 @@ sphinx-gallery
 sphinx-jsonschema
 sphinx-tabs
 sphinx-version-warning
-sphinx_rtd_theme
+# TODO(simon): Use sphinx book theme released version
+git+https://github.com/executablebooks/sphinx-book-theme.git@0a87d26e214c419d2e6efcadddab4be8ae7b2c21
 tabulate
 uvicorn
 werkzeug
-tune-sklearn==0.0.5
+git+git://github.com/ray-project/tune-sklearn@master#tune-sklearn
 scikit-optimize
diff --git a/doc/requirements-rtd.txt b/doc/requirements-rtd.txt
index 5245aa83a68f..1c6cd5322c80 100644
--- a/doc/requirements-rtd.txt
+++ b/doc/requirements-rtd.txt
@@ -7,5 +7,4 @@ alabaster>=0.7,<0.8,!=0.7.5
 commonmark==0.8.1
 recommonmark==0.5.0
 sphinx<2
-sphinx-rtd-theme<0.5
 readthedocs-sphinx-ext<1.1
diff --git a/doc/source/_static/css/custom.css b/doc/source/_static/css/custom.css
index 68935539b552..089d5c7e0448 100644
--- a/doc/source/_static/css/custom.css
+++ b/doc/source/_static/css/custom.css
@@ -1,32 +1,54 @@
 /*Extends the docstring signature box.*/
 .rst-content dl:not(.docutils) dt {
-    display: block;
-    padding: 10px;
-    word-wrap: break-word;
-    padding-right: 100px;
+  display: block;
+  padding: 10px;
+  word-wrap: break-word;
+  padding-right: 100px;
 }
 /*Lists in an admonition note do not have awkward whitespace below.*/
 .rst-content .admonition-note .section ul {
-    margin-bottom: 0px
+  margin-bottom: 0px;
 }
 /*Properties become blue (classmethod, staticmethod, property)*/
 .rst-content dl dt em.property {
-    color: #2980B9;
-    text-transform: uppercase
+  color: #2980b9;
+  text-transform: uppercase;
 }
 
-.rst-content .section ol p, .rst-content .section ul p {
-    margin-bottom: 0px;
+.rst-content .section ol p,
+.rst-content .section ul p {
+  margin-bottom: 0px;
 }
 
 div.sphx-glr-bigcontainer {
-    display: inline-block;
-    width: 100%
+  display: inline-block;
+  width: 100%;
 }
 
-
-td.tune-colab, th.tune-colab {
+td.tune-colab,
+th.tune-colab {
   border: 1px solid #dddddd;
   text-align: left;
   padding: 8px;
 }
+
+/* Adjustment to Sphinx Book Theme */
+.table td {
+  /* Remove row spacing */
+  padding: 0;
+}
+
+table {
+  /* Force full width for all table */
+  width: 136% !important;
+}
+
+img.inline-figure {
+  /* Override the display: block for img */
+  display: inherit !important;
+}
+
+#version-warning-banner {
+  /* Make version warning clickable */
+  z-index: 1;
+}
diff --git a/doc/source/_static/favicon.ico b/doc/source/_static/favicon.ico
new file mode 100644
index 000000000000..04e72e7643eb
Binary files /dev/null and b/doc/source/_static/favicon.ico differ
diff --git a/doc/source/conf.py b/doc/source/conf.py
index 8ff9e7f274cc..dca1d11efc13 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -114,6 +114,8 @@ def __getattr__(cls, name):
     'versionwarning.extension',
 ]
 
+versionwarning_admonition_type = "tip"
+
 versionwarning_messages = {
     "master": (
         "This document is for the master branch. "
@@ -125,7 +127,7 @@ def __getattr__(cls, name):
     ),
 }
 
-versionwarning_body_selector = "div.document"
+versionwarning_body_selector = "#main-content"
 sphinx_gallery_conf = {
     "examples_dirs": ["../examples",
                       "tune/_tutorials"],  # path to example scripts
@@ -233,33 +235,38 @@ def __getattr__(cls, name):
 
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
-import sphinx_rtd_theme
-html_theme = 'sphinx_rtd_theme'
-html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
+html_theme = "sphinx_book_theme"
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.
-#html_theme_options = {}
+html_theme_options = {
+    "repository_url": "https://github.com/ray-project/ray",
+    "use_repository_button": True,
+    "use_issues_button": True,
+    "use_edit_page_button": True,
+    "path_to_docs": "doc/source",
+    "home_page_in_toc": True,
+}
 
 # Add any paths that contain custom themes here, relative to this directory.
 #html_theme_path = []
 
 # The name for this set of Sphinx documents.  If None, it defaults to
 # "<project> v<release> documentation".
-#html_title = None
+html_title = f"Ray v{release}"
 
 # A shorter title for the navigation bar.  Default is the same as html_title.
 #html_short_title = None
 
 # The name of an image file (relative to this directory) to place at the top
 # of the sidebar.
-#html_logo = None
+html_logo = "images/ray_logo.png"
 
 # The name of an image file (within the static path) to use as favicon of the
 # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
 # pixels large.
-#html_favicon = None
+html_favicon = "_static/favicon.ico"
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
@@ -280,7 +287,7 @@ def __getattr__(cls, name):
 #html_use_smartypants = True
 
 # Custom sidebar templates, maps document names to template names.
-html_sidebars = {'**': ['index.html']}
+# html_sidebars = {'**': ['index.html']}
 
 # Additional templates that should be rendered to pages, maps page names to
 # template names.
diff --git a/doc/source/ray-overview/basics.rst b/doc/source/ray-overview/basics.rst
index b61fc2ae87a9..963c2ed2e0a3 100644
--- a/doc/source/ray-overview/basics.rst
+++ b/doc/source/ray-overview/basics.rst
@@ -1,10 +1,4 @@
 
-.. raw:: html
-
-  <embed>
-    <a href="https://github.com/ray-project/ray"><img style="position: absolute; top: 0; right: 0; border: 0;" src="https://camo.githubusercontent.com/365986a132ccd6a44c23a9169022c0b5c890c387/68747470733a2f2f73332e616d617a6f6e6177732e636f6d2f6769746875622f726962626f6e732f666f726b6d655f72696768745f7265645f6161303030302e706e67" alt="Fork me on GitHub" data-canonical-src="https://s3.amazonaws.com/github/ribbons/forkme_right_red_aa0000.png"></a>
-  </embed>
-
 .. image:: https://github.com/ray-project/ray/raw/master/doc/source/images/ray_header_logo.png
 
 **Ray provides a simple, universal API for building distributed applications.**
@@ -13,7 +7,7 @@ Ray accomplishes this mission by:
 
 1. Providing simple primitives for building and running distributed applications.
 2. Enabling end users to parallelize single machine code, with little to zero code changes.
-3. Including a large ecosystem of applications, libraries, and tools on top of the core Ray to enable complex applications. 
+3. Including a large ecosystem of applications, libraries, and tools on top of the core Ray to enable complex applications.
 
 **Ray Core** provides the simple primitives for application building.
 
diff --git a/doc/source/rllib-algorithms.rst b/doc/source/rllib-algorithms.rst
index 32bc0a7a93cd..476c0f4c947a 100644
--- a/doc/source/rllib-algorithms.rst
+++ b/doc/source/rllib-algorithms.rst
@@ -13,6 +13,7 @@ Algorithm           Frameworks Discrete Actions        Continuous Actions Multi-
 =================== ========== ======================= ================== =========== =============================================================
 `A2C, A3C`_         tf + torch **Yes** `+parametric`_  **Yes**            **Yes**     `+RNN`_, `+LSTM auto-wrapping`_, `+Transformer`_, `+autoreg`_
 `ARS`_              tf + torch **Yes**                 **Yes**            No
+`BC`_               tf + torch **Yes** `+parametric`_  **Yes**            **Yes**     `+RNN`_
 `ES`_               tf + torch **Yes**                 **Yes**            No
 `DDPG`_, `TD3`_     tf + torch No                      **Yes**            **Yes**
 `APEX-DDPG`_        tf + torch No                      **Yes**            **Yes**
@@ -22,6 +23,7 @@ Algorithm           Frameworks Discrete Actions        Continuous Actions Multi-
 `IMPALA`_           tf + torch **Yes** `+parametric`_  **Yes**            **Yes**     `+RNN`_, `+LSTM auto-wrapping`_, `+Transformer`_, `+autoreg`_
 `MAML`_             tf + torch No                      **Yes**            No
 `MARWIL`_           tf + torch **Yes** `+parametric`_  **Yes**            **Yes**     `+RNN`_
+`MBMPO`_            torch      No                      **Yes**            No
 `PG`_               tf + torch **Yes** `+parametric`_  **Yes**            **Yes**     `+RNN`_, `+LSTM auto-wrapping`_, `+Transformer`_, `+autoreg`_
 `PPO`_, `APPO`_     tf + torch **Yes** `+parametric`_  **Yes**            **Yes**     `+RNN`_, `+LSTM auto-wrapping`_, `+Transformer`_, `+autoreg`_
 `SAC`_              tf + torch **Yes**                 **Yes**            **Yes**
@@ -442,6 +444,35 @@ Tuned examples: HalfCheetahRandDirecEnv (`Env <https://github.com/ray-project/ra
    :start-after: __sphinx_doc_begin__
    :end-before: __sphinx_doc_end__
 
+.. _mbmpo:
+
+Model-Based Meta-Policy-Optimization (MB-MPO)
+---------------------------------------------
+|pytorch|
+`[paper] <https://arxiv.org/pdf/1809.05214.pdf>`__ `[implementation] <https://github.com/ray-project/ray/blob/master/rllib/agents/mbmpo/mbmpo.py>`__
+
+RLlib's MBMPO implementation is a Dyna-styled model-based RL method that learns based on the predictions of an ensemble of transition-dynamics models. Similar to MAML, MBMPO metalearns an optimial policy by treating each dynamics model as a different task. Code here is adapted from https://github.com/jonasrothfuss/model_ensemble_meta_learning. Similar to the original paper, MBMPO is evaluated on MuJoCo, with the horizon set to 200 instead of the default 1000.
+
+Additional statistics are logged in MBMPO. Each MBMPO iteration corresponds to multiple MAML iterations, and ``MAMLIter$i$_DynaTrajInner_$j$_episode_reward_mean`` measures the agent's returns across the dynamics models at iteration ``i`` of MAML and step ``j`` of inner adaptation. Examples can be seen `here <https://github.com/ray-project/rl-experiments/tree/master/mbmpo>`__.
+
+Tuned examples: `HalfCheetah <https://github.com/ray-project/ray/blob/master/rllib/tuned_examples/mbmpo/halfcheetah-mbmpo.yaml>`__, `Hopper <https://github.com/ray-project/ray/blob/master/rllib/tuned_examples/mbmpo/hopper-mbmpo.yaml>`__
+
+**MuJoCo results @100K steps:** `more details <https://github.com/ray-project/rl-experiments>`__
+
+=============  ============  ====================
+MuJoCo env     RLlib MBMPO   Clavera et al MBMPO
+=============  ============  ====================
+HalfCheetah    520           ~550
+Hopper         620           ~650
+=============  ============  ====================
+
+**MBMPO-specific configs** (see also `common configs <rllib-training.html#common-parameters>`__):
+
+.. literalinclude:: ../../rllib/agents/mbmpo/mbmpo.py
+   :language: python
+   :start-after: __sphinx_doc_begin__
+   :end-before: __sphinx_doc_end__
+
 .. _dreamer:
 
 Dreamer
@@ -517,10 +548,15 @@ Tuned examples: `Humanoid-v1 <https://github.com/ray-project/ray/blob/master/rll
 
 .. _marwil:
 
-Advantage Re-Weighted Imitation Learning (MARWIL)
--------------------------------------------------
+Monotonic Advantage Re-Weighted Imitation Learning (MARWIL)
+-----------------------------------------------------------
 |pytorch| |tensorflow|
-`[paper] <http://papers.nips.cc/paper/7866-exponentially-weighted-imitation-learning-for-batched-historical-data>`__ `[implementation] <https://github.com/ray-project/ray/blob/master/rllib/agents/marwil/marwil.py>`__ MARWIL is a hybrid imitation learning and policy gradient algorithm suitable for training on batched historical data. When the ``beta`` hyperparameter is set to zero, the MARWIL objective reduces to vanilla imitation learning. MARWIL requires the `offline datasets API <rllib-offline.html>`__ to be used.
+`[paper] <http://papers.nips.cc/paper/7866-exponentially-weighted-imitation-learning-for-batched-historical-data>`__
+`[implementation] <https://github.com/ray-project/ray/blob/master/rllib/agents/marwil/marwil.py>`__
+
+MARWIL is a hybrid imitation learning and policy gradient algorithm suitable for training on batched historical data.
+When the ``beta`` hyperparameter is set to zero, the MARWIL objective reduces to vanilla imitation learning (see `BC`_).
+MARWIL requires the `offline datasets API <rllib-offline.html>`__ to be used.
 
 Tuned examples: `CartPole-v0 <https://github.com/ray-project/ray/blob/master/rllib/tuned_examples/marwil/cartpole-marwil.yaml>`__
 
@@ -532,6 +568,29 @@ Tuned examples: `CartPole-v0 <https://github.com/ray-project/ray/blob/master/rll
    :end-before: __sphinx_doc_end__
 
 
+.. _bc:
+
+Behavior Cloning (BC; derived from MARWIL implementation)
+---------------------------------------------------------
+|pytorch| |tensorflow|
+`[paper] <http://papers.nips.cc/paper/7866-exponentially-weighted-imitation-learning-for-batched-historical-data>`__
+`[implementation] <https://github.com/ray-project/ray/blob/master/rllib/agents/marwil/bc.py>`__
+
+Our behavioral cloning implementation is directly derived from our `MARWIL`_ implementation,
+with the only difference being the ``beta`` parameter force-set to 0.0. This makes
+BC try to match the behavior policy, which generated the offline data, disregarding any resulting rewards.
+BC requires the `offline datasets API <rllib-offline.html>`__ to be used.
+
+Tuned examples: `CartPole-v0 <https://github.com/ray-project/ray/blob/master/rllib/tuned_examples/marwil/cartpole-bc.yaml>`__
+
+**BC-specific configs** (see also `common configs <rllib-training.html#common-parameters>`__):
+
+.. literalinclude:: ../../rllib/agents/marwil/bc.py
+   :language: python
+   :start-after: __sphinx_doc_begin__
+   :end-before: __sphinx_doc_end__
+
+
 Contextual Bandits (contrib/bandits)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -600,9 +659,11 @@ Tuned examples: `SimpleContextualBandit <https://github.com/ray-project/ray/blob
 
 
 .. |tensorflow| image:: tensorflow.png
+    :class: inline-figure
     :width: 24
 
 .. |pytorch| image:: pytorch.png
+    :class: inline-figure
     :width: 24
 
 
diff --git a/doc/source/rllib-toc.rst b/doc/source/rllib-toc.rst
index 1a22fcda0044..bc6b8b630d1d 100644
--- a/doc/source/rllib-toc.rst
+++ b/doc/source/rllib-toc.rst
@@ -110,6 +110,8 @@ Algorithms
 
    -  |pytorch| |tensorflow| :ref:`Model-Agnostic Meta-Learning (MAML) <maml>`
 
+   -  |pytorch| :ref:`Model-Based Meta-Policy-Optimization (MBMPO) <mbmpo>`
+
    -  |pytorch| |tensorflow| :ref:`Policy Gradients <pg>`
 
    -  |pytorch| |tensorflow| :ref:`Proximal Policy Optimization (PPO) <ppo>`
@@ -208,7 +210,9 @@ TensorFlow 2.0
 RLlib currently runs in ``tf.compat.v1`` mode. This means eager execution is disabled by default, and RLlib imports TF with ``import tensorflow.compat.v1 as tf; tf.disable_v2_behaviour()``. Eager execution can be enabled manually by calling ``tf.enable_eager_execution()`` or setting the ``"eager": True`` trainer config.
 
 .. |tensorflow| image:: tensorflow.png
+    :class: inline-figure
     :width: 16
 
 .. |pytorch| image:: pytorch.png
+    :class: inline-figure
     :width: 16
diff --git a/doc/source/rllib.rst b/doc/source/rllib.rst
index 937d4d6f3ef0..79db9095f6ec 100644
--- a/doc/source/rllib.rst
+++ b/doc/source/rllib.rst
@@ -117,7 +117,9 @@ RLlib provides ways to customize almost all aspects of training, including the `
 To learn more, proceed to the `table of contents <rllib-toc.html>`__.
 
 .. |tensorflow| image:: tensorflow.png
+    :class: inline-figure
     :width: 24
 
 .. |pytorch| image:: pytorch.png
+    :class: inline-figure
     :width: 24
diff --git a/doc/source/tune/_tutorials/tune-sklearn.py b/doc/source/tune/_tutorials/tune-sklearn.py
index 7edd15938a21..c21f0ff5a527 100644
--- a/doc/source/tune/_tutorials/tune-sklearn.py
+++ b/doc/source/tune/_tutorials/tune-sklearn.py
@@ -127,7 +127,7 @@
     clf,
     parameter_grid,
     search_optimization="bayesian",
-    n_iter=3,
+    n_trials=3,
     early_stopping=True,
     max_iters=10,
 )
diff --git a/doc/source/tune/api_docs/analysis.rst b/doc/source/tune/api_docs/analysis.rst
index 2ec32f686c17..c9468fbab694 100644
--- a/doc/source/tune/api_docs/analysis.rst
+++ b/doc/source/tune/api_docs/analysis.rst
@@ -18,7 +18,7 @@ Here are some example operations for obtaining a summary of your experiment:
 .. code-block:: python
 
     # Get a dataframe for the last reported results of all of the trials
-    df = analysis.dataframe()
+    df = analysis.results_df
 
     # Get a dataframe for the max accuracy seen for each trial
     df = analysis.dataframe(metric="mean_accuracy", mode="max")
diff --git a/doc/source/tune/api_docs/sklearn.rst b/doc/source/tune/api_docs/sklearn.rst
index 0067a952ce77..02a015727bdd 100644
--- a/doc/source/tune/api_docs/sklearn.rst
+++ b/doc/source/tune/api_docs/sklearn.rst
@@ -5,10 +5,16 @@ Scikit-Learn API  (tune.sklearn)
 
 .. _tunegridsearchcv-docs:
 
+TuneGridSearchCV
+----------------
+
 .. autoclass:: ray.tune.sklearn.TuneGridSearchCV
 	:inherited-members:
 
 .. _tunesearchcv-docs:
 
+TuneSearchCV
+------------
+
 .. autoclass:: ray.tune.sklearn.TuneSearchCV
 	:inherited-members:
diff --git a/doc/source/tune/key-concepts.rst b/doc/source/tune/key-concepts.rst
index 11247895bd50..213d680a01c0 100644
--- a/doc/source/tune/key-concepts.rst
+++ b/doc/source/tune/key-concepts.rst
@@ -219,16 +219,24 @@ Analysis
 
     analysis = tune.run(trainable, search_alg=algo, stop={"training_iteration": 20})
 
-    # Get the best hyperparameters
-    best_hyperparameters = analysis.get_best_config()
+    best_trial = analysis.best_trial  # Get best trial
+    best_config = analysis.best_config  # Get best trial's hyperparameters
+    best_logdir = analysis.best_logdir  # Get best trial's logdir
+    best_checkpoint = analysis.best_checkpoint  # Get best trial's best checkpoint
+    best_result = analysis.best_result  # Get best trial's last results
+    best_result_df = analysis.best_result_df  # Get best result as pandas dataframe
 
 This object can also retrieve all training runs as dataframes, allowing you to do ad-hoc data analysis over your results.
 
 .. code-block:: python
 
-    # Get a dataframe for the max score seen for each trial
+    # Get a dataframe with the last results for each trial
+    df_results = analysis.results_df
+
+    # Get a dataframe of results for a specific score or mode
     df = analysis.dataframe(metric="score", mode="max")
 
+
 What's Next?
 -------------
 
diff --git a/java/pom.xml b/java/pom.xml
index 2f9f21c35741..1fb82f909d10 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -25,6 +25,12 @@
     <developerConnection>scm:git:ssh://github.com:ray-project/ray.git</developerConnection>
   </scm>
 
+  <developers>
+    <developer>
+      <organizationUrl>https://ray.io</organizationUrl>
+    </developer>
+  </developers>
+
   <distributionManagement>
     <snapshotRepository>
       <id>ossrh</id>
diff --git a/java/runtime/src/main/java/io/ray/runtime/RayNativeRuntime.java b/java/runtime/src/main/java/io/ray/runtime/RayNativeRuntime.java
index 8c5be8f8fa3e..b6059ce16f59 100644
--- a/java/runtime/src/main/java/io/ray/runtime/RayNativeRuntime.java
+++ b/java/runtime/src/main/java/io/ray/runtime/RayNativeRuntime.java
@@ -69,8 +69,6 @@ public final class RayNativeRuntime extends AbstractRayRuntime {
 
     JniUtils.loadLibrary(BinaryFileUtil.CORE_WORKER_JAVA_LIBRARY, true);
     LOGGER.debug("Native libraries loaded.");
-    // Reset library path at runtime.
-    resetLibraryPath(rayConfig);
     try {
       FileUtils.forceMkdir(new File(rayConfig.logDir));
     } catch (IOException e) {
@@ -78,12 +76,6 @@ public final class RayNativeRuntime extends AbstractRayRuntime {
     }
   }
 
-  private static void resetLibraryPath(RayConfig rayConfig) {
-    String separator = System.getProperty("path.separator");
-    String libraryPath = String.join(separator, rayConfig.libraryPath);
-    JniUtils.resetLibraryPath(libraryPath);
-  }
-
   public RayNativeRuntime(RayConfig rayConfig) {
     super(rayConfig);
     loadConfigFromGcs(rayConfig);
diff --git a/java/runtime/src/main/java/io/ray/runtime/util/JniUtils.java b/java/runtime/src/main/java/io/ray/runtime/util/JniUtils.java
index 31f6b66d2b8c..df49c008bc49 100644
--- a/java/runtime/src/main/java/io/ray/runtime/util/JniUtils.java
+++ b/java/runtime/src/main/java/io/ray/runtime/util/JniUtils.java
@@ -1,11 +1,9 @@
 package io.ray.runtime.util;
 
-import com.google.common.base.Strings;
 import com.google.common.collect.Sets;
 import com.sun.jna.NativeLibrary;
 import io.ray.runtime.config.RayConfig;
 import java.io.File;
-import java.lang.reflect.Field;
 import java.util.Set;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -48,38 +46,8 @@ public static synchronized void loadLibrary(String libraryName, boolean exportSy
       }
       System.load(file.getAbsolutePath());
       LOGGER.debug("Native library loaded.");
-      resetLibraryPath(file.getAbsolutePath());
       loadedLibs.add(libraryName);
     }
   }
 
-  /**
-   * This is a hack to reset library path at runtime. Please don't use it outside of ray
-   */
-  public static synchronized void resetLibraryPath(String libPath) {
-    if (Strings.isNullOrEmpty(libPath)) {
-      return;
-    }
-    String path = System.getProperty("java.library.path");
-    String separator = System.getProperty("path.separator");
-    if (Strings.isNullOrEmpty(path)) {
-      path = "";
-    } else {
-      path += separator;
-    }
-    path += String.join(separator, libPath);
-
-    // This is a hack to reset library path at runtime,
-    // see https://stackoverflow.com/questions/15409223/.
-    System.setProperty("java.library.path", path);
-    // Set sys_paths to null so that java.library.path will be re-evaluated next time it is needed.
-    final Field sysPathsField;
-    try {
-      sysPathsField = ClassLoader.class.getDeclaredField("sys_paths");
-      sysPathsField.setAccessible(true);
-      sysPathsField.set(null, null);
-    } catch (NoSuchFieldException | IllegalAccessException e) {
-      LOGGER.error("Failed to set library path.", e);
-    }
-  }
 }
diff --git a/java/test.sh b/java/test.sh
index 70ef6ebbbbfd..36a92f259462 100755
--- a/java/test.sh
+++ b/java/test.sh
@@ -52,7 +52,7 @@ case "${OSTYPE}" in
   darwin*) ip=$(ipconfig getifaddr en0);;
   *) echo "Can't get ip address for ${OSTYPE}"; exit 1;;
 esac
-RAY_BACKEND_LOG_LEVEL=debug ray start --head --redis-port=6379 --redis-password=123456 --include-java --code-search-path="$PWD/bazel-bin/java/all_tests_deploy.jar"
+RAY_BACKEND_LOG_LEVEL=debug ray start --head --redis-port=6379 --redis-password=123456 --code-search-path="$PWD/bazel-bin/java/all_tests_deploy.jar"
 RAY_BACKEND_LOG_LEVEL=debug java -cp bazel-bin/java/all_tests_deploy.jar -Dray.redis.address="$ip:6379"\
  -Dray.redis.password='123456' -Dray.job.code-search-path="$PWD/bazel-bin/java/all_tests_deploy.jar" io.ray.test.MultiDriverTest
 ray stop
diff --git a/java/test/src/main/java/io/ray/test/BaseMultiLanguageTest.java b/java/test/src/main/java/io/ray/test/BaseMultiLanguageTest.java
index 2aeb909887a1..bfe8daa4d9f4 100644
--- a/java/test/src/main/java/io/ray/test/BaseMultiLanguageTest.java
+++ b/java/test/src/main/java/io/ray/test/BaseMultiLanguageTest.java
@@ -82,7 +82,6 @@ public void setUp() {
         String.format("--raylet-socket-name=%s", RAYLET_SOCKET_NAME),
         String.format("--node-manager-port=%s", nodeManagerPort),
         "--load-code-from-local",
-        "--include-java",
         "--system-config=" + new Gson().toJson(RayConfig.create().rayletConfigParameters),
         "--code-search-path=" + String.join(":", classpath)
     );
diff --git a/python/ray/autoscaler/updater.py b/python/ray/autoscaler/updater.py
index 8b69714ccbb8..2c905e77ee94 100644
--- a/python/ray/autoscaler/updater.py
+++ b/python/ray/autoscaler/updater.py
@@ -166,6 +166,8 @@ def sync_file_mounts(self, sync_cmd, step_numbers=(0, 2)):
 
         def do_sync(remote_path, local_path, allow_non_existing_paths=False):
             if allow_non_existing_paths and not os.path.exists(local_path):
+                cli_logger.print("sync: {} does not exist. Skipping.",
+                                 local_path)
                 # Ignore missing source files. In the future we should support
                 # the --delete-missing-args command to delete files that have
                 # been removed
@@ -204,7 +206,10 @@ def do_sync(remote_path, local_path, allow_non_existing_paths=False):
             with cli_logger.group(
                     "Processing worker file mounts",
                     _numbered=("[]", previous_steps + 2, total_steps)):
+                cli_logger.print("synced files: {}",
+                                 str(self.cluster_synced_files))
                 for path in self.cluster_synced_files:
+                    path = os.path.expanduser(path)
                     do_sync(path, path, allow_non_existing_paths=True)
         else:
             cli_logger.print(
diff --git a/python/ray/dashboard/dashboard.py b/python/ray/dashboard/dashboard.py
index 76a75d053168..ee82a5bad00f 100644
--- a/python/ray/dashboard/dashboard.py
+++ b/python/ray/dashboard/dashboard.py
@@ -806,7 +806,7 @@ def collect(self):
 
         # search through all the sub_directories in log directory
         analysis = Analysis(str(self._logdir))
-        df = analysis.dataframe()
+        df = analysis.dataframe(metric="episode_reward_mean", mode="max")
 
         if len(df) == 0 or "trial_id" not in df.columns:
             return
diff --git a/python/ray/job_config.py b/python/ray/job_config.py
index ab2c20d4207e..92474a6935e5 100644
--- a/python/ray/job_config.py
+++ b/python/ray/job_config.py
@@ -33,6 +33,8 @@ def __init__(
             self.jvm_options = jvm_options
         if code_search_path is None:
             self.code_search_path = []
+        else:
+            self.code_search_path = code_search_path
 
     def serialize(self):
         job_config = ray.gcs_utils.JobConfig()
diff --git a/python/ray/node.py b/python/ray/node.py
index a65cc1e876a8..248ffadfb47f 100644
--- a/python/ray/node.py
+++ b/python/ray/node.py
@@ -178,8 +178,6 @@ def __init__(self,
         else:
             self._webui_url = (
                 ray.services.get_webui_url_from_redis(redis_client))
-            ray_params.include_java = (
-                ray.services.include_java_from_redis(redis_client))
 
         if head or not connect_only:
             # We need to start a local raylet.
@@ -276,13 +274,14 @@ def merge_resources(env_dict, params_dict):
                                    key, params_dict[key], env_dict[key]))
             return num_cpus, num_gpus, memory, object_store_memory, result
 
-        env_resources = {}
-        env_string = os.getenv(ray_constants.RESOURCES_ENVIRONMENT_VARIABLE)
-        if env_string:
-            env_resources = json.loads(env_string)
-            logger.info(f"Autosaler overriding resources: {env_resources}.")
-
         if not self._resource_spec:
+            env_resources = {}
+            env_string = os.getenv(
+                ray_constants.RESOURCES_ENVIRONMENT_VARIABLE)
+            if env_string:
+                env_resources = json.loads(env_string)
+                logger.info(
+                    f"Autosaler overriding resources: {env_resources}.")
             num_cpus, num_gpus, memory, object_store_memory, resources = \
                 merge_resources(env_resources, self._ray_params.resources)
             self._resource_spec = ResourceSpec(
@@ -576,7 +575,6 @@ def start_redis(self):
              redis_max_clients=self._ray_params.redis_max_clients,
              redirect_worker_output=True,
              password=self._ray_params.redis_password,
-             include_java=self._ray_params.include_java,
              fate_share=self.kernel_fate_share)
         assert (
             ray_constants.PROCESS_TYPE_REDIS_SERVER not in self.all_processes)
@@ -649,16 +647,17 @@ def start_dashboard(self, require_dashboard):
             redis_client = self.create_redis_client()
             redis_client.hmset("webui", {"url": self._webui_url})
 
-    def start_plasma_store(self):
+    def start_plasma_store(self, plasma_directory, object_store_memory):
         """Start the plasma store."""
         stdout_file, stderr_file = self.get_log_file_handles(
             "plasma_store", unique=True)
         process_info = ray.services.start_plasma_store(
             self.get_resource_spec(),
+            plasma_directory,
+            object_store_memory,
             self._plasma_store_socket_name,
             stdout_file=stdout_file,
             stderr_file=stderr_file,
-            plasma_directory=self._ray_params.plasma_directory,
             huge_pages=self._ray_params.huge_pages,
             keep_idle=bool(self._config.get("plasma_store_as_thread")),
             fate_share=self.kernel_fate_share)
@@ -688,7 +687,11 @@ def start_gcs_server(self):
             process_info,
         ]
 
-    def start_raylet(self, use_valgrind=False, use_profiler=False):
+    def start_raylet(self,
+                     plasma_directory,
+                     object_store_memory,
+                     use_valgrind=False,
+                     use_profiler=False):
         """Start the raylet.
 
         Args:
@@ -709,21 +712,21 @@ def start_raylet(self, use_valgrind=False, use_profiler=False):
             self._temp_dir,
             self._session_dir,
             self.get_resource_spec(),
-            self._ray_params.min_worker_port,
-            self._ray_params.max_worker_port,
-            self._ray_params.object_manager_port,
-            self._ray_params.redis_password,
-            self._ray_params.metrics_agent_port,
-            self._metrics_export_port,
+            plasma_directory,
+            object_store_memory,
+            min_worker_port=self._ray_params.min_worker_port,
+            max_worker_port=self._ray_params.max_worker_port,
+            object_manager_port=self._ray_params.object_manager_port,
+            redis_password=self._ray_params.redis_password,
+            metrics_agent_port=self._ray_params.metrics_agent_port,
+            metrics_export_port=self._metrics_export_port,
             use_valgrind=use_valgrind,
             use_profiler=use_profiler,
             stdout_file=stdout_file,
             stderr_file=stderr_file,
             config=self._config,
-            include_java=self._ray_params.include_java,
             java_worker_options=self._ray_params.java_worker_options,
             load_code_from_local=self._ray_params.load_code_from_local,
-            plasma_directory=self._ray_params.plasma_directory,
             huge_pages=self._ray_params.huge_pages,
             fate_share=self.kernel_fate_share,
             socket_to_use=self.socket,
@@ -810,8 +813,17 @@ def start_ray_processes(self):
         logger.debug(f"Process STDOUT and STDERR is being "
                      f"redirected to {self._logs_dir}.")
 
-        self.start_plasma_store()
-        self.start_raylet()
+        # Make sure we don't call `determine_plasma_store_config` multiple
+        # times to avoid printing multiple warnings.
+        resource_spec = self.get_resource_spec()
+        plasma_directory, object_store_memory = \
+            ray.services.determine_plasma_store_config(
+                resource_spec.object_store_memory,
+                plasma_directory=self._ray_params.plasma_directory,
+                huge_pages=self._ray_params.huge_pages
+            )
+        self.start_plasma_store(plasma_directory, object_store_memory)
+        self.start_raylet(plasma_directory, object_store_memory)
         if "RAY_USE_NEW_DASHBOARD" not in os.environ:
             self.start_reporter()
 
diff --git a/python/ray/parameter.py b/python/ray/parameter.py
index 4a4ec4e85d63..811d9539ac35 100644
--- a/python/ray/parameter.py
+++ b/python/ray/parameter.py
@@ -84,8 +84,6 @@ class RayParams:
             monitor the log files for all processes on this node and push their
             contents to Redis.
         autoscaling_config: path to autoscaling config file.
-        include_java (bool): If True, the raylet backend can also support
-            Java worker.
         java_worker_options (list): The command options for Java worker.
         load_code_from_local: Whether load code from local file or from GCS.
         metrics_agent_port(int): The port to bind metrics agent.
@@ -138,7 +136,6 @@ def __init__(self,
                  temp_dir=None,
                  include_log_monitor=None,
                  autoscaling_config=None,
-                 include_java=False,
                  java_worker_options=None,
                  load_code_from_local=False,
                  start_initial_python_workers_for_first_job=False,
@@ -183,7 +180,6 @@ def __init__(self,
         self.temp_dir = temp_dir
         self.include_log_monitor = include_log_monitor
         self.autoscaling_config = autoscaling_config
-        self.include_java = include_java
         self.java_worker_options = java_worker_options
         self.load_code_from_local = load_code_from_local
         self.metrics_agent_port = metrics_agent_port
diff --git a/python/ray/scripts/scripts.py b/python/ray/scripts/scripts.py
index 1d5a0bd053eb..dbd783b6a0bb 100644
--- a/python/ray/scripts/scripts.py
+++ b/python/ray/scripts/scripts.py
@@ -343,11 +343,6 @@ def dashboard(cluster_config_file, cluster_name, port, remote_port):
     "--temp-dir",
     default=None,
     help="manually specify the root temporary dir of the Ray process")
-@click.option(
-    "--include-java",
-    is_flag=True,
-    default=None,
-    help="Enable Java worker support.")
 @click.option(
     "--java-worker-options",
     required=False,
@@ -397,7 +392,7 @@ def start(node_ip_address, redis_address, address, redis_port, port,
           head, include_webui, webui_host, include_dashboard, dashboard_host,
           dashboard_port, block, plasma_directory, huge_pages,
           autoscaling_config, no_redirect_worker_output, no_redirect_output,
-          plasma_store_socket_name, raylet_socket_name, temp_dir, include_java,
+          plasma_store_socket_name, raylet_socket_name, temp_dir,
           java_worker_options, code_search_path, load_code_from_local,
           system_config, lru_evict, enable_object_reconstruction,
           metrics_export_port, log_style, log_color, verbose):
@@ -505,7 +500,6 @@ def start(node_ip_address, redis_address, address, redis_port, port,
         plasma_store_socket_name=plasma_store_socket_name,
         raylet_socket_name=raylet_socket_name,
         temp_dir=temp_dir,
-        include_java=include_java,
         include_dashboard=include_dashboard,
         dashboard_host=dashboard_host,
         dashboard_port=dashboard_port,
@@ -564,7 +558,6 @@ def start(node_ip_address, redis_address, address, redis_port, port,
             num_redis_shards=num_redis_shards,
             redis_max_clients=redis_max_clients,
             autoscaling_config=autoscaling_config,
-            include_java=False,
         )
 
         node = ray.node.Node(
@@ -622,7 +615,7 @@ def start(node_ip_address, redis_address, address, redis_port, port,
             "    ray stop".format(
                 redis_address, " --redis-password='" + redis_password + "'"
                 if redis_password else "",
-                ", redis_password='" + redis_password + "'"
+                ", _redis_password='" + redis_password + "'"
                 if redis_password else ""))
     else:
         # Start Ray on a non-head node.
@@ -671,12 +664,6 @@ def start(node_ip_address, redis_address, address, redis_port, port,
             raise ValueError(
                 "If --head is not passed in, the --include-dashboard"
                 "flag is not relevant.")
-        if include_java is not None:
-            cli_logger.abort("`{}` should not be specified without `{}`.",
-                             cf.bold("--include-java"), cf.bold("--head"))
-
-            raise ValueError("--include-java should only be set for the head "
-                             "node.")
 
         # Wait for the Redis server to be started. And throw an exception if we
         # can't connect to it.
@@ -1472,7 +1459,7 @@ def memory(address, redis_password):
     if not address:
         address = services.find_redis_address_or_die()
     logger.info(f"Connecting to Ray instance at {address}.")
-    ray.init(address=address, redis_password=redis_password)
+    ray.init(address=address, _redis_password=redis_password)
     print(ray.internal.internal_api.memory_summary())
 
 
diff --git a/python/ray/services.py b/python/ray/services.py
index b2b1380a83fa..1283925f811a 100644
--- a/python/ray/services.py
+++ b/python/ray/services.py
@@ -123,18 +123,6 @@ def new_port():
     return random.randint(10000, 65535)
 
 
-def include_java_from_redis(redis_client):
-    """This is used for query include_java bool from redis.
-
-    Args:
-        redis_client (StrictRedis): The redis client to GCS.
-
-    Returns:
-        True if this cluster backend enables Java worker.
-    """
-    return redis_client.get("INCLUDE_JAVA") == b"1"
-
-
 def find_redis_address_or_die():
     pids = psutil.pids()
     redis_addresses = set()
@@ -683,7 +671,6 @@ def start_redis(node_ip_address,
                 redirect_worker_output=False,
                 password=None,
                 use_credis=None,
-                include_java=False,
                 fate_share=None):
     """Start the Redis global state store.
 
@@ -709,8 +696,6 @@ def start_redis(node_ip_address,
         use_credis: If True, additionally load the chain-replicated libraries
             into the redis servers.  Defaults to None, which means its value is
             set by the presence of "RAY_USE_NEW_GCS" in os.environ.
-        include_java (bool): If True, the raylet backend can also support
-            Java worker.
 
     Returns:
         A tuple of the address for the primary Redis shard, a list of
@@ -784,10 +769,6 @@ def start_redis(node_ip_address,
     primary_redis_client.set("RedirectOutput", 1
                              if redirect_worker_output else 0)
 
-    # put the include_java bool to primary redis-server, so that other nodes
-    # can access it and know whether or not to enable cross-languages.
-    primary_redis_client.set("INCLUDE_JAVA", 1 if include_java else 0)
-
     # Init job counter to GCS.
     primary_redis_client.set("JobCounter", 0)
 
@@ -1256,6 +1237,8 @@ def start_raylet(redis_address,
                  temp_dir,
                  session_dir,
                  resource_spec,
+                 plasma_directory,
+                 object_store_memory,
                  min_worker_port=None,
                  max_worker_port=None,
                  object_manager_port=None,
@@ -1267,10 +1250,8 @@ def start_raylet(redis_address,
                  stdout_file=None,
                  stderr_file=None,
                  config=None,
-                 include_java=False,
                  java_worker_options=None,
                  load_code_from_local=False,
-                 plasma_directory=None,
                  huge_pages=False,
                  fate_share=None,
                  socket_to_use=None,
@@ -1312,8 +1293,6 @@ def start_raylet(redis_address,
             no redirection should happen, then this should be None.
         config (dict|None): Optional Raylet configuration that will
             override defaults in RayConfig.
-        include_java (bool): If True, the raylet backend can also support
-            Java worker.
         java_worker_options (list): The command options for Java worker.
         code_search_path (list): Code search path for worker. code_search_path
             is added to worker command in non-multi-tenancy mode and job_config
@@ -1345,6 +1324,26 @@ def start_raylet(redis_address,
 
     gcs_ip_address, gcs_port = redis_address.split(":")
 
+    has_java_command = False
+    try:
+        java_proc = subprocess.run(
+            ["java", "-version"],
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE)
+        if java_proc.returncode == 0:
+            has_java_command = True
+    except OSError:
+        pass
+
+    ray_java_installed = False
+    try:
+        jars_dir = get_ray_jars_dir()
+        if os.path.exists(jars_dir):
+            ray_java_installed = True
+    except Exception:
+        pass
+
+    include_java = has_java_command and ray_java_installed
     if include_java is True:
         java_worker_command = build_java_worker_command(
             json.loads(java_worker_options) if java_worker_options else [],
@@ -1457,8 +1456,6 @@ def start_raylet(redis_address,
             subprocess.list2cmdline(agent_command)))
     if config.get("plasma_store_as_thread"):
         # command related to the plasma store
-        plasma_directory, object_store_memory = determine_plasma_store_config(
-            resource_spec.object_store_memory, plasma_directory, huge_pages)
         command += [
             f"--object_store_memory={object_store_memory}",
             f"--plasma_directory={plasma_directory}",
@@ -1653,8 +1650,8 @@ def determine_plasma_store_config(object_store_memory,
                 "than the total available memory.")
     else:
         plasma_directory = os.path.abspath(plasma_directory)
-        logger.warning("WARNING: object_store_memory is not verified when "
-                       "plasma_directory is set.")
+        logger.info("object_store_memory is not verified when "
+                    "plasma_directory is set.")
 
     if not os.path.isdir(plasma_directory):
         raise ValueError(f"The file {plasma_directory} does not "
@@ -1680,10 +1677,11 @@ def determine_plasma_store_config(object_store_memory,
 
 
 def start_plasma_store(resource_spec,
+                       plasma_directory,
+                       object_store_memory,
                        plasma_store_socket_name,
                        stdout_file=None,
                        stderr_file=None,
-                       plasma_directory=None,
                        keep_idle=False,
                        huge_pages=False,
                        fate_share=None,
@@ -1712,8 +1710,6 @@ def start_plasma_store(resource_spec,
         raise ValueError("Cannot use valgrind and profiler at the same time.")
 
     assert resource_spec.resolved()
-    plasma_directory, object_store_memory = determine_plasma_store_config(
-        resource_spec.object_store_memory, plasma_directory, huge_pages)
 
     command = [
         PLASMA_STORE_EXECUTABLE,
diff --git a/python/ray/tests/test_cross_language.py b/python/ray/tests/test_cross_language.py
index 9ba24a980628..3904f63df135 100644
--- a/python/ray/tests/test_cross_language.py
+++ b/python/ray/tests/test_cross_language.py
@@ -6,7 +6,7 @@
 
 
 def test_cross_language_raise_kwargs(shutdown_only):
-    ray.init(_load_code_from_local=True, _include_java=True)
+    ray.init(_load_code_from_local=True)
 
     with pytest.raises(Exception, match="kwargs"):
         ray.java_function("a", "b").remote(x="arg1")
@@ -16,7 +16,7 @@ def test_cross_language_raise_kwargs(shutdown_only):
 
 
 def test_cross_language_raise_exception(shutdown_only):
-    ray.init(_load_code_from_local=True, _include_java=True)
+    ray.init(_load_code_from_local=True)
 
     class PythonObject(object):
         pass
diff --git a/python/ray/tune/BUILD b/python/ray/tune/BUILD
index d24cf0452663..f85583f7434d 100644
--- a/python/ray/tune/BUILD
+++ b/python/ray/tune/BUILD
@@ -149,7 +149,7 @@ py_test(
 
 py_test(
     name = "test_sample",
-    size = "medium",
+    size = "small",
     srcs = ["tests/test_sample.py"],
     deps = [":tune_lib"],
     tags = ["exclusive"],
diff --git a/python/ray/tune/analysis/experiment_analysis.py b/python/ray/tune/analysis/experiment_analysis.py
index 2da4c33e8883..afa5de622ceb 100644
--- a/python/ray/tune/analysis/experiment_analysis.py
+++ b/python/ray/tune/analysis/experiment_analysis.py
@@ -1,11 +1,17 @@
 import json
 import logging
 import os
+from typing import Dict
+
+from ray.tune.checkpoint_manager import Checkpoint
+from ray.tune.utils import flatten_dict
 
 try:
     import pandas as pd
+    from pandas import DataFrame
 except ImportError:
     pd = None
+    DataFrame = None
 
 from ray.tune.error import TuneError
 from ray.tune.result import EXPR_PROGRESS_FILE, EXPR_PARAM_FILE,\
@@ -80,6 +86,9 @@ def dataframe(self, metric=None, mode=None):
         Returns:
             pd.DataFrame: Constructed from a result dict of each trial.
         """
+        metric = self._validate_metric(metric)
+        mode = self._validate_mode(mode)
+
         rows = self._retrieve_rows(metric=metric, mode=mode)
         all_configs = self.get_all_configs(prefix=True)
         for path, config in all_configs.items():
@@ -227,6 +236,9 @@ def get_best_checkpoint(self, trial, metric=None, mode=None):
         mode = self._validate_mode(mode)
 
         checkpoint_paths = self.get_trial_checkpoints_paths(trial, metric)
+        if not checkpoint_paths:
+            logger.error(f"No checkpoints have been found for trial {trial}.")
+            return None
         if mode == "max":
             return max(checkpoint_paths, key=lambda x: x[1])[0]
         else:
@@ -316,7 +328,150 @@ def __init__(self,
             os.path.dirname(experiment_checkpoint_path), default_metric,
             default_mode)
 
-    def get_best_trial(self, metric=None, mode=None, scope="all"):
+    @property
+    def best_trial(self) -> Trial:
+        """Get the best trial of the experiment
+
+        The best trial is determined by comparing the last trial results
+        using the `metric` and `mode` parameters passed to `tune.run()`.
+
+        If you didn't pass these parameters, use
+        `get_best_trial(metric, mode, scope)` instead.
+        """
+        if not self.default_metric or not self.default_mode:
+            raise ValueError(
+                "To fetch the `best_trial`, pass a `metric` and `mode` "
+                "parameter to `tune.run()`. Alternatively, use the "
+                "`get_best_trial(metric, mode)` method to set the metric "
+                "and mode explicitly.")
+        return self.get_best_trial(self.default_metric, self.default_mode)
+
+    @property
+    def best_config(self) -> Dict:
+        """Get the config of the best trial of the experiment
+
+        The best trial is determined by comparing the last trial results
+        using the `metric` and `mode` parameters passed to `tune.run()`.
+
+        If you didn't pass these parameters, use
+        `get_best_config(metric, mode, scope)` instead.
+        """
+        if not self.default_metric or not self.default_mode:
+            raise ValueError(
+                "To fetch the `best_config`, pass a `metric` and `mode` "
+                "parameter to `tune.run()`. Alternatively, use the "
+                "`get_best_config(metric, mode)` method to set the metric "
+                "and mode explicitly.")
+        return self.get_best_config(self.default_metric, self.default_mode)
+
+    @property
+    def best_checkpoint(self) -> Checkpoint:
+        """Get the checkpoint of the best trial of the experiment
+
+        The best trial is determined by comparing the last trial results
+        using the `metric` and `mode` parameters passed to `tune.run()`.
+
+        If you didn't pass these parameters, use
+        `get_best_checkpoint(trial, metric, mode)` instead.
+        """
+        if not self.default_metric or not self.default_mode:
+            raise ValueError(
+                "To fetch the `best_checkpoint`, pass a `metric` and `mode` "
+                "parameter to `tune.run()`. Alternatively, use the "
+                "`get_best_checkpoint(trial, metric, mode)` method to set the "
+                "metric and mode explicitly.")
+        best_trial = self.best_trial
+        return self.get_best_checkpoint(best_trial, self.default_metric,
+                                        self.default_mode)
+
+    @property
+    def best_logdir(self) -> str:
+        """Get the logdir of the best trial of the experiment
+
+        The best trial is determined by comparing the last trial results
+        using the `metric` and `mode` parameters passed to `tune.run()`.
+
+        If you didn't pass these parameters, use
+        `get_best_logdir(metric, mode)` instead.
+        """
+        if not self.default_metric or not self.default_mode:
+            raise ValueError(
+                "To fetch the `best_logdir`, pass a `metric` and `mode` "
+                "parameter to `tune.run()`. Alternatively, use the "
+                "`get_best_logdir(metric, mode, scope)` method to set the "
+                "metric and mode explicitly.")
+        return self.get_best_logdir(self.default_metric, self.default_mode)
+
+    @property
+    def best_dataframe(self) -> DataFrame:
+        """Get the full result dataframe of the best trial of the experiment
+
+        The best trial is determined by comparing the last trial results
+        using the `metric` and `mode` parameters passed to `tune.run()`.
+
+        If you didn't pass these parameters, use
+        `get_best_logdir(metric, mode)` and use it to look for the dataframe
+        in the `self.trial_dataframes` dict.
+        """
+        if not self.default_metric or not self.default_mode:
+            raise ValueError(
+                "To fetch the `best_result`, pass a `metric` and `mode` "
+                "parameter to `tune.run()`.")
+        best_logdir = self.best_logdir
+        return self.trial_dataframes[best_logdir]
+
+    @property
+    def best_result(self) -> Dict:
+        """Get the last result of the best trial of the experiment
+
+        The best trial is determined by comparing the last trial results
+        using the `metric` and `mode` parameters passed to `tune.run()`.
+
+        If you didn't pass these parameters, use
+        `get_best_trial(metric, mode, scope).last_result` instead.
+        """
+        if not self.default_metric or not self.default_mode:
+            raise ValueError(
+                "To fetch the `best_result`, pass a `metric` and `mode` "
+                "parameter to `tune.run()`. Alternatively, use "
+                "`get_best_trial(metric, mode).last_result` to set "
+                "the metric and mode explicitly and fetch the last result.")
+        return self.best_trial.last_result
+
+    @property
+    def best_result_df(self) -> DataFrame:
+        """Get the best result of the experiment as a pandas dataframe.
+
+        The best trial is determined by comparing the last trial results
+        using the `metric` and `mode` parameters passed to `tune.run()`.
+
+        If you didn't pass these parameters, use
+        `get_best_trial(metric, mode, scope).last_result` instead.
+        """
+        if not pd:
+            raise ValueError("`best_result_df` requires pandas. Install with "
+                             "`pip install pandas`.")
+        best_result = flatten_dict(self.best_result, delimiter=".")
+        return pd.DataFrame.from_records([best_result], index="trial_id")
+
+    @property
+    def results(self) -> Dict[str, Dict]:
+        """Get the last result of the all trials of the experiment"""
+        return {trial.trial_id: trial.last_result for trial in self.trials}
+
+    @property
+    def results_df(self) -> DataFrame:
+        if not pd:
+            raise ValueError("`best_result_df` requires pandas. Install with "
+                             "`pip install pandas`.")
+        return pd.DataFrame.from_records(
+            [
+                flatten_dict(trial.last_result, delimiter=".")
+                for trial in self.trials
+            ],
+            index="trial_id")
+
+    def get_best_trial(self, metric=None, mode=None, scope="last"):
         """Retrieve the best trial object.
 
         Compares all trials' scores on ``metric``.
@@ -380,7 +535,7 @@ def get_best_trial(self, metric=None, mode=None, scope="all"):
                 "parameter?")
         return best_trial
 
-    def get_best_config(self, metric=None, mode=None, scope="all"):
+    def get_best_config(self, metric=None, mode=None, scope="last"):
         """Retrieve the best config corresponding to the trial.
 
         Compares all trials' scores on `metric`.
@@ -407,7 +562,7 @@ def get_best_config(self, metric=None, mode=None, scope="all"):
         best_trial = self.get_best_trial(metric, mode, scope)
         return best_trial.config if best_trial else None
 
-    def get_best_logdir(self, metric=None, mode=None, scope="all"):
+    def get_best_logdir(self, metric=None, mode=None, scope="last"):
         """Retrieve the logdir corresponding to the best trial.
 
         Compares all trials' scores on `metric`.
diff --git a/python/ray/tune/commands.py b/python/ray/tune/commands.py
index 2ab17e609906..7fbbe9776bde 100644
--- a/python/ray/tune/commands.py
+++ b/python/ray/tune/commands.py
@@ -116,7 +116,8 @@ def list_trials(experiment_path,
     _check_tabulate()
 
     try:
-        checkpoints_df = Analysis(experiment_path).dataframe()
+        checkpoints_df = Analysis(experiment_path).dataframe(
+            metric="episode_reward_mean", mode="max")
     except TuneError:
         raise click.ClickException("No trial data found!")
 
diff --git a/python/ray/tune/examples/bayesopt_example.py b/python/ray/tune/examples/bayesopt_example.py
index d9f552658af2..1d0e112121f5 100644
--- a/python/ray/tune/examples/bayesopt_example.py
+++ b/python/ray/tune/examples/bayesopt_example.py
@@ -7,6 +7,7 @@
 import ray
 from ray import tune
 from ray.tune.schedulers import AsyncHyperBandScheduler
+from ray.tune.suggest import ConcurrencyLimiter
 from ray.tune.suggest.bayesopt import BayesOptSearch
 
 
@@ -43,18 +44,18 @@ def easy_objective(config):
             "height": tune.uniform(-100, 100)
         }
     }
-    algo = BayesOptSearch(
-        metric="mean_loss",
-        mode="min",
-        utility_kwargs={
-            "kind": "ucb",
-            "kappa": 2.5,
-            "xi": 0.0
-        })
-    scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min")
+    algo = BayesOptSearch(utility_kwargs={
+        "kind": "ucb",
+        "kappa": 2.5,
+        "xi": 0.0
+    })
+    algo = ConcurrencyLimiter(algo, max_concurrent=4)
+    scheduler = AsyncHyperBandScheduler()
     tune.run(
         easy_objective,
         name="my_exp",
+        metric="mean_loss",
+        mode="min",
         search_alg=algo,
         scheduler=scheduler,
         **tune_kwargs)
diff --git a/python/ray/tune/examples/dragonfly_example.py b/python/ray/tune/examples/dragonfly_example.py
index 53b8c3f84266..38c83083eac0 100644
--- a/python/ray/tune/examples/dragonfly_example.py
+++ b/python/ray/tune/examples/dragonfly_example.py
@@ -11,6 +11,7 @@
 
 import ray
 from ray import tune
+from ray.tune.suggest import ConcurrencyLimiter
 from ray.tune.schedulers import AsyncHyperBandScheduler
 from ray.tune.suggest.dragonfly import DragonflySearch
 
@@ -70,12 +71,14 @@ def objective(config):
         optimizer="bandit",
         domain="euclidean",
         # space=space,  # If you want to set the space manually
-        metric="objective",
-        mode="max")
+    )
+    df_search = ConcurrencyLimiter(df_search, max_concurrent=4)
 
-    scheduler = AsyncHyperBandScheduler(metric="objective", mode="max")
+    scheduler = AsyncHyperBandScheduler()
     tune.run(
         objective,
+        metric="objective",
+        mode="max",
         name="dragonfly_search",
         search_alg=df_search,
         scheduler=scheduler,
diff --git a/python/ray/tune/examples/hyperband_example.py b/python/ray/tune/examples/hyperband_example.py
index 77ec56040a99..c2aff71aec96 100755
--- a/python/ray/tune/examples/hyperband_example.py
+++ b/python/ray/tune/examples/hyperband_example.py
@@ -3,16 +3,15 @@
 import argparse
 import json
 import os
-import random
 
 import numpy as np
 
 import ray
-from ray.tune import Trainable, run, sample_from
+from ray import tune
 from ray.tune.schedulers import HyperBandScheduler
 
 
-class MyTrainableClass(Trainable):
+class MyTrainableClass(tune.Trainable):
     """Example agent whose learning curve is a random sigmoid.
 
     The dummy hyperparameters "width" and "height" determine the slope and
@@ -58,13 +57,14 @@ def load_checkpoint(self, checkpoint_path):
         mode="max",
         max_t=200)
 
-    run(MyTrainableClass,
+    tune.run(
+        MyTrainableClass,
         name="hyperband_test",
         num_samples=20,
         stop={"training_iteration": 1 if args.smoke_test else 99999},
         config={
-            "width": sample_from(lambda spec: 10 + int(90 * random.random())),
-            "height": sample_from(lambda spec: int(100 * random.random()))
+            "width": tune.randint(10, 90),
+            "height": tune.randint(0, 100)
         },
         scheduler=hyperband,
         fail_fast=True)
diff --git a/python/ray/tune/examples/hyperopt_example.py b/python/ray/tune/examples/hyperopt_example.py
index 3385376b6a55..d28f059ecb00 100644
--- a/python/ray/tune/examples/hyperopt_example.py
+++ b/python/ray/tune/examples/hyperopt_example.py
@@ -6,6 +6,7 @@
 
 import ray
 from ray import tune
+from ray.tune.suggest import ConcurrencyLimiter
 from ray.tune.schedulers import AsyncHyperBandScheduler
 from ray.tune.suggest.hyperopt import HyperOptSearch
 
@@ -58,8 +59,14 @@ def easy_objective(config):
             "activation": tune.choice(["relu", "tanh"])
         }
     }
-    algo = HyperOptSearch(
-        metric="mean_loss", mode="min", points_to_evaluate=current_best_params)
-    scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min")
+    algo = HyperOptSearch(points_to_evaluate=current_best_params)
+    algo = ConcurrencyLimiter(algo, max_concurrent=4)
+
+    scheduler = AsyncHyperBandScheduler()
     tune.run(
-        easy_objective, search_alg=algo, scheduler=scheduler, **tune_kwargs)
+        easy_objective,
+        search_alg=algo,
+        scheduler=scheduler,
+        metric="mean_loss",
+        mode="min",
+        **tune_kwargs)
diff --git a/python/ray/tune/examples/lightgbm_example.py b/python/ray/tune/examples/lightgbm_example.py
index a0385372842d..9ca41fa8e7b3 100644
--- a/python/ray/tune/examples/lightgbm_example.py
+++ b/python/ray/tune/examples/lightgbm_example.py
@@ -44,6 +44,8 @@ def train_breast_cancer(config):
     from ray.tune.schedulers import ASHAScheduler
     tune.run(
         train_breast_cancer,
+        metric="binary_error",
+        mode="min",
         config=config,
         num_samples=2,
-        scheduler=ASHAScheduler(metric="binary_error", mode="min"))
+        scheduler=ASHAScheduler())
diff --git a/python/ray/tune/examples/mlflow_example.py b/python/ray/tune/examples/mlflow_example.py
index 368726c0b6ad..875c7837bbbf 100644
--- a/python/ray/tune/examples/mlflow_example.py
+++ b/python/ray/tune/examples/mlflow_example.py
@@ -9,7 +9,6 @@
 import mlflow
 from mlflow.tracking import MlflowClient
 import time
-import random
 
 from ray import tune
 from ray.tune.logger import MLFLowLogger, DEFAULT_LOGGERS
@@ -44,9 +43,8 @@ def easy_objective(config):
             "logger_config": {
                 "mlflow_experiment_id": experiment_id,
             },
-            "width": tune.sample_from(
-                lambda spec: 10 + int(90 * random.random())),
-            "height": tune.sample_from(lambda spec: int(100 * random.random()))
+            "width": tune.randint(10, 100),
+            "height": tune.randint(0, 100),
         })
 
     df = mlflow.search_runs([experiment_id])
diff --git a/python/ray/tune/examples/mnist_pytorch.py b/python/ray/tune/examples/mnist_pytorch.py
index 5a2c3677079c..d1e4fdf69fb8 100644
--- a/python/ray/tune/examples/mnist_pytorch.py
+++ b/python/ray/tune/examples/mnist_pytorch.py
@@ -1,7 +1,6 @@
 # Original Code here:
 # https://github.com/pytorch/examples/blob/master/mnist/main.py
 import os
-import numpy as np
 import argparse
 from filelock import FileLock
 import torch
@@ -89,7 +88,7 @@ def get_data_loaders():
 
 
 def train_mnist(config):
-    use_cuda = config.get("use_gpu") and torch.cuda.is_available()
+    use_cuda = torch.cuda.is_available()
     device = torch.device("cuda" if use_cuda else "cpu")
     train_loader, test_loader = get_data_loaders()
     model = ConvNet().to(device)
@@ -100,6 +99,7 @@ def train_mnist(config):
     while True:
         train(model, optimizer, train_loader, device)
         acc = test(model, test_loader, device)
+        # Set this to run Tune.
         tune.report(mean_accuracy=acc)
 
 
@@ -120,10 +120,14 @@ def train_mnist(config):
         ray.init(address=args.ray_address)
     else:
         ray.init(num_cpus=2 if args.smoke_test else None)
-    sched = AsyncHyperBandScheduler(
-        time_attr="training_iteration", metric="mean_accuracy")
+
+    # for early stopping
+    sched = AsyncHyperBandScheduler()
+
     analysis = tune.run(
         train_mnist,
+        metric="mean_accuracy",
+        mode="max",
         name="exp",
         scheduler=sched,
         stop={
@@ -132,14 +136,12 @@ def train_mnist(config):
         },
         resources_per_trial={
             "cpu": 2,
-            "gpu": int(args.cuda)
+            "gpu": int(args.cuda)  # set this for GPUs
         },
         num_samples=1 if args.smoke_test else 50,
         config={
-            "lr": tune.sample_from(lambda spec: 10**(-10 * np.random.rand())),
+            "lr": tune.loguniform(1e-4, 1e-2),
             "momentum": tune.uniform(0.1, 0.9),
-            "use_gpu": int(args.cuda)
         })
 
-    print("Best config is:",
-          analysis.get_best_config(metric="mean_accuracy", mode="max"))
+    print("Best config is:", analysis.best_config)
diff --git a/python/ray/tune/examples/mnist_pytorch_trainable.py b/python/ray/tune/examples/mnist_pytorch_trainable.py
index c623111daf83..c31b81968375 100644
--- a/python/ray/tune/examples/mnist_pytorch_trainable.py
+++ b/python/ray/tune/examples/mnist_pytorch_trainable.py
@@ -65,9 +65,11 @@ def load_checkpoint(self, checkpoint_path):
 if __name__ == "__main__":
     args = parser.parse_args()
     ray.init(address=args.ray_address, num_cpus=6 if args.smoke_test else None)
-    sched = ASHAScheduler(metric="mean_accuracy")
+    sched = ASHAScheduler()
     analysis = tune.run(
         TrainMNIST,
+        metric="mean_accuracy",
+        mode="max",
         scheduler=sched,
         stop={
             "mean_accuracy": 0.95,
diff --git a/python/ray/tune/examples/mxnet_example.py b/python/ray/tune/examples/mxnet_example.py
index b128c121d298..dd959e481ff0 100644
--- a/python/ray/tune/examples/mxnet_example.py
+++ b/python/ray/tune/examples/mxnet_example.py
@@ -66,8 +66,7 @@ def tune_mnist_mxnet(num_samples=10, num_epochs=10):
         reduction_factor=2)
 
     reporter = CLIReporter(
-        parameter_columns=["layer_1_size", "layer_2_size", "lr", "batch_size"],
-        metric_columns=["loss", "mean_accuracy", "training_iteration"])
+        parameter_columns=["layer_1_size", "layer_2_size", "lr", "batch_size"])
 
     tune.run(
         partial(train_mnist_mxnet, mnist=mnist_data, num_epochs=num_epochs),
diff --git a/python/ray/tune/examples/nevergrad_example.py b/python/ray/tune/examples/nevergrad_example.py
index 0dbd01e6a082..7eae59bc12f6 100644
--- a/python/ray/tune/examples/nevergrad_example.py
+++ b/python/ray/tune/examples/nevergrad_example.py
@@ -6,6 +6,7 @@
 
 import ray
 from ray import tune
+from ray.tune.suggest import ConcurrencyLimiter
 from ray.tune.schedulers import AsyncHyperBandScheduler
 from ray.tune.suggest.nevergrad import NevergradSearch
 
@@ -57,13 +58,15 @@ def easy_objective(config):
     algo = NevergradSearch(
         optimizer=ng.optimizers.OnePlusOne,
         # space=space,  # If you want to set the space manually
-        metric="mean_loss",
-        mode="min")
+    )
+    algo = ConcurrencyLimiter(algo, max_concurrent=4)
 
-    scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min")
+    scheduler = AsyncHyperBandScheduler()
 
     tune.run(
         easy_objective,
+        metric="mean_loss",
+        mode="min",
         name="nevergrad",
         search_alg=algo,
         scheduler=scheduler,
diff --git a/python/ray/tune/examples/optuna_example.py b/python/ray/tune/examples/optuna_example.py
index ded76a425bff..ab7e68d383f0 100644
--- a/python/ray/tune/examples/optuna_example.py
+++ b/python/ray/tune/examples/optuna_example.py
@@ -6,6 +6,7 @@
 
 import ray
 from ray import tune
+from ray.tune.suggest import ConcurrencyLimiter
 from ray.tune.schedulers import AsyncHyperBandScheduler
 from ray.tune.suggest.optuna import OptunaSearch
 
@@ -45,7 +46,13 @@ def easy_objective(config):
             "activation": tune.choice(["relu", "tanh"])
         }
     }
-    algo = OptunaSearch(metric="mean_loss", mode="min")
-    scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min")
+    algo = OptunaSearch()
+    algo = ConcurrencyLimiter(algo, max_concurrent=4)
+    scheduler = AsyncHyperBandScheduler()
     tune.run(
-        easy_objective, search_alg=algo, scheduler=scheduler, **tune_kwargs)
+        easy_objective,
+        metric="mean_loss",
+        mode="min",
+        search_alg=algo,
+        scheduler=scheduler,
+        **tune_kwargs)
diff --git a/python/ray/tune/examples/pbt_dcgan_mnist/pbt_dcgan_mnist_trainable.py b/python/ray/tune/examples/pbt_dcgan_mnist/pbt_dcgan_mnist_trainable.py
index 1d6b3b7e3822..8dea4fbcdeeb 100644
--- a/python/ray/tune/examples/pbt_dcgan_mnist/pbt_dcgan_mnist_trainable.py
+++ b/python/ray/tune/examples/pbt_dcgan_mnist/pbt_dcgan_mnist_trainable.py
@@ -160,6 +160,6 @@ def _export_model(self, export_formats, export_dir):
 
     # demo of the trained Generators
     if not args.smoke_test:
-        logdirs = analysis.dataframe()["logdir"].tolist()
+        logdirs = analysis.results_df["logdir"].tolist()
         model_paths = [os.path.join(d, "exported_models") for d in logdirs]
         demo_gan(analysis, model_paths)
diff --git a/python/ray/tune/examples/skopt_example.py b/python/ray/tune/examples/skopt_example.py
index bc6ca9fbb237..ec0d891a2ee6 100644
--- a/python/ray/tune/examples/skopt_example.py
+++ b/python/ray/tune/examples/skopt_example.py
@@ -6,6 +6,7 @@
 
 import ray
 from ray import tune
+from ray.tune.suggest import ConcurrencyLimiter
 from ray.tune.schedulers import AsyncHyperBandScheduler
 from ray.tune.suggest.skopt import SkOptSearch
 
@@ -59,15 +60,16 @@ def easy_objective(config):
     algo = SkOptSearch(
         # parameter_names=space.keys(),  # If you want to set the space
         # parameter_ranges=space.values(), # If you want to set the space
-        metric="mean_loss",
-        mode="min",
         points_to_evaluate=previously_run_params,
         evaluated_rewards=known_rewards)
+    algo = ConcurrencyLimiter(algo, max_concurrent=4)
 
-    scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min")
+    scheduler = AsyncHyperBandScheduler()
 
     tune.run(
         easy_objective,
+        metric="mean_loss",
+        mode="min",
         name="skopt_exp_with_warmstart",
         search_alg=algo,
         scheduler=scheduler,
diff --git a/python/ray/tune/examples/tune_cifar10_gluon.py b/python/ray/tune/examples/tune_cifar10_gluon.py
index 49d14574aeaf..ac0fa90d52fd 100644
--- a/python/ray/tune/examples/tune_cifar10_gluon.py
+++ b/python/ray/tune/examples/tune_cifar10_gluon.py
@@ -154,8 +154,8 @@ def train(epoch):
             with ag.record():
                 outputs = [finetune_net(X) for X in data]
                 loss = [L(yhat, y) for yhat, y in zip(outputs, label)]
-            for l in loss:
-                l.backward()
+            for ls in loss:
+                ls.backward()
 
             trainer.step(batch_size)
         mx.nd.waitall()
@@ -170,7 +170,7 @@ def test():
             outputs = [finetune_net(X) for X in data]
             loss = [L(yhat, y) for yhat, y in zip(outputs, label)]
 
-            test_loss += sum(l.mean().asscalar() for l in loss) / len(loss)
+            test_loss += sum(ls.mean().asscalar() for ls in loss) / len(loss)
             metric.update(label, outputs)
 
         _, test_acc = metric.get()
@@ -194,11 +194,7 @@ def test():
         sched = FIFOScheduler()
     elif args.scheduler == "asynchyperband":
         sched = AsyncHyperBandScheduler(
-            time_attr="training_iteration",
-            metric="mean_loss",
-            mode="min",
-            max_t=400,
-            grace_period=60)
+            metric="mean_loss", mode="min", max_t=400, grace_period=60)
     else:
         raise NotImplementedError
     tune.run(
diff --git a/python/ray/tune/progress_reporter.py b/python/ray/tune/progress_reporter.py
index c1325f1021ec..ca60adf29936 100644
--- a/python/ray/tune/progress_reporter.py
+++ b/python/ray/tune/progress_reporter.py
@@ -1,10 +1,12 @@
 from __future__ import print_function
 
 import collections
+import numpy as np
 import time
 
 from ray.tune.result import (EPISODE_REWARD_MEAN, MEAN_ACCURACY, MEAN_LOSS,
-                             TRAINING_ITERATION, TIME_TOTAL_S, TIMESTEPS_TOTAL)
+                             TRAINING_ITERATION, TIME_TOTAL_S, TIMESTEPS_TOTAL,
+                             AUTO_RESULT_KEYS)
 from ray.tune.utils import unflattened_lookup
 
 try:
@@ -51,6 +53,10 @@ def report(self, trials, done, *sys_info):
 class TuneReporterBase(ProgressReporter):
     """Abstract base class for the default Tune reporters.
 
+    If metric_columns is not overriden, Tune will attempt to automatically
+    infer the metrics being outputted, up to 'infer_limit' number of
+    metrics.
+
     Args:
         metric_columns (dict[str, str]|list[str]): Names of metrics to
             include in progress table. If this is a dict, the keys should
@@ -80,17 +86,25 @@ class TuneReporterBase(ProgressReporter):
         TIMESTEPS_TOTAL: "ts",
         EPISODE_REWARD_MEAN: "reward",
     })
+    VALID_SUMMARY_TYPES = {
+        int, float, np.float32, np.float64, np.int32, np.int64,
+        type(None)
+    }
 
     def __init__(self,
                  metric_columns=None,
                  parameter_columns=None,
                  max_progress_rows=20,
                  max_error_rows=20,
-                 max_report_frequency=5):
+                 max_report_frequency=5,
+                 infer_limit=3):
+        self._metrics_override = metric_columns is not None
+        self._inferred_metrics = {}
         self._metric_columns = metric_columns or self.DEFAULT_COLUMNS.copy()
         self._parameter_columns = parameter_columns or []
         self._max_progress_rows = max_progress_rows
         self._max_error_rows = max_error_rows
+        self._infer_limit = infer_limit
 
         self._max_report_freqency = max_report_frequency
         self._last_report_time = 0
@@ -110,6 +124,7 @@ def add_metric_column(self, metric, representation=None):
             representation (str): Representation to use in table. Defaults to
                 `metric`.
         """
+        self._metrics_override = True
         if metric in self._metric_columns:
             raise ValueError("Column {} already exists.".format(metric))
 
@@ -161,6 +176,9 @@ def _progress_str(self, trials, done, *sys_info, fmt="psql", delim="\n"):
             fmt (str): Table format. See `tablefmt` in tabulate API.
             delim (str): Delimiter between messages.
         """
+        if not self._metrics_override:
+            user_metrics = self._infer_user_metrics(trials, self._infer_limit)
+            self._metric_columns.update(user_metrics)
         messages = ["== Status ==", memory_debug_str(), *sys_info]
         if done:
             max_progress = None
@@ -178,6 +196,24 @@ def _progress_str(self, trials, done, *sys_info, fmt="psql", delim="\n"):
         messages.append(trial_errors_str(trials, fmt=fmt, max_rows=max_error))
         return delim.join(messages) + delim
 
+    def _infer_user_metrics(self, trials, limit=4):
+        """Try to infer the metrics to print out."""
+        if len(self._inferred_metrics) >= limit:
+            return self._inferred_metrics
+        self._inferred_metrics = {}
+        for t in trials:
+            if not t.last_result:
+                continue
+            for metric, value in t.last_result.items():
+                if metric not in self.DEFAULT_COLUMNS:
+                    if metric not in AUTO_RESULT_KEYS:
+                        if type(value) in self.VALID_SUMMARY_TYPES:
+                            self._inferred_metrics[metric] = metric
+
+                if len(self._inferred_metrics) >= limit:
+                    return self._inferred_metrics
+        return self._inferred_metrics
+
 
 class JupyterNotebookReporter(TuneReporterBase):
     """Jupyter notebook-friendly Reporter that can update display in-place.
diff --git a/python/ray/tune/result.py b/python/ray/tune/result.py
index 70b311bf7f91..8fab799e916b 100644
--- a/python/ray/tune/result.py
+++ b/python/ray/tune/result.py
@@ -29,6 +29,9 @@
 # (Optional) Mean loss for training iteration
 MEAN_LOSS = "mean_loss"
 
+# (Optional) Mean loss for training iteration
+NEG_MEAN_LOSS = "neg_mean_loss"
+
 # (Optional) Mean accuracy for training iteration
 MEAN_ACCURACY = "mean_accuracy"
 
@@ -61,6 +64,26 @@
 DEFAULT_RESULT_KEYS = (TRAINING_ITERATION, TIME_TOTAL_S, TIMESTEPS_TOTAL,
                        MEAN_ACCURACY, MEAN_LOSS)
 
+# Make sure this doesn't regress
+AUTO_RESULT_KEYS = (
+    TRAINING_ITERATION,
+    TIME_TOTAL_S,
+    EPISODES_TOTAL,
+    TIMESTEPS_TOTAL,
+    NODE_IP,
+    HOSTNAME,
+    PID,
+    TIME_TOTAL_S,
+    TIME_THIS_ITER_S,
+    "timestamp",
+    "experiment_id",
+    "date",
+    "time_since_restore",
+    "iterations_since_restore",
+    "timesteps_since_restore",
+    "config",
+)
+
 # __duplicate__ is a magic keyword used internally to
 # avoid double-logging results when using the Function API.
 RESULT_DUPLICATE = "__duplicate__"
diff --git a/python/ray/tune/schedulers/__init__.py b/python/ray/tune/schedulers/__init__.py
index 54b88ca9ecb0..5e51bdab24b3 100644
--- a/python/ray/tune/schedulers/__init__.py
+++ b/python/ray/tune/schedulers/__init__.py
@@ -10,8 +10,8 @@
 
 def create_scheduler(
         scheduler,
-        metric="episode_reward_mean",
-        mode="max",
+        metric=None,
+        mode=None,
         **kwargs,
 ):
     """Instantiate a scheduler based on the given string.
diff --git a/python/ray/tune/schedulers/async_hyperband.py b/python/ray/tune/schedulers/async_hyperband.py
index 29cf481eb784..02e59453de6d 100644
--- a/python/ray/tune/schedulers/async_hyperband.py
+++ b/python/ray/tune/schedulers/async_hyperband.py
@@ -38,8 +38,8 @@ class AsyncHyperBandScheduler(FIFOScheduler):
     def __init__(self,
                  time_attr="training_iteration",
                  reward_attr=None,
-                 metric="episode_reward_mean",
-                 mode="max",
+                 metric=None,
+                 mode=None,
                  max_t=100,
                  grace_period=1,
                  reduction_factor=4,
@@ -49,7 +49,8 @@ def __init__(self,
         assert grace_period > 0, "grace_period must be positive!"
         assert reduction_factor > 1, "Reduction Factor not valid!"
         assert brackets > 0, "brackets must be positive!"
-        assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!"
+        if mode:
+            assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!"
 
         if reward_attr is not None:
             mode = "max"
@@ -73,13 +74,41 @@ def __init__(self,
         self._counter = 0  # for
         self._num_stopped = 0
         self._metric = metric
-        if mode == "max":
+        self._mode = mode
+        self._metric_op = None
+        if self._mode == "max":
             self._metric_op = 1.
-        elif mode == "min":
+        elif self._mode == "min":
             self._metric_op = -1.
         self._time_attr = time_attr
 
+    def set_search_properties(self, metric, mode):
+        if self._metric and metric:
+            return False
+        if self._mode and mode:
+            return False
+
+        if metric:
+            self._metric = metric
+        if mode:
+            self._mode = mode
+
+        if self._mode == "max":
+            self._metric_op = 1.
+        elif self._mode == "min":
+            self._metric_op = -1.
+
+        return True
+
     def on_trial_add(self, trial_runner, trial):
+        if not self._metric or not self._metric_op:
+            raise ValueError(
+                "{} has been instantiated without a valid `metric` ({}) or "
+                "`mode` ({}) parameter. Either pass these parameters when "
+                "instantiating the scheduler, or pass them as parameters "
+                "to `tune.run()`".format(self.__class__.__name__, self._metric,
+                                         self._mode))
+
         sizes = np.array([len(b._rungs) for b in self._brackets])
         probs = np.e**(sizes - sizes.max())
         normalized = probs / probs.sum()
@@ -162,6 +191,7 @@ def on_result(self, trial, cur_iter, cur_rew):
         return action
 
     def debug_str(self):
+        # TODO: fix up the output for this
         iters = " | ".join([
             "Iter {:.3f}: {}".format(milestone, self.cutoff(recorded))
             for milestone, recorded in self._rungs
diff --git a/python/ray/tune/schedulers/hb_bohb.py b/python/ray/tune/schedulers/hb_bohb.py
index 7204e71e361a..c8c061034631 100644
--- a/python/ray/tune/schedulers/hb_bohb.py
+++ b/python/ray/tune/schedulers/hb_bohb.py
@@ -30,6 +30,13 @@ def on_trial_add(self, trial_runner, trial):
         to current bracket. Else, create new iteration, create new bracket,
         add to bracket.
         """
+        if not self._metric or not self._metric_op:
+            raise ValueError(
+                "{} has been instantiated without a valid `metric` ({}) or "
+                "`mode` ({}) parameter. Either pass these parameters when "
+                "instantiating the scheduler, or pass them as parameters "
+                "to `tune.run()`".format(self.__class__.__name__, self._metric,
+                                         self._mode))
 
         cur_bracket = self._state["bracket"]
         cur_band = self._hyperbands[self._state["band_idx"]]
diff --git a/python/ray/tune/schedulers/hyperband.py b/python/ray/tune/schedulers/hyperband.py
index a2fe3ad91ed3..3066cf80b3dc 100644
--- a/python/ray/tune/schedulers/hyperband.py
+++ b/python/ray/tune/schedulers/hyperband.py
@@ -76,12 +76,13 @@ class HyperBandScheduler(FIFOScheduler):
     def __init__(self,
                  time_attr="training_iteration",
                  reward_attr=None,
-                 metric="episode_reward_mean",
-                 mode="max",
+                 metric=None,
+                 mode=None,
                  max_t=81,
                  reduction_factor=3):
         assert max_t > 0, "Max (time_attr) not valid!"
-        assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!"
+        if mode:
+            assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!"
 
         if reward_attr is not None:
             mode = "max"
@@ -108,12 +109,33 @@ def __init__(self,
         self._state = {"bracket": None, "band_idx": 0}
         self._num_stopped = 0
         self._metric = metric
-        if mode == "max":
+        self._mode = mode
+        self._metric_op = None
+
+        if self._mode == "max":
             self._metric_op = 1.
-        elif mode == "min":
+        elif self._mode == "min":
             self._metric_op = -1.
         self._time_attr = time_attr
 
+    def set_search_properties(self, metric, mode):
+        if self._metric and metric:
+            return False
+        if self._mode and mode:
+            return False
+
+        if metric:
+            self._metric = metric
+        if mode:
+            self._mode = mode
+
+        if self._mode == "max":
+            self._metric_op = 1.
+        elif self._mode == "min":
+            self._metric_op = -1.
+
+        return True
+
     def on_trial_add(self, trial_runner, trial):
         """Adds new trial.
 
@@ -121,6 +143,13 @@ def on_trial_add(self, trial_runner, trial):
         add to current bracket. Else, if current band is not filled,
         create new bracket, add to current bracket.
         Else, create new iteration, create new bracket, add to bracket."""
+        if not self._metric or not self._metric_op:
+            raise ValueError(
+                "{} has been instantiated without a valid `metric` ({}) or "
+                "`mode` ({}) parameter. Either pass these parameters when "
+                "instantiating the scheduler, or pass them as parameters "
+                "to `tune.run()`".format(self.__class__.__name__, self._metric,
+                                         self._mode))
 
         cur_bracket = self._state["bracket"]
         cur_band = self._hyperbands[self._state["band_idx"]]
diff --git a/python/ray/tune/schedulers/median_stopping_rule.py b/python/ray/tune/schedulers/median_stopping_rule.py
index 2389f166e32e..497c62915ac6 100644
--- a/python/ray/tune/schedulers/median_stopping_rule.py
+++ b/python/ray/tune/schedulers/median_stopping_rule.py
@@ -40,13 +40,12 @@ class MedianStoppingRule(FIFOScheduler):
     def __init__(self,
                  time_attr="time_total_s",
                  reward_attr=None,
-                 metric="episode_reward_mean",
-                 mode="max",
+                 metric=None,
+                 mode=None,
                  grace_period=60.0,
                  min_samples_required=3,
                  min_time_slice=0,
                  hard_stop=True):
-        assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!"
         if reward_attr is not None:
             mode = "max"
             metric = reward_attr
@@ -60,15 +59,49 @@ def __init__(self,
         self._min_samples_required = min_samples_required
         self._min_time_slice = min_time_slice
         self._metric = metric
-        assert mode in {"min", "max"}, "`mode` must be 'min' or 'max'."
-        self._worst = float("-inf") if mode == "max" else float("inf")
-        self._compare_op = max if mode == "max" else min
+        self._worst = None
+        self._compare_op = None
+
+        self._mode = mode
+        if mode:
+            assert mode in ["min", "max"], "`mode` must be 'min' or 'max'."
+            self._worst = float("-inf") if self._mode == "max" else float(
+                "inf")
+            self._compare_op = max if self._mode == "max" else min
+
         self._time_attr = time_attr
         self._hard_stop = hard_stop
         self._trial_state = {}
         self._last_pause = collections.defaultdict(lambda: float("-inf"))
         self._results = collections.defaultdict(list)
 
+    def set_search_properties(self, metric, mode):
+        if self._metric and metric:
+            return False
+        if self._mode and mode:
+            return False
+
+        if metric:
+            self._metric = metric
+        if mode:
+            self._mode = mode
+
+        self._worst = float("-inf") if self._mode == "max" else float("inf")
+        self._compare_op = max if self._mode == "max" else min
+
+        return True
+
+    def on_trial_add(self, trial_runner, trial):
+        if not self._metric or not self._worst or not self._compare_op:
+            raise ValueError(
+                "{} has been instantiated without a valid `metric` ({}) or "
+                "`mode` ({}) parameter. Either pass these parameters when "
+                "instantiating the scheduler, or pass them as parameters "
+                "to `tune.run()`".format(self.__class__.__name__, self._metric,
+                                         self._mode))
+
+        super(MedianStoppingRule, self).on_trial_add(trial_runner, trial)
+
     def on_trial_result(self, trial_runner, trial, result):
         """Callback for early stopping.
 
diff --git a/python/ray/tune/schedulers/pbt.py b/python/ray/tune/schedulers/pbt.py
index 70137e8de3f6..6e6396097028 100644
--- a/python/ray/tune/schedulers/pbt.py
+++ b/python/ray/tune/schedulers/pbt.py
@@ -216,8 +216,8 @@ class PopulationBasedTraining(FIFOScheduler):
     def __init__(self,
                  time_attr="time_total_s",
                  reward_attr=None,
-                 metric="episode_reward_mean",
-                 mode="max",
+                 metric=None,
+                 mode=None,
                  perturbation_interval=60.0,
                  hyperparam_mutations={},
                  quantile_fraction=0.25,
@@ -253,7 +253,8 @@ def __init__(self,
                 "perturbation_interval must be a positive number greater "
                 "than 0. Current value: '{}'".format(perturbation_interval))
 
-        assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!"
+        if mode:
+            assert mode in ["min", "max"], "`mode` must be 'min' or 'max'."
 
         if reward_attr is not None:
             mode = "max"
@@ -265,9 +266,11 @@ def __init__(self,
 
         FIFOScheduler.__init__(self)
         self._metric = metric
-        if mode == "max":
+        self._mode = mode
+        self._metric_op = None
+        if self._mode == "max":
             self._metric_op = 1.
-        elif mode == "min":
+        elif self._mode == "min":
             self._metric_op = -1.
         self._time_attr = time_attr
         self._perturbation_interval = perturbation_interval
@@ -285,7 +288,33 @@ def __init__(self,
         self._num_checkpoints = 0
         self._num_perturbations = 0
 
+    def set_search_properties(self, metric, mode):
+        if self._metric and metric:
+            return False
+        if self._mode and mode:
+            return False
+
+        if metric:
+            self._metric = metric
+        if mode:
+            self._mode = mode
+
+        if self._mode == "max":
+            self._metric_op = 1.
+        elif self._mode == "min":
+            self._metric_op = -1.
+
+        return True
+
     def on_trial_add(self, trial_runner, trial):
+        if not self._metric or not self._metric_op:
+            raise ValueError(
+                "{} has been instantiated without a valid `metric` ({}) or "
+                "`mode` ({}) parameter. Either pass these parameters when "
+                "instantiating the scheduler, or pass them as parameters "
+                "to `tune.run()`".format(self.__class__.__name__, self._metric,
+                                         self._mode))
+
         self._trial_state[trial] = PBTTrialState(trial)
 
         for attr in self._hyperparam_mutations.keys():
diff --git a/python/ray/tune/schedulers/trial_scheduler.py b/python/ray/tune/schedulers/trial_scheduler.py
index 6fe7284cf655..66ba25904379 100644
--- a/python/ray/tune/schedulers/trial_scheduler.py
+++ b/python/ray/tune/schedulers/trial_scheduler.py
@@ -8,6 +8,18 @@ class TrialScheduler:
     PAUSE = "PAUSE"  #: Status for pausing trial execution
     STOP = "STOP"  #: Status for stopping trial execution
 
+    def set_search_properties(self, metric, mode):
+        """Pass search properties to scheduler.
+
+        This method acts as an alternative to instantiating schedulers
+        that react to metrics with their own `metric` and `mode` parameters.
+
+        Args:
+            metric (str): Metric to optimize
+            mode (str): One of ["min", "max"]. Direction to optimize.
+        """
+        return True
+
     def on_trial_add(self, trial_runner, trial):
         """Called when a new trial is added to the trial runner."""
 
diff --git a/python/ray/tune/suggest/__init__.py b/python/ray/tune/suggest/__init__.py
index a9b5582a9088..f3f332f1ff4e 100644
--- a/python/ray/tune/suggest/__init__.py
+++ b/python/ray/tune/suggest/__init__.py
@@ -8,8 +8,8 @@
 
 def create_searcher(
         search_alg,
-        metric="episode_reward_mean",
-        mode="max",
+        metric=None,
+        mode=None,
         **kwargs,
 ):
     """Instantiate a search algorithm based on the given string.
diff --git a/python/ray/tune/suggest/ax.py b/python/ray/tune/suggest/ax.py
index 9574f80ce398..28b52a9c6816 100644
--- a/python/ray/tune/suggest/ax.py
+++ b/python/ray/tune/suggest/ax.py
@@ -104,15 +104,16 @@ def easy_objective(config):
 
     def __init__(self,
                  space=None,
-                 metric="episode_reward_mean",
-                 mode="max",
+                 metric=None,
+                 mode=None,
                  parameter_constraints=None,
                  outcome_constraints=None,
                  ax_client=None,
                  use_early_stopped_trials=None,
                  max_concurrent=None):
         assert ax is not None, "Ax must be installed!"
-        assert mode in ["min", "max"], "`mode` must be one of ['min', 'max']"
+        if mode:
+            assert mode in ["min", "max"], "`mode` must be 'min' or 'max'."
 
         super(AxSearch, self).__init__(
             metric=metric,
diff --git a/python/ray/tune/suggest/bayesopt.py b/python/ray/tune/suggest/bayesopt.py
index 340f200a5ecb..d5c7684c1ab4 100644
--- a/python/ray/tune/suggest/bayesopt.py
+++ b/python/ray/tune/suggest/bayesopt.py
@@ -101,8 +101,8 @@ class BayesOptSearch(Searcher):
 
     def __init__(self,
                  space=None,
-                 metric="episode_reward_mean",
-                 mode="max",
+                 metric=None,
+                 mode=None,
                  utility_kwargs=None,
                  random_state=42,
                  random_search_steps=10,
@@ -144,7 +144,8 @@ def __init__(self,
         assert byo is not None, (
             "BayesOpt must be installed!. You can install BayesOpt with"
             " the command: `pip install bayesian-optimization`.")
-        assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!"
+        if mode:
+            assert mode in ["min", "max"], "`mode` must be 'min' or 'max'."
         self.max_concurrent = max_concurrent
         self._config_counter = defaultdict(int)
         self._patience = patience
@@ -284,8 +285,10 @@ def register_analysis(self, analysis):
             analysis (ExperimentAnalysis): Optionally, the previous analysis
                 to integrate.
         """
-        for (_, report), params in zip(analysis.dataframe().iterrows(),
-                                       analysis.get_all_configs().values()):
+        for (_, report), params in zip(
+                analysis.dataframe(metric=self._metric,
+                                   mode=self._mode).iterrows(),
+                analysis.get_all_configs().values()):
             # We add the obtained results to the
             # gaussian process optimizer
             self._register_result(params, report)
diff --git a/python/ray/tune/suggest/bohb.py b/python/ray/tune/suggest/bohb.py
index b545656106e7..318e582e0717 100644
--- a/python/ray/tune/suggest/bohb.py
+++ b/python/ray/tune/suggest/bohb.py
@@ -95,11 +95,12 @@ def __init__(self,
                  space=None,
                  bohb_config=None,
                  max_concurrent=10,
-                 metric="neg_mean_loss",
-                 mode="max"):
+                 metric=None,
+                 mode=None):
         from hpbandster.optimizers.config_generators.bohb import BOHB
         assert BOHB is not None, "HpBandSter must be installed!"
-        assert mode in ["min", "max"], "`mode` must be in [min, max]!"
+        if mode:
+            assert mode in ["min", "max"], "`mode` must be 'min' or 'max'."
         self._max_concurrent = max_concurrent
         self.trial_to_params = {}
         self.running = set()
diff --git a/python/ray/tune/suggest/dragonfly.py b/python/ray/tune/suggest/dragonfly.py
index 051301b62135..b2da186b04b5 100644
--- a/python/ray/tune/suggest/dragonfly.py
+++ b/python/ray/tune/suggest/dragonfly.py
@@ -130,15 +130,16 @@ def __init__(self,
                  optimizer=None,
                  domain=None,
                  space=None,
-                 metric="episode_reward_mean",
-                 mode="max",
+                 metric=None,
+                 mode=None,
                  points_to_evaluate=None,
                  evaluated_rewards=None,
                  **kwargs):
         assert dragonfly is not None, """dragonfly must be installed!
             You can install Dragonfly with the command:
             `pip install dragonfly-opt`."""
-        assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!"
+        if mode:
+            assert mode in ["min", "max"], "`mode` must be 'min' or 'max'."
 
         super(DragonflySearch, self).__init__(
             metric=metric, mode=mode, **kwargs)
diff --git a/python/ray/tune/suggest/hyperopt.py b/python/ray/tune/suggest/hyperopt.py
index b05cc3cc2314..b097cc29f275 100644
--- a/python/ray/tune/suggest/hyperopt.py
+++ b/python/ray/tune/suggest/hyperopt.py
@@ -118,8 +118,8 @@ class HyperOptSearch(Searcher):
     def __init__(
             self,
             space=None,
-            metric="episode_reward_mean",
-            mode="max",
+            metric=None,
+            mode=None,
             points_to_evaluate=None,
             n_initial_points=20,
             random_state_seed=None,
@@ -129,6 +129,8 @@ def __init__(
     ):
         assert hpo is not None, (
             "HyperOpt must be installed! Run `pip install hyperopt`.")
+        if mode:
+            assert mode in ["min", "max"], "`mode` must be 'min' or 'max'."
         from hyperopt.fmin import generate_trials_to_calculate
         super(HyperOptSearch, self).__init__(
             metric=metric,
diff --git a/python/ray/tune/suggest/nevergrad.py b/python/ray/tune/suggest/nevergrad.py
index e46935907387..bee20c814564 100644
--- a/python/ray/tune/suggest/nevergrad.py
+++ b/python/ray/tune/suggest/nevergrad.py
@@ -87,12 +87,13 @@ class NevergradSearch(Searcher):
     def __init__(self,
                  optimizer=None,
                  space=None,
-                 metric="episode_reward_mean",
-                 mode="max",
+                 metric=None,
+                 mode=None,
                  max_concurrent=None,
                  **kwargs):
         assert ng is not None, "Nevergrad must be installed!"
-        assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!"
+        if mode:
+            assert mode in ["min", "max"], "`mode` must be 'min' or 'max'."
 
         super(NevergradSearch, self).__init__(
             metric=metric, mode=mode, max_concurrent=max_concurrent, **kwargs)
diff --git a/python/ray/tune/suggest/optuna.py b/python/ray/tune/suggest/optuna.py
index 792df0fc3b53..ae3f1aadbe48 100644
--- a/python/ray/tune/suggest/optuna.py
+++ b/python/ray/tune/suggest/optuna.py
@@ -100,11 +100,7 @@ class OptunaSearch(Searcher):
 
     """
 
-    def __init__(self,
-                 space=None,
-                 metric="episode_reward_mean",
-                 mode="max",
-                 sampler=None):
+    def __init__(self, space=None, metric=None, mode=None, sampler=None):
         assert ot is not None, (
             "Optuna must be installed! Run `pip install optuna`.")
         super(OptunaSearch, self).__init__(
diff --git a/python/ray/tune/suggest/repeater.py b/python/ray/tune/suggest/repeater.py
index 05c5a2b397dd..647b6cdaf17f 100644
--- a/python/ray/tune/suggest/repeater.py
+++ b/python/ray/tune/suggest/repeater.py
@@ -167,3 +167,6 @@ def get_state(self):
 
     def set_state(self, state):
         self.__dict__.update(state)
+
+    def set_search_properties(self, metric, mode, config):
+        return self.searcher.set_search_properties(metric, mode, config)
diff --git a/python/ray/tune/suggest/skopt.py b/python/ray/tune/suggest/skopt.py
index ff26ed24f598..67dec2bde9e7 100644
--- a/python/ray/tune/suggest/skopt.py
+++ b/python/ray/tune/suggest/skopt.py
@@ -127,8 +127,8 @@ class SkOptSearch(Searcher):
     def __init__(self,
                  optimizer=None,
                  space=None,
-                 metric="episode_reward_mean",
-                 mode="max",
+                 metric=None,
+                 mode=None,
                  points_to_evaluate=None,
                  evaluated_rewards=None,
                  max_concurrent=None,
@@ -137,7 +137,8 @@ def __init__(self,
             You can install Skopt with the command:
             `pip install scikit-optimize`."""
 
-        assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!"
+        if mode:
+            assert mode in ["min", "max"], "`mode` must be 'min' or 'max'."
         self.max_concurrent = max_concurrent
         super(SkOptSearch, self).__init__(
             metric=metric,
diff --git a/python/ray/tune/suggest/suggestion.py b/python/ray/tune/suggest/suggestion.py
index 633fe33718e1..2a9793ceed28 100644
--- a/python/ray/tune/suggest/suggestion.py
+++ b/python/ray/tune/suggest/suggestion.py
@@ -56,8 +56,8 @@ def on_trial_complete(self, trial_id, result, **kwargs):
     CKPT_FILE_TMPL = "searcher-state-{}.pkl"
 
     def __init__(self,
-                 metric="episode_reward_mean",
-                 mode="max",
+                 metric=None,
+                 mode=None,
                  max_concurrent=None,
                  use_early_stopped_trials=None):
         if use_early_stopped_trials is False:
@@ -70,6 +70,13 @@ def __init__(self,
                 "search algorithm. Use tune.suggest.ConcurrencyLimiter() "
                 "instead. This will raise an error in future versions of Ray.")
 
+        self._metric = metric
+        self._mode = mode
+
+        if not mode or not metric:
+            # Early return to avoid assertions
+            return
+
         assert isinstance(
             metric, type(mode)), "metric and mode must be of the same type"
         if isinstance(mode, str):
@@ -83,9 +90,6 @@ def __init__(self,
         else:
             raise ValueError("Mode most either be a list or string")
 
-        self._metric = metric
-        self._mode = mode
-
     def set_search_properties(self, metric, mode, config):
         """Pass search properties to searcher.
 
@@ -362,3 +366,6 @@ def on_pause(self, trial_id):
 
     def on_unpause(self, trial_id):
         self.searcher.on_unpause(trial_id)
+
+    def set_search_properties(self, metric, mode, config):
+        return self.searcher.set_search_properties(metric, mode, config)
diff --git a/python/ray/tune/suggest/zoopt.py b/python/ray/tune/suggest/zoopt.py
index 950a8a68745e..8f3b2453181d 100644
--- a/python/ray/tune/suggest/zoopt.py
+++ b/python/ray/tune/suggest/zoopt.py
@@ -109,12 +109,13 @@ def __init__(self,
                  algo="asracos",
                  budget=None,
                  dim_dict=None,
-                 metric="episode_reward_mean",
-                 mode="min",
+                 metric=None,
+                 mode=None,
                  **kwargs):
         assert zoopt is not None, "Zoopt not found - please install zoopt."
         assert budget is not None, "`budget` should not be None!"
-        assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!"
+        if mode:
+            assert mode in ["min", "max"], "`mode` must be 'min' or 'max'."
         _algo = algo.lower()
         assert _algo in ["asracos", "sracos"
                          ], "`algo` must be in ['asracos', 'sracos'] currently"
diff --git a/python/ray/tune/tests/example.py b/python/ray/tune/tests/example.py
index 69d1f854b577..383dd5ecb72a 100644
--- a/python/ray/tune/tests/example.py
+++ b/python/ray/tune/tests/example.py
@@ -39,5 +39,5 @@ def training_function(config):
     metric="mean_loss", mode="min"))
 
 # Get a dataframe for analyzing trial results.
-df = analysis.dataframe()
+df = analysis.results_df
 # __quick_start_end__
diff --git a/python/ray/tune/tests/test_api.py b/python/ray/tune/tests/test_api.py
index fa0213dd8571..3dc3d9fb25bd 100644
--- a/python/ray/tune/tests/test_api.py
+++ b/python/ray/tune/tests/test_api.py
@@ -520,7 +520,8 @@ def train(config, reporter):
         analysis = tune.run(train, num_samples=10, stop=stopper)
         self.assertTrue(
             all(t.status == Trial.TERMINATED for t in analysis.trials))
-        self.assertTrue(len(analysis.dataframe()) <= top)
+        self.assertTrue(
+            len(analysis.dataframe(metric="test", mode="max")) <= top)
 
         patience = 5
         stopper = EarlyStopping("test", top=top, mode="min", patience=patience)
@@ -528,14 +529,16 @@ def train(config, reporter):
         analysis = tune.run(train, num_samples=20, stop=stopper)
         self.assertTrue(
             all(t.status == Trial.TERMINATED for t in analysis.trials))
-        self.assertTrue(len(analysis.dataframe()) <= patience)
+        self.assertTrue(
+            len(analysis.dataframe(metric="test", mode="max")) <= patience)
 
         stopper = EarlyStopping("test", top=top, mode="min")
 
         analysis = tune.run(train, num_samples=10, stop=stopper)
         self.assertTrue(
             all(t.status == Trial.TERMINATED for t in analysis.trials))
-        self.assertTrue(len(analysis.dataframe()) <= top)
+        self.assertTrue(
+            len(analysis.dataframe(metric="test", mode="max")) <= top)
 
     def testBadStoppingFunction(self):
         def train(config, reporter):
diff --git a/python/ray/tune/tests/test_experiment_analysis.py b/python/ray/tune/tests/test_experiment_analysis.py
index 5195c7825264..bac891cc965c 100644
--- a/python/ray/tune/tests/test_experiment_analysis.py
+++ b/python/ray/tune/tests/test_experiment_analysis.py
@@ -7,7 +7,7 @@
 from numpy import nan
 
 import ray
-from ray.tune import run, sample_from
+from ray import tune
 from ray.tune.examples.async_hyperband_example import MyTrainableClass
 
 
@@ -26,7 +26,7 @@ def tearDown(self):
         ray.shutdown()
 
     def run_test_exp(self):
-        self.ea = run(
+        self.ea = tune.run(
             MyTrainableClass,
             name=self.test_name,
             local_dir=self.test_dir,
@@ -34,13 +34,14 @@ def run_test_exp(self):
             checkpoint_freq=1,
             num_samples=self.num_samples,
             config={
-                "width": sample_from(
+                "width": tune.sample_from(
                     lambda spec: 10 + int(90 * random.random())),
-                "height": sample_from(lambda spec: int(100 * random.random())),
+                "height": tune.sample_from(
+                    lambda spec: int(100 * random.random())),
             })
 
     def nan_test_exp(self):
-        nan_ea = run(
+        nan_ea = tune.run(
             lambda x: nan,
             name="testing_nan",
             local_dir=self.test_dir,
@@ -48,14 +49,15 @@ def nan_test_exp(self):
             checkpoint_freq=1,
             num_samples=self.num_samples,
             config={
-                "width": sample_from(
+                "width": tune.sample_from(
                     lambda spec: 10 + int(90 * random.random())),
-                "height": sample_from(lambda spec: int(100 * random.random())),
+                "height": tune.sample_from(
+                    lambda spec: int(100 * random.random())),
             })
         return nan_ea
 
     def testDataframe(self):
-        df = self.ea.dataframe()
+        df = self.ea.dataframe(self.metric, mode="max")
 
         self.assertTrue(isinstance(df, pd.DataFrame))
         self.assertEquals(df.shape[0], self.num_samples)
@@ -143,21 +145,50 @@ def testAllDataframes(self):
             self.assertEqual(df.training_iteration.max(), 1)
 
     def testIgnoreOtherExperiment(self):
-        analysis = run(
+        analysis = tune.run(
             MyTrainableClass,
             name="test_example",
             local_dir=self.test_dir,
             stop={"training_iteration": 1},
             num_samples=1,
             config={
-                "width": sample_from(
+                "width": tune.sample_from(
                     lambda spec: 10 + int(90 * random.random())),
-                "height": sample_from(lambda spec: int(100 * random.random())),
+                "height": tune.sample_from(
+                    lambda spec: int(100 * random.random())),
             })
-        df = analysis.dataframe()
+        df = analysis.dataframe(self.metric, mode="max")
         self.assertEquals(df.shape[0], 1)
 
 
+class ExperimentAnalysisPropertySuite(unittest.TestCase):
+    def testBestProperties(self):
+        def train(config):
+            for i in range(10):
+                with tune.checkpoint_dir(i):
+                    pass
+                tune.report(res=config["base"] + i)
+
+        ea = tune.run(
+            train,
+            config={"base": tune.grid_search([100, 200, 300])},
+            metric="res",
+            mode="max")
+
+        trials = ea.trials
+
+        self.assertEquals(ea.best_trial, trials[2])
+        self.assertEquals(ea.best_config, trials[2].config)
+        self.assertEquals(ea.best_logdir, trials[2].logdir)
+        self.assertEquals(ea.best_checkpoint, trials[2].checkpoint.value)
+        self.assertTrue(
+            all(ea.best_dataframe["trial_id"] == trials[2].trial_id))
+        self.assertEquals(ea.results_df.loc[trials[2].trial_id, "res"], 309)
+        self.assertEquals(ea.best_result["res"], 309)
+        self.assertEquals(ea.best_result_df.loc[trials[2].trial_id, "res"],
+                          309)
+
+
 if __name__ == "__main__":
     import pytest
     import sys
diff --git a/python/ray/tune/tests/test_experiment_analysis_mem.py b/python/ray/tune/tests/test_experiment_analysis_mem.py
index 4e299a758855..4ef9a51f8fd3 100644
--- a/python/ray/tune/tests/test_experiment_analysis_mem.py
+++ b/python/ray/tune/tests/test_experiment_analysis_mem.py
@@ -83,10 +83,10 @@ def testCompareTrials(self):
             num_samples=1,
             config={"id": grid_search(list(range(5)))})
 
-        max_all = ea.get_best_trial("score",
-                                    "max").metric_analysis["score"]["max"]
-        min_all = ea.get_best_trial("score",
-                                    "min").metric_analysis["score"]["min"]
+        max_all = ea.get_best_trial("score", "max",
+                                    "all").metric_analysis["score"]["max"]
+        min_all = ea.get_best_trial("score", "min",
+                                    "all").metric_analysis["score"]["min"]
         max_last = ea.get_best_trial("score", "max",
                                      "last").metric_analysis["score"]["last"]
         max_avg = ea.get_best_trial("score", "max",
@@ -149,7 +149,7 @@ def tearDown(self):
 
     def testDataframe(self):
         analysis = Analysis(self.test_dir)
-        df = analysis.dataframe()
+        df = analysis.dataframe(self.metric, mode="max")
         self.assertTrue(isinstance(df, pd.DataFrame))
         self.assertEqual(df.shape[0], self.num_samples * 2)
 
diff --git a/python/ray/tune/tests/test_progress_reporter.py b/python/ray/tune/tests/test_progress_reporter.py
index 144f59ede1aa..7b5db73656fc 100644
--- a/python/ray/tune/tests/test_progress_reporter.py
+++ b/python/ray/tune/tests/test_progress_reporter.py
@@ -3,9 +3,10 @@
 import os
 import unittest
 from unittest.mock import MagicMock, Mock
-
+from ray import tune
 from ray.test_utils import run_string_as_driver
 from ray.tune.trial import Trial
+from ray.tune.result import AUTO_RESULT_KEYS
 from ray.tune.progress_reporter import (CLIReporter, _fair_filter_trials,
                                         trial_progress_str)
 
@@ -233,6 +234,43 @@ def testAddMetricColumn(self):
         reporter.add_metric_column("foo", "bar")
         self.assertIn("foo", reporter._metric_columns)
 
+    def testInfer(self):
+        reporter = CLIReporter()
+        test_result = dict(foo_result=1, baz_result=4123, bar_result="testme")
+
+        def test(config):
+            for i in range(3):
+                tune.report(**test_result)
+
+        analysis = tune.run(test, num_samples=3)
+        all_trials = analysis.trials
+        inferred_results = reporter._infer_user_metrics(all_trials)
+        for metric in inferred_results:
+            self.assertNotIn(metric, AUTO_RESULT_KEYS)
+            self.assertTrue(metric in test_result)
+
+        class TestReporter(CLIReporter):
+            _output = []
+
+            def __init__(self, *args, **kwargs):
+                super().__init__(*args, **kwargs)
+                self._max_report_freqency = 0
+
+            def report(self, *args, **kwargs):
+                progress_str = self._progress_str(*args, **kwargs)
+                self._output.append(progress_str)
+
+        reporter = TestReporter()
+        analysis = tune.run(test, num_samples=3, progress_reporter=reporter)
+        found = {k: False for k in test_result}
+        for output in reporter._output:
+            for key in test_result:
+                if key in output:
+                    found[key] = True
+        assert found["foo_result"]
+        assert found["baz_result"]
+        assert not found["bar_result"]
+
     def testProgressStr(self):
         trials = []
         for i in range(5):
@@ -285,7 +323,6 @@ def testProgressStr(self):
             }, {"a": "A"},
             fmt="psql",
             max_rows=3)
-        print(prog3)
         assert prog3 == EXPECTED_RESULT_3
 
     def testEndToEndReporting(self):
diff --git a/python/ray/tune/tests/test_trial_scheduler.py b/python/ray/tune/tests/test_trial_scheduler.py
index 320e76af39d6..507ae81f0aee 100644
--- a/python/ray/tune/tests/test_trial_scheduler.py
+++ b/python/ray/tune/tests/test_trial_scheduler.py
@@ -60,7 +60,11 @@ def basicSetup(self, rule):
         return t1, t2
 
     def testMedianStoppingConstantPerf(self):
-        rule = MedianStoppingRule(grace_period=0, min_samples_required=1)
+        rule = MedianStoppingRule(
+            metric="episode_reward_mean",
+            mode="max",
+            grace_period=0,
+            min_samples_required=1)
         t1, t2 = self.basicSetup(rule)
         runner = mock_trial_runner()
         rule.on_trial_complete(runner, t1, result(10, 1000))
@@ -75,7 +79,11 @@ def testMedianStoppingConstantPerf(self):
             TrialScheduler.STOP)
 
     def testMedianStoppingOnCompleteOnly(self):
-        rule = MedianStoppingRule(grace_period=0, min_samples_required=1)
+        rule = MedianStoppingRule(
+            metric="episode_reward_mean",
+            mode="max",
+            grace_period=0,
+            min_samples_required=1)
         t1, t2 = self.basicSetup(rule)
         runner = mock_trial_runner()
         self.assertEqual(
@@ -87,7 +95,11 @@ def testMedianStoppingOnCompleteOnly(self):
             TrialScheduler.STOP)
 
     def testMedianStoppingGracePeriod(self):
-        rule = MedianStoppingRule(grace_period=2.5, min_samples_required=1)
+        rule = MedianStoppingRule(
+            metric="episode_reward_mean",
+            mode="max",
+            grace_period=2.5,
+            min_samples_required=1)
         t1, t2 = self.basicSetup(rule)
         runner = mock_trial_runner()
         rule.on_trial_complete(runner, t1, result(10, 1000))
@@ -104,7 +116,11 @@ def testMedianStoppingGracePeriod(self):
             TrialScheduler.STOP)
 
     def testMedianStoppingMinSamples(self):
-        rule = MedianStoppingRule(grace_period=0, min_samples_required=2)
+        rule = MedianStoppingRule(
+            metric="episode_reward_mean",
+            mode="max",
+            grace_period=0,
+            min_samples_required=2)
         t1, t2 = self.basicSetup(rule)
         runner = mock_trial_runner()
         rule.on_trial_complete(runner, t1, result(10, 1000))
@@ -120,7 +136,11 @@ def testMedianStoppingMinSamples(self):
             TrialScheduler.STOP)
 
     def testMedianStoppingUsesMedian(self):
-        rule = MedianStoppingRule(grace_period=0, min_samples_required=1)
+        rule = MedianStoppingRule(
+            metric="episode_reward_mean",
+            mode="max",
+            grace_period=0,
+            min_samples_required=1)
         t1, t2 = self.basicSetup(rule)
         runner = mock_trial_runner()
         rule.on_trial_complete(runner, t1, result(10, 1000))
@@ -135,7 +155,11 @@ def testMedianStoppingUsesMedian(self):
 
     def testMedianStoppingSoftStop(self):
         rule = MedianStoppingRule(
-            grace_period=0, min_samples_required=1, hard_stop=False)
+            metric="episode_reward_mean",
+            mode="max",
+            grace_period=0,
+            min_samples_required=1,
+            hard_stop=False)
         t1, t2 = self.basicSetup(rule)
         runner = mock_trial_runner()
         rule.on_trial_complete(runner, t1, result(10, 1000))
@@ -265,7 +289,8 @@ def schedulerSetup(self, num_trials, max_t=81):
         (15, 9) -> (5, 27) -> (2, 45);
         (34, 3) -> (12, 9) -> (4, 27) -> (2, 42);
         (81, 1) -> (27, 3) -> (9, 9) -> (3, 27) -> (1, 41);"""
-        sched = HyperBandScheduler(max_t=max_t)
+        sched = HyperBandScheduler(
+            metric="episode_reward_mean", mode="max", max_t=max_t)
         for i in range(num_trials):
             t = Trial("__fake")
             sched.on_trial_add(None, t)
@@ -321,7 +346,7 @@ def advancedSetup(self):
         return sched
 
     def testConfigSameEta(self):
-        sched = HyperBandScheduler()
+        sched = HyperBandScheduler(metric="episode_reward_mean", mode="max")
         i = 0
         while not sched._cur_band_filled():
             t = Trial("__fake")
@@ -335,7 +360,10 @@ def testConfigSameEta(self):
 
         reduction_factor = 10
         sched = HyperBandScheduler(
-            max_t=1000, reduction_factor=reduction_factor)
+            metric="episode_reward_mean",
+            mode="max",
+            max_t=1000,
+            reduction_factor=reduction_factor)
         i = 0
         while not sched._cur_band_filled():
             t = Trial("__fake")
@@ -348,7 +376,8 @@ def testConfigSameEta(self):
         self.assertEqual(sched._hyperbands[0][-1]._r, 1)
 
     def testConfigSameEtaSmall(self):
-        sched = HyperBandScheduler(max_t=1)
+        sched = HyperBandScheduler(
+            metric="episode_reward_mean", mode="max", max_t=1)
         i = 0
         while len(sched._hyperbands) < 2:
             t = Trial("__fake")
@@ -627,7 +656,11 @@ def tearDown(self):
         _register_all()  # re-register the evicted objects
 
     def testLargestBracketFirst(self):
-        sched = HyperBandForBOHB(max_t=3, reduction_factor=3)
+        sched = HyperBandForBOHB(
+            metric="episode_reward_mean",
+            mode="max",
+            max_t=3,
+            reduction_factor=3)
         runner = _MockTrialRunner(sched)
         for i in range(3):
             t = Trial("__fake")
@@ -642,7 +675,11 @@ def testCheckTrialInfoUpdate(self):
         def result(score, ts):
             return {"episode_reward_mean": score, TRAINING_ITERATION: ts}
 
-        sched = HyperBandForBOHB(max_t=3, reduction_factor=3)
+        sched = HyperBandForBOHB(
+            metric="episode_reward_mean",
+            mode="max",
+            max_t=3,
+            reduction_factor=3)
         runner = _MockTrialRunner(sched)
         runner._search_alg = MagicMock()
         runner._search_alg.searcher = MagicMock()
@@ -668,7 +705,11 @@ def testCheckTrialInfoUpdateMin(self):
         def result(score, ts):
             return {"episode_reward_mean": score, TRAINING_ITERATION: ts}
 
-        sched = HyperBandForBOHB(max_t=3, reduction_factor=3, mode="min")
+        sched = HyperBandForBOHB(
+            metric="episode_reward_mean",
+            mode="min",
+            max_t=3,
+            reduction_factor=3)
         runner = _MockTrialRunner(sched)
         runner._search_alg = MagicMock()
         runner._search_alg.searcher = MagicMock()
@@ -693,7 +734,11 @@ def testPauseResumeChooseTrial(self):
         def result(score, ts):
             return {"episode_reward_mean": score, TRAINING_ITERATION: ts}
 
-        sched = HyperBandForBOHB(max_t=10, reduction_factor=3, mode="min")
+        sched = HyperBandForBOHB(
+            metric="episode_reward_mean",
+            mode="min",
+            max_t=10,
+            reduction_factor=3)
         runner = _MockTrialRunner(sched)
         runner._search_alg = MagicMock()
         runner._search_alg.searcher = MagicMock()
@@ -761,6 +806,8 @@ def basicSetup(self,
         }
         pbt = PopulationBasedTraining(
             time_attr="training_iteration",
+            metric="episode_reward_mean",
+            mode="max",
             perturbation_interval=perturbation_interval,
             resample_probability=resample_prob,
             quantile_fraction=0.25,
@@ -1675,6 +1722,7 @@ def basicSetup(self,
         }
         pbt = PopulationBasedTraining(
             metric="mean_accuracy",
+            mode="max",
             time_attr="training_iteration",
             perturbation_interval=perturbation_interval,
             resample_probability=resample_prob,
@@ -1791,7 +1839,8 @@ def nanSetup(self, scheduler):
         return t1, t2
 
     def testAsyncHBOnComplete(self):
-        scheduler = AsyncHyperBandScheduler(max_t=10, brackets=1)
+        scheduler = AsyncHyperBandScheduler(
+            metric="episode_reward_mean", mode="max", max_t=10, brackets=1)
         t1, t2 = self.basicSetup(scheduler)
         t3 = Trial("PPO")
         scheduler.on_trial_add(None, t3)
@@ -1802,7 +1851,11 @@ def testAsyncHBOnComplete(self):
 
     def testAsyncHBGracePeriod(self):
         scheduler = AsyncHyperBandScheduler(
-            grace_period=2.5, reduction_factor=3, brackets=1)
+            metric="episode_reward_mean",
+            mode="max",
+            grace_period=2.5,
+            reduction_factor=3,
+            brackets=1)
         t1, t2 = self.basicSetup(scheduler)
         scheduler.on_trial_complete(None, t1, result(10, 1000))
         scheduler.on_trial_complete(None, t2, result(10, 1000))
@@ -1819,7 +1872,8 @@ def testAsyncHBGracePeriod(self):
             TrialScheduler.STOP)
 
     def testAsyncHBAllCompletes(self):
-        scheduler = AsyncHyperBandScheduler(max_t=10, brackets=10)
+        scheduler = AsyncHyperBandScheduler(
+            metric="episode_reward_mean", mode="max", max_t=10, brackets=10)
         trials = [Trial("PPO") for i in range(10)]
         for t in trials:
             scheduler.on_trial_add(None, t)
@@ -1831,7 +1885,12 @@ def testAsyncHBAllCompletes(self):
 
     def testAsyncHBUsesPercentile(self):
         scheduler = AsyncHyperBandScheduler(
-            grace_period=1, max_t=10, reduction_factor=2, brackets=1)
+            metric="episode_reward_mean",
+            mode="max",
+            grace_period=1,
+            max_t=10,
+            reduction_factor=2,
+            brackets=1)
         t1, t2 = self.basicSetup(scheduler)
         scheduler.on_trial_complete(None, t1, result(10, 1000))
         scheduler.on_trial_complete(None, t2, result(10, 1000))
@@ -1846,7 +1905,12 @@ def testAsyncHBUsesPercentile(self):
 
     def testAsyncHBNanPercentile(self):
         scheduler = AsyncHyperBandScheduler(
-            grace_period=1, max_t=10, reduction_factor=2, brackets=1)
+            metric="episode_reward_mean",
+            mode="max",
+            grace_period=1,
+            max_t=10,
+            reduction_factor=2,
+            brackets=1)
         t1, t2 = self.nanSetup(scheduler)
         scheduler.on_trial_complete(None, t1, result(10, 450))
         scheduler.on_trial_complete(None, t2, result(10, np.nan))
diff --git a/python/ray/tune/tests/test_trial_scheduler_pbt.py b/python/ray/tune/tests/test_trial_scheduler_pbt.py
index 740616e8ce4d..5af7cb46724a 100644
--- a/python/ray/tune/tests/test_trial_scheduler_pbt.py
+++ b/python/ray/tune/tests/test_trial_scheduler_pbt.py
@@ -82,15 +82,24 @@ def synchSetup(self, synch, param=[10, 20, 30]):
 
     def testAsynchFail(self):
         analysis = self.synchSetup(False)
-        self.assertTrue(any(analysis.dataframe()["mean_accuracy"] != 33))
+        self.assertTrue(
+            any(
+                analysis.dataframe(metric="mean_accuracy", mode="max")
+                ["mean_accuracy"] != 33))
 
     def testSynchPass(self):
         analysis = self.synchSetup(True)
-        self.assertTrue(all(analysis.dataframe()["mean_accuracy"] == 33))
+        self.assertTrue(
+            all(
+                analysis.dataframe(metric="mean_accuracy", mode="max")[
+                    "mean_accuracy"] == 33))
 
     def testSynchPassLast(self):
         analysis = self.synchSetup(True, param=[30, 20, 10])
-        self.assertTrue(all(analysis.dataframe()["mean_accuracy"] == 33))
+        self.assertTrue(
+            all(
+                analysis.dataframe(metric="mean_accuracy", mode="max")[
+                    "mean_accuracy"] == 33))
 
 
 class PopulationBasedTrainingConfigTest(unittest.TestCase):
diff --git a/python/ray/tune/tests/tutorial.py b/python/ray/tune/tests/tutorial.py
index f0e5fa5af1d2..2aa4422798c6 100644
--- a/python/ray/tune/tests/tutorial.py
+++ b/python/ray/tune/tests/tutorial.py
@@ -166,7 +166,7 @@ def train_mnist(config):
 # __run_analysis_begin__
 import os
 
-df = analysis.dataframe()
+df = analysis.results_df
 logdir = analysis.get_best_logdir("mean_accuracy", mode="max")
 state_dict = torch.load(os.path.join(logdir, "model.pth"))
 
diff --git a/python/ray/tune/tune.py b/python/ray/tune/tune.py
index f331bebec2b3..075ba5c6910b 100644
--- a/python/ray/tune/tune.py
+++ b/python/ray/tune/tune.py
@@ -68,6 +68,8 @@ def _report_progress(runner, reporter, done=False):
 def run(
         run_or_experiment,
         name=None,
+        metric=None,
+        mode=None,
         stop=None,
         time_budget_s=None,
         config=None,
@@ -147,6 +149,12 @@ def run(
             will need to first register the function:
             ``tune.register_trainable("lambda_id", lambda x: ...)``. You can
             then use ``tune.run("lambda_id")``.
+        metric (str): Metric to optimize. This metric should be reported
+            with `tune.report()`. If set, will be passed to the search
+            algorithm and scheduler.
+        mode (str): Must be one of [min, max]. Determines whether objective is
+            minimizing or maximizing the metric attribute. If set, will be
+            passed to the search algorithm and scheduler.
         name (str): Name of experiment.
         stop (dict | callable | :class:`Stopper`): Stopping criteria. If dict,
             the keys may be any field in the return result of 'train()',
@@ -276,6 +284,11 @@ def run(
             "sync_config=SyncConfig(...)`. See `ray.tune.SyncConfig` for "
             "more details.")
 
+    if mode and mode not in ["min", "max"]:
+        raise ValueError(
+            "The `mode` parameter passed to `tune.run()` has to be one of "
+            "['min', 'max']")
+
     config = config or {}
     sync_config = sync_config or SyncConfig()
     set_sync_periods(sync_config)
@@ -329,8 +342,7 @@ def run(
     if not search_alg:
         search_alg = BasicVariantGenerator()
 
-    # TODO (krfricke): Introduce metric/mode as top level API
-    if config and not search_alg.set_search_properties(None, None, config):
+    if config and not search_alg.set_search_properties(metric, mode, config):
         if has_unresolved_values(config):
             raise ValueError(
                 "You passed a `config` parameter to `tune.run()` with "
@@ -339,9 +351,17 @@ def run(
                 "does not contain any more parameter definitions - include "
                 "them in the search algorithm's search space if necessary.")
 
+    scheduler = scheduler or FIFOScheduler()
+    if not scheduler.set_search_properties(metric, mode):
+        raise ValueError(
+            "You passed a `metric` or `mode` argument to `tune.run()`, but "
+            "the scheduler you are using was already instantiated with their "
+            "own `metric` and `mode` parameters. Either remove the arguments "
+            "from your scheduler or from your call to `tune.run()`")
+
     runner = TrialRunner(
         search_alg=search_alg,
-        scheduler=scheduler or FIFOScheduler(),
+        scheduler=scheduler,
         local_checkpoint_dir=experiments[0].checkpoint_dir,
         remote_checkpoint_dir=experiments[0].remote_checkpoint_dir,
         sync_to_cloud=sync_config.sync_to_cloud,
@@ -413,8 +433,8 @@ def run(
     return ExperimentAnalysis(
         runner.checkpoint_file,
         trials=trials,
-        default_metric=None,
-        default_mode=None)
+        default_metric=metric,
+        default_mode=mode)
 
 
 def run_experiments(experiments,
diff --git a/python/ray/worker.py b/python/ray/worker.py
index 851f4933e655..536f9d7a13ad 100644
--- a/python/ray/worker.py
+++ b/python/ray/worker.py
@@ -491,7 +491,6 @@ def init(
         _driver_object_store_memory=None,
         _memory=None,
         _redis_password=ray_constants.REDIS_DEFAULT_PASSWORD,
-        _include_java=False,
         _java_worker_options=None,
         _code_search_path=None,
         _temp_dir=None,
@@ -580,8 +579,6 @@ def init(
         _memory: Amount of reservable memory resource to create.
         _redis_password (str): Prevents external clients without the password
             from connecting to Redis if provided.
-        _include_java: Boolean flag indicating whether or not to enable java
-            workers.
         _temp_dir (str): If provided, specifies the root temporary
             directory for the Ray process. Defaults to an OS-specific
             conventional location, e.g., "/tmp/ray".
@@ -673,7 +670,6 @@ def init(
             redis_password=_redis_password,
             plasma_directory=None,
             huge_pages=None,
-            include_java=_include_java,
             include_dashboard=include_dashboard,
             dashboard_host=dashboard_host,
             dashboard_port=dashboard_port,
diff --git a/python/requirements_tune.txt b/python/requirements_tune.txt
index d06b3e2a6534..880bc8d2ba89 100644
--- a/python/requirements_tune.txt
+++ b/python/requirements_tune.txt
@@ -26,7 +26,7 @@ timm
 torch>=1.5.0
 torchvision>=0.6.0
 transformers
-tune-sklearn==0.0.5
+git+git://github.com/ray-project/tune-sklearn@master#tune-sklearn
 wandb
 xgboost
 zoopt>=0.4.0
diff --git a/rllib/BUILD b/rllib/BUILD
index 7087beb20d81..21be09591995 100644
--- a/rllib/BUILD
+++ b/rllib/BUILD
@@ -468,6 +468,16 @@ py_test(
     srcs = ["agents/marwil/tests/test_marwil.py"]
 )
 
+# BCTrainer (sub-type of MARWIL)
+py_test(
+    name = "test_bc",
+    tags = ["agents_dir"],
+    size = "medium",
+    # Include the json data file.
+    data = ["tests/data/cartpole/large.json"],
+    srcs = ["agents/marwil/tests/test_bc.py"]
+)
+
 # MAMLTrainer
 py_test(
     name = "test_maml",
diff --git a/rllib/agents/dqn/dqn.py b/rllib/agents/dqn/dqn.py
index d07e7e9a77a3..6a4ba288b506 100644
--- a/rllib/agents/dqn/dqn.py
+++ b/rllib/agents/dqn/dqn.py
@@ -1,16 +1,20 @@
 import logging
+from typing import Type
 
-from ray.rllib.agents.trainer import with_common_config
-from ray.rllib.agents.trainer_template import build_trainer
 from ray.rllib.agents.dqn.dqn_tf_policy import DQNTFPolicy
 from ray.rllib.agents.dqn.simple_q_tf_policy import SimpleQTFPolicy
-from ray.rllib.policy.policy import LEARNER_STATS_KEY
+from ray.rllib.agents.trainer import with_common_config
+from ray.rllib.agents.trainer_template import build_trainer
+from ray.rllib.evaluation.worker_set import WorkerSet
+from ray.rllib.execution.concurrency_ops import Concurrently
+from ray.rllib.execution.metric_ops import StandardMetricsReporting
 from ray.rllib.execution.replay_buffer import LocalReplayBuffer
+from ray.rllib.execution.replay_ops import Replay, StoreToReplayBuffer
 from ray.rllib.execution.rollout_ops import ParallelRollouts
-from ray.rllib.execution.concurrency_ops import Concurrently
-from ray.rllib.execution.replay_ops import StoreToReplayBuffer, Replay
 from ray.rllib.execution.train_ops import TrainOneStep, UpdateTargetNetwork
-from ray.rllib.execution.metric_ops import StandardMetricsReporting
+from ray.rllib.policy.policy import LEARNER_STATS_KEY, Policy
+from ray.rllib.utils.typing import TrainerConfigDict
+from ray.util.iter import LocalIterator
 
 logger = logging.getLogger(__name__)
 
@@ -122,7 +126,7 @@
 # yapf: enable
 
 
-def validate_config(config):
+def validate_config(config: TrainerConfigDict) -> None:
     """Checks and updates the config based on settings.
 
     Rewrites rollout_fragment_length to take into account n_step truncation.
@@ -152,7 +156,8 @@ def validate_config(config):
                              "replay_sequence_length > 1.")
 
 
-def execution_plan(workers, config):
+def execution_plan(workers: WorkerSet,
+                   config: TrainerConfigDict) -> LocalIterator[dict]:
     if config.get("prioritized_replay"):
         prio_args = {
             "prioritized_replay_alpha": config["prioritized_replay_alpha"],
@@ -217,7 +222,7 @@ def update_prio(item):
     return StandardMetricsReporting(train_op, workers, config)
 
 
-def calculate_rr_weights(config):
+def calculate_rr_weights(config: TrainerConfigDict):
     if not config["training_intensity"]:
         return [1, 1]
     # e.g., 32 / 4 -> native ratio of 8.0
@@ -229,7 +234,7 @@ def calculate_rr_weights(config):
     return weights
 
 
-def get_policy_class(config):
+def get_policy_class(config: TrainerConfigDict) -> Type[Policy]:
     if config["framework"] == "torch":
         from ray.rllib.agents.dqn.dqn_torch_policy import DQNTorchPolicy
         return DQNTorchPolicy
@@ -237,7 +242,7 @@ def get_policy_class(config):
         return DQNTFPolicy
 
 
-def get_simple_policy_class(config):
+def get_simple_policy_class(config: TrainerConfigDict) -> Type[Policy]:
     if config["framework"] == "torch":
         from ray.rllib.agents.dqn.simple_q_torch_policy import \
             SimpleQTorchPolicy
diff --git a/rllib/agents/dqn/dqn_tf_policy.py b/rllib/agents/dqn/dqn_tf_policy.py
index ddce5b332353..177129f209fc 100644
--- a/rllib/agents/dqn/dqn_tf_policy.py
+++ b/rllib/agents/dqn/dqn_tf_policy.py
@@ -1,22 +1,26 @@
-from gym.spaces import Discrete
-import numpy as np
+from typing import Dict
 
+import gym
+import numpy as np
 import ray
 from ray.rllib.agents.dqn.distributional_q_tf_model import \
     DistributionalQTFModel
 from ray.rllib.agents.dqn.simple_q_tf_policy import TargetNetworkMixin
 from ray.rllib.models import ModelCatalog
+from ray.rllib.models.modelv2 import ModelV2
 from ray.rllib.models.tf.tf_action_dist import Categorical
+from ray.rllib.policy.policy import Policy
 from ray.rllib.policy.sample_batch import SampleBatch
 from ray.rllib.policy.tf_policy import LearningRateSchedule
 from ray.rllib.policy.tf_policy_template import build_tf_policy
 from ray.rllib.utils.error import UnsupportedSpaceException
 from ray.rllib.utils.exploration import ParameterNoise
-from ray.rllib.utils.numpy import convert_to_numpy
 from ray.rllib.utils.framework import try_import_tf
-from ray.rllib.utils.tf_ops import huber_loss, reduce_mean_ignore_inf, \
-    minimize_and_clip
-from ray.rllib.utils.tf_ops import make_tf_callable
+from ray.rllib.utils.numpy import convert_to_numpy
+from ray.rllib.utils.tf_ops import (huber_loss, make_tf_callable,
+                                    minimize_and_clip, reduce_mean_ignore_inf)
+from ray.rllib.utils.typing import (ModelGradients, TensorType,
+                                    TrainerConfigDict)
 
 tf1, tf, tfv = try_import_tf()
 
@@ -126,9 +130,11 @@ def compute_td_error(obs_t, act_t, rew_t, obs_tp1, done_mask,
         self.compute_td_error = compute_td_error
 
 
-def build_q_model(policy, obs_space, action_space, config):
+def build_q_model(policy: Policy, obs_space: gym.Space,
+                  action_space: gym.Space,
+                  config: TrainerConfigDict) -> ModelV2:
 
-    if not isinstance(action_space, Discrete):
+    if not isinstance(action_space, gym.spaces.Discrete):
         raise UnsupportedSpaceException(
             "Action space {} is not supported for DQN.".format(action_space))
 
@@ -184,9 +190,9 @@ def build_q_model(policy, obs_space, action_space, config):
     return policy.q_model
 
 
-def get_distribution_inputs_and_class(policy,
-                                      model,
-                                      obs_batch,
+def get_distribution_inputs_and_class(policy: Policy,
+                                      model: ModelV2,
+                                      obs_batch: TensorType,
                                       *,
                                       explore=True,
                                       **kwargs):
@@ -198,7 +204,8 @@ def get_distribution_inputs_and_class(policy,
     return policy.q_values, Categorical, []  # state-out
 
 
-def build_q_losses(policy, model, _, train_batch):
+def build_q_losses(policy: Policy, model, _,
+                   train_batch: SampleBatch) -> TensorType:
     config = policy.config
     # q network evaluation
     q_t, q_logits_t, q_dist_t = compute_q_values(
@@ -253,7 +260,8 @@ def build_q_losses(policy, model, _, train_batch):
     return policy.q_loss.loss
 
 
-def adam_optimizer(policy, config):
+def adam_optimizer(policy: Policy, config: TrainerConfigDict
+                   ) -> "tf.keras.optimizers.Optimizer":
     if policy.config["framework"] in ["tf2", "tfe"]:
         return tf.keras.optimizers.Adam(
             learning_rate=policy.cur_lr, epsilon=config["adam_epsilon"])
@@ -262,7 +270,8 @@ def adam_optimizer(policy, config):
             learning_rate=policy.cur_lr, epsilon=config["adam_epsilon"])
 
 
-def clip_gradients(policy, optimizer, loss):
+def clip_gradients(policy: Policy, optimizer: "tf.keras.optimizers.Optimizer",
+                   loss: TensorType) -> ModelGradients:
     if policy.config["grad_clip"] is not None:
         grads_and_vars = minimize_and_clip(
             optimizer,
@@ -276,25 +285,28 @@ def clip_gradients(policy, optimizer, loss):
     return grads_and_vars
 
 
-def build_q_stats(policy, batch):
+def build_q_stats(policy: Policy, batch) -> Dict[str, TensorType]:
     return dict({
         "cur_lr": tf.cast(policy.cur_lr, tf.float64),
     }, **policy.q_loss.stats)
 
 
-def setup_early_mixins(policy, obs_space, action_space, config):
+def setup_early_mixins(policy: Policy, obs_space, action_space,
+                       config: TrainerConfigDict) -> None:
     LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"])
 
 
-def setup_mid_mixins(policy, obs_space, action_space, config):
+def setup_mid_mixins(policy: Policy, obs_space, action_space, config) -> None:
     ComputeTDErrorMixin.__init__(policy)
 
 
-def setup_late_mixins(policy, obs_space, action_space, config):
+def setup_late_mixins(policy: Policy, obs_space: gym.Space,
+                      action_space: gym.Space,
+                      config: TrainerConfigDict) -> None:
     TargetNetworkMixin.__init__(policy, obs_space, action_space, config)
 
 
-def compute_q_values(policy, model, obs, explore):
+def compute_q_values(policy: Policy, model: ModelV2, obs: TensorType, explore):
     config = policy.config
 
     model_out, state = model({
@@ -361,7 +373,10 @@ def _adjust_nstep(n_step, gamma, obs, actions, rewards, new_obs, dones):
                 rewards[i] += gamma**j * rewards[i + j]
 
 
-def postprocess_nstep_and_prio(policy, batch, other_agent=None, episode=None):
+def postprocess_nstep_and_prio(policy: Policy,
+                               batch: SampleBatch,
+                               other_agent=None,
+                               episode=None) -> SampleBatch:
     # N-step Q adjustments.
     if policy.config["n_step"] > 1:
         _adjust_nstep(policy.config["n_step"], policy.config["gamma"],
diff --git a/rllib/agents/dqn/dqn_torch_policy.py b/rllib/agents/dqn/dqn_torch_policy.py
index cb6cf77ad409..e400f6b243c2 100644
--- a/rllib/agents/dqn/dqn_torch_policy.py
+++ b/rllib/agents/dqn/dqn_torch_policy.py
@@ -1,21 +1,27 @@
-from gym.spaces import Discrete
+from typing import Dict, List, Tuple
 
+import gym
 import ray
-from ray.rllib.agents.dqn.dqn_tf_policy import postprocess_nstep_and_prio, \
-    PRIO_WEIGHTS, Q_SCOPE, Q_TARGET_SCOPE
 from ray.rllib.agents.a3c.a3c_torch_policy import apply_grad_clipping
+from ray.rllib.agents.dqn.dqn_tf_policy import (
+    PRIO_WEIGHTS, Q_SCOPE, Q_TARGET_SCOPE, postprocess_nstep_and_prio)
 from ray.rllib.agents.dqn.dqn_torch_model import DQNTorchModel
 from ray.rllib.agents.dqn.simple_q_torch_policy import TargetNetworkMixin
-from ray.rllib.policy.sample_batch import SampleBatch
 from ray.rllib.models.catalog import ModelCatalog
-from ray.rllib.models.torch.torch_action_dist import TorchCategorical
+from ray.rllib.models.modelv2 import ModelV2
+from ray.rllib.models.torch.torch_action_dist import (TorchCategorical,
+                                                      TorchDistributionWrapper)
+from ray.rllib.policy.policy import Policy
+from ray.rllib.policy.sample_batch import SampleBatch
 from ray.rllib.policy.torch_policy import LearningRateSchedule
 from ray.rllib.policy.torch_policy_template import build_torch_policy
 from ray.rllib.utils.error import UnsupportedSpaceException
 from ray.rllib.utils.exploration.parameter_noise import ParameterNoise
 from ray.rllib.utils.framework import try_import_torch
-from ray.rllib.utils.torch_ops import huber_loss, reduce_mean_ignore_inf, \
-    softmax_cross_entropy_with_logits, FLOAT_MIN
+from ray.rllib.utils.torch_ops import (FLOAT_MIN, huber_loss,
+                                       reduce_mean_ignore_inf,
+                                       softmax_cross_entropy_with_logits)
+from ray.rllib.utils.typing import TensorType, TrainerConfigDict
 
 torch, nn = try_import_torch()
 F = None
@@ -115,9 +121,11 @@ def compute_td_error(obs_t, act_t, rew_t, obs_tp1, done_mask,
         self.compute_td_error = compute_td_error
 
 
-def build_q_model_and_distribution(policy, obs_space, action_space, config):
+def build_q_model_and_distribution(
+        policy: Policy, obs_space: gym.Space, action_space: gym.Space,
+        config: TrainerConfigDict) -> Tuple[ModelV2, TorchDistributionWrapper]:
 
-    if not isinstance(action_space, Discrete):
+    if not isinstance(action_space, gym.spaces.Discrete):
         raise UnsupportedSpaceException(
             "Action space {} is not supported for DQN.".format(action_space))
 
@@ -179,13 +187,14 @@ def build_q_model_and_distribution(policy, obs_space, action_space, config):
     return policy.q_model, TorchCategorical
 
 
-def get_distribution_inputs_and_class(policy,
-                                      model,
-                                      obs_batch,
-                                      *,
-                                      explore=True,
-                                      is_training=False,
-                                      **kwargs):
+def get_distribution_inputs_and_class(
+        policy: Policy,
+        model: ModelV2,
+        obs_batch: TensorType,
+        *,
+        explore: bool = True,
+        is_training: bool = False,
+        **kwargs) -> Tuple[TensorType, type, List[TensorType]]:
     q_vals = compute_q_values(policy, model, obs_batch, explore, is_training)
     q_vals = q_vals[0] if isinstance(q_vals, tuple) else q_vals
 
@@ -193,7 +202,8 @@ def get_distribution_inputs_and_class(policy,
     return policy.q_values, TorchCategorical, []  # state-out
 
 
-def build_q_losses(policy, model, _, train_batch):
+def build_q_losses(policy: Policy, model, _,
+                   train_batch: SampleBatch) -> TensorType:
     config = policy.config
     # Q-network evaluation.
     q_t, q_logits_t, q_probs_t = compute_q_values(
@@ -259,22 +269,25 @@ def build_q_losses(policy, model, _, train_batch):
     return policy.q_loss.loss
 
 
-def adam_optimizer(policy, config):
+def adam_optimizer(policy: Policy,
+                   config: TrainerConfigDict) -> "torch.optim.Optimizer":
     return torch.optim.Adam(
         policy.q_func_vars, lr=policy.cur_lr, eps=config["adam_epsilon"])
 
 
-def build_q_stats(policy, batch):
+def build_q_stats(policy: Policy, batch) -> Dict[str, TensorType]:
     return dict({
         "cur_lr": policy.cur_lr,
     }, **policy.q_loss.stats)
 
 
-def setup_early_mixins(policy, obs_space, action_space, config):
+def setup_early_mixins(policy: Policy, obs_space, action_space,
+                       config: TrainerConfigDict) -> None:
     LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"])
 
 
-def after_init(policy, obs_space, action_space, config):
+def after_init(policy: Policy, obs_space: gym.Space, action_space: gym.Space,
+               config: TrainerConfigDict) -> None:
     ComputeTDErrorMixin.__init__(policy)
     TargetNetworkMixin.__init__(policy, obs_space, action_space, config)
     # Move target net to device (this is done autoatically for the
@@ -282,7 +295,11 @@ def after_init(policy, obs_space, action_space, config):
     policy.target_q_model = policy.target_q_model.to(policy.device)
 
 
-def compute_q_values(policy, model, obs, explore, is_training=False):
+def compute_q_values(policy: Policy,
+                     model: ModelV2,
+                     obs: TensorType,
+                     explore,
+                     is_training: bool = False):
     config = policy.config
 
     model_out, state = model({
@@ -323,12 +340,15 @@ def compute_q_values(policy, model, obs, explore, is_training=False):
     return value, logits, probs_or_logits
 
 
-def grad_process_and_td_error_fn(policy, optimizer, loss):
+def grad_process_and_td_error_fn(policy: Policy,
+                                 optimizer: "torch.optim.Optimizer",
+                                 loss: TensorType) -> Dict[str, TensorType]:
     # Clip grads if configured.
     return apply_grad_clipping(policy, optimizer, loss)
 
 
-def extra_action_out_fn(policy, input_dict, state_batches, model, action_dist):
+def extra_action_out_fn(policy: Policy, input_dict, state_batches, model,
+                        action_dist) -> Dict[str, TensorType]:
     return {"q_values": policy.q_values}
 
 
diff --git a/rllib/agents/dqn/simple_q.py b/rllib/agents/dqn/simple_q.py
index d24bb786aa4d..443daf7f810a 100644
--- a/rllib/agents/dqn/simple_q.py
+++ b/rllib/agents/dqn/simple_q.py
@@ -1,14 +1,28 @@
+"""
+Simple Q (simple_q)
+===================
+
+This file defines the distributed Trainer class for the simple Q learning.
+See `simple_q_[tf|torch]_policy.py` for the definition of the policy loss.
+"""
+
 import logging
+from typing import Optional, Type
 
-from ray.rllib.agents.trainer import with_common_config
-from ray.rllib.agents.dqn.simple_q_tf_policy import SimpleQTFPolicy
 from ray.rllib.agents.dqn.dqn import DQNTrainer
+from ray.rllib.agents.dqn.simple_q_tf_policy import SimpleQTFPolicy
+from ray.rllib.agents.dqn.simple_q_torch_policy import SimpleQTorchPolicy
+from ray.rllib.agents.trainer import with_common_config
+from ray.rllib.evaluation.worker_set import WorkerSet
 from ray.rllib.execution.concurrency_ops import Concurrently
-from ray.rllib.execution.replay_ops import StoreToReplayBuffer, Replay
-from ray.rllib.execution.rollout_ops import ParallelRollouts
-from ray.rllib.execution.train_ops import TrainOneStep, UpdateTargetNetwork
 from ray.rllib.execution.metric_ops import StandardMetricsReporting
 from ray.rllib.execution.replay_buffer import LocalReplayBuffer
+from ray.rllib.execution.replay_ops import Replay, StoreToReplayBuffer
+from ray.rllib.execution.rollout_ops import ParallelRollouts
+from ray.rllib.execution.train_ops import TrainOneStep, UpdateTargetNetwork
+from ray.rllib.policy.policy import Policy
+from ray.rllib.utils.typing import TrainerConfigDict
+from ray.util.iter import LocalIterator
 
 logger = logging.getLogger(__name__)
 
@@ -78,16 +92,22 @@
 # yapf: enable
 
 
-def get_policy_class(config):
+def get_policy_class(config: TrainerConfigDict) -> Optional[Type[Policy]]:
+    """Policy class picker function. Class is chosen based on DL-framework.
+
+    Args:
+        config (TrainerConfigDict): The trainer's configuration dict.
+
+    Returns:
+        Optional[Type[Policy]]: The Policy class to use with PGTrainer.
+            If None, use `default_policy` provided in build_trainer().
+    """
     if config["framework"] == "torch":
-        from ray.rllib.agents.dqn.simple_q_torch_policy import \
-            SimpleQTorchPolicy
         return SimpleQTorchPolicy
-    else:
-        return SimpleQTFPolicy
 
 
-def execution_plan(workers, config):
+def execution_plan(workers: WorkerSet,
+                   config: TrainerConfigDict) -> LocalIterator[dict]:
     local_replay_buffer = LocalReplayBuffer(
         num_shards=1,
         learning_starts=config["learning_starts"],
diff --git a/rllib/agents/dqn/simple_q_tf_policy.py b/rllib/agents/dqn/simple_q_tf_policy.py
index c6a70615b300..526980c1a7f7 100644
--- a/rllib/agents/dqn/simple_q_tf_policy.py
+++ b/rllib/agents/dqn/simple_q_tf_policy.py
@@ -1,19 +1,24 @@
 """Basic example of a DQN policy without any optimizations."""
 
-from gym.spaces import Discrete
 import logging
+from typing import List, Tuple, Type
 
+import gym
 import ray
-from ray.rllib.policy.sample_batch import SampleBatch
 from ray.rllib.models import ModelCatalog
+from ray.rllib.models.modelv2 import ModelV2
+from ray.rllib.models.tf.tf_action_dist import (Categorical,
+                                                TFActionDistribution)
 from ray.rllib.models.torch.torch_action_dist import TorchCategorical
-from ray.rllib.models.tf.tf_action_dist import Categorical
-from ray.rllib.utils.annotations import override
-from ray.rllib.utils.error import UnsupportedSpaceException
+from ray.rllib.policy import Policy
+from ray.rllib.policy.sample_batch import SampleBatch
 from ray.rllib.policy.tf_policy import TFPolicy
 from ray.rllib.policy.tf_policy_template import build_tf_policy
+from ray.rllib.utils.annotations import override
+from ray.rllib.utils.error import UnsupportedSpaceException
 from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.tf_ops import huber_loss, make_tf_callable
+from ray.rllib.utils.typing import TensorType, TrainerConfigDict
 
 tf1, tf, tfv = try_import_tf()
 logger = logging.getLogger(__name__)
@@ -23,7 +28,8 @@
 
 
 class TargetNetworkMixin:
-    def __init__(self, obs_space, action_space, config):
+    def __init__(self, obs_space: gym.Space, action_space: gym.Space,
+                 config: TrainerConfigDict):
         @make_tf_callable(self.get_session())
         def do_update():
             # update_target_fn will be called periodically to copy Q network to
@@ -44,9 +50,11 @@ def variables(self):
         return self.q_func_vars + self.target_q_func_vars
 
 
-def build_q_models(policy, obs_space, action_space, config):
+def build_q_models(policy: Policy, obs_space: gym.Space,
+                   action_space: gym.Space,
+                   config: TrainerConfigDict) -> ModelV2:
 
-    if not isinstance(action_space, Discrete):
+    if not isinstance(action_space, gym.spaces.Discrete):
         raise UnsupportedSpaceException(
             "Action space {} is not supported for DQN.".format(action_space))
 
@@ -72,13 +80,14 @@ def build_q_models(policy, obs_space, action_space, config):
     return policy.q_model
 
 
-def get_distribution_inputs_and_class(policy,
-                                      q_model,
-                                      obs_batch,
-                                      *,
-                                      explore=True,
-                                      is_training=True,
-                                      **kwargs):
+def get_distribution_inputs_and_class(
+        policy: Policy,
+        q_model: ModelV2,
+        obs_batch: TensorType,
+        *,
+        explore=True,
+        is_training=True,
+        **kwargs) -> Tuple[TensorType, type, List[TensorType]]:
     q_vals = compute_q_values(policy, q_model, obs_batch, explore, is_training)
     q_vals = q_vals[0] if isinstance(q_vals, tuple) else q_vals
 
@@ -88,7 +97,9 @@ def get_distribution_inputs_and_class(policy,
                              Categorical), []  # state-outs
 
 
-def build_q_losses(policy, model, dist_class, train_batch):
+def build_q_losses(policy: Policy, model: ModelV2,
+                   dist_class: Type[TFActionDistribution],
+                   train_batch: SampleBatch) -> TensorType:
     # q network evaluation
     q_t = compute_q_values(
         policy,
@@ -131,7 +142,11 @@ def build_q_losses(policy, model, dist_class, train_batch):
     return loss
 
 
-def compute_q_values(policy, model, obs, explore, is_training=None):
+def compute_q_values(policy: Policy,
+                     model: ModelV2,
+                     obs: TensorType,
+                     explore,
+                     is_training=None) -> TensorType:
     model_out, _ = model({
         SampleBatch.CUR_OBS: obs,
         "is_training": is_training
@@ -141,7 +156,9 @@ def compute_q_values(policy, model, obs, explore, is_training=None):
     return model_out
 
 
-def setup_late_mixins(policy, obs_space, action_space, config):
+def setup_late_mixins(policy: Policy, obs_space: gym.Space,
+                      action_space: gym.Space,
+                      config: TrainerConfigDict) -> None:
     TargetNetworkMixin.__init__(policy, obs_space, action_space, config)
 
 
diff --git a/rllib/agents/dqn/simple_q_torch_policy.py b/rllib/agents/dqn/simple_q_torch_policy.py
index 941bacb0e1ef..fbdcc05ae676 100644
--- a/rllib/agents/dqn/simple_q_torch_policy.py
+++ b/rllib/agents/dqn/simple_q_torch_policy.py
@@ -1,15 +1,20 @@
 """Basic example of a DQN policy without any optimizations."""
 
 import logging
+from typing import Dict
 
+import gym
 import ray
-from ray.rllib.agents.dqn.simple_q_tf_policy import build_q_models, \
-    get_distribution_inputs_and_class, compute_q_values
-from ray.rllib.policy.sample_batch import SampleBatch
+from ray.rllib.agents.dqn.simple_q_tf_policy import (
+    build_q_models, compute_q_values, get_distribution_inputs_and_class)
+from ray.rllib.models.modelv2 import ModelV2
 from ray.rllib.models.torch.torch_action_dist import TorchCategorical
+from ray.rllib.policy import Policy
+from ray.rllib.policy.sample_batch import SampleBatch
 from ray.rllib.policy.torch_policy_template import build_torch_policy
 from ray.rllib.utils.framework import try_import_torch
 from ray.rllib.utils.torch_ops import huber_loss
+from ray.rllib.utils.typing import TensorType, TrainerConfigDict
 
 torch, nn = try_import_torch()
 F = None
@@ -19,7 +24,8 @@
 
 
 class TargetNetworkMixin:
-    def __init__(self, obs_space, action_space, config):
+    def __init__(self, obs_space: gym.Space, action_space: gym.Space,
+                 config: TrainerConfigDict):
         def do_update():
             # Update_target_fn will be called periodically to copy Q network to
             # target Q network.
@@ -30,12 +36,15 @@ def do_update():
         self.update_target = do_update
 
 
-def build_q_model_and_distribution(policy, obs_space, action_space, config):
+def build_q_model_and_distribution(policy: Policy, obs_space: gym.Space,
+                                   action_space: gym.Space,
+                                   config: TrainerConfigDict) -> ModelV2:
     return build_q_models(policy, obs_space, action_space, config), \
         TorchCategorical
 
 
-def build_q_losses(policy, model, dist_class, train_batch):
+def build_q_losses(policy: Policy, model, dist_class,
+                   train_batch: SampleBatch) -> TensorType:
     # q network evaluation
     q_t = compute_q_values(
         policy,
@@ -78,12 +87,15 @@ def build_q_losses(policy, model, dist_class, train_batch):
     return loss
 
 
-def extra_action_out_fn(policy, input_dict, state_batches, model, action_dist):
+def extra_action_out_fn(policy: Policy, input_dict, state_batches, model,
+                        action_dist) -> Dict[str, TensorType]:
     """Adds q-values to action out dict."""
     return {"q_values": policy.q_values}
 
 
-def setup_late_mixins(policy, obs_space, action_space, config):
+def setup_late_mixins(policy: Policy, obs_space: gym.Space,
+                      action_space: gym.Space,
+                      config: TrainerConfigDict) -> None:
     TargetNetworkMixin.__init__(policy, obs_space, action_space, config)
 
 
diff --git a/rllib/agents/maml/maml_torch_policy.py b/rllib/agents/maml/maml_torch_policy.py
index cf378a4ba681..8a143b455e65 100644
--- a/rllib/agents/maml/maml_torch_policy.py
+++ b/rllib/agents/maml/maml_torch_policy.py
@@ -199,10 +199,9 @@ def __init__(self,
                 current_policy_vars[i] = adapted_policy_vars
                 kls.append(kl_loss)
                 inner_ppo_loss.append(ppo_loss)
-            inner_kls.append(kls)
+            inner_kls.extend(kls)
 
-        mean_inner_kl = [torch.mean(torch.stack(kls)) for kls in inner_kls]
-        self.mean_inner_kl = mean_inner_kl
+        self.mean_inner_kl = inner_kls
 
         ppo_obj = []
         for i in range(self.num_tasks):
@@ -230,10 +229,10 @@ def __init__(self,
         self.mean_entropy = entropy_loss
 
         self.inner_kl_loss = torch.mean(
-            torch.stack(
-                [a * b for a, b in zip(self.cur_kl_coeff, mean_inner_kl)]))
+            torch.stack([
+                a * b for a, b in zip(self.cur_kl_coeff, self.mean_inner_kl)
+            ]))
         self.loss = torch.mean(torch.stack(ppo_obj)) + self.inner_kl_loss
-        print("Meta-Loss: ", self.loss, ", Inner KL:", self.inner_kl_loss)
 
     def feed_forward(self, obs, policy_vars, policy_config):
         # Hacky for now, reconstruct FC network with adapted weights
@@ -298,7 +297,6 @@ def fc_network(inp, network_vars, hidden_nonlinearity,
         return pi_new_logits, torch.squeeze(value_fn)
 
     def compute_updated_variables(self, loss, network_vars, model):
-
         grad = torch.autograd.grad(
             loss,
             inputs=model.parameters(),
@@ -389,8 +387,9 @@ def maml_stats(policy, train_batch):
 
 class KLCoeffMixin:
     def __init__(self, config):
-        self.kl_coeff_val = [config["kl_coeff"]
-                             ] * config["inner_adaptation_steps"]
+        self.kl_coeff_val = [
+            config["kl_coeff"]
+        ] * config["inner_adaptation_steps"] * config["num_workers"]
         self.kl_target = self.config["kl_target"]
 
     def update_kls(self, sampled_kls):
diff --git a/rllib/agents/marwil/__init__.py b/rllib/agents/marwil/__init__.py
index f901cf07269e..5b66c96f172c 100644
--- a/rllib/agents/marwil/__init__.py
+++ b/rllib/agents/marwil/__init__.py
@@ -1,8 +1,11 @@
+from ray.rllib.agents.marwil.bc import BCTrainer, BC_DEFAULT_CONFIG
 from ray.rllib.agents.marwil.marwil import MARWILTrainer, DEFAULT_CONFIG
 from ray.rllib.agents.marwil.marwil_tf_policy import MARWILTFPolicy
 from ray.rllib.agents.marwil.marwil_torch_policy import MARWILTorchPolicy
 
 __all__ = [
+    "BCTrainer",
+    "BC_DEFAULT_CONFIG",
     "DEFAULT_CONFIG",
     "MARWILTFPolicy",
     "MARWILTorchPolicy",
diff --git a/rllib/agents/marwil/bc.py b/rllib/agents/marwil/bc.py
new file mode 100644
index 000000000000..81f8afce5970
--- /dev/null
+++ b/rllib/agents/marwil/bc.py
@@ -0,0 +1,29 @@
+"""Behavioral Cloning (derived from MARWIL).
+
+Simply uses the MARWIL agent with beta force-set to 0.0.
+"""
+from ray.rllib.agents.marwil.marwil import MARWILTrainer, \
+    DEFAULT_CONFIG as MARWIL_CONFIG
+from ray.rllib.utils.typing import TrainerConfigDict
+
+# yapf: disable
+# __sphinx_doc_begin__
+BC_DEFAULT_CONFIG = MARWILTrainer.merge_trainer_configs(
+    MARWIL_CONFIG, {
+        "beta": 0.0,
+    })
+# __sphinx_doc_end__
+# yapf: enable
+
+
+def validate_config(config: TrainerConfigDict):
+    if config["beta"] != 0.0:
+        raise ValueError(
+            "For behavioral cloning, `beta` parameter must be 0.0!")
+
+
+BCTrainer = MARWILTrainer.with_updates(
+    name="BC",
+    default_config=BC_DEFAULT_CONFIG,
+    validate_config=validate_config,
+)
diff --git a/rllib/agents/marwil/marwil.py b/rllib/agents/marwil/marwil.py
index a196015e02e8..e68c61dc931e 100644
--- a/rllib/agents/marwil/marwil.py
+++ b/rllib/agents/marwil/marwil.py
@@ -16,22 +16,22 @@
     # Use importance sampling estimators for reward
     "input_evaluation": ["is", "wis"],
 
-    # Scaling of advantages in exponential terms
-    # When beta is 0, MARWIL is reduced to imitation learning
+    # Scaling of advantages in exponential terms.
+    # When beta is 0.0, MARWIL is reduced to imitation learning.
     "beta": 1.0,
-    # Balancing value estimation loss and policy optimization loss
+    # Balancing value estimation loss and policy optimization loss.
     "vf_coeff": 1.0,
-    # Whether to calculate cumulative rewards
+    # Whether to calculate cumulative rewards.
     "postprocess_inputs": True,
-    # Whether to rollout "complete_episodes" or "truncate_episodes"
+    # Whether to rollout "complete_episodes" or "truncate_episodes".
     "batch_mode": "complete_episodes",
-    # Learning rate for adam optimizer
+    # Learning rate for adam optimizer.
     "lr": 1e-4,
-    # Number of timesteps collected for each SGD round
+    # Number of timesteps collected for each SGD round.
     "train_batch_size": 2000,
-    # Number of steps max to keep in the batch replay buffer
+    # Number of steps max to keep in the batch replay buffer.
     "replay_buffer_size": 100000,
-    # Number of steps to read before learning starts
+    # Number of steps to read before learning starts.
     "learning_starts": 0,
     # === Parallelism ===
     "num_workers": 0,
@@ -45,8 +45,6 @@ def get_policy_class(config):
         from ray.rllib.agents.marwil.marwil_torch_policy import \
             MARWILTorchPolicy
         return MARWILTorchPolicy
-    else:
-        return MARWILTFPolicy
 
 
 def execution_plan(workers, config):
diff --git a/rllib/agents/marwil/tests/test_bc.py b/rllib/agents/marwil/tests/test_bc.py
new file mode 100644
index 000000000000..31a9b3818618
--- /dev/null
+++ b/rllib/agents/marwil/tests/test_bc.py
@@ -0,0 +1,65 @@
+import os
+from pathlib import Path
+import unittest
+
+import ray
+import ray.rllib.agents.marwil as marwil
+from ray.rllib.utils.framework import try_import_tf
+from ray.rllib.utils.test_utils import check_compute_single_action, \
+    framework_iterator
+
+tf1, tf, tfv = try_import_tf()
+
+
+class TestBC(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        ray.init()
+
+    @classmethod
+    def tearDownClass(cls):
+        ray.shutdown()
+
+    def test_bc_compilation_and_learning_from_offline_file(self):
+        """Test whether a BCTrainer can be built with all frameworks.
+
+        And learns from a historic-data file.
+        """
+        rllib_dir = Path(__file__).parent.parent.parent.parent
+        print("rllib dir={}".format(rllib_dir))
+        data_file = os.path.join(rllib_dir, "tests/data/cartpole/large.json")
+        print("data_file={} exists={}".format(data_file,
+                                              os.path.isfile(data_file)))
+
+        config = marwil.BC_DEFAULT_CONFIG.copy()
+        config["num_workers"] = 0  # Run locally.
+        config["evaluation_num_workers"] = 1
+        config["evaluation_interval"] = 1
+        # Evaluate on actual environment.
+        config["evaluation_config"] = {"input": "sampler"}
+        # Learn from offline data.
+        config["input"] = [data_file]
+        num_iterations = 300
+
+        # Test for all frameworks.
+        for _ in framework_iterator(config, frameworks=("tf", "torch")):
+            trainer = marwil.BCTrainer(config=config, env="CartPole-v0")
+            for i in range(num_iterations):
+                eval_results = trainer.train()["evaluation"]
+                print("iter={} R={}".format(
+                    i, eval_results["episode_reward_mean"]))
+                # Learn until some reward is reached on an actual live env.
+                if eval_results["episode_reward_mean"] > 60.0:
+                    print("learnt!")
+                    break
+
+            check_compute_single_action(
+                trainer, include_prev_action_reward=True)
+
+            trainer.stop()
+
+
+if __name__ == "__main__":
+    import pytest
+    import sys
+    sys.exit(pytest.main(["-v", __file__]))
diff --git a/rllib/agents/marwil/tests/test_marwil.py b/rllib/agents/marwil/tests/test_marwil.py
index 49a223da1d06..b390ada22d09 100644
--- a/rllib/agents/marwil/tests/test_marwil.py
+++ b/rllib/agents/marwil/tests/test_marwil.py
@@ -35,7 +35,9 @@ def test_marwil_compilation_and_learning_from_offline_file(self):
         config["num_workers"] = 0  # Run locally.
         config["evaluation_num_workers"] = 1
         config["evaluation_interval"] = 1
+        # Evaluate on actual environment.
         config["evaluation_config"] = {"input": "sampler"}
+        # Learn from offline data.
         config["input"] = [data_file]
         num_iterations = 300
 
diff --git a/rllib/agents/mbmpo/mbmpo.py b/rllib/agents/mbmpo/mbmpo.py
index cf24f8a7821d..eebbb1dcd3db 100644
--- a/rllib/agents/mbmpo/mbmpo.py
+++ b/rllib/agents/mbmpo/mbmpo.py
@@ -18,7 +18,8 @@
 from ray.rllib.evaluation.metrics import collect_episodes
 from ray.rllib.agents.mbmpo.model_vector_env import custom_model_vector_env
 from ray.rllib.evaluation.metrics import collect_metrics
-from ray.rllib.agents.mbmpo.utils import calculate_gae_advantages
+from ray.rllib.agents.mbmpo.utils import calculate_gae_advantages, \
+    MBMPOExploration
 
 logger = logging.getLogger(__name__)
 
@@ -69,7 +70,7 @@
         # Number of Transition-Dynamics Models for Ensemble
         "ensemble_size": 5,
         # Hidden Layers for Model Ensemble
-        "fcnet_hiddens": [512, 512],
+        "fcnet_hiddens": [512, 512, 512],
         # Model Learning Rate
         "lr": 1e-3,
         # Max number of training epochs per MBMPO iter
@@ -81,10 +82,11 @@
         # Normalize Data (obs, action, and deltas)
         "normalize_data": True,
     },
+    "exploration_config": {
+        "type": MBMPOExploration,
+    },
     # Workers sample from dynamics models
     "custom_vector_env": custom_model_vector_env,
-    # How many enviornments there are per worker (vectorized)
-    "num_worker_envs": 20,
     # How many iterations through MAML per MBMPO iteration
     "num_maml_steps": 10,
 })
@@ -152,7 +154,7 @@ def update(pi, pi_id):
         metrics.info[LEARNER_INFO] = fetches
         metrics.counters[STEPS_TRAINED_COUNTER] += samples.count
 
-        if self.step_counter == self.num_steps:
+        if self.step_counter == self.num_steps - 1:
             td_metric = self.workers.local_worker().foreach_policy(
                 fit_dynamics)[0]
 
diff --git a/rllib/agents/mbmpo/model_ensemble.py b/rllib/agents/mbmpo/model_ensemble.py
index c252e046449f..1c8d03562070 100644
--- a/rllib/agents/mbmpo/model_ensemble.py
+++ b/rllib/agents/mbmpo/model_ensemble.py
@@ -158,7 +158,7 @@ def __init__(self, obs_space, action_space, num_outputs, model_config,
 
         for i in range(self.num_models):
             self.add_module("TD-model-" + str(i), self.dynamics_ensemble[i])
-        self.replay_buffer_max = 100000
+        self.replay_buffer_max = 10000
         self.replay_buffer = None
         self.optimizers = [
             torch.optim.Adam(
@@ -170,7 +170,8 @@ def __init__(self, obs_space, action_space, num_outputs, model_config,
         self.metrics[STEPS_SAMPLED_COUNTER] = 0
 
         # For each worker, choose a random model to choose trajectories from
-        self.sample_index = np.random.randint(self.num_models)
+        worker_index = get_global_worker().worker_index
+        self.sample_index = int((worker_index - 1) / self.num_models)
         self.global_itr = 0
         self.device = (torch.device("cuda")
                        if torch.cuda.is_available() else torch.device("cpu"))
@@ -195,9 +196,10 @@ def fit(self):
         # Add env samples to Replay Buffer
         local_worker = get_global_worker()
         new_samples = local_worker.sample()
+        # Initial Exploration of 8000 timesteps
         if not self.global_itr:
-            tmp = local_worker.sample()
-            new_samples.concat(tmp)
+            extra = local_worker.sample()
+            new_samples.concat(extra)
 
         # Process Samples
         new_samples = process_samples(new_samples)
@@ -257,9 +259,6 @@ def convert_to_str(lst):
                     train_losses[ind] = train_losses[
                         ind].detach().cpu().numpy()
 
-                del x
-                del y
-
             # Validation
             val_lists = []
             for data in zip(*val_loaders):
@@ -273,8 +272,6 @@ def convert_to_str(lst):
 
                 for ind in range(self.num_models):
                     val_losses[ind] = val_losses[ind].detach().cpu().numpy()
-                del x
-                del y
 
             val_lists = np.array(val_lists)
             avg_val_losses = np.mean(val_lists, axis=0)
diff --git a/rllib/agents/mbmpo/model_vector_env.py b/rllib/agents/mbmpo/model_vector_env.py
index 655169e0613e..4a0b56836a3e 100644
--- a/rllib/agents/mbmpo/model_vector_env.py
+++ b/rllib/agents/mbmpo/model_vector_env.py
@@ -81,7 +81,7 @@ def vector_step(self, actions):
         next_obs_batch = self.model.predict_model_batches(
             obs_batch, action_batch, device=self.device)
 
-        next_obs_batch = np.clip(next_obs_batch, -50, 50)
+        next_obs_batch = np.clip(next_obs_batch, -1000, 1000)
 
         rew_batch = self.envs[0].reward(obs_batch, action_batch,
                                         next_obs_batch)
@@ -95,7 +95,8 @@ def vector_step(self, actions):
 
         self.cur_obs = next_obs_batch
 
-        return list(obs_batch), list(rew_batch), list(dones_batch), info_batch
+        return list(next_obs_batch), list(rew_batch), list(
+            dones_batch), info_batch
 
     @override(VectorEnv)
     def get_unwrapped(self):
diff --git a/rllib/agents/mbmpo/utils.py b/rllib/agents/mbmpo/utils.py
index 16bb922da74f..b6efcdb477ba 100644
--- a/rllib/agents/mbmpo/utils.py
+++ b/rllib/agents/mbmpo/utils.py
@@ -1,5 +1,16 @@
 import numpy as np
 import scipy
+from typing import Union
+
+from ray.rllib.models.action_dist import ActionDistribution
+from ray.rllib.models.modelv2 import ModelV2
+from ray.rllib.utils.annotations import override
+from ray.rllib.utils.exploration.exploration import Exploration
+from ray.rllib.utils.framework import try_import_tf, try_import_torch, \
+    TensorType
+
+tf1, tf, tfv = try_import_tf()
+torch, _ = try_import_torch()
 
 
 class LinearFeatureBaseline():
@@ -66,3 +77,50 @@ def discount_cumsum(x, discount):
         """
     return scipy.signal.lfilter(
         [1], [1, float(-discount)], x[::-1], axis=0)[::-1]
+
+
+class MBMPOExploration(Exploration):
+    """An exploration that simply samples from a distribution.
+
+    The sampling can be made deterministic by passing explore=False into
+    the call to `get_exploration_action`.
+    Also allows for scheduled parameters for the distributions, such as
+    lowering stddev, temperature, etc.. over time.
+    """
+
+    def __init__(self, action_space, *, framework: str, model: ModelV2,
+                 **kwargs):
+        """Initializes a StochasticSampling Exploration object.
+
+        Args:
+            action_space (Space): The gym action space used by the environment.
+            framework (str): One of None, "tf", "torch".
+        """
+        assert framework is not None
+        self.timestep = 0
+        self.worker_index = kwargs["worker_index"]
+        super().__init__(
+            action_space, model=model, framework=framework, **kwargs)
+
+    @override(Exploration)
+    def get_exploration_action(self,
+                               *,
+                               action_distribution: ActionDistribution,
+                               timestep: Union[int, TensorType],
+                               explore: bool = True):
+        assert self.framework == "torch"
+        return self._get_torch_exploration_action(action_distribution, explore)
+
+    def _get_torch_exploration_action(self, action_dist, explore):
+        action = action_dist.sample()
+        logp = action_dist.sampled_action_logp()
+
+        batch_size = action.size()[0]
+
+        # Initial Random Exploration for Real Env Interaction
+        if self.worker_index == 0 and self.timestep < 8000:
+            print("Using Random")
+            action = [self.action_space.sample() for _ in range(batch_size)]
+            logp = [0.0 for _ in range(batch_size)]
+        self.timestep += batch_size
+        return action, logp
diff --git a/rllib/agents/registry.py b/rllib/agents/registry.py
index 2f46106c7850..001f921d4d0a 100644
--- a/rllib/agents/registry.py
+++ b/rllib/agents/registry.py
@@ -5,9 +5,24 @@
 from ray.rllib.contrib.registry import CONTRIBUTED_ALGORITHMS
 
 
-def _import_sac():
-    from ray.rllib.agents import sac
-    return sac.SACTrainer
+def _import_a2c():
+    from ray.rllib.agents import a3c
+    return a3c.A2CTrainer
+
+
+def _import_a3c():
+    from ray.rllib.agents import a3c
+    return a3c.A3CTrainer
+
+
+def _import_apex():
+    from ray.rllib.agents import dqn
+    return dqn.ApexTrainer
+
+
+def _import_apex_ddpg():
+    from ray.rllib.agents import ddpg
+    return ddpg.ApexDDPGTrainer
 
 
 def _import_appo():
@@ -15,14 +30,14 @@ def _import_appo():
     return ppo.APPOTrainer
 
 
-def _import_ddppo():
-    from ray.rllib.agents import ppo
-    return ppo.DDPPOTrainer
+def _import_ars():
+    from ray.rllib.agents import ars
+    return ars.ARSTrainer
 
 
-def _import_qmix():
-    from ray.rllib.agents import qmix
-    return qmix.QMixTrainer
+def _import_bc():
+    from ray.rllib.agents import marwil
+    return marwil.BCTrainer
 
 
 def _import_ddpg():
@@ -30,19 +45,19 @@ def _import_ddpg():
     return ddpg.DDPGTrainer
 
 
-def _import_apex_ddpg():
-    from ray.rllib.agents import ddpg
-    return ddpg.ApexDDPGTrainer
+def _import_ddppo():
+    from ray.rllib.agents import ppo
+    return ppo.DDPPOTrainer
 
 
-def _import_td3():
-    from ray.rllib.agents import ddpg
-    return ddpg.TD3Trainer
+def _import_dqn():
+    from ray.rllib.agents import dqn
+    return dqn.DQNTrainer
 
 
-def _import_ppo():
-    from ray.rllib.agents import ppo
-    return ppo.PPOTrainer
+def _import_dreamer():
+    from ray.rllib.agents import dreamer
+    return dreamer.DREAMERTrainer
 
 
 def _import_es():
@@ -50,34 +65,24 @@ def _import_es():
     return es.ESTrainer
 
 
-def _import_ars():
-    from ray.rllib.agents import ars
-    return ars.ARSTrainer
-
-
-def _import_dqn():
-    from ray.rllib.agents import dqn
-    return dqn.DQNTrainer
-
-
-def _import_simple_q():
-    from ray.rllib.agents import dqn
-    return dqn.SimpleQTrainer
+def _import_impala():
+    from ray.rllib.agents import impala
+    return impala.ImpalaTrainer
 
 
-def _import_apex():
-    from ray.rllib.agents import dqn
-    return dqn.ApexTrainer
+def _import_maml():
+    from ray.rllib.agents import maml
+    return maml.MAMLTrainer
 
 
-def _import_a3c():
-    from ray.rllib.agents import a3c
-    return a3c.A3CTrainer
+def _import_marwil():
+    from ray.rllib.agents import marwil
+    return marwil.MARWILTrainer
 
 
-def _import_a2c():
-    from ray.rllib.agents import a3c
-    return a3c.A2CTrainer
+def _import_mbmpo():
+    from ray.rllib.agents import mbmpo
+    return mbmpo.MBMPOTrainer
 
 
 def _import_pg():
@@ -85,53 +90,54 @@ def _import_pg():
     return pg.PGTrainer
 
 
-def _import_impala():
-    from ray.rllib.agents import impala
-    return impala.ImpalaTrainer
+def _import_ppo():
+    from ray.rllib.agents import ppo
+    return ppo.PPOTrainer
 
 
-def _import_marwil():
-    from ray.rllib.agents import marwil
-    return marwil.MARWILTrainer
+def _import_qmix():
+    from ray.rllib.agents import qmix
+    return qmix.QMixTrainer
 
 
-def _import_maml():
-    from ray.rllib.agents import maml
-    return maml.MAMLTrainer
+def _import_sac():
+    from ray.rllib.agents import sac
+    return sac.SACTrainer
 
 
-def _import_mbmpo():
-    from ray.rllib.agents import mbmpo
-    return mbmpo.MBMPOTrainer
+def _import_simple_q():
+    from ray.rllib.agents import dqn
+    return dqn.SimpleQTrainer
 
 
-def _import_dreamer():
-    from ray.rllib.agents import dreamer
-    return dreamer.DREAMERTrainer
+def _import_td3():
+    from ray.rllib.agents import ddpg
+    return ddpg.TD3Trainer
 
 
 ALGORITHMS = {
-    "SAC": _import_sac,
-    "DDPG": _import_ddpg,
+    "A2C": _import_a2c,
+    "A3C": _import_a3c,
+    "APEX": _import_apex,
     "APEX_DDPG": _import_apex_ddpg,
-    "TD3": _import_td3,
-    "PPO": _import_ppo,
-    "ES": _import_es,
+    "APPO": _import_appo,
     "ARS": _import_ars,
+    "BC": _import_bc,
+    "ES": _import_es,
+    "DDPG": _import_ddpg,
+    "DDPPO": _import_ddppo,
     "DQN": _import_dqn,
-    "SimpleQ": _import_simple_q,
-    "APEX": _import_apex,
-    "A3C": _import_a3c,
-    "A2C": _import_a2c,
-    "PG": _import_pg,
+    "DREAMER": _import_dreamer,
     "IMPALA": _import_impala,
-    "QMIX": _import_qmix,
-    "APPO": _import_appo,
-    "DDPPO": _import_ddppo,
-    "MARWIL": _import_marwil,
     "MAML": _import_maml,
+    "MARWIL": _import_marwil,
     "MBMPO": _import_mbmpo,
-    "DREAMER": _import_dreamer,
+    "PG": _import_pg,
+    "PPO": _import_ppo,
+    "QMIX": _import_qmix,
+    "SAC": _import_sac,
+    "SimpleQ": _import_simple_q,
+    "TD3": _import_td3,
 }
 
 
diff --git a/rllib/agents/trainer.py b/rllib/agents/trainer.py
index 40107668cc3a..92e252e52181 100644
--- a/rllib/agents/trainer.py
+++ b/rllib/agents/trainer.py
@@ -1160,6 +1160,13 @@ def __setstate__(self, state: dict):
         if "optimizer" in state:
             self.optimizer.restore(state["optimizer"])
 
+    @staticmethod
+    def with_updates(**overrides) -> Type["Trainer"]:
+        raise NotImplementedError(
+            "`with_updates` may only be called on Trainer sub-classes "
+            "that were generated via the `ray.rllib.agents.trainer_template."
+            "build_trainer()` function!")
+
     def _register_if_needed(self, env_object: Union[str, EnvType]):
         if isinstance(env_object, str):
             return env_object
diff --git a/rllib/agents/trainer_template.py b/rllib/agents/trainer_template.py
index 1461d46b9583..2241621d9a0b 100644
--- a/rllib/agents/trainer_template.py
+++ b/rllib/agents/trainer_template.py
@@ -139,27 +139,30 @@ def _before_evaluate(self):
             if before_evaluate_fn:
                 before_evaluate_fn(self)
 
+        @override(Trainer)
         def __getstate__(self):
             state = Trainer.__getstate__(self)
             state["train_exec_impl"] = (
                 self.train_exec_impl.shared_metrics.get().save())
             return state
 
+        @override(Trainer)
         def __setstate__(self, state):
             Trainer.__setstate__(self, state)
             self.train_exec_impl.shared_metrics.get().restore(
                 state["train_exec_impl"])
 
-    def with_updates(**overrides):
-        """Build a copy of this trainer with the specified overrides.
+        @staticmethod
+        @override(Trainer)
+        def with_updates(**overrides) -> Type[Trainer]:
+            """Build a copy of this trainer with the specified overrides.
 
-        Arguments:
-            overrides (dict): use this to override any of the arguments
-                originally passed to build_trainer() for this policy.
-        """
-        return build_trainer(**dict(original_kwargs, **overrides))
+            Keyword Args:
+                overrides (dict): use this to override any of the arguments
+                    originally passed to build_trainer() for this policy.
+            """
+            return build_trainer(**dict(original_kwargs, **overrides))
 
-    trainer_cls.with_updates = staticmethod(with_updates)
     trainer_cls.__name__ = name
     trainer_cls.__qualname__ = name
     return trainer_cls
diff --git a/rllib/evaluation/sampler.py b/rllib/evaluation/sampler.py
index be957e1fecca..b3755d8e371d 100644
--- a/rllib/evaluation/sampler.py
+++ b/rllib/evaluation/sampler.py
@@ -1093,7 +1093,10 @@ def _process_observations_w_trajectory_view_api(
 
         # Invoke the step callback after the step is logged to the episode
         callbacks.on_episode_step(
-            worker=worker, base_env=base_env, episode=episode)
+            worker=worker,
+            base_env=base_env,
+            episode=episode,
+            env_index=env_id)
 
         # Cut the batch if ...
         # - all-agents-done and not packing multiple episodes into one
diff --git a/rllib/examples/env/halfcheetah.py b/rllib/examples/env/mbmpo_env.py
similarity index 57%
rename from rllib/examples/env/halfcheetah.py
rename to rllib/examples/env/mbmpo_env.py
index 70f946468a29..22315e547036 100644
--- a/rllib/examples/env/halfcheetah.py
+++ b/rllib/examples/env/mbmpo_env.py
@@ -1,21 +1,5 @@
 import numpy as np
-from gym.envs.mujoco import HalfCheetahEnv
-import inspect
-
-
-def get_all_function_arguments(function, locals):
-    kwargs_dict = {}
-    for arg in inspect.getfullargspec(function).kwonlyargs:
-        if arg not in ["args", "kwargs"]:
-            kwargs_dict[arg] = locals[arg]
-    args = [locals[arg] for arg in inspect.getfullargspec(function).args]
-
-    if "args" in locals:
-        args += locals["args"]
-
-    if "kwargs" in locals:
-        kwargs_dict.update(locals["kwargs"])
-    return args, kwargs_dict
+from gym.envs.mujoco import HalfCheetahEnv, HopperEnv
 
 
 class HalfCheetahWrapper(HalfCheetahEnv):
@@ -42,8 +26,28 @@ def reward(self, obs, action, obs_next):
             return np.minimum(np.maximum(-1000.0, reward), 1000.0)
 
 
+class HopperWrapper(HopperEnv):
+    """Hopper Wrapper that wraps Mujoco Hopper-v2 env
+    with an additional defined reward function for model-based RL.
+
+    This is currently used for MBMPO.
+    """
+
+    def __init__(self, *args, **kwargs):
+        HopperEnv.__init__(self, *args, **kwargs)
+
+    def reward(self, obs, action, obs_next):
+        alive_bonus = 1.0
+        assert obs.ndim == 2 and action.ndim == 2
+        assert obs.shape == obs_next.shape and action.shape[0] == obs.shape[0]
+        vel = obs_next[:, 5]
+        ctrl_cost = 1e-3 * np.sum(np.square(action), axis=1)
+        reward = vel + alive_bonus - ctrl_cost
+        return np.minimum(np.maximum(-1000.0, reward), 1000.0)
+
+
 if __name__ == "__main__":
-    env = HalfCheetahWrapper()
+    env = HopperWrapper()
     env.reset()
     for _ in range(1000):
         env.step(env.action_space.sample())
diff --git a/rllib/examples/unity3d_env_local.py b/rllib/examples/unity3d_env_local.py
index 60eace4a8a90..1c4c7c45b410 100644
--- a/rllib/examples/unity3d_env_local.py
+++ b/rllib/examples/unity3d_env_local.py
@@ -117,18 +117,18 @@
         config["exploration_config"] = {
             "type": "Curiosity",
             "eta": 0.1,
-            "lr": tune.grid_search([0.0003, 0.001]),
+            "lr": 0.001,
             # No actual feature net: map directly from observations to feature
             # vector (linearly).
             "feature_net_config": {
-                "fcnet_hiddens": tune.grid_search([[], [256]]),
+                "fcnet_hiddens": [],
                 "fcnet_activation": "relu",
             },
             "sub_exploration": {
                 "type": "StochasticSampling",
             },
-            "forward_net_activation": tune.grid_search(["relu", "swish"]),
-            "inverse_net_activation": tune.grid_search(["relu", "swish"]),
+            "forward_net_activation": "relu",
+            "inverse_net_activation": "relu",
         }
 
     stop = {
diff --git a/rllib/offline/json_reader.py b/rllib/offline/json_reader.py
index 1229bdd07f24..e6315f561720 100644
--- a/rllib/offline/json_reader.py
+++ b/rllib/offline/json_reader.py
@@ -93,7 +93,7 @@ def _postprocess_if_needed(self,
             return SampleBatch.concat_samples(out)
         else:
             # TODO(ekl) this is trickier since the alignments between agent
-            # trajectories in the episode are not available any more.
+            #  trajectories in the episode are not available any more.
             raise NotImplementedError(
                 "Postprocessing of multi-agent data not implemented yet.")
 
diff --git a/rllib/policy/torch_policy_template.py b/rllib/policy/torch_policy_template.py
index 1e1fd480665e..ec91a58d13ae 100644
--- a/rllib/policy/torch_policy_template.py
+++ b/rllib/policy/torch_policy_template.py
@@ -85,7 +85,7 @@ def build_torch_policy(
             values given the policy and training batch. If None,
             will use `TorchPolicy.extra_grad_info()` instead. The stats dict is
             used for logging (e.g. in TensorBoard).
-        extra_action_out_fn (Optional[Callable[[Policy, Dict[str, TensorType,
+        extra_action_out_fn (Optional[Callable[[Policy, Dict[str, TensorType],
             List[TensorType], ModelV2, TorchDistributionWrapper]], Dict[str,
             TensorType]]]): Optional callable that returns a dict of extra
             values to include in experiences. If None, no extra computations
diff --git a/rllib/tuned_examples/marwil/cartpole-bc.yaml b/rllib/tuned_examples/marwil/cartpole-bc.yaml
new file mode 100644
index 000000000000..c0c0af0da741
--- /dev/null
+++ b/rllib/tuned_examples/marwil/cartpole-bc.yaml
@@ -0,0 +1,20 @@
+# To generate training data, first run:
+# $ ./train.py --run=PPO --env=CartPole-v0 \
+#      --stop='{"timesteps_total": 50000}' \
+#      --config='{"output": "/tmp/out", "batch_mode": "complete_episodes"}'
+cartpole-bc:
+    env: CartPole-v0
+    run: BC
+    stop:
+        timesteps_total: 500000
+    config:
+        # Works for both torch and tf.
+        framework: tf
+        # In order to evaluate on an actual environment, use these following
+        # settings:
+        evaluation_num_workers: 1
+        evaluation_interval: 1
+        evaluation_config:
+            input: sampler
+        # The historic (offline) data file from the PPO run (at the top).
+        input: /tmp/out
diff --git a/rllib/tuned_examples/marwil/cartpole-marwil.yaml b/rllib/tuned_examples/marwil/cartpole-marwil.yaml
index 6e9643778a58..06759ef9510b 100644
--- a/rllib/tuned_examples/marwil/cartpole-marwil.yaml
+++ b/rllib/tuned_examples/marwil/cartpole-marwil.yaml
@@ -16,8 +16,6 @@ cartpole-marwil:
         evaluation_interval: 1
         evaluation_config:
             input: sampler
-        # Compare IL (beta=0) vs MARWIL.
-        beta:
-            grid_search: [0, 1]
+        beta: 1.0  # Compare to behavior cloning (beta=0.0).
         # The historic (offline) data file from the PPO run (at the top).
         input: /tmp/out
diff --git a/rllib/tuned_examples/mbmpo/halfcheetah-mbmpo.yaml b/rllib/tuned_examples/mbmpo/halfcheetah-mbmpo.yaml
index 9e69fde03ffb..7980894aff08 100644
--- a/rllib/tuned_examples/mbmpo/halfcheetah-mbmpo.yaml
+++ b/rllib/tuned_examples/mbmpo/halfcheetah-mbmpo.yaml
@@ -1,11 +1,12 @@
-halfcheetah-mb-mpo:
-    env: ray.rllib.examples.env.halfcheetah.HalfCheetahWrapper
+halfcheetah-mbmpo:
+    env: ray.rllib.examples.env.mbmpo_env.HalfCheetahWrapper
     run: MBMPO
     stop:
         training_iteration: 500
     config:
          # Only supported in torch right now
         framework: torch
+        # 200 in paper, 1000 will take forever
         horizon: 200
         num_envs_per_worker: 20
         inner_adaptation_steps: 1
@@ -14,12 +15,13 @@ halfcheetah-mb-mpo:
         lambda: 1.0
         lr: 0.001
         clip_param: 0.5
-        kl_target: 0.01
+        kl_target: 0.003
         kl_coeff: 0.0000000001
         num_workers: 20
         num_gpus: 1
         inner_lr: 0.001
         clip_actions: False
+        num_maml_steps: 15
         model:
             fcnet_hiddens: [32, 32]
             free_log_std: True
diff --git a/rllib/tuned_examples/mbmpo/hopper-mbmpo.yaml b/rllib/tuned_examples/mbmpo/hopper-mbmpo.yaml
new file mode 100644
index 000000000000..28d6a0b54d2e
--- /dev/null
+++ b/rllib/tuned_examples/mbmpo/hopper-mbmpo.yaml
@@ -0,0 +1,27 @@
+hopper-mbmpo:
+    env: ray.rllib.examples.env.mbmpo_env.HopperWrapper
+    run: MBMPO
+    stop:
+        training_iteration: 500
+    config:
+         # Only supported in torch right now
+        framework: torch
+        # 200 in paper, 1000 will take forever
+        horizon: 200
+        num_envs_per_worker: 20
+        inner_adaptation_steps: 1
+        maml_optimizer_steps: 8
+        gamma: 0.99
+        lambda: 1.0
+        lr: 0.001
+        clip_param: 0.5
+        kl_target: 0.003
+        kl_coeff: 0.0000000001
+        num_workers: 20
+        num_gpus: 1
+        inner_lr: 0.001
+        clip_actions: False
+        num_maml_steps: 15
+        model:
+            fcnet_hiddens: [32, 32]
+            free_log_std: True
diff --git a/src/ray/core_worker/task_manager.cc b/src/ray/core_worker/task_manager.cc
index 9ed0ce0e0cd6..8d1fc2eeb435 100644
--- a/src/ray/core_worker/task_manager.cc
+++ b/src/ray/core_worker/task_manager.cc
@@ -301,8 +301,8 @@ bool TaskManager::PendingTaskFailed(const TaskID &task_id, rpc::ErrorType error_
   if (num_retries_left != 0) {
     auto retries_str =
         num_retries_left == -1 ? "infinite" : std::to_string(num_retries_left);
-    RAY_LOG(ERROR) << retries_str << " retries left for task " << spec.TaskId()
-                   << ", attempting to resubmit.";
+    RAY_LOG(INFO) << retries_str << " retries left for task " << spec.TaskId()
+                  << ", attempting to resubmit.";
     retry_task_callback_(spec, /*delay=*/true);
     will_retry = true;
   } else {
@@ -315,8 +315,8 @@ bool TaskManager::PendingTaskFailed(const TaskID &task_id, rpc::ErrorType error_
            (current_time_ms() - last_log_time_ms_) >
                kTaskFailureLoggingFrequencyMillis)) {
         if (num_failure_logs_++ == kTaskFailureThrottlingThreshold) {
-          RAY_LOG(ERROR) << "Too many failure logs, throttling to once every "
-                         << kTaskFailureLoggingFrequencyMillis << " millis.";
+          RAY_LOG(WARNING) << "Too many failure logs, throttling to once every "
+                           << kTaskFailureLoggingFrequencyMillis << " millis.";
         }
         last_log_time_ms_ = current_time_ms();
         if (status != nullptr) {
diff --git a/src/ray/raylet/node_manager.cc b/src/ray/raylet/node_manager.cc
index 7d4fa0961dfd..3e5514bf47a6 100644
--- a/src/ray/raylet/node_manager.cc
+++ b/src/ray/raylet/node_manager.cc
@@ -2242,11 +2242,11 @@ void NodeManager::MarkObjectsAsFailed(
       // If we failed to save the error code, log a warning and push an error message
       // to the driver.
       std::ostringstream stream;
-      stream << "An plasma error (" << status.ToString() << ") occurred while saving"
+      stream << "A plasma error (" << status.ToString() << ") occurred while saving"
              << " error code to object " << object_id << ". Anyone who's getting this"
              << " object may hang forever.";
       std::string error_message = stream.str();
-      RAY_LOG(WARNING) << error_message;
+      RAY_LOG(ERROR) << error_message;
       auto error_data_ptr =
           gcs::CreateErrorTableData("task", error_message, current_time_ms(), job_id);
       RAY_CHECK_OK(gcs_client_->Errors().AsyncReportJobError(error_data_ptr, nullptr));
diff --git a/src/ray/raylet/worker_pool.cc b/src/ray/raylet/worker_pool.cc
index caf4f67e8d9c..6ade2367d735 100644
--- a/src/ray/raylet/worker_pool.cc
+++ b/src/ray/raylet/worker_pool.cc
@@ -133,6 +133,10 @@ WorkerPool::WorkerPool(boost::asio::io_service &io_service, int num_workers,
 void WorkerPool::Start(int num_workers) {
   RAY_CHECK(!RayConfig::instance().enable_multi_tenancy());
   for (auto &entry : states_by_lang_) {
+    if (entry.first == Language::JAVA) {
+      // Disable initial workers for Java.
+      continue;
+    }
     auto &state = entry.second;
     int num_worker_processes = static_cast<int>(
         std::ceil(static_cast<double>(num_workers) / state.num_workers_per_process));
@@ -387,9 +391,15 @@ Process WorkerPool::StartProcess(const std::vector<std::string> &worker_command_
   argv.push_back(NULL);
   Process child(argv.data(), io_service_, ec, /*decouple=*/false, env);
   if (!child.IsValid() || ec) {
-    // The worker failed to start. This is a fatal error.
-    RAY_LOG(FATAL) << "Failed to start worker with return value " << ec << ": "
-                   << ec.message();
+    // errorcode 24: Too many files. This is caused by ulimit.
+    if (ec.value() == 24) {
+      RAY_LOG(FATAL) << "Too many workers, failed to create a file. Try setting "
+                     << "`ulimit -n <num_files>` then restart Ray.";
+    } else {
+      // The worker failed to start. This is a fatal error.
+      RAY_LOG(FATAL) << "Failed to start worker with return value " << ec << ": "
+                     << ec.message();
+    }
   }
   return child;
 }
diff --git a/src/ray/raylet/worker_pool_test.cc b/src/ray/raylet/worker_pool_test.cc
index 56b251e66069..2d1a831d389f 100644
--- a/src/ray/raylet/worker_pool_test.cc
+++ b/src/ray/raylet/worker_pool_test.cc
@@ -273,13 +273,10 @@ TEST_P(WorkerPoolTest, StartupJavaWorkerProcessCount) {
 TEST_P(WorkerPoolTest, InitialWorkerProcessCount) {
   if (!RayConfig::instance().enable_multi_tenancy()) {
     worker_pool_->Start(1);
-    // Here we try to start only 1 worker for each worker language. But since each Java
-    // worker process contains exactly NUM_WORKERS_PER_PROCESS_JAVA (3) workers here,
-    // it's expected to see 3 workers for Java and 1 worker for Python, instead of 1 for
-    // each worker language.
-    ASSERT_NE(worker_pool_->NumWorkersStarting(), 1 * LANGUAGES.size());
-    ASSERT_EQ(worker_pool_->NumWorkersStarting(), 1 + NUM_WORKERS_PER_PROCESS_JAVA);
-    ASSERT_EQ(worker_pool_->NumWorkerProcessesStarting(), LANGUAGES.size());
+    // Here we try to start only 1 worker for each worker language. But since we disabled
+    // initial workers for Java, we expect to see only 1 worker which is a Python worker.
+    ASSERT_EQ(worker_pool_->NumWorkersStarting(), 1);
+    ASSERT_EQ(worker_pool_->NumWorkerProcessesStarting(), 1);
   } else {
     ASSERT_EQ(worker_pool_->NumWorkersStarting(), 0);
     ASSERT_EQ(worker_pool_->NumWorkerProcessesStarting(), 0);
diff --git a/streaming/BUILD.bazel b/streaming/BUILD.bazel
index 83d3c3a5e168..175643069a28 100644
--- a/streaming/BUILD.bazel
+++ b/streaming/BUILD.bazel
@@ -150,6 +150,7 @@ cc_library(
         "@bazel_tools//src/conditions:windows": [
             # TODO(mehrdadn): This is to resolve symbols on Windows for now. Should remove this later. (See d7f8d18.)
             "//:core_worker_lib",
+            "//:exported_streaming_internal",
         ],
         "//conditions:default": [
             "core_worker_lib.so",
diff --git a/streaming/java/pom.xml b/streaming/java/pom.xml
index 003c7670ada6..91c4186816c0 100644
--- a/streaming/java/pom.xml
+++ b/streaming/java/pom.xml
@@ -26,6 +26,12 @@
     <developerConnection>scm:git:ssh://github.com:ray-project/ray.git</developerConnection>
   </scm>
 
+  <developers>
+    <developer>
+      <organizationUrl>https://ray.io</organizationUrl>
+    </developer>
+  </developers>
+
   <distributionManagement>
     <snapshotRepository>
       <id>ossrh</id>
diff --git a/streaming/java/streaming-api/src/main/java/io/ray/streaming/api/context/ClusterStarter.java b/streaming/java/streaming-api/src/main/java/io/ray/streaming/api/context/ClusterStarter.java
index 1c820e6f23c6..e331208247ed 100644
--- a/streaming/java/streaming-api/src/main/java/io/ray/streaming/api/context/ClusterStarter.java
+++ b/streaming/java/streaming-api/src/main/java/io/ray/streaming/api/context/ClusterStarter.java
@@ -68,7 +68,6 @@ static synchronized void startCluster(boolean isCrossLanguage, boolean isLocal)
         String.format("--raylet-socket-name=%s", RAYLET_SOCKET_NAME),
         String.format("--node-manager-port=%s", nodeManagerPort),
         "--load-code-from-local",
-        "--include-java",
         "--java-worker-options=" + workerOptions,
         "--system-config=" + new Gson().toJson(config)
     );
diff --git a/streaming/python/examples/wordcount.py b/streaming/python/examples/wordcount.py
index 66b1a811272d..2f62b19dad54 100644
--- a/streaming/python/examples/wordcount.py
+++ b/streaming/python/examples/wordcount.py
@@ -65,7 +65,7 @@ def splitter(line):
     args = parser.parse_args()
     titles_file = str(args.titles_file)
 
-    ray.init(_load_code_from_local=True, _include_java=True)
+    ray.init(_load_code_from_local=True)
 
     ctx = StreamingContext.Builder() \
         .option(Config.CHANNEL_TYPE, Config.NATIVE_CHANNEL) \
diff --git a/streaming/python/tests/test_failover.py b/streaming/python/tests/test_failover.py
index def93f43edc2..adab217e09b3 100644
--- a/streaming/python/tests/test_failover.py
+++ b/streaming/python/tests/test_failover.py
@@ -8,7 +8,7 @@
 
 def test_word_count():
     try:
-        ray.init(_load_code_from_local=True, _include_java=True)
+        ray.init(_load_code_from_local=True)
         # time.sleep(10) # for gdb to attach
         ctx = StreamingContext.Builder() \
             .option("streaming.context-backend.type", "local_file") \
diff --git a/streaming/python/tests/test_hybrid_stream.py b/streaming/python/tests/test_hybrid_stream.py
index 7d79b9a0ef4d..e257f0d9fd5a 100644
--- a/streaming/python/tests/test_hybrid_stream.py
+++ b/streaming/python/tests/test_hybrid_stream.py
@@ -35,7 +35,6 @@ def test_hybrid_stream():
     assert not ray.is_initialized()
     ray.init(
         _load_code_from_local=True,
-        _include_java=True,
         _java_worker_options=java_worker_options,
         _system_config={"num_workers_per_process_java": 1})
 
diff --git a/streaming/python/tests/test_stream.py b/streaming/python/tests/test_stream.py
index 06dbeba850a5..f99033d19959 100644
--- a/streaming/python/tests/test_stream.py
+++ b/streaming/python/tests/test_stream.py
@@ -3,7 +3,7 @@
 
 
 def test_data_stream():
-    ray.init(_load_code_from_local=True, _include_java=True)
+    ray.init(_load_code_from_local=True)
     ctx = StreamingContext.Builder().build()
     stream = ctx.from_values(1, 2, 3)
     java_stream = stream.as_java_stream()
@@ -17,7 +17,7 @@ def test_data_stream():
 
 
 def test_key_data_stream():
-    ray.init(_load_code_from_local=True, _include_java=True)
+    ray.init(_load_code_from_local=True)
     ctx = StreamingContext.Builder().build()
     key_stream = ctx.from_values(
         "a", "b", "c").map(lambda x: (x, 1)).key_by(lambda x: x[0])
@@ -32,7 +32,7 @@ def test_key_data_stream():
 
 
 def test_stream_config():
-    ray.init(_load_code_from_local=True, _include_java=True)
+    ray.init(_load_code_from_local=True)
     ctx = StreamingContext.Builder().build()
     stream = ctx.from_values(1, 2, 3)
     stream.with_config("k1", "v1")
diff --git a/streaming/python/tests/test_union_stream.py b/streaming/python/tests/test_union_stream.py
index 4f24226c4b9f..0c655b1d03d7 100644
--- a/streaming/python/tests/test_union_stream.py
+++ b/streaming/python/tests/test_union_stream.py
@@ -5,7 +5,7 @@
 
 
 def test_union_stream():
-    ray.init(_load_code_from_local=True, _include_java=True)
+    ray.init(_load_code_from_local=True)
     ctx = StreamingContext.Builder() \
         .option("streaming.metrics.reporters", "") \
         .build()
diff --git a/streaming/python/tests/test_word_count.py b/streaming/python/tests/test_word_count.py
index 07127b96ed10..372ae3e1e44e 100644
--- a/streaming/python/tests/test_word_count.py
+++ b/streaming/python/tests/test_word_count.py
@@ -4,7 +4,7 @@
 
 
 def test_word_count():
-    ray.init(_load_code_from_local=True, _include_java=True)
+    ray.init(_load_code_from_local=True)
     ctx = StreamingContext.Builder() \
         .build()
     ctx.read_text_file(__file__) \
@@ -23,7 +23,7 @@ def test_word_count():
 
 
 def test_simple_word_count():
-    ray.init(_load_code_from_local=True, _include_java=True)
+    ray.init(_load_code_from_local=True)
     ctx = StreamingContext.Builder() \
         .build()
     sink_file = "/tmp/ray_streaming_test_simple_word_count.txt"