diff --git a/README.rst b/README.rst index 1888eb37a68c..b4b48da3bbf3 100644 --- a/README.rst +++ b/README.rst @@ -120,7 +120,7 @@ This example runs a parallel grid search to optimize an example objective functi print("Best config: ", analysis.get_best_config(metric="mean_loss")) # Get a dataframe for analyzing trial results. - df = analysis.dataframe() + df = analysis.results_df If TensorBoard is installed, automatically visualize all trial results: diff --git a/doc/requirements-doc.txt b/doc/requirements-doc.txt index 83e0979e5db3..6317b37e88f2 100644 --- a/doc/requirements-doc.txt +++ b/doc/requirements-doc.txt @@ -24,9 +24,10 @@ sphinx-gallery sphinx-jsonschema sphinx-tabs sphinx-version-warning -sphinx_rtd_theme +# TODO(simon): Use sphinx book theme released version +git+https://github.com/executablebooks/sphinx-book-theme.git@0a87d26e214c419d2e6efcadddab4be8ae7b2c21 tabulate uvicorn werkzeug -tune-sklearn==0.0.5 +git+git://github.com/ray-project/tune-sklearn@master#tune-sklearn scikit-optimize diff --git a/doc/requirements-rtd.txt b/doc/requirements-rtd.txt index 5245aa83a68f..1c6cd5322c80 100644 --- a/doc/requirements-rtd.txt +++ b/doc/requirements-rtd.txt @@ -7,5 +7,4 @@ alabaster>=0.7,<0.8,!=0.7.5 commonmark==0.8.1 recommonmark==0.5.0 sphinx<2 -sphinx-rtd-theme<0.5 readthedocs-sphinx-ext<1.1 diff --git a/doc/source/_static/css/custom.css b/doc/source/_static/css/custom.css index 68935539b552..089d5c7e0448 100644 --- a/doc/source/_static/css/custom.css +++ b/doc/source/_static/css/custom.css @@ -1,32 +1,54 @@ /*Extends the docstring signature box.*/ .rst-content dl:not(.docutils) dt { - display: block; - padding: 10px; - word-wrap: break-word; - padding-right: 100px; + display: block; + padding: 10px; + word-wrap: break-word; + padding-right: 100px; } /*Lists in an admonition note do not have awkward whitespace below.*/ .rst-content .admonition-note .section ul { - margin-bottom: 0px + margin-bottom: 0px; } /*Properties become blue (classmethod, staticmethod, property)*/ .rst-content dl dt em.property { - color: #2980B9; - text-transform: uppercase + color: #2980b9; + text-transform: uppercase; } -.rst-content .section ol p, .rst-content .section ul p { - margin-bottom: 0px; +.rst-content .section ol p, +.rst-content .section ul p { + margin-bottom: 0px; } div.sphx-glr-bigcontainer { - display: inline-block; - width: 100% + display: inline-block; + width: 100%; } - -td.tune-colab, th.tune-colab { +td.tune-colab, +th.tune-colab { border: 1px solid #dddddd; text-align: left; padding: 8px; } + +/* Adjustment to Sphinx Book Theme */ +.table td { + /* Remove row spacing */ + padding: 0; +} + +table { + /* Force full width for all table */ + width: 136% !important; +} + +img.inline-figure { + /* Override the display: block for img */ + display: inherit !important; +} + +#version-warning-banner { + /* Make version warning clickable */ + z-index: 1; +} diff --git a/doc/source/_static/favicon.ico b/doc/source/_static/favicon.ico new file mode 100644 index 000000000000..04e72e7643eb Binary files /dev/null and b/doc/source/_static/favicon.ico differ diff --git a/doc/source/conf.py b/doc/source/conf.py index 8ff9e7f274cc..dca1d11efc13 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -114,6 +114,8 @@ def __getattr__(cls, name): 'versionwarning.extension', ] +versionwarning_admonition_type = "tip" + versionwarning_messages = { "master": ( "This document is for the master branch. " @@ -125,7 +127,7 @@ def __getattr__(cls, name): ), } -versionwarning_body_selector = "div.document" +versionwarning_body_selector = "#main-content" sphinx_gallery_conf = { "examples_dirs": ["../examples", "tune/_tutorials"], # path to example scripts @@ -233,33 +235,38 @@ def __getattr__(cls, name): # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -import sphinx_rtd_theme -html_theme = 'sphinx_rtd_theme' -html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] +html_theme = "sphinx_book_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -#html_theme_options = {} +html_theme_options = { + "repository_url": "https://github.com/ray-project/ray", + "use_repository_button": True, + "use_issues_button": True, + "use_edit_page_button": True, + "path_to_docs": "doc/source", + "home_page_in_toc": True, +} # Add any paths that contain custom themes here, relative to this directory. #html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". -#html_title = None +html_title = f"Ray v{release}" # A shorter title for the navigation bar. Default is the same as html_title. #html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. -#html_logo = None +html_logo = "images/ray_logo.png" # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -#html_favicon = None +html_favicon = "_static/favicon.ico" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, @@ -280,7 +287,7 @@ def __getattr__(cls, name): #html_use_smartypants = True # Custom sidebar templates, maps document names to template names. -html_sidebars = {'**': ['index.html']} +# html_sidebars = {'**': ['index.html']} # Additional templates that should be rendered to pages, maps page names to # template names. diff --git a/doc/source/ray-overview/basics.rst b/doc/source/ray-overview/basics.rst index b61fc2ae87a9..963c2ed2e0a3 100644 --- a/doc/source/ray-overview/basics.rst +++ b/doc/source/ray-overview/basics.rst @@ -1,10 +1,4 @@ -.. raw:: html - - - Fork me on GitHub - - .. image:: https://github.com/ray-project/ray/raw/master/doc/source/images/ray_header_logo.png **Ray provides a simple, universal API for building distributed applications.** @@ -13,7 +7,7 @@ Ray accomplishes this mission by: 1. Providing simple primitives for building and running distributed applications. 2. Enabling end users to parallelize single machine code, with little to zero code changes. -3. Including a large ecosystem of applications, libraries, and tools on top of the core Ray to enable complex applications. +3. Including a large ecosystem of applications, libraries, and tools on top of the core Ray to enable complex applications. **Ray Core** provides the simple primitives for application building. diff --git a/doc/source/rllib-algorithms.rst b/doc/source/rllib-algorithms.rst index 32bc0a7a93cd..476c0f4c947a 100644 --- a/doc/source/rllib-algorithms.rst +++ b/doc/source/rllib-algorithms.rst @@ -13,6 +13,7 @@ Algorithm Frameworks Discrete Actions Continuous Actions Multi- =================== ========== ======================= ================== =========== ============================================================= `A2C, A3C`_ tf + torch **Yes** `+parametric`_ **Yes** **Yes** `+RNN`_, `+LSTM auto-wrapping`_, `+Transformer`_, `+autoreg`_ `ARS`_ tf + torch **Yes** **Yes** No +`BC`_ tf + torch **Yes** `+parametric`_ **Yes** **Yes** `+RNN`_ `ES`_ tf + torch **Yes** **Yes** No `DDPG`_, `TD3`_ tf + torch No **Yes** **Yes** `APEX-DDPG`_ tf + torch No **Yes** **Yes** @@ -22,6 +23,7 @@ Algorithm Frameworks Discrete Actions Continuous Actions Multi- `IMPALA`_ tf + torch **Yes** `+parametric`_ **Yes** **Yes** `+RNN`_, `+LSTM auto-wrapping`_, `+Transformer`_, `+autoreg`_ `MAML`_ tf + torch No **Yes** No `MARWIL`_ tf + torch **Yes** `+parametric`_ **Yes** **Yes** `+RNN`_ +`MBMPO`_ torch No **Yes** No `PG`_ tf + torch **Yes** `+parametric`_ **Yes** **Yes** `+RNN`_, `+LSTM auto-wrapping`_, `+Transformer`_, `+autoreg`_ `PPO`_, `APPO`_ tf + torch **Yes** `+parametric`_ **Yes** **Yes** `+RNN`_, `+LSTM auto-wrapping`_, `+Transformer`_, `+autoreg`_ `SAC`_ tf + torch **Yes** **Yes** **Yes** @@ -442,6 +444,35 @@ Tuned examples: HalfCheetahRandDirecEnv (`Env `__ `[implementation] `__ + +RLlib's MBMPO implementation is a Dyna-styled model-based RL method that learns based on the predictions of an ensemble of transition-dynamics models. Similar to MAML, MBMPO metalearns an optimial policy by treating each dynamics model as a different task. Code here is adapted from https://github.com/jonasrothfuss/model_ensemble_meta_learning. Similar to the original paper, MBMPO is evaluated on MuJoCo, with the horizon set to 200 instead of the default 1000. + +Additional statistics are logged in MBMPO. Each MBMPO iteration corresponds to multiple MAML iterations, and ``MAMLIter$i$_DynaTrajInner_$j$_episode_reward_mean`` measures the agent's returns across the dynamics models at iteration ``i`` of MAML and step ``j`` of inner adaptation. Examples can be seen `here `__. + +Tuned examples: `HalfCheetah `__, `Hopper `__ + +**MuJoCo results @100K steps:** `more details `__ + +============= ============ ==================== +MuJoCo env RLlib MBMPO Clavera et al MBMPO +============= ============ ==================== +HalfCheetah 520 ~550 +Hopper 620 ~650 +============= ============ ==================== + +**MBMPO-specific configs** (see also `common configs `__): + +.. literalinclude:: ../../rllib/agents/mbmpo/mbmpo.py + :language: python + :start-after: __sphinx_doc_begin__ + :end-before: __sphinx_doc_end__ + .. _dreamer: Dreamer @@ -517,10 +548,15 @@ Tuned examples: `Humanoid-v1 `__ `[implementation] `__ MARWIL is a hybrid imitation learning and policy gradient algorithm suitable for training on batched historical data. When the ``beta`` hyperparameter is set to zero, the MARWIL objective reduces to vanilla imitation learning. MARWIL requires the `offline datasets API `__ to be used. +`[paper] `__ +`[implementation] `__ + +MARWIL is a hybrid imitation learning and policy gradient algorithm suitable for training on batched historical data. +When the ``beta`` hyperparameter is set to zero, the MARWIL objective reduces to vanilla imitation learning (see `BC`_). +MARWIL requires the `offline datasets API `__ to be used. Tuned examples: `CartPole-v0 `__ @@ -532,6 +568,29 @@ Tuned examples: `CartPole-v0 `__ +`[implementation] `__ + +Our behavioral cloning implementation is directly derived from our `MARWIL`_ implementation, +with the only difference being the ``beta`` parameter force-set to 0.0. This makes +BC try to match the behavior policy, which generated the offline data, disregarding any resulting rewards. +BC requires the `offline datasets API `__ to be used. + +Tuned examples: `CartPole-v0 `__ + +**BC-specific configs** (see also `common configs `__): + +.. literalinclude:: ../../rllib/agents/marwil/bc.py + :language: python + :start-after: __sphinx_doc_begin__ + :end-before: __sphinx_doc_end__ + + Contextual Bandits (contrib/bandits) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -600,9 +659,11 @@ Tuned examples: `SimpleContextualBandit ` + - |pytorch| :ref:`Model-Based Meta-Policy-Optimization (MBMPO) ` + - |pytorch| |tensorflow| :ref:`Policy Gradients ` - |pytorch| |tensorflow| :ref:`Proximal Policy Optimization (PPO) ` @@ -208,7 +210,9 @@ TensorFlow 2.0 RLlib currently runs in ``tf.compat.v1`` mode. This means eager execution is disabled by default, and RLlib imports TF with ``import tensorflow.compat.v1 as tf; tf.disable_v2_behaviour()``. Eager execution can be enabled manually by calling ``tf.enable_eager_execution()`` or setting the ``"eager": True`` trainer config. .. |tensorflow| image:: tensorflow.png + :class: inline-figure :width: 16 .. |pytorch| image:: pytorch.png + :class: inline-figure :width: 16 diff --git a/doc/source/rllib.rst b/doc/source/rllib.rst index 937d4d6f3ef0..79db9095f6ec 100644 --- a/doc/source/rllib.rst +++ b/doc/source/rllib.rst @@ -117,7 +117,9 @@ RLlib provides ways to customize almost all aspects of training, including the ` To learn more, proceed to the `table of contents `__. .. |tensorflow| image:: tensorflow.png + :class: inline-figure :width: 24 .. |pytorch| image:: pytorch.png + :class: inline-figure :width: 24 diff --git a/doc/source/tune/_tutorials/tune-sklearn.py b/doc/source/tune/_tutorials/tune-sklearn.py index 7edd15938a21..c21f0ff5a527 100644 --- a/doc/source/tune/_tutorials/tune-sklearn.py +++ b/doc/source/tune/_tutorials/tune-sklearn.py @@ -127,7 +127,7 @@ clf, parameter_grid, search_optimization="bayesian", - n_iter=3, + n_trials=3, early_stopping=True, max_iters=10, ) diff --git a/doc/source/tune/api_docs/analysis.rst b/doc/source/tune/api_docs/analysis.rst index 2ec32f686c17..c9468fbab694 100644 --- a/doc/source/tune/api_docs/analysis.rst +++ b/doc/source/tune/api_docs/analysis.rst @@ -18,7 +18,7 @@ Here are some example operations for obtaining a summary of your experiment: .. code-block:: python # Get a dataframe for the last reported results of all of the trials - df = analysis.dataframe() + df = analysis.results_df # Get a dataframe for the max accuracy seen for each trial df = analysis.dataframe(metric="mean_accuracy", mode="max") diff --git a/doc/source/tune/api_docs/sklearn.rst b/doc/source/tune/api_docs/sklearn.rst index 0067a952ce77..02a015727bdd 100644 --- a/doc/source/tune/api_docs/sklearn.rst +++ b/doc/source/tune/api_docs/sklearn.rst @@ -5,10 +5,16 @@ Scikit-Learn API (tune.sklearn) .. _tunegridsearchcv-docs: +TuneGridSearchCV +---------------- + .. autoclass:: ray.tune.sklearn.TuneGridSearchCV :inherited-members: .. _tunesearchcv-docs: +TuneSearchCV +------------ + .. autoclass:: ray.tune.sklearn.TuneSearchCV :inherited-members: diff --git a/doc/source/tune/key-concepts.rst b/doc/source/tune/key-concepts.rst index 11247895bd50..213d680a01c0 100644 --- a/doc/source/tune/key-concepts.rst +++ b/doc/source/tune/key-concepts.rst @@ -219,16 +219,24 @@ Analysis analysis = tune.run(trainable, search_alg=algo, stop={"training_iteration": 20}) - # Get the best hyperparameters - best_hyperparameters = analysis.get_best_config() + best_trial = analysis.best_trial # Get best trial + best_config = analysis.best_config # Get best trial's hyperparameters + best_logdir = analysis.best_logdir # Get best trial's logdir + best_checkpoint = analysis.best_checkpoint # Get best trial's best checkpoint + best_result = analysis.best_result # Get best trial's last results + best_result_df = analysis.best_result_df # Get best result as pandas dataframe This object can also retrieve all training runs as dataframes, allowing you to do ad-hoc data analysis over your results. .. code-block:: python - # Get a dataframe for the max score seen for each trial + # Get a dataframe with the last results for each trial + df_results = analysis.results_df + + # Get a dataframe of results for a specific score or mode df = analysis.dataframe(metric="score", mode="max") + What's Next? ------------- diff --git a/java/pom.xml b/java/pom.xml index 2f9f21c35741..1fb82f909d10 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -25,6 +25,12 @@ scm:git:ssh://github.com:ray-project/ray.git + + + https://ray.io + + + ossrh diff --git a/java/runtime/src/main/java/io/ray/runtime/RayNativeRuntime.java b/java/runtime/src/main/java/io/ray/runtime/RayNativeRuntime.java index 8c5be8f8fa3e..b6059ce16f59 100644 --- a/java/runtime/src/main/java/io/ray/runtime/RayNativeRuntime.java +++ b/java/runtime/src/main/java/io/ray/runtime/RayNativeRuntime.java @@ -69,8 +69,6 @@ public final class RayNativeRuntime extends AbstractRayRuntime { JniUtils.loadLibrary(BinaryFileUtil.CORE_WORKER_JAVA_LIBRARY, true); LOGGER.debug("Native libraries loaded."); - // Reset library path at runtime. - resetLibraryPath(rayConfig); try { FileUtils.forceMkdir(new File(rayConfig.logDir)); } catch (IOException e) { @@ -78,12 +76,6 @@ public final class RayNativeRuntime extends AbstractRayRuntime { } } - private static void resetLibraryPath(RayConfig rayConfig) { - String separator = System.getProperty("path.separator"); - String libraryPath = String.join(separator, rayConfig.libraryPath); - JniUtils.resetLibraryPath(libraryPath); - } - public RayNativeRuntime(RayConfig rayConfig) { super(rayConfig); loadConfigFromGcs(rayConfig); diff --git a/java/runtime/src/main/java/io/ray/runtime/util/JniUtils.java b/java/runtime/src/main/java/io/ray/runtime/util/JniUtils.java index 31f6b66d2b8c..df49c008bc49 100644 --- a/java/runtime/src/main/java/io/ray/runtime/util/JniUtils.java +++ b/java/runtime/src/main/java/io/ray/runtime/util/JniUtils.java @@ -1,11 +1,9 @@ package io.ray.runtime.util; -import com.google.common.base.Strings; import com.google.common.collect.Sets; import com.sun.jna.NativeLibrary; import io.ray.runtime.config.RayConfig; import java.io.File; -import java.lang.reflect.Field; import java.util.Set; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -48,38 +46,8 @@ public static synchronized void loadLibrary(String libraryName, boolean exportSy } System.load(file.getAbsolutePath()); LOGGER.debug("Native library loaded."); - resetLibraryPath(file.getAbsolutePath()); loadedLibs.add(libraryName); } } - /** - * This is a hack to reset library path at runtime. Please don't use it outside of ray - */ - public static synchronized void resetLibraryPath(String libPath) { - if (Strings.isNullOrEmpty(libPath)) { - return; - } - String path = System.getProperty("java.library.path"); - String separator = System.getProperty("path.separator"); - if (Strings.isNullOrEmpty(path)) { - path = ""; - } else { - path += separator; - } - path += String.join(separator, libPath); - - // This is a hack to reset library path at runtime, - // see https://stackoverflow.com/questions/15409223/. - System.setProperty("java.library.path", path); - // Set sys_paths to null so that java.library.path will be re-evaluated next time it is needed. - final Field sysPathsField; - try { - sysPathsField = ClassLoader.class.getDeclaredField("sys_paths"); - sysPathsField.setAccessible(true); - sysPathsField.set(null, null); - } catch (NoSuchFieldException | IllegalAccessException e) { - LOGGER.error("Failed to set library path.", e); - } - } } diff --git a/java/test.sh b/java/test.sh index 70ef6ebbbbfd..36a92f259462 100755 --- a/java/test.sh +++ b/java/test.sh @@ -52,7 +52,7 @@ case "${OSTYPE}" in darwin*) ip=$(ipconfig getifaddr en0);; *) echo "Can't get ip address for ${OSTYPE}"; exit 1;; esac -RAY_BACKEND_LOG_LEVEL=debug ray start --head --redis-port=6379 --redis-password=123456 --include-java --code-search-path="$PWD/bazel-bin/java/all_tests_deploy.jar" +RAY_BACKEND_LOG_LEVEL=debug ray start --head --redis-port=6379 --redis-password=123456 --code-search-path="$PWD/bazel-bin/java/all_tests_deploy.jar" RAY_BACKEND_LOG_LEVEL=debug java -cp bazel-bin/java/all_tests_deploy.jar -Dray.redis.address="$ip:6379"\ -Dray.redis.password='123456' -Dray.job.code-search-path="$PWD/bazel-bin/java/all_tests_deploy.jar" io.ray.test.MultiDriverTest ray stop diff --git a/java/test/src/main/java/io/ray/test/BaseMultiLanguageTest.java b/java/test/src/main/java/io/ray/test/BaseMultiLanguageTest.java index 2aeb909887a1..bfe8daa4d9f4 100644 --- a/java/test/src/main/java/io/ray/test/BaseMultiLanguageTest.java +++ b/java/test/src/main/java/io/ray/test/BaseMultiLanguageTest.java @@ -82,7 +82,6 @@ public void setUp() { String.format("--raylet-socket-name=%s", RAYLET_SOCKET_NAME), String.format("--node-manager-port=%s", nodeManagerPort), "--load-code-from-local", - "--include-java", "--system-config=" + new Gson().toJson(RayConfig.create().rayletConfigParameters), "--code-search-path=" + String.join(":", classpath) ); diff --git a/python/ray/autoscaler/updater.py b/python/ray/autoscaler/updater.py index 8b69714ccbb8..2c905e77ee94 100644 --- a/python/ray/autoscaler/updater.py +++ b/python/ray/autoscaler/updater.py @@ -166,6 +166,8 @@ def sync_file_mounts(self, sync_cmd, step_numbers=(0, 2)): def do_sync(remote_path, local_path, allow_non_existing_paths=False): if allow_non_existing_paths and not os.path.exists(local_path): + cli_logger.print("sync: {} does not exist. Skipping.", + local_path) # Ignore missing source files. In the future we should support # the --delete-missing-args command to delete files that have # been removed @@ -204,7 +206,10 @@ def do_sync(remote_path, local_path, allow_non_existing_paths=False): with cli_logger.group( "Processing worker file mounts", _numbered=("[]", previous_steps + 2, total_steps)): + cli_logger.print("synced files: {}", + str(self.cluster_synced_files)) for path in self.cluster_synced_files: + path = os.path.expanduser(path) do_sync(path, path, allow_non_existing_paths=True) else: cli_logger.print( diff --git a/python/ray/dashboard/dashboard.py b/python/ray/dashboard/dashboard.py index 76a75d053168..ee82a5bad00f 100644 --- a/python/ray/dashboard/dashboard.py +++ b/python/ray/dashboard/dashboard.py @@ -806,7 +806,7 @@ def collect(self): # search through all the sub_directories in log directory analysis = Analysis(str(self._logdir)) - df = analysis.dataframe() + df = analysis.dataframe(metric="episode_reward_mean", mode="max") if len(df) == 0 or "trial_id" not in df.columns: return diff --git a/python/ray/job_config.py b/python/ray/job_config.py index ab2c20d4207e..92474a6935e5 100644 --- a/python/ray/job_config.py +++ b/python/ray/job_config.py @@ -33,6 +33,8 @@ def __init__( self.jvm_options = jvm_options if code_search_path is None: self.code_search_path = [] + else: + self.code_search_path = code_search_path def serialize(self): job_config = ray.gcs_utils.JobConfig() diff --git a/python/ray/node.py b/python/ray/node.py index a65cc1e876a8..248ffadfb47f 100644 --- a/python/ray/node.py +++ b/python/ray/node.py @@ -178,8 +178,6 @@ def __init__(self, else: self._webui_url = ( ray.services.get_webui_url_from_redis(redis_client)) - ray_params.include_java = ( - ray.services.include_java_from_redis(redis_client)) if head or not connect_only: # We need to start a local raylet. @@ -276,13 +274,14 @@ def merge_resources(env_dict, params_dict): key, params_dict[key], env_dict[key])) return num_cpus, num_gpus, memory, object_store_memory, result - env_resources = {} - env_string = os.getenv(ray_constants.RESOURCES_ENVIRONMENT_VARIABLE) - if env_string: - env_resources = json.loads(env_string) - logger.info(f"Autosaler overriding resources: {env_resources}.") - if not self._resource_spec: + env_resources = {} + env_string = os.getenv( + ray_constants.RESOURCES_ENVIRONMENT_VARIABLE) + if env_string: + env_resources = json.loads(env_string) + logger.info( + f"Autosaler overriding resources: {env_resources}.") num_cpus, num_gpus, memory, object_store_memory, resources = \ merge_resources(env_resources, self._ray_params.resources) self._resource_spec = ResourceSpec( @@ -576,7 +575,6 @@ def start_redis(self): redis_max_clients=self._ray_params.redis_max_clients, redirect_worker_output=True, password=self._ray_params.redis_password, - include_java=self._ray_params.include_java, fate_share=self.kernel_fate_share) assert ( ray_constants.PROCESS_TYPE_REDIS_SERVER not in self.all_processes) @@ -649,16 +647,17 @@ def start_dashboard(self, require_dashboard): redis_client = self.create_redis_client() redis_client.hmset("webui", {"url": self._webui_url}) - def start_plasma_store(self): + def start_plasma_store(self, plasma_directory, object_store_memory): """Start the plasma store.""" stdout_file, stderr_file = self.get_log_file_handles( "plasma_store", unique=True) process_info = ray.services.start_plasma_store( self.get_resource_spec(), + plasma_directory, + object_store_memory, self._plasma_store_socket_name, stdout_file=stdout_file, stderr_file=stderr_file, - plasma_directory=self._ray_params.plasma_directory, huge_pages=self._ray_params.huge_pages, keep_idle=bool(self._config.get("plasma_store_as_thread")), fate_share=self.kernel_fate_share) @@ -688,7 +687,11 @@ def start_gcs_server(self): process_info, ] - def start_raylet(self, use_valgrind=False, use_profiler=False): + def start_raylet(self, + plasma_directory, + object_store_memory, + use_valgrind=False, + use_profiler=False): """Start the raylet. Args: @@ -709,21 +712,21 @@ def start_raylet(self, use_valgrind=False, use_profiler=False): self._temp_dir, self._session_dir, self.get_resource_spec(), - self._ray_params.min_worker_port, - self._ray_params.max_worker_port, - self._ray_params.object_manager_port, - self._ray_params.redis_password, - self._ray_params.metrics_agent_port, - self._metrics_export_port, + plasma_directory, + object_store_memory, + min_worker_port=self._ray_params.min_worker_port, + max_worker_port=self._ray_params.max_worker_port, + object_manager_port=self._ray_params.object_manager_port, + redis_password=self._ray_params.redis_password, + metrics_agent_port=self._ray_params.metrics_agent_port, + metrics_export_port=self._metrics_export_port, use_valgrind=use_valgrind, use_profiler=use_profiler, stdout_file=stdout_file, stderr_file=stderr_file, config=self._config, - include_java=self._ray_params.include_java, java_worker_options=self._ray_params.java_worker_options, load_code_from_local=self._ray_params.load_code_from_local, - plasma_directory=self._ray_params.plasma_directory, huge_pages=self._ray_params.huge_pages, fate_share=self.kernel_fate_share, socket_to_use=self.socket, @@ -810,8 +813,17 @@ def start_ray_processes(self): logger.debug(f"Process STDOUT and STDERR is being " f"redirected to {self._logs_dir}.") - self.start_plasma_store() - self.start_raylet() + # Make sure we don't call `determine_plasma_store_config` multiple + # times to avoid printing multiple warnings. + resource_spec = self.get_resource_spec() + plasma_directory, object_store_memory = \ + ray.services.determine_plasma_store_config( + resource_spec.object_store_memory, + plasma_directory=self._ray_params.plasma_directory, + huge_pages=self._ray_params.huge_pages + ) + self.start_plasma_store(plasma_directory, object_store_memory) + self.start_raylet(plasma_directory, object_store_memory) if "RAY_USE_NEW_DASHBOARD" not in os.environ: self.start_reporter() diff --git a/python/ray/parameter.py b/python/ray/parameter.py index 4a4ec4e85d63..811d9539ac35 100644 --- a/python/ray/parameter.py +++ b/python/ray/parameter.py @@ -84,8 +84,6 @@ class RayParams: monitor the log files for all processes on this node and push their contents to Redis. autoscaling_config: path to autoscaling config file. - include_java (bool): If True, the raylet backend can also support - Java worker. java_worker_options (list): The command options for Java worker. load_code_from_local: Whether load code from local file or from GCS. metrics_agent_port(int): The port to bind metrics agent. @@ -138,7 +136,6 @@ def __init__(self, temp_dir=None, include_log_monitor=None, autoscaling_config=None, - include_java=False, java_worker_options=None, load_code_from_local=False, start_initial_python_workers_for_first_job=False, @@ -183,7 +180,6 @@ def __init__(self, self.temp_dir = temp_dir self.include_log_monitor = include_log_monitor self.autoscaling_config = autoscaling_config - self.include_java = include_java self.java_worker_options = java_worker_options self.load_code_from_local = load_code_from_local self.metrics_agent_port = metrics_agent_port diff --git a/python/ray/scripts/scripts.py b/python/ray/scripts/scripts.py index 1d5a0bd053eb..dbd783b6a0bb 100644 --- a/python/ray/scripts/scripts.py +++ b/python/ray/scripts/scripts.py @@ -343,11 +343,6 @@ def dashboard(cluster_config_file, cluster_name, port, remote_port): "--temp-dir", default=None, help="manually specify the root temporary dir of the Ray process") -@click.option( - "--include-java", - is_flag=True, - default=None, - help="Enable Java worker support.") @click.option( "--java-worker-options", required=False, @@ -397,7 +392,7 @@ def start(node_ip_address, redis_address, address, redis_port, port, head, include_webui, webui_host, include_dashboard, dashboard_host, dashboard_port, block, plasma_directory, huge_pages, autoscaling_config, no_redirect_worker_output, no_redirect_output, - plasma_store_socket_name, raylet_socket_name, temp_dir, include_java, + plasma_store_socket_name, raylet_socket_name, temp_dir, java_worker_options, code_search_path, load_code_from_local, system_config, lru_evict, enable_object_reconstruction, metrics_export_port, log_style, log_color, verbose): @@ -505,7 +500,6 @@ def start(node_ip_address, redis_address, address, redis_port, port, plasma_store_socket_name=plasma_store_socket_name, raylet_socket_name=raylet_socket_name, temp_dir=temp_dir, - include_java=include_java, include_dashboard=include_dashboard, dashboard_host=dashboard_host, dashboard_port=dashboard_port, @@ -564,7 +558,6 @@ def start(node_ip_address, redis_address, address, redis_port, port, num_redis_shards=num_redis_shards, redis_max_clients=redis_max_clients, autoscaling_config=autoscaling_config, - include_java=False, ) node = ray.node.Node( @@ -622,7 +615,7 @@ def start(node_ip_address, redis_address, address, redis_port, port, " ray stop".format( redis_address, " --redis-password='" + redis_password + "'" if redis_password else "", - ", redis_password='" + redis_password + "'" + ", _redis_password='" + redis_password + "'" if redis_password else "")) else: # Start Ray on a non-head node. @@ -671,12 +664,6 @@ def start(node_ip_address, redis_address, address, redis_port, port, raise ValueError( "If --head is not passed in, the --include-dashboard" "flag is not relevant.") - if include_java is not None: - cli_logger.abort("`{}` should not be specified without `{}`.", - cf.bold("--include-java"), cf.bold("--head")) - - raise ValueError("--include-java should only be set for the head " - "node.") # Wait for the Redis server to be started. And throw an exception if we # can't connect to it. @@ -1472,7 +1459,7 @@ def memory(address, redis_password): if not address: address = services.find_redis_address_or_die() logger.info(f"Connecting to Ray instance at {address}.") - ray.init(address=address, redis_password=redis_password) + ray.init(address=address, _redis_password=redis_password) print(ray.internal.internal_api.memory_summary()) diff --git a/python/ray/services.py b/python/ray/services.py index b2b1380a83fa..1283925f811a 100644 --- a/python/ray/services.py +++ b/python/ray/services.py @@ -123,18 +123,6 @@ def new_port(): return random.randint(10000, 65535) -def include_java_from_redis(redis_client): - """This is used for query include_java bool from redis. - - Args: - redis_client (StrictRedis): The redis client to GCS. - - Returns: - True if this cluster backend enables Java worker. - """ - return redis_client.get("INCLUDE_JAVA") == b"1" - - def find_redis_address_or_die(): pids = psutil.pids() redis_addresses = set() @@ -683,7 +671,6 @@ def start_redis(node_ip_address, redirect_worker_output=False, password=None, use_credis=None, - include_java=False, fate_share=None): """Start the Redis global state store. @@ -709,8 +696,6 @@ def start_redis(node_ip_address, use_credis: If True, additionally load the chain-replicated libraries into the redis servers. Defaults to None, which means its value is set by the presence of "RAY_USE_NEW_GCS" in os.environ. - include_java (bool): If True, the raylet backend can also support - Java worker. Returns: A tuple of the address for the primary Redis shard, a list of @@ -784,10 +769,6 @@ def start_redis(node_ip_address, primary_redis_client.set("RedirectOutput", 1 if redirect_worker_output else 0) - # put the include_java bool to primary redis-server, so that other nodes - # can access it and know whether or not to enable cross-languages. - primary_redis_client.set("INCLUDE_JAVA", 1 if include_java else 0) - # Init job counter to GCS. primary_redis_client.set("JobCounter", 0) @@ -1256,6 +1237,8 @@ def start_raylet(redis_address, temp_dir, session_dir, resource_spec, + plasma_directory, + object_store_memory, min_worker_port=None, max_worker_port=None, object_manager_port=None, @@ -1267,10 +1250,8 @@ def start_raylet(redis_address, stdout_file=None, stderr_file=None, config=None, - include_java=False, java_worker_options=None, load_code_from_local=False, - plasma_directory=None, huge_pages=False, fate_share=None, socket_to_use=None, @@ -1312,8 +1293,6 @@ def start_raylet(redis_address, no redirection should happen, then this should be None. config (dict|None): Optional Raylet configuration that will override defaults in RayConfig. - include_java (bool): If True, the raylet backend can also support - Java worker. java_worker_options (list): The command options for Java worker. code_search_path (list): Code search path for worker. code_search_path is added to worker command in non-multi-tenancy mode and job_config @@ -1345,6 +1324,26 @@ def start_raylet(redis_address, gcs_ip_address, gcs_port = redis_address.split(":") + has_java_command = False + try: + java_proc = subprocess.run( + ["java", "-version"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + if java_proc.returncode == 0: + has_java_command = True + except OSError: + pass + + ray_java_installed = False + try: + jars_dir = get_ray_jars_dir() + if os.path.exists(jars_dir): + ray_java_installed = True + except Exception: + pass + + include_java = has_java_command and ray_java_installed if include_java is True: java_worker_command = build_java_worker_command( json.loads(java_worker_options) if java_worker_options else [], @@ -1457,8 +1456,6 @@ def start_raylet(redis_address, subprocess.list2cmdline(agent_command))) if config.get("plasma_store_as_thread"): # command related to the plasma store - plasma_directory, object_store_memory = determine_plasma_store_config( - resource_spec.object_store_memory, plasma_directory, huge_pages) command += [ f"--object_store_memory={object_store_memory}", f"--plasma_directory={plasma_directory}", @@ -1653,8 +1650,8 @@ def determine_plasma_store_config(object_store_memory, "than the total available memory.") else: plasma_directory = os.path.abspath(plasma_directory) - logger.warning("WARNING: object_store_memory is not verified when " - "plasma_directory is set.") + logger.info("object_store_memory is not verified when " + "plasma_directory is set.") if not os.path.isdir(plasma_directory): raise ValueError(f"The file {plasma_directory} does not " @@ -1680,10 +1677,11 @@ def determine_plasma_store_config(object_store_memory, def start_plasma_store(resource_spec, + plasma_directory, + object_store_memory, plasma_store_socket_name, stdout_file=None, stderr_file=None, - plasma_directory=None, keep_idle=False, huge_pages=False, fate_share=None, @@ -1712,8 +1710,6 @@ def start_plasma_store(resource_spec, raise ValueError("Cannot use valgrind and profiler at the same time.") assert resource_spec.resolved() - plasma_directory, object_store_memory = determine_plasma_store_config( - resource_spec.object_store_memory, plasma_directory, huge_pages) command = [ PLASMA_STORE_EXECUTABLE, diff --git a/python/ray/tests/test_cross_language.py b/python/ray/tests/test_cross_language.py index 9ba24a980628..3904f63df135 100644 --- a/python/ray/tests/test_cross_language.py +++ b/python/ray/tests/test_cross_language.py @@ -6,7 +6,7 @@ def test_cross_language_raise_kwargs(shutdown_only): - ray.init(_load_code_from_local=True, _include_java=True) + ray.init(_load_code_from_local=True) with pytest.raises(Exception, match="kwargs"): ray.java_function("a", "b").remote(x="arg1") @@ -16,7 +16,7 @@ def test_cross_language_raise_kwargs(shutdown_only): def test_cross_language_raise_exception(shutdown_only): - ray.init(_load_code_from_local=True, _include_java=True) + ray.init(_load_code_from_local=True) class PythonObject(object): pass diff --git a/python/ray/tune/BUILD b/python/ray/tune/BUILD index d24cf0452663..f85583f7434d 100644 --- a/python/ray/tune/BUILD +++ b/python/ray/tune/BUILD @@ -149,7 +149,7 @@ py_test( py_test( name = "test_sample", - size = "medium", + size = "small", srcs = ["tests/test_sample.py"], deps = [":tune_lib"], tags = ["exclusive"], diff --git a/python/ray/tune/analysis/experiment_analysis.py b/python/ray/tune/analysis/experiment_analysis.py index 2da4c33e8883..afa5de622ceb 100644 --- a/python/ray/tune/analysis/experiment_analysis.py +++ b/python/ray/tune/analysis/experiment_analysis.py @@ -1,11 +1,17 @@ import json import logging import os +from typing import Dict + +from ray.tune.checkpoint_manager import Checkpoint +from ray.tune.utils import flatten_dict try: import pandas as pd + from pandas import DataFrame except ImportError: pd = None + DataFrame = None from ray.tune.error import TuneError from ray.tune.result import EXPR_PROGRESS_FILE, EXPR_PARAM_FILE,\ @@ -80,6 +86,9 @@ def dataframe(self, metric=None, mode=None): Returns: pd.DataFrame: Constructed from a result dict of each trial. """ + metric = self._validate_metric(metric) + mode = self._validate_mode(mode) + rows = self._retrieve_rows(metric=metric, mode=mode) all_configs = self.get_all_configs(prefix=True) for path, config in all_configs.items(): @@ -227,6 +236,9 @@ def get_best_checkpoint(self, trial, metric=None, mode=None): mode = self._validate_mode(mode) checkpoint_paths = self.get_trial_checkpoints_paths(trial, metric) + if not checkpoint_paths: + logger.error(f"No checkpoints have been found for trial {trial}.") + return None if mode == "max": return max(checkpoint_paths, key=lambda x: x[1])[0] else: @@ -316,7 +328,150 @@ def __init__(self, os.path.dirname(experiment_checkpoint_path), default_metric, default_mode) - def get_best_trial(self, metric=None, mode=None, scope="all"): + @property + def best_trial(self) -> Trial: + """Get the best trial of the experiment + + The best trial is determined by comparing the last trial results + using the `metric` and `mode` parameters passed to `tune.run()`. + + If you didn't pass these parameters, use + `get_best_trial(metric, mode, scope)` instead. + """ + if not self.default_metric or not self.default_mode: + raise ValueError( + "To fetch the `best_trial`, pass a `metric` and `mode` " + "parameter to `tune.run()`. Alternatively, use the " + "`get_best_trial(metric, mode)` method to set the metric " + "and mode explicitly.") + return self.get_best_trial(self.default_metric, self.default_mode) + + @property + def best_config(self) -> Dict: + """Get the config of the best trial of the experiment + + The best trial is determined by comparing the last trial results + using the `metric` and `mode` parameters passed to `tune.run()`. + + If you didn't pass these parameters, use + `get_best_config(metric, mode, scope)` instead. + """ + if not self.default_metric or not self.default_mode: + raise ValueError( + "To fetch the `best_config`, pass a `metric` and `mode` " + "parameter to `tune.run()`. Alternatively, use the " + "`get_best_config(metric, mode)` method to set the metric " + "and mode explicitly.") + return self.get_best_config(self.default_metric, self.default_mode) + + @property + def best_checkpoint(self) -> Checkpoint: + """Get the checkpoint of the best trial of the experiment + + The best trial is determined by comparing the last trial results + using the `metric` and `mode` parameters passed to `tune.run()`. + + If you didn't pass these parameters, use + `get_best_checkpoint(trial, metric, mode)` instead. + """ + if not self.default_metric or not self.default_mode: + raise ValueError( + "To fetch the `best_checkpoint`, pass a `metric` and `mode` " + "parameter to `tune.run()`. Alternatively, use the " + "`get_best_checkpoint(trial, metric, mode)` method to set the " + "metric and mode explicitly.") + best_trial = self.best_trial + return self.get_best_checkpoint(best_trial, self.default_metric, + self.default_mode) + + @property + def best_logdir(self) -> str: + """Get the logdir of the best trial of the experiment + + The best trial is determined by comparing the last trial results + using the `metric` and `mode` parameters passed to `tune.run()`. + + If you didn't pass these parameters, use + `get_best_logdir(metric, mode)` instead. + """ + if not self.default_metric or not self.default_mode: + raise ValueError( + "To fetch the `best_logdir`, pass a `metric` and `mode` " + "parameter to `tune.run()`. Alternatively, use the " + "`get_best_logdir(metric, mode, scope)` method to set the " + "metric and mode explicitly.") + return self.get_best_logdir(self.default_metric, self.default_mode) + + @property + def best_dataframe(self) -> DataFrame: + """Get the full result dataframe of the best trial of the experiment + + The best trial is determined by comparing the last trial results + using the `metric` and `mode` parameters passed to `tune.run()`. + + If you didn't pass these parameters, use + `get_best_logdir(metric, mode)` and use it to look for the dataframe + in the `self.trial_dataframes` dict. + """ + if not self.default_metric or not self.default_mode: + raise ValueError( + "To fetch the `best_result`, pass a `metric` and `mode` " + "parameter to `tune.run()`.") + best_logdir = self.best_logdir + return self.trial_dataframes[best_logdir] + + @property + def best_result(self) -> Dict: + """Get the last result of the best trial of the experiment + + The best trial is determined by comparing the last trial results + using the `metric` and `mode` parameters passed to `tune.run()`. + + If you didn't pass these parameters, use + `get_best_trial(metric, mode, scope).last_result` instead. + """ + if not self.default_metric or not self.default_mode: + raise ValueError( + "To fetch the `best_result`, pass a `metric` and `mode` " + "parameter to `tune.run()`. Alternatively, use " + "`get_best_trial(metric, mode).last_result` to set " + "the metric and mode explicitly and fetch the last result.") + return self.best_trial.last_result + + @property + def best_result_df(self) -> DataFrame: + """Get the best result of the experiment as a pandas dataframe. + + The best trial is determined by comparing the last trial results + using the `metric` and `mode` parameters passed to `tune.run()`. + + If you didn't pass these parameters, use + `get_best_trial(metric, mode, scope).last_result` instead. + """ + if not pd: + raise ValueError("`best_result_df` requires pandas. Install with " + "`pip install pandas`.") + best_result = flatten_dict(self.best_result, delimiter=".") + return pd.DataFrame.from_records([best_result], index="trial_id") + + @property + def results(self) -> Dict[str, Dict]: + """Get the last result of the all trials of the experiment""" + return {trial.trial_id: trial.last_result for trial in self.trials} + + @property + def results_df(self) -> DataFrame: + if not pd: + raise ValueError("`best_result_df` requires pandas. Install with " + "`pip install pandas`.") + return pd.DataFrame.from_records( + [ + flatten_dict(trial.last_result, delimiter=".") + for trial in self.trials + ], + index="trial_id") + + def get_best_trial(self, metric=None, mode=None, scope="last"): """Retrieve the best trial object. Compares all trials' scores on ``metric``. @@ -380,7 +535,7 @@ def get_best_trial(self, metric=None, mode=None, scope="all"): "parameter?") return best_trial - def get_best_config(self, metric=None, mode=None, scope="all"): + def get_best_config(self, metric=None, mode=None, scope="last"): """Retrieve the best config corresponding to the trial. Compares all trials' scores on `metric`. @@ -407,7 +562,7 @@ def get_best_config(self, metric=None, mode=None, scope="all"): best_trial = self.get_best_trial(metric, mode, scope) return best_trial.config if best_trial else None - def get_best_logdir(self, metric=None, mode=None, scope="all"): + def get_best_logdir(self, metric=None, mode=None, scope="last"): """Retrieve the logdir corresponding to the best trial. Compares all trials' scores on `metric`. diff --git a/python/ray/tune/commands.py b/python/ray/tune/commands.py index 2ab17e609906..7fbbe9776bde 100644 --- a/python/ray/tune/commands.py +++ b/python/ray/tune/commands.py @@ -116,7 +116,8 @@ def list_trials(experiment_path, _check_tabulate() try: - checkpoints_df = Analysis(experiment_path).dataframe() + checkpoints_df = Analysis(experiment_path).dataframe( + metric="episode_reward_mean", mode="max") except TuneError: raise click.ClickException("No trial data found!") diff --git a/python/ray/tune/examples/bayesopt_example.py b/python/ray/tune/examples/bayesopt_example.py index d9f552658af2..1d0e112121f5 100644 --- a/python/ray/tune/examples/bayesopt_example.py +++ b/python/ray/tune/examples/bayesopt_example.py @@ -7,6 +7,7 @@ import ray from ray import tune from ray.tune.schedulers import AsyncHyperBandScheduler +from ray.tune.suggest import ConcurrencyLimiter from ray.tune.suggest.bayesopt import BayesOptSearch @@ -43,18 +44,18 @@ def easy_objective(config): "height": tune.uniform(-100, 100) } } - algo = BayesOptSearch( - metric="mean_loss", - mode="min", - utility_kwargs={ - "kind": "ucb", - "kappa": 2.5, - "xi": 0.0 - }) - scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min") + algo = BayesOptSearch(utility_kwargs={ + "kind": "ucb", + "kappa": 2.5, + "xi": 0.0 + }) + algo = ConcurrencyLimiter(algo, max_concurrent=4) + scheduler = AsyncHyperBandScheduler() tune.run( easy_objective, name="my_exp", + metric="mean_loss", + mode="min", search_alg=algo, scheduler=scheduler, **tune_kwargs) diff --git a/python/ray/tune/examples/dragonfly_example.py b/python/ray/tune/examples/dragonfly_example.py index 53b8c3f84266..38c83083eac0 100644 --- a/python/ray/tune/examples/dragonfly_example.py +++ b/python/ray/tune/examples/dragonfly_example.py @@ -11,6 +11,7 @@ import ray from ray import tune +from ray.tune.suggest import ConcurrencyLimiter from ray.tune.schedulers import AsyncHyperBandScheduler from ray.tune.suggest.dragonfly import DragonflySearch @@ -70,12 +71,14 @@ def objective(config): optimizer="bandit", domain="euclidean", # space=space, # If you want to set the space manually - metric="objective", - mode="max") + ) + df_search = ConcurrencyLimiter(df_search, max_concurrent=4) - scheduler = AsyncHyperBandScheduler(metric="objective", mode="max") + scheduler = AsyncHyperBandScheduler() tune.run( objective, + metric="objective", + mode="max", name="dragonfly_search", search_alg=df_search, scheduler=scheduler, diff --git a/python/ray/tune/examples/hyperband_example.py b/python/ray/tune/examples/hyperband_example.py index 77ec56040a99..c2aff71aec96 100755 --- a/python/ray/tune/examples/hyperband_example.py +++ b/python/ray/tune/examples/hyperband_example.py @@ -3,16 +3,15 @@ import argparse import json import os -import random import numpy as np import ray -from ray.tune import Trainable, run, sample_from +from ray import tune from ray.tune.schedulers import HyperBandScheduler -class MyTrainableClass(Trainable): +class MyTrainableClass(tune.Trainable): """Example agent whose learning curve is a random sigmoid. The dummy hyperparameters "width" and "height" determine the slope and @@ -58,13 +57,14 @@ def load_checkpoint(self, checkpoint_path): mode="max", max_t=200) - run(MyTrainableClass, + tune.run( + MyTrainableClass, name="hyperband_test", num_samples=20, stop={"training_iteration": 1 if args.smoke_test else 99999}, config={ - "width": sample_from(lambda spec: 10 + int(90 * random.random())), - "height": sample_from(lambda spec: int(100 * random.random())) + "width": tune.randint(10, 90), + "height": tune.randint(0, 100) }, scheduler=hyperband, fail_fast=True) diff --git a/python/ray/tune/examples/hyperopt_example.py b/python/ray/tune/examples/hyperopt_example.py index 3385376b6a55..d28f059ecb00 100644 --- a/python/ray/tune/examples/hyperopt_example.py +++ b/python/ray/tune/examples/hyperopt_example.py @@ -6,6 +6,7 @@ import ray from ray import tune +from ray.tune.suggest import ConcurrencyLimiter from ray.tune.schedulers import AsyncHyperBandScheduler from ray.tune.suggest.hyperopt import HyperOptSearch @@ -58,8 +59,14 @@ def easy_objective(config): "activation": tune.choice(["relu", "tanh"]) } } - algo = HyperOptSearch( - metric="mean_loss", mode="min", points_to_evaluate=current_best_params) - scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min") + algo = HyperOptSearch(points_to_evaluate=current_best_params) + algo = ConcurrencyLimiter(algo, max_concurrent=4) + + scheduler = AsyncHyperBandScheduler() tune.run( - easy_objective, search_alg=algo, scheduler=scheduler, **tune_kwargs) + easy_objective, + search_alg=algo, + scheduler=scheduler, + metric="mean_loss", + mode="min", + **tune_kwargs) diff --git a/python/ray/tune/examples/lightgbm_example.py b/python/ray/tune/examples/lightgbm_example.py index a0385372842d..9ca41fa8e7b3 100644 --- a/python/ray/tune/examples/lightgbm_example.py +++ b/python/ray/tune/examples/lightgbm_example.py @@ -44,6 +44,8 @@ def train_breast_cancer(config): from ray.tune.schedulers import ASHAScheduler tune.run( train_breast_cancer, + metric="binary_error", + mode="min", config=config, num_samples=2, - scheduler=ASHAScheduler(metric="binary_error", mode="min")) + scheduler=ASHAScheduler()) diff --git a/python/ray/tune/examples/mlflow_example.py b/python/ray/tune/examples/mlflow_example.py index 368726c0b6ad..875c7837bbbf 100644 --- a/python/ray/tune/examples/mlflow_example.py +++ b/python/ray/tune/examples/mlflow_example.py @@ -9,7 +9,6 @@ import mlflow from mlflow.tracking import MlflowClient import time -import random from ray import tune from ray.tune.logger import MLFLowLogger, DEFAULT_LOGGERS @@ -44,9 +43,8 @@ def easy_objective(config): "logger_config": { "mlflow_experiment_id": experiment_id, }, - "width": tune.sample_from( - lambda spec: 10 + int(90 * random.random())), - "height": tune.sample_from(lambda spec: int(100 * random.random())) + "width": tune.randint(10, 100), + "height": tune.randint(0, 100), }) df = mlflow.search_runs([experiment_id]) diff --git a/python/ray/tune/examples/mnist_pytorch.py b/python/ray/tune/examples/mnist_pytorch.py index 5a2c3677079c..d1e4fdf69fb8 100644 --- a/python/ray/tune/examples/mnist_pytorch.py +++ b/python/ray/tune/examples/mnist_pytorch.py @@ -1,7 +1,6 @@ # Original Code here: # https://github.com/pytorch/examples/blob/master/mnist/main.py import os -import numpy as np import argparse from filelock import FileLock import torch @@ -89,7 +88,7 @@ def get_data_loaders(): def train_mnist(config): - use_cuda = config.get("use_gpu") and torch.cuda.is_available() + use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") train_loader, test_loader = get_data_loaders() model = ConvNet().to(device) @@ -100,6 +99,7 @@ def train_mnist(config): while True: train(model, optimizer, train_loader, device) acc = test(model, test_loader, device) + # Set this to run Tune. tune.report(mean_accuracy=acc) @@ -120,10 +120,14 @@ def train_mnist(config): ray.init(address=args.ray_address) else: ray.init(num_cpus=2 if args.smoke_test else None) - sched = AsyncHyperBandScheduler( - time_attr="training_iteration", metric="mean_accuracy") + + # for early stopping + sched = AsyncHyperBandScheduler() + analysis = tune.run( train_mnist, + metric="mean_accuracy", + mode="max", name="exp", scheduler=sched, stop={ @@ -132,14 +136,12 @@ def train_mnist(config): }, resources_per_trial={ "cpu": 2, - "gpu": int(args.cuda) + "gpu": int(args.cuda) # set this for GPUs }, num_samples=1 if args.smoke_test else 50, config={ - "lr": tune.sample_from(lambda spec: 10**(-10 * np.random.rand())), + "lr": tune.loguniform(1e-4, 1e-2), "momentum": tune.uniform(0.1, 0.9), - "use_gpu": int(args.cuda) }) - print("Best config is:", - analysis.get_best_config(metric="mean_accuracy", mode="max")) + print("Best config is:", analysis.best_config) diff --git a/python/ray/tune/examples/mnist_pytorch_trainable.py b/python/ray/tune/examples/mnist_pytorch_trainable.py index c623111daf83..c31b81968375 100644 --- a/python/ray/tune/examples/mnist_pytorch_trainable.py +++ b/python/ray/tune/examples/mnist_pytorch_trainable.py @@ -65,9 +65,11 @@ def load_checkpoint(self, checkpoint_path): if __name__ == "__main__": args = parser.parse_args() ray.init(address=args.ray_address, num_cpus=6 if args.smoke_test else None) - sched = ASHAScheduler(metric="mean_accuracy") + sched = ASHAScheduler() analysis = tune.run( TrainMNIST, + metric="mean_accuracy", + mode="max", scheduler=sched, stop={ "mean_accuracy": 0.95, diff --git a/python/ray/tune/examples/mxnet_example.py b/python/ray/tune/examples/mxnet_example.py index b128c121d298..dd959e481ff0 100644 --- a/python/ray/tune/examples/mxnet_example.py +++ b/python/ray/tune/examples/mxnet_example.py @@ -66,8 +66,7 @@ def tune_mnist_mxnet(num_samples=10, num_epochs=10): reduction_factor=2) reporter = CLIReporter( - parameter_columns=["layer_1_size", "layer_2_size", "lr", "batch_size"], - metric_columns=["loss", "mean_accuracy", "training_iteration"]) + parameter_columns=["layer_1_size", "layer_2_size", "lr", "batch_size"]) tune.run( partial(train_mnist_mxnet, mnist=mnist_data, num_epochs=num_epochs), diff --git a/python/ray/tune/examples/nevergrad_example.py b/python/ray/tune/examples/nevergrad_example.py index 0dbd01e6a082..7eae59bc12f6 100644 --- a/python/ray/tune/examples/nevergrad_example.py +++ b/python/ray/tune/examples/nevergrad_example.py @@ -6,6 +6,7 @@ import ray from ray import tune +from ray.tune.suggest import ConcurrencyLimiter from ray.tune.schedulers import AsyncHyperBandScheduler from ray.tune.suggest.nevergrad import NevergradSearch @@ -57,13 +58,15 @@ def easy_objective(config): algo = NevergradSearch( optimizer=ng.optimizers.OnePlusOne, # space=space, # If you want to set the space manually - metric="mean_loss", - mode="min") + ) + algo = ConcurrencyLimiter(algo, max_concurrent=4) - scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min") + scheduler = AsyncHyperBandScheduler() tune.run( easy_objective, + metric="mean_loss", + mode="min", name="nevergrad", search_alg=algo, scheduler=scheduler, diff --git a/python/ray/tune/examples/optuna_example.py b/python/ray/tune/examples/optuna_example.py index ded76a425bff..ab7e68d383f0 100644 --- a/python/ray/tune/examples/optuna_example.py +++ b/python/ray/tune/examples/optuna_example.py @@ -6,6 +6,7 @@ import ray from ray import tune +from ray.tune.suggest import ConcurrencyLimiter from ray.tune.schedulers import AsyncHyperBandScheduler from ray.tune.suggest.optuna import OptunaSearch @@ -45,7 +46,13 @@ def easy_objective(config): "activation": tune.choice(["relu", "tanh"]) } } - algo = OptunaSearch(metric="mean_loss", mode="min") - scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min") + algo = OptunaSearch() + algo = ConcurrencyLimiter(algo, max_concurrent=4) + scheduler = AsyncHyperBandScheduler() tune.run( - easy_objective, search_alg=algo, scheduler=scheduler, **tune_kwargs) + easy_objective, + metric="mean_loss", + mode="min", + search_alg=algo, + scheduler=scheduler, + **tune_kwargs) diff --git a/python/ray/tune/examples/pbt_dcgan_mnist/pbt_dcgan_mnist_trainable.py b/python/ray/tune/examples/pbt_dcgan_mnist/pbt_dcgan_mnist_trainable.py index 1d6b3b7e3822..8dea4fbcdeeb 100644 --- a/python/ray/tune/examples/pbt_dcgan_mnist/pbt_dcgan_mnist_trainable.py +++ b/python/ray/tune/examples/pbt_dcgan_mnist/pbt_dcgan_mnist_trainable.py @@ -160,6 +160,6 @@ def _export_model(self, export_formats, export_dir): # demo of the trained Generators if not args.smoke_test: - logdirs = analysis.dataframe()["logdir"].tolist() + logdirs = analysis.results_df["logdir"].tolist() model_paths = [os.path.join(d, "exported_models") for d in logdirs] demo_gan(analysis, model_paths) diff --git a/python/ray/tune/examples/skopt_example.py b/python/ray/tune/examples/skopt_example.py index bc6ca9fbb237..ec0d891a2ee6 100644 --- a/python/ray/tune/examples/skopt_example.py +++ b/python/ray/tune/examples/skopt_example.py @@ -6,6 +6,7 @@ import ray from ray import tune +from ray.tune.suggest import ConcurrencyLimiter from ray.tune.schedulers import AsyncHyperBandScheduler from ray.tune.suggest.skopt import SkOptSearch @@ -59,15 +60,16 @@ def easy_objective(config): algo = SkOptSearch( # parameter_names=space.keys(), # If you want to set the space # parameter_ranges=space.values(), # If you want to set the space - metric="mean_loss", - mode="min", points_to_evaluate=previously_run_params, evaluated_rewards=known_rewards) + algo = ConcurrencyLimiter(algo, max_concurrent=4) - scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min") + scheduler = AsyncHyperBandScheduler() tune.run( easy_objective, + metric="mean_loss", + mode="min", name="skopt_exp_with_warmstart", search_alg=algo, scheduler=scheduler, diff --git a/python/ray/tune/examples/tune_cifar10_gluon.py b/python/ray/tune/examples/tune_cifar10_gluon.py index 49d14574aeaf..ac0fa90d52fd 100644 --- a/python/ray/tune/examples/tune_cifar10_gluon.py +++ b/python/ray/tune/examples/tune_cifar10_gluon.py @@ -154,8 +154,8 @@ def train(epoch): with ag.record(): outputs = [finetune_net(X) for X in data] loss = [L(yhat, y) for yhat, y in zip(outputs, label)] - for l in loss: - l.backward() + for ls in loss: + ls.backward() trainer.step(batch_size) mx.nd.waitall() @@ -170,7 +170,7 @@ def test(): outputs = [finetune_net(X) for X in data] loss = [L(yhat, y) for yhat, y in zip(outputs, label)] - test_loss += sum(l.mean().asscalar() for l in loss) / len(loss) + test_loss += sum(ls.mean().asscalar() for ls in loss) / len(loss) metric.update(label, outputs) _, test_acc = metric.get() @@ -194,11 +194,7 @@ def test(): sched = FIFOScheduler() elif args.scheduler == "asynchyperband": sched = AsyncHyperBandScheduler( - time_attr="training_iteration", - metric="mean_loss", - mode="min", - max_t=400, - grace_period=60) + metric="mean_loss", mode="min", max_t=400, grace_period=60) else: raise NotImplementedError tune.run( diff --git a/python/ray/tune/progress_reporter.py b/python/ray/tune/progress_reporter.py index c1325f1021ec..ca60adf29936 100644 --- a/python/ray/tune/progress_reporter.py +++ b/python/ray/tune/progress_reporter.py @@ -1,10 +1,12 @@ from __future__ import print_function import collections +import numpy as np import time from ray.tune.result import (EPISODE_REWARD_MEAN, MEAN_ACCURACY, MEAN_LOSS, - TRAINING_ITERATION, TIME_TOTAL_S, TIMESTEPS_TOTAL) + TRAINING_ITERATION, TIME_TOTAL_S, TIMESTEPS_TOTAL, + AUTO_RESULT_KEYS) from ray.tune.utils import unflattened_lookup try: @@ -51,6 +53,10 @@ def report(self, trials, done, *sys_info): class TuneReporterBase(ProgressReporter): """Abstract base class for the default Tune reporters. + If metric_columns is not overriden, Tune will attempt to automatically + infer the metrics being outputted, up to 'infer_limit' number of + metrics. + Args: metric_columns (dict[str, str]|list[str]): Names of metrics to include in progress table. If this is a dict, the keys should @@ -80,17 +86,25 @@ class TuneReporterBase(ProgressReporter): TIMESTEPS_TOTAL: "ts", EPISODE_REWARD_MEAN: "reward", }) + VALID_SUMMARY_TYPES = { + int, float, np.float32, np.float64, np.int32, np.int64, + type(None) + } def __init__(self, metric_columns=None, parameter_columns=None, max_progress_rows=20, max_error_rows=20, - max_report_frequency=5): + max_report_frequency=5, + infer_limit=3): + self._metrics_override = metric_columns is not None + self._inferred_metrics = {} self._metric_columns = metric_columns or self.DEFAULT_COLUMNS.copy() self._parameter_columns = parameter_columns or [] self._max_progress_rows = max_progress_rows self._max_error_rows = max_error_rows + self._infer_limit = infer_limit self._max_report_freqency = max_report_frequency self._last_report_time = 0 @@ -110,6 +124,7 @@ def add_metric_column(self, metric, representation=None): representation (str): Representation to use in table. Defaults to `metric`. """ + self._metrics_override = True if metric in self._metric_columns: raise ValueError("Column {} already exists.".format(metric)) @@ -161,6 +176,9 @@ def _progress_str(self, trials, done, *sys_info, fmt="psql", delim="\n"): fmt (str): Table format. See `tablefmt` in tabulate API. delim (str): Delimiter between messages. """ + if not self._metrics_override: + user_metrics = self._infer_user_metrics(trials, self._infer_limit) + self._metric_columns.update(user_metrics) messages = ["== Status ==", memory_debug_str(), *sys_info] if done: max_progress = None @@ -178,6 +196,24 @@ def _progress_str(self, trials, done, *sys_info, fmt="psql", delim="\n"): messages.append(trial_errors_str(trials, fmt=fmt, max_rows=max_error)) return delim.join(messages) + delim + def _infer_user_metrics(self, trials, limit=4): + """Try to infer the metrics to print out.""" + if len(self._inferred_metrics) >= limit: + return self._inferred_metrics + self._inferred_metrics = {} + for t in trials: + if not t.last_result: + continue + for metric, value in t.last_result.items(): + if metric not in self.DEFAULT_COLUMNS: + if metric not in AUTO_RESULT_KEYS: + if type(value) in self.VALID_SUMMARY_TYPES: + self._inferred_metrics[metric] = metric + + if len(self._inferred_metrics) >= limit: + return self._inferred_metrics + return self._inferred_metrics + class JupyterNotebookReporter(TuneReporterBase): """Jupyter notebook-friendly Reporter that can update display in-place. diff --git a/python/ray/tune/result.py b/python/ray/tune/result.py index 70b311bf7f91..8fab799e916b 100644 --- a/python/ray/tune/result.py +++ b/python/ray/tune/result.py @@ -29,6 +29,9 @@ # (Optional) Mean loss for training iteration MEAN_LOSS = "mean_loss" +# (Optional) Mean loss for training iteration +NEG_MEAN_LOSS = "neg_mean_loss" + # (Optional) Mean accuracy for training iteration MEAN_ACCURACY = "mean_accuracy" @@ -61,6 +64,26 @@ DEFAULT_RESULT_KEYS = (TRAINING_ITERATION, TIME_TOTAL_S, TIMESTEPS_TOTAL, MEAN_ACCURACY, MEAN_LOSS) +# Make sure this doesn't regress +AUTO_RESULT_KEYS = ( + TRAINING_ITERATION, + TIME_TOTAL_S, + EPISODES_TOTAL, + TIMESTEPS_TOTAL, + NODE_IP, + HOSTNAME, + PID, + TIME_TOTAL_S, + TIME_THIS_ITER_S, + "timestamp", + "experiment_id", + "date", + "time_since_restore", + "iterations_since_restore", + "timesteps_since_restore", + "config", +) + # __duplicate__ is a magic keyword used internally to # avoid double-logging results when using the Function API. RESULT_DUPLICATE = "__duplicate__" diff --git a/python/ray/tune/schedulers/__init__.py b/python/ray/tune/schedulers/__init__.py index 54b88ca9ecb0..5e51bdab24b3 100644 --- a/python/ray/tune/schedulers/__init__.py +++ b/python/ray/tune/schedulers/__init__.py @@ -10,8 +10,8 @@ def create_scheduler( scheduler, - metric="episode_reward_mean", - mode="max", + metric=None, + mode=None, **kwargs, ): """Instantiate a scheduler based on the given string. diff --git a/python/ray/tune/schedulers/async_hyperband.py b/python/ray/tune/schedulers/async_hyperband.py index 29cf481eb784..02e59453de6d 100644 --- a/python/ray/tune/schedulers/async_hyperband.py +++ b/python/ray/tune/schedulers/async_hyperband.py @@ -38,8 +38,8 @@ class AsyncHyperBandScheduler(FIFOScheduler): def __init__(self, time_attr="training_iteration", reward_attr=None, - metric="episode_reward_mean", - mode="max", + metric=None, + mode=None, max_t=100, grace_period=1, reduction_factor=4, @@ -49,7 +49,8 @@ def __init__(self, assert grace_period > 0, "grace_period must be positive!" assert reduction_factor > 1, "Reduction Factor not valid!" assert brackets > 0, "brackets must be positive!" - assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!" + if mode: + assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!" if reward_attr is not None: mode = "max" @@ -73,13 +74,41 @@ def __init__(self, self._counter = 0 # for self._num_stopped = 0 self._metric = metric - if mode == "max": + self._mode = mode + self._metric_op = None + if self._mode == "max": self._metric_op = 1. - elif mode == "min": + elif self._mode == "min": self._metric_op = -1. self._time_attr = time_attr + def set_search_properties(self, metric, mode): + if self._metric and metric: + return False + if self._mode and mode: + return False + + if metric: + self._metric = metric + if mode: + self._mode = mode + + if self._mode == "max": + self._metric_op = 1. + elif self._mode == "min": + self._metric_op = -1. + + return True + def on_trial_add(self, trial_runner, trial): + if not self._metric or not self._metric_op: + raise ValueError( + "{} has been instantiated without a valid `metric` ({}) or " + "`mode` ({}) parameter. Either pass these parameters when " + "instantiating the scheduler, or pass them as parameters " + "to `tune.run()`".format(self.__class__.__name__, self._metric, + self._mode)) + sizes = np.array([len(b._rungs) for b in self._brackets]) probs = np.e**(sizes - sizes.max()) normalized = probs / probs.sum() @@ -162,6 +191,7 @@ def on_result(self, trial, cur_iter, cur_rew): return action def debug_str(self): + # TODO: fix up the output for this iters = " | ".join([ "Iter {:.3f}: {}".format(milestone, self.cutoff(recorded)) for milestone, recorded in self._rungs diff --git a/python/ray/tune/schedulers/hb_bohb.py b/python/ray/tune/schedulers/hb_bohb.py index 7204e71e361a..c8c061034631 100644 --- a/python/ray/tune/schedulers/hb_bohb.py +++ b/python/ray/tune/schedulers/hb_bohb.py @@ -30,6 +30,13 @@ def on_trial_add(self, trial_runner, trial): to current bracket. Else, create new iteration, create new bracket, add to bracket. """ + if not self._metric or not self._metric_op: + raise ValueError( + "{} has been instantiated without a valid `metric` ({}) or " + "`mode` ({}) parameter. Either pass these parameters when " + "instantiating the scheduler, or pass them as parameters " + "to `tune.run()`".format(self.__class__.__name__, self._metric, + self._mode)) cur_bracket = self._state["bracket"] cur_band = self._hyperbands[self._state["band_idx"]] diff --git a/python/ray/tune/schedulers/hyperband.py b/python/ray/tune/schedulers/hyperband.py index a2fe3ad91ed3..3066cf80b3dc 100644 --- a/python/ray/tune/schedulers/hyperband.py +++ b/python/ray/tune/schedulers/hyperband.py @@ -76,12 +76,13 @@ class HyperBandScheduler(FIFOScheduler): def __init__(self, time_attr="training_iteration", reward_attr=None, - metric="episode_reward_mean", - mode="max", + metric=None, + mode=None, max_t=81, reduction_factor=3): assert max_t > 0, "Max (time_attr) not valid!" - assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!" + if mode: + assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!" if reward_attr is not None: mode = "max" @@ -108,12 +109,33 @@ def __init__(self, self._state = {"bracket": None, "band_idx": 0} self._num_stopped = 0 self._metric = metric - if mode == "max": + self._mode = mode + self._metric_op = None + + if self._mode == "max": self._metric_op = 1. - elif mode == "min": + elif self._mode == "min": self._metric_op = -1. self._time_attr = time_attr + def set_search_properties(self, metric, mode): + if self._metric and metric: + return False + if self._mode and mode: + return False + + if metric: + self._metric = metric + if mode: + self._mode = mode + + if self._mode == "max": + self._metric_op = 1. + elif self._mode == "min": + self._metric_op = -1. + + return True + def on_trial_add(self, trial_runner, trial): """Adds new trial. @@ -121,6 +143,13 @@ def on_trial_add(self, trial_runner, trial): add to current bracket. Else, if current band is not filled, create new bracket, add to current bracket. Else, create new iteration, create new bracket, add to bracket.""" + if not self._metric or not self._metric_op: + raise ValueError( + "{} has been instantiated without a valid `metric` ({}) or " + "`mode` ({}) parameter. Either pass these parameters when " + "instantiating the scheduler, or pass them as parameters " + "to `tune.run()`".format(self.__class__.__name__, self._metric, + self._mode)) cur_bracket = self._state["bracket"] cur_band = self._hyperbands[self._state["band_idx"]] diff --git a/python/ray/tune/schedulers/median_stopping_rule.py b/python/ray/tune/schedulers/median_stopping_rule.py index 2389f166e32e..497c62915ac6 100644 --- a/python/ray/tune/schedulers/median_stopping_rule.py +++ b/python/ray/tune/schedulers/median_stopping_rule.py @@ -40,13 +40,12 @@ class MedianStoppingRule(FIFOScheduler): def __init__(self, time_attr="time_total_s", reward_attr=None, - metric="episode_reward_mean", - mode="max", + metric=None, + mode=None, grace_period=60.0, min_samples_required=3, min_time_slice=0, hard_stop=True): - assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!" if reward_attr is not None: mode = "max" metric = reward_attr @@ -60,15 +59,49 @@ def __init__(self, self._min_samples_required = min_samples_required self._min_time_slice = min_time_slice self._metric = metric - assert mode in {"min", "max"}, "`mode` must be 'min' or 'max'." - self._worst = float("-inf") if mode == "max" else float("inf") - self._compare_op = max if mode == "max" else min + self._worst = None + self._compare_op = None + + self._mode = mode + if mode: + assert mode in ["min", "max"], "`mode` must be 'min' or 'max'." + self._worst = float("-inf") if self._mode == "max" else float( + "inf") + self._compare_op = max if self._mode == "max" else min + self._time_attr = time_attr self._hard_stop = hard_stop self._trial_state = {} self._last_pause = collections.defaultdict(lambda: float("-inf")) self._results = collections.defaultdict(list) + def set_search_properties(self, metric, mode): + if self._metric and metric: + return False + if self._mode and mode: + return False + + if metric: + self._metric = metric + if mode: + self._mode = mode + + self._worst = float("-inf") if self._mode == "max" else float("inf") + self._compare_op = max if self._mode == "max" else min + + return True + + def on_trial_add(self, trial_runner, trial): + if not self._metric or not self._worst or not self._compare_op: + raise ValueError( + "{} has been instantiated without a valid `metric` ({}) or " + "`mode` ({}) parameter. Either pass these parameters when " + "instantiating the scheduler, or pass them as parameters " + "to `tune.run()`".format(self.__class__.__name__, self._metric, + self._mode)) + + super(MedianStoppingRule, self).on_trial_add(trial_runner, trial) + def on_trial_result(self, trial_runner, trial, result): """Callback for early stopping. diff --git a/python/ray/tune/schedulers/pbt.py b/python/ray/tune/schedulers/pbt.py index 70137e8de3f6..6e6396097028 100644 --- a/python/ray/tune/schedulers/pbt.py +++ b/python/ray/tune/schedulers/pbt.py @@ -216,8 +216,8 @@ class PopulationBasedTraining(FIFOScheduler): def __init__(self, time_attr="time_total_s", reward_attr=None, - metric="episode_reward_mean", - mode="max", + metric=None, + mode=None, perturbation_interval=60.0, hyperparam_mutations={}, quantile_fraction=0.25, @@ -253,7 +253,8 @@ def __init__(self, "perturbation_interval must be a positive number greater " "than 0. Current value: '{}'".format(perturbation_interval)) - assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!" + if mode: + assert mode in ["min", "max"], "`mode` must be 'min' or 'max'." if reward_attr is not None: mode = "max" @@ -265,9 +266,11 @@ def __init__(self, FIFOScheduler.__init__(self) self._metric = metric - if mode == "max": + self._mode = mode + self._metric_op = None + if self._mode == "max": self._metric_op = 1. - elif mode == "min": + elif self._mode == "min": self._metric_op = -1. self._time_attr = time_attr self._perturbation_interval = perturbation_interval @@ -285,7 +288,33 @@ def __init__(self, self._num_checkpoints = 0 self._num_perturbations = 0 + def set_search_properties(self, metric, mode): + if self._metric and metric: + return False + if self._mode and mode: + return False + + if metric: + self._metric = metric + if mode: + self._mode = mode + + if self._mode == "max": + self._metric_op = 1. + elif self._mode == "min": + self._metric_op = -1. + + return True + def on_trial_add(self, trial_runner, trial): + if not self._metric or not self._metric_op: + raise ValueError( + "{} has been instantiated without a valid `metric` ({}) or " + "`mode` ({}) parameter. Either pass these parameters when " + "instantiating the scheduler, or pass them as parameters " + "to `tune.run()`".format(self.__class__.__name__, self._metric, + self._mode)) + self._trial_state[trial] = PBTTrialState(trial) for attr in self._hyperparam_mutations.keys(): diff --git a/python/ray/tune/schedulers/trial_scheduler.py b/python/ray/tune/schedulers/trial_scheduler.py index 6fe7284cf655..66ba25904379 100644 --- a/python/ray/tune/schedulers/trial_scheduler.py +++ b/python/ray/tune/schedulers/trial_scheduler.py @@ -8,6 +8,18 @@ class TrialScheduler: PAUSE = "PAUSE" #: Status for pausing trial execution STOP = "STOP" #: Status for stopping trial execution + def set_search_properties(self, metric, mode): + """Pass search properties to scheduler. + + This method acts as an alternative to instantiating schedulers + that react to metrics with their own `metric` and `mode` parameters. + + Args: + metric (str): Metric to optimize + mode (str): One of ["min", "max"]. Direction to optimize. + """ + return True + def on_trial_add(self, trial_runner, trial): """Called when a new trial is added to the trial runner.""" diff --git a/python/ray/tune/suggest/__init__.py b/python/ray/tune/suggest/__init__.py index a9b5582a9088..f3f332f1ff4e 100644 --- a/python/ray/tune/suggest/__init__.py +++ b/python/ray/tune/suggest/__init__.py @@ -8,8 +8,8 @@ def create_searcher( search_alg, - metric="episode_reward_mean", - mode="max", + metric=None, + mode=None, **kwargs, ): """Instantiate a search algorithm based on the given string. diff --git a/python/ray/tune/suggest/ax.py b/python/ray/tune/suggest/ax.py index 9574f80ce398..28b52a9c6816 100644 --- a/python/ray/tune/suggest/ax.py +++ b/python/ray/tune/suggest/ax.py @@ -104,15 +104,16 @@ def easy_objective(config): def __init__(self, space=None, - metric="episode_reward_mean", - mode="max", + metric=None, + mode=None, parameter_constraints=None, outcome_constraints=None, ax_client=None, use_early_stopped_trials=None, max_concurrent=None): assert ax is not None, "Ax must be installed!" - assert mode in ["min", "max"], "`mode` must be one of ['min', 'max']" + if mode: + assert mode in ["min", "max"], "`mode` must be 'min' or 'max'." super(AxSearch, self).__init__( metric=metric, diff --git a/python/ray/tune/suggest/bayesopt.py b/python/ray/tune/suggest/bayesopt.py index 340f200a5ecb..d5c7684c1ab4 100644 --- a/python/ray/tune/suggest/bayesopt.py +++ b/python/ray/tune/suggest/bayesopt.py @@ -101,8 +101,8 @@ class BayesOptSearch(Searcher): def __init__(self, space=None, - metric="episode_reward_mean", - mode="max", + metric=None, + mode=None, utility_kwargs=None, random_state=42, random_search_steps=10, @@ -144,7 +144,8 @@ def __init__(self, assert byo is not None, ( "BayesOpt must be installed!. You can install BayesOpt with" " the command: `pip install bayesian-optimization`.") - assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!" + if mode: + assert mode in ["min", "max"], "`mode` must be 'min' or 'max'." self.max_concurrent = max_concurrent self._config_counter = defaultdict(int) self._patience = patience @@ -284,8 +285,10 @@ def register_analysis(self, analysis): analysis (ExperimentAnalysis): Optionally, the previous analysis to integrate. """ - for (_, report), params in zip(analysis.dataframe().iterrows(), - analysis.get_all_configs().values()): + for (_, report), params in zip( + analysis.dataframe(metric=self._metric, + mode=self._mode).iterrows(), + analysis.get_all_configs().values()): # We add the obtained results to the # gaussian process optimizer self._register_result(params, report) diff --git a/python/ray/tune/suggest/bohb.py b/python/ray/tune/suggest/bohb.py index b545656106e7..318e582e0717 100644 --- a/python/ray/tune/suggest/bohb.py +++ b/python/ray/tune/suggest/bohb.py @@ -95,11 +95,12 @@ def __init__(self, space=None, bohb_config=None, max_concurrent=10, - metric="neg_mean_loss", - mode="max"): + metric=None, + mode=None): from hpbandster.optimizers.config_generators.bohb import BOHB assert BOHB is not None, "HpBandSter must be installed!" - assert mode in ["min", "max"], "`mode` must be in [min, max]!" + if mode: + assert mode in ["min", "max"], "`mode` must be 'min' or 'max'." self._max_concurrent = max_concurrent self.trial_to_params = {} self.running = set() diff --git a/python/ray/tune/suggest/dragonfly.py b/python/ray/tune/suggest/dragonfly.py index 051301b62135..b2da186b04b5 100644 --- a/python/ray/tune/suggest/dragonfly.py +++ b/python/ray/tune/suggest/dragonfly.py @@ -130,15 +130,16 @@ def __init__(self, optimizer=None, domain=None, space=None, - metric="episode_reward_mean", - mode="max", + metric=None, + mode=None, points_to_evaluate=None, evaluated_rewards=None, **kwargs): assert dragonfly is not None, """dragonfly must be installed! You can install Dragonfly with the command: `pip install dragonfly-opt`.""" - assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!" + if mode: + assert mode in ["min", "max"], "`mode` must be 'min' or 'max'." super(DragonflySearch, self).__init__( metric=metric, mode=mode, **kwargs) diff --git a/python/ray/tune/suggest/hyperopt.py b/python/ray/tune/suggest/hyperopt.py index b05cc3cc2314..b097cc29f275 100644 --- a/python/ray/tune/suggest/hyperopt.py +++ b/python/ray/tune/suggest/hyperopt.py @@ -118,8 +118,8 @@ class HyperOptSearch(Searcher): def __init__( self, space=None, - metric="episode_reward_mean", - mode="max", + metric=None, + mode=None, points_to_evaluate=None, n_initial_points=20, random_state_seed=None, @@ -129,6 +129,8 @@ def __init__( ): assert hpo is not None, ( "HyperOpt must be installed! Run `pip install hyperopt`.") + if mode: + assert mode in ["min", "max"], "`mode` must be 'min' or 'max'." from hyperopt.fmin import generate_trials_to_calculate super(HyperOptSearch, self).__init__( metric=metric, diff --git a/python/ray/tune/suggest/nevergrad.py b/python/ray/tune/suggest/nevergrad.py index e46935907387..bee20c814564 100644 --- a/python/ray/tune/suggest/nevergrad.py +++ b/python/ray/tune/suggest/nevergrad.py @@ -87,12 +87,13 @@ class NevergradSearch(Searcher): def __init__(self, optimizer=None, space=None, - metric="episode_reward_mean", - mode="max", + metric=None, + mode=None, max_concurrent=None, **kwargs): assert ng is not None, "Nevergrad must be installed!" - assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!" + if mode: + assert mode in ["min", "max"], "`mode` must be 'min' or 'max'." super(NevergradSearch, self).__init__( metric=metric, mode=mode, max_concurrent=max_concurrent, **kwargs) diff --git a/python/ray/tune/suggest/optuna.py b/python/ray/tune/suggest/optuna.py index 792df0fc3b53..ae3f1aadbe48 100644 --- a/python/ray/tune/suggest/optuna.py +++ b/python/ray/tune/suggest/optuna.py @@ -100,11 +100,7 @@ class OptunaSearch(Searcher): """ - def __init__(self, - space=None, - metric="episode_reward_mean", - mode="max", - sampler=None): + def __init__(self, space=None, metric=None, mode=None, sampler=None): assert ot is not None, ( "Optuna must be installed! Run `pip install optuna`.") super(OptunaSearch, self).__init__( diff --git a/python/ray/tune/suggest/repeater.py b/python/ray/tune/suggest/repeater.py index 05c5a2b397dd..647b6cdaf17f 100644 --- a/python/ray/tune/suggest/repeater.py +++ b/python/ray/tune/suggest/repeater.py @@ -167,3 +167,6 @@ def get_state(self): def set_state(self, state): self.__dict__.update(state) + + def set_search_properties(self, metric, mode, config): + return self.searcher.set_search_properties(metric, mode, config) diff --git a/python/ray/tune/suggest/skopt.py b/python/ray/tune/suggest/skopt.py index ff26ed24f598..67dec2bde9e7 100644 --- a/python/ray/tune/suggest/skopt.py +++ b/python/ray/tune/suggest/skopt.py @@ -127,8 +127,8 @@ class SkOptSearch(Searcher): def __init__(self, optimizer=None, space=None, - metric="episode_reward_mean", - mode="max", + metric=None, + mode=None, points_to_evaluate=None, evaluated_rewards=None, max_concurrent=None, @@ -137,7 +137,8 @@ def __init__(self, You can install Skopt with the command: `pip install scikit-optimize`.""" - assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!" + if mode: + assert mode in ["min", "max"], "`mode` must be 'min' or 'max'." self.max_concurrent = max_concurrent super(SkOptSearch, self).__init__( metric=metric, diff --git a/python/ray/tune/suggest/suggestion.py b/python/ray/tune/suggest/suggestion.py index 633fe33718e1..2a9793ceed28 100644 --- a/python/ray/tune/suggest/suggestion.py +++ b/python/ray/tune/suggest/suggestion.py @@ -56,8 +56,8 @@ def on_trial_complete(self, trial_id, result, **kwargs): CKPT_FILE_TMPL = "searcher-state-{}.pkl" def __init__(self, - metric="episode_reward_mean", - mode="max", + metric=None, + mode=None, max_concurrent=None, use_early_stopped_trials=None): if use_early_stopped_trials is False: @@ -70,6 +70,13 @@ def __init__(self, "search algorithm. Use tune.suggest.ConcurrencyLimiter() " "instead. This will raise an error in future versions of Ray.") + self._metric = metric + self._mode = mode + + if not mode or not metric: + # Early return to avoid assertions + return + assert isinstance( metric, type(mode)), "metric and mode must be of the same type" if isinstance(mode, str): @@ -83,9 +90,6 @@ def __init__(self, else: raise ValueError("Mode most either be a list or string") - self._metric = metric - self._mode = mode - def set_search_properties(self, metric, mode, config): """Pass search properties to searcher. @@ -362,3 +366,6 @@ def on_pause(self, trial_id): def on_unpause(self, trial_id): self.searcher.on_unpause(trial_id) + + def set_search_properties(self, metric, mode, config): + return self.searcher.set_search_properties(metric, mode, config) diff --git a/python/ray/tune/suggest/zoopt.py b/python/ray/tune/suggest/zoopt.py index 950a8a68745e..8f3b2453181d 100644 --- a/python/ray/tune/suggest/zoopt.py +++ b/python/ray/tune/suggest/zoopt.py @@ -109,12 +109,13 @@ def __init__(self, algo="asracos", budget=None, dim_dict=None, - metric="episode_reward_mean", - mode="min", + metric=None, + mode=None, **kwargs): assert zoopt is not None, "Zoopt not found - please install zoopt." assert budget is not None, "`budget` should not be None!" - assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!" + if mode: + assert mode in ["min", "max"], "`mode` must be 'min' or 'max'." _algo = algo.lower() assert _algo in ["asracos", "sracos" ], "`algo` must be in ['asracos', 'sracos'] currently" diff --git a/python/ray/tune/tests/example.py b/python/ray/tune/tests/example.py index 69d1f854b577..383dd5ecb72a 100644 --- a/python/ray/tune/tests/example.py +++ b/python/ray/tune/tests/example.py @@ -39,5 +39,5 @@ def training_function(config): metric="mean_loss", mode="min")) # Get a dataframe for analyzing trial results. -df = analysis.dataframe() +df = analysis.results_df # __quick_start_end__ diff --git a/python/ray/tune/tests/test_api.py b/python/ray/tune/tests/test_api.py index fa0213dd8571..3dc3d9fb25bd 100644 --- a/python/ray/tune/tests/test_api.py +++ b/python/ray/tune/tests/test_api.py @@ -520,7 +520,8 @@ def train(config, reporter): analysis = tune.run(train, num_samples=10, stop=stopper) self.assertTrue( all(t.status == Trial.TERMINATED for t in analysis.trials)) - self.assertTrue(len(analysis.dataframe()) <= top) + self.assertTrue( + len(analysis.dataframe(metric="test", mode="max")) <= top) patience = 5 stopper = EarlyStopping("test", top=top, mode="min", patience=patience) @@ -528,14 +529,16 @@ def train(config, reporter): analysis = tune.run(train, num_samples=20, stop=stopper) self.assertTrue( all(t.status == Trial.TERMINATED for t in analysis.trials)) - self.assertTrue(len(analysis.dataframe()) <= patience) + self.assertTrue( + len(analysis.dataframe(metric="test", mode="max")) <= patience) stopper = EarlyStopping("test", top=top, mode="min") analysis = tune.run(train, num_samples=10, stop=stopper) self.assertTrue( all(t.status == Trial.TERMINATED for t in analysis.trials)) - self.assertTrue(len(analysis.dataframe()) <= top) + self.assertTrue( + len(analysis.dataframe(metric="test", mode="max")) <= top) def testBadStoppingFunction(self): def train(config, reporter): diff --git a/python/ray/tune/tests/test_experiment_analysis.py b/python/ray/tune/tests/test_experiment_analysis.py index 5195c7825264..bac891cc965c 100644 --- a/python/ray/tune/tests/test_experiment_analysis.py +++ b/python/ray/tune/tests/test_experiment_analysis.py @@ -7,7 +7,7 @@ from numpy import nan import ray -from ray.tune import run, sample_from +from ray import tune from ray.tune.examples.async_hyperband_example import MyTrainableClass @@ -26,7 +26,7 @@ def tearDown(self): ray.shutdown() def run_test_exp(self): - self.ea = run( + self.ea = tune.run( MyTrainableClass, name=self.test_name, local_dir=self.test_dir, @@ -34,13 +34,14 @@ def run_test_exp(self): checkpoint_freq=1, num_samples=self.num_samples, config={ - "width": sample_from( + "width": tune.sample_from( lambda spec: 10 + int(90 * random.random())), - "height": sample_from(lambda spec: int(100 * random.random())), + "height": tune.sample_from( + lambda spec: int(100 * random.random())), }) def nan_test_exp(self): - nan_ea = run( + nan_ea = tune.run( lambda x: nan, name="testing_nan", local_dir=self.test_dir, @@ -48,14 +49,15 @@ def nan_test_exp(self): checkpoint_freq=1, num_samples=self.num_samples, config={ - "width": sample_from( + "width": tune.sample_from( lambda spec: 10 + int(90 * random.random())), - "height": sample_from(lambda spec: int(100 * random.random())), + "height": tune.sample_from( + lambda spec: int(100 * random.random())), }) return nan_ea def testDataframe(self): - df = self.ea.dataframe() + df = self.ea.dataframe(self.metric, mode="max") self.assertTrue(isinstance(df, pd.DataFrame)) self.assertEquals(df.shape[0], self.num_samples) @@ -143,21 +145,50 @@ def testAllDataframes(self): self.assertEqual(df.training_iteration.max(), 1) def testIgnoreOtherExperiment(self): - analysis = run( + analysis = tune.run( MyTrainableClass, name="test_example", local_dir=self.test_dir, stop={"training_iteration": 1}, num_samples=1, config={ - "width": sample_from( + "width": tune.sample_from( lambda spec: 10 + int(90 * random.random())), - "height": sample_from(lambda spec: int(100 * random.random())), + "height": tune.sample_from( + lambda spec: int(100 * random.random())), }) - df = analysis.dataframe() + df = analysis.dataframe(self.metric, mode="max") self.assertEquals(df.shape[0], 1) +class ExperimentAnalysisPropertySuite(unittest.TestCase): + def testBestProperties(self): + def train(config): + for i in range(10): + with tune.checkpoint_dir(i): + pass + tune.report(res=config["base"] + i) + + ea = tune.run( + train, + config={"base": tune.grid_search([100, 200, 300])}, + metric="res", + mode="max") + + trials = ea.trials + + self.assertEquals(ea.best_trial, trials[2]) + self.assertEquals(ea.best_config, trials[2].config) + self.assertEquals(ea.best_logdir, trials[2].logdir) + self.assertEquals(ea.best_checkpoint, trials[2].checkpoint.value) + self.assertTrue( + all(ea.best_dataframe["trial_id"] == trials[2].trial_id)) + self.assertEquals(ea.results_df.loc[trials[2].trial_id, "res"], 309) + self.assertEquals(ea.best_result["res"], 309) + self.assertEquals(ea.best_result_df.loc[trials[2].trial_id, "res"], + 309) + + if __name__ == "__main__": import pytest import sys diff --git a/python/ray/tune/tests/test_experiment_analysis_mem.py b/python/ray/tune/tests/test_experiment_analysis_mem.py index 4e299a758855..4ef9a51f8fd3 100644 --- a/python/ray/tune/tests/test_experiment_analysis_mem.py +++ b/python/ray/tune/tests/test_experiment_analysis_mem.py @@ -83,10 +83,10 @@ def testCompareTrials(self): num_samples=1, config={"id": grid_search(list(range(5)))}) - max_all = ea.get_best_trial("score", - "max").metric_analysis["score"]["max"] - min_all = ea.get_best_trial("score", - "min").metric_analysis["score"]["min"] + max_all = ea.get_best_trial("score", "max", + "all").metric_analysis["score"]["max"] + min_all = ea.get_best_trial("score", "min", + "all").metric_analysis["score"]["min"] max_last = ea.get_best_trial("score", "max", "last").metric_analysis["score"]["last"] max_avg = ea.get_best_trial("score", "max", @@ -149,7 +149,7 @@ def tearDown(self): def testDataframe(self): analysis = Analysis(self.test_dir) - df = analysis.dataframe() + df = analysis.dataframe(self.metric, mode="max") self.assertTrue(isinstance(df, pd.DataFrame)) self.assertEqual(df.shape[0], self.num_samples * 2) diff --git a/python/ray/tune/tests/test_progress_reporter.py b/python/ray/tune/tests/test_progress_reporter.py index 144f59ede1aa..7b5db73656fc 100644 --- a/python/ray/tune/tests/test_progress_reporter.py +++ b/python/ray/tune/tests/test_progress_reporter.py @@ -3,9 +3,10 @@ import os import unittest from unittest.mock import MagicMock, Mock - +from ray import tune from ray.test_utils import run_string_as_driver from ray.tune.trial import Trial +from ray.tune.result import AUTO_RESULT_KEYS from ray.tune.progress_reporter import (CLIReporter, _fair_filter_trials, trial_progress_str) @@ -233,6 +234,43 @@ def testAddMetricColumn(self): reporter.add_metric_column("foo", "bar") self.assertIn("foo", reporter._metric_columns) + def testInfer(self): + reporter = CLIReporter() + test_result = dict(foo_result=1, baz_result=4123, bar_result="testme") + + def test(config): + for i in range(3): + tune.report(**test_result) + + analysis = tune.run(test, num_samples=3) + all_trials = analysis.trials + inferred_results = reporter._infer_user_metrics(all_trials) + for metric in inferred_results: + self.assertNotIn(metric, AUTO_RESULT_KEYS) + self.assertTrue(metric in test_result) + + class TestReporter(CLIReporter): + _output = [] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._max_report_freqency = 0 + + def report(self, *args, **kwargs): + progress_str = self._progress_str(*args, **kwargs) + self._output.append(progress_str) + + reporter = TestReporter() + analysis = tune.run(test, num_samples=3, progress_reporter=reporter) + found = {k: False for k in test_result} + for output in reporter._output: + for key in test_result: + if key in output: + found[key] = True + assert found["foo_result"] + assert found["baz_result"] + assert not found["bar_result"] + def testProgressStr(self): trials = [] for i in range(5): @@ -285,7 +323,6 @@ def testProgressStr(self): }, {"a": "A"}, fmt="psql", max_rows=3) - print(prog3) assert prog3 == EXPECTED_RESULT_3 def testEndToEndReporting(self): diff --git a/python/ray/tune/tests/test_trial_scheduler.py b/python/ray/tune/tests/test_trial_scheduler.py index 320e76af39d6..507ae81f0aee 100644 --- a/python/ray/tune/tests/test_trial_scheduler.py +++ b/python/ray/tune/tests/test_trial_scheduler.py @@ -60,7 +60,11 @@ def basicSetup(self, rule): return t1, t2 def testMedianStoppingConstantPerf(self): - rule = MedianStoppingRule(grace_period=0, min_samples_required=1) + rule = MedianStoppingRule( + metric="episode_reward_mean", + mode="max", + grace_period=0, + min_samples_required=1) t1, t2 = self.basicSetup(rule) runner = mock_trial_runner() rule.on_trial_complete(runner, t1, result(10, 1000)) @@ -75,7 +79,11 @@ def testMedianStoppingConstantPerf(self): TrialScheduler.STOP) def testMedianStoppingOnCompleteOnly(self): - rule = MedianStoppingRule(grace_period=0, min_samples_required=1) + rule = MedianStoppingRule( + metric="episode_reward_mean", + mode="max", + grace_period=0, + min_samples_required=1) t1, t2 = self.basicSetup(rule) runner = mock_trial_runner() self.assertEqual( @@ -87,7 +95,11 @@ def testMedianStoppingOnCompleteOnly(self): TrialScheduler.STOP) def testMedianStoppingGracePeriod(self): - rule = MedianStoppingRule(grace_period=2.5, min_samples_required=1) + rule = MedianStoppingRule( + metric="episode_reward_mean", + mode="max", + grace_period=2.5, + min_samples_required=1) t1, t2 = self.basicSetup(rule) runner = mock_trial_runner() rule.on_trial_complete(runner, t1, result(10, 1000)) @@ -104,7 +116,11 @@ def testMedianStoppingGracePeriod(self): TrialScheduler.STOP) def testMedianStoppingMinSamples(self): - rule = MedianStoppingRule(grace_period=0, min_samples_required=2) + rule = MedianStoppingRule( + metric="episode_reward_mean", + mode="max", + grace_period=0, + min_samples_required=2) t1, t2 = self.basicSetup(rule) runner = mock_trial_runner() rule.on_trial_complete(runner, t1, result(10, 1000)) @@ -120,7 +136,11 @@ def testMedianStoppingMinSamples(self): TrialScheduler.STOP) def testMedianStoppingUsesMedian(self): - rule = MedianStoppingRule(grace_period=0, min_samples_required=1) + rule = MedianStoppingRule( + metric="episode_reward_mean", + mode="max", + grace_period=0, + min_samples_required=1) t1, t2 = self.basicSetup(rule) runner = mock_trial_runner() rule.on_trial_complete(runner, t1, result(10, 1000)) @@ -135,7 +155,11 @@ def testMedianStoppingUsesMedian(self): def testMedianStoppingSoftStop(self): rule = MedianStoppingRule( - grace_period=0, min_samples_required=1, hard_stop=False) + metric="episode_reward_mean", + mode="max", + grace_period=0, + min_samples_required=1, + hard_stop=False) t1, t2 = self.basicSetup(rule) runner = mock_trial_runner() rule.on_trial_complete(runner, t1, result(10, 1000)) @@ -265,7 +289,8 @@ def schedulerSetup(self, num_trials, max_t=81): (15, 9) -> (5, 27) -> (2, 45); (34, 3) -> (12, 9) -> (4, 27) -> (2, 42); (81, 1) -> (27, 3) -> (9, 9) -> (3, 27) -> (1, 41);""" - sched = HyperBandScheduler(max_t=max_t) + sched = HyperBandScheduler( + metric="episode_reward_mean", mode="max", max_t=max_t) for i in range(num_trials): t = Trial("__fake") sched.on_trial_add(None, t) @@ -321,7 +346,7 @@ def advancedSetup(self): return sched def testConfigSameEta(self): - sched = HyperBandScheduler() + sched = HyperBandScheduler(metric="episode_reward_mean", mode="max") i = 0 while not sched._cur_band_filled(): t = Trial("__fake") @@ -335,7 +360,10 @@ def testConfigSameEta(self): reduction_factor = 10 sched = HyperBandScheduler( - max_t=1000, reduction_factor=reduction_factor) + metric="episode_reward_mean", + mode="max", + max_t=1000, + reduction_factor=reduction_factor) i = 0 while not sched._cur_band_filled(): t = Trial("__fake") @@ -348,7 +376,8 @@ def testConfigSameEta(self): self.assertEqual(sched._hyperbands[0][-1]._r, 1) def testConfigSameEtaSmall(self): - sched = HyperBandScheduler(max_t=1) + sched = HyperBandScheduler( + metric="episode_reward_mean", mode="max", max_t=1) i = 0 while len(sched._hyperbands) < 2: t = Trial("__fake") @@ -627,7 +656,11 @@ def tearDown(self): _register_all() # re-register the evicted objects def testLargestBracketFirst(self): - sched = HyperBandForBOHB(max_t=3, reduction_factor=3) + sched = HyperBandForBOHB( + metric="episode_reward_mean", + mode="max", + max_t=3, + reduction_factor=3) runner = _MockTrialRunner(sched) for i in range(3): t = Trial("__fake") @@ -642,7 +675,11 @@ def testCheckTrialInfoUpdate(self): def result(score, ts): return {"episode_reward_mean": score, TRAINING_ITERATION: ts} - sched = HyperBandForBOHB(max_t=3, reduction_factor=3) + sched = HyperBandForBOHB( + metric="episode_reward_mean", + mode="max", + max_t=3, + reduction_factor=3) runner = _MockTrialRunner(sched) runner._search_alg = MagicMock() runner._search_alg.searcher = MagicMock() @@ -668,7 +705,11 @@ def testCheckTrialInfoUpdateMin(self): def result(score, ts): return {"episode_reward_mean": score, TRAINING_ITERATION: ts} - sched = HyperBandForBOHB(max_t=3, reduction_factor=3, mode="min") + sched = HyperBandForBOHB( + metric="episode_reward_mean", + mode="min", + max_t=3, + reduction_factor=3) runner = _MockTrialRunner(sched) runner._search_alg = MagicMock() runner._search_alg.searcher = MagicMock() @@ -693,7 +734,11 @@ def testPauseResumeChooseTrial(self): def result(score, ts): return {"episode_reward_mean": score, TRAINING_ITERATION: ts} - sched = HyperBandForBOHB(max_t=10, reduction_factor=3, mode="min") + sched = HyperBandForBOHB( + metric="episode_reward_mean", + mode="min", + max_t=10, + reduction_factor=3) runner = _MockTrialRunner(sched) runner._search_alg = MagicMock() runner._search_alg.searcher = MagicMock() @@ -761,6 +806,8 @@ def basicSetup(self, } pbt = PopulationBasedTraining( time_attr="training_iteration", + metric="episode_reward_mean", + mode="max", perturbation_interval=perturbation_interval, resample_probability=resample_prob, quantile_fraction=0.25, @@ -1675,6 +1722,7 @@ def basicSetup(self, } pbt = PopulationBasedTraining( metric="mean_accuracy", + mode="max", time_attr="training_iteration", perturbation_interval=perturbation_interval, resample_probability=resample_prob, @@ -1791,7 +1839,8 @@ def nanSetup(self, scheduler): return t1, t2 def testAsyncHBOnComplete(self): - scheduler = AsyncHyperBandScheduler(max_t=10, brackets=1) + scheduler = AsyncHyperBandScheduler( + metric="episode_reward_mean", mode="max", max_t=10, brackets=1) t1, t2 = self.basicSetup(scheduler) t3 = Trial("PPO") scheduler.on_trial_add(None, t3) @@ -1802,7 +1851,11 @@ def testAsyncHBOnComplete(self): def testAsyncHBGracePeriod(self): scheduler = AsyncHyperBandScheduler( - grace_period=2.5, reduction_factor=3, brackets=1) + metric="episode_reward_mean", + mode="max", + grace_period=2.5, + reduction_factor=3, + brackets=1) t1, t2 = self.basicSetup(scheduler) scheduler.on_trial_complete(None, t1, result(10, 1000)) scheduler.on_trial_complete(None, t2, result(10, 1000)) @@ -1819,7 +1872,8 @@ def testAsyncHBGracePeriod(self): TrialScheduler.STOP) def testAsyncHBAllCompletes(self): - scheduler = AsyncHyperBandScheduler(max_t=10, brackets=10) + scheduler = AsyncHyperBandScheduler( + metric="episode_reward_mean", mode="max", max_t=10, brackets=10) trials = [Trial("PPO") for i in range(10)] for t in trials: scheduler.on_trial_add(None, t) @@ -1831,7 +1885,12 @@ def testAsyncHBAllCompletes(self): def testAsyncHBUsesPercentile(self): scheduler = AsyncHyperBandScheduler( - grace_period=1, max_t=10, reduction_factor=2, brackets=1) + metric="episode_reward_mean", + mode="max", + grace_period=1, + max_t=10, + reduction_factor=2, + brackets=1) t1, t2 = self.basicSetup(scheduler) scheduler.on_trial_complete(None, t1, result(10, 1000)) scheduler.on_trial_complete(None, t2, result(10, 1000)) @@ -1846,7 +1905,12 @@ def testAsyncHBUsesPercentile(self): def testAsyncHBNanPercentile(self): scheduler = AsyncHyperBandScheduler( - grace_period=1, max_t=10, reduction_factor=2, brackets=1) + metric="episode_reward_mean", + mode="max", + grace_period=1, + max_t=10, + reduction_factor=2, + brackets=1) t1, t2 = self.nanSetup(scheduler) scheduler.on_trial_complete(None, t1, result(10, 450)) scheduler.on_trial_complete(None, t2, result(10, np.nan)) diff --git a/python/ray/tune/tests/test_trial_scheduler_pbt.py b/python/ray/tune/tests/test_trial_scheduler_pbt.py index 740616e8ce4d..5af7cb46724a 100644 --- a/python/ray/tune/tests/test_trial_scheduler_pbt.py +++ b/python/ray/tune/tests/test_trial_scheduler_pbt.py @@ -82,15 +82,24 @@ def synchSetup(self, synch, param=[10, 20, 30]): def testAsynchFail(self): analysis = self.synchSetup(False) - self.assertTrue(any(analysis.dataframe()["mean_accuracy"] != 33)) + self.assertTrue( + any( + analysis.dataframe(metric="mean_accuracy", mode="max") + ["mean_accuracy"] != 33)) def testSynchPass(self): analysis = self.synchSetup(True) - self.assertTrue(all(analysis.dataframe()["mean_accuracy"] == 33)) + self.assertTrue( + all( + analysis.dataframe(metric="mean_accuracy", mode="max")[ + "mean_accuracy"] == 33)) def testSynchPassLast(self): analysis = self.synchSetup(True, param=[30, 20, 10]) - self.assertTrue(all(analysis.dataframe()["mean_accuracy"] == 33)) + self.assertTrue( + all( + analysis.dataframe(metric="mean_accuracy", mode="max")[ + "mean_accuracy"] == 33)) class PopulationBasedTrainingConfigTest(unittest.TestCase): diff --git a/python/ray/tune/tests/tutorial.py b/python/ray/tune/tests/tutorial.py index f0e5fa5af1d2..2aa4422798c6 100644 --- a/python/ray/tune/tests/tutorial.py +++ b/python/ray/tune/tests/tutorial.py @@ -166,7 +166,7 @@ def train_mnist(config): # __run_analysis_begin__ import os -df = analysis.dataframe() +df = analysis.results_df logdir = analysis.get_best_logdir("mean_accuracy", mode="max") state_dict = torch.load(os.path.join(logdir, "model.pth")) diff --git a/python/ray/tune/tune.py b/python/ray/tune/tune.py index f331bebec2b3..075ba5c6910b 100644 --- a/python/ray/tune/tune.py +++ b/python/ray/tune/tune.py @@ -68,6 +68,8 @@ def _report_progress(runner, reporter, done=False): def run( run_or_experiment, name=None, + metric=None, + mode=None, stop=None, time_budget_s=None, config=None, @@ -147,6 +149,12 @@ def run( will need to first register the function: ``tune.register_trainable("lambda_id", lambda x: ...)``. You can then use ``tune.run("lambda_id")``. + metric (str): Metric to optimize. This metric should be reported + with `tune.report()`. If set, will be passed to the search + algorithm and scheduler. + mode (str): Must be one of [min, max]. Determines whether objective is + minimizing or maximizing the metric attribute. If set, will be + passed to the search algorithm and scheduler. name (str): Name of experiment. stop (dict | callable | :class:`Stopper`): Stopping criteria. If dict, the keys may be any field in the return result of 'train()', @@ -276,6 +284,11 @@ def run( "sync_config=SyncConfig(...)`. See `ray.tune.SyncConfig` for " "more details.") + if mode and mode not in ["min", "max"]: + raise ValueError( + "The `mode` parameter passed to `tune.run()` has to be one of " + "['min', 'max']") + config = config or {} sync_config = sync_config or SyncConfig() set_sync_periods(sync_config) @@ -329,8 +342,7 @@ def run( if not search_alg: search_alg = BasicVariantGenerator() - # TODO (krfricke): Introduce metric/mode as top level API - if config and not search_alg.set_search_properties(None, None, config): + if config and not search_alg.set_search_properties(metric, mode, config): if has_unresolved_values(config): raise ValueError( "You passed a `config` parameter to `tune.run()` with " @@ -339,9 +351,17 @@ def run( "does not contain any more parameter definitions - include " "them in the search algorithm's search space if necessary.") + scheduler = scheduler or FIFOScheduler() + if not scheduler.set_search_properties(metric, mode): + raise ValueError( + "You passed a `metric` or `mode` argument to `tune.run()`, but " + "the scheduler you are using was already instantiated with their " + "own `metric` and `mode` parameters. Either remove the arguments " + "from your scheduler or from your call to `tune.run()`") + runner = TrialRunner( search_alg=search_alg, - scheduler=scheduler or FIFOScheduler(), + scheduler=scheduler, local_checkpoint_dir=experiments[0].checkpoint_dir, remote_checkpoint_dir=experiments[0].remote_checkpoint_dir, sync_to_cloud=sync_config.sync_to_cloud, @@ -413,8 +433,8 @@ def run( return ExperimentAnalysis( runner.checkpoint_file, trials=trials, - default_metric=None, - default_mode=None) + default_metric=metric, + default_mode=mode) def run_experiments(experiments, diff --git a/python/ray/worker.py b/python/ray/worker.py index 851f4933e655..536f9d7a13ad 100644 --- a/python/ray/worker.py +++ b/python/ray/worker.py @@ -491,7 +491,6 @@ def init( _driver_object_store_memory=None, _memory=None, _redis_password=ray_constants.REDIS_DEFAULT_PASSWORD, - _include_java=False, _java_worker_options=None, _code_search_path=None, _temp_dir=None, @@ -580,8 +579,6 @@ def init( _memory: Amount of reservable memory resource to create. _redis_password (str): Prevents external clients without the password from connecting to Redis if provided. - _include_java: Boolean flag indicating whether or not to enable java - workers. _temp_dir (str): If provided, specifies the root temporary directory for the Ray process. Defaults to an OS-specific conventional location, e.g., "/tmp/ray". @@ -673,7 +670,6 @@ def init( redis_password=_redis_password, plasma_directory=None, huge_pages=None, - include_java=_include_java, include_dashboard=include_dashboard, dashboard_host=dashboard_host, dashboard_port=dashboard_port, diff --git a/python/requirements_tune.txt b/python/requirements_tune.txt index d06b3e2a6534..880bc8d2ba89 100644 --- a/python/requirements_tune.txt +++ b/python/requirements_tune.txt @@ -26,7 +26,7 @@ timm torch>=1.5.0 torchvision>=0.6.0 transformers -tune-sklearn==0.0.5 +git+git://github.com/ray-project/tune-sklearn@master#tune-sklearn wandb xgboost zoopt>=0.4.0 diff --git a/rllib/BUILD b/rllib/BUILD index 7087beb20d81..21be09591995 100644 --- a/rllib/BUILD +++ b/rllib/BUILD @@ -468,6 +468,16 @@ py_test( srcs = ["agents/marwil/tests/test_marwil.py"] ) +# BCTrainer (sub-type of MARWIL) +py_test( + name = "test_bc", + tags = ["agents_dir"], + size = "medium", + # Include the json data file. + data = ["tests/data/cartpole/large.json"], + srcs = ["agents/marwil/tests/test_bc.py"] +) + # MAMLTrainer py_test( name = "test_maml", diff --git a/rllib/agents/dqn/dqn.py b/rllib/agents/dqn/dqn.py index d07e7e9a77a3..6a4ba288b506 100644 --- a/rllib/agents/dqn/dqn.py +++ b/rllib/agents/dqn/dqn.py @@ -1,16 +1,20 @@ import logging +from typing import Type -from ray.rllib.agents.trainer import with_common_config -from ray.rllib.agents.trainer_template import build_trainer from ray.rllib.agents.dqn.dqn_tf_policy import DQNTFPolicy from ray.rllib.agents.dqn.simple_q_tf_policy import SimpleQTFPolicy -from ray.rllib.policy.policy import LEARNER_STATS_KEY +from ray.rllib.agents.trainer import with_common_config +from ray.rllib.agents.trainer_template import build_trainer +from ray.rllib.evaluation.worker_set import WorkerSet +from ray.rllib.execution.concurrency_ops import Concurrently +from ray.rllib.execution.metric_ops import StandardMetricsReporting from ray.rllib.execution.replay_buffer import LocalReplayBuffer +from ray.rllib.execution.replay_ops import Replay, StoreToReplayBuffer from ray.rllib.execution.rollout_ops import ParallelRollouts -from ray.rllib.execution.concurrency_ops import Concurrently -from ray.rllib.execution.replay_ops import StoreToReplayBuffer, Replay from ray.rllib.execution.train_ops import TrainOneStep, UpdateTargetNetwork -from ray.rllib.execution.metric_ops import StandardMetricsReporting +from ray.rllib.policy.policy import LEARNER_STATS_KEY, Policy +from ray.rllib.utils.typing import TrainerConfigDict +from ray.util.iter import LocalIterator logger = logging.getLogger(__name__) @@ -122,7 +126,7 @@ # yapf: enable -def validate_config(config): +def validate_config(config: TrainerConfigDict) -> None: """Checks and updates the config based on settings. Rewrites rollout_fragment_length to take into account n_step truncation. @@ -152,7 +156,8 @@ def validate_config(config): "replay_sequence_length > 1.") -def execution_plan(workers, config): +def execution_plan(workers: WorkerSet, + config: TrainerConfigDict) -> LocalIterator[dict]: if config.get("prioritized_replay"): prio_args = { "prioritized_replay_alpha": config["prioritized_replay_alpha"], @@ -217,7 +222,7 @@ def update_prio(item): return StandardMetricsReporting(train_op, workers, config) -def calculate_rr_weights(config): +def calculate_rr_weights(config: TrainerConfigDict): if not config["training_intensity"]: return [1, 1] # e.g., 32 / 4 -> native ratio of 8.0 @@ -229,7 +234,7 @@ def calculate_rr_weights(config): return weights -def get_policy_class(config): +def get_policy_class(config: TrainerConfigDict) -> Type[Policy]: if config["framework"] == "torch": from ray.rllib.agents.dqn.dqn_torch_policy import DQNTorchPolicy return DQNTorchPolicy @@ -237,7 +242,7 @@ def get_policy_class(config): return DQNTFPolicy -def get_simple_policy_class(config): +def get_simple_policy_class(config: TrainerConfigDict) -> Type[Policy]: if config["framework"] == "torch": from ray.rllib.agents.dqn.simple_q_torch_policy import \ SimpleQTorchPolicy diff --git a/rllib/agents/dqn/dqn_tf_policy.py b/rllib/agents/dqn/dqn_tf_policy.py index ddce5b332353..177129f209fc 100644 --- a/rllib/agents/dqn/dqn_tf_policy.py +++ b/rllib/agents/dqn/dqn_tf_policy.py @@ -1,22 +1,26 @@ -from gym.spaces import Discrete -import numpy as np +from typing import Dict +import gym +import numpy as np import ray from ray.rllib.agents.dqn.distributional_q_tf_model import \ DistributionalQTFModel from ray.rllib.agents.dqn.simple_q_tf_policy import TargetNetworkMixin from ray.rllib.models import ModelCatalog +from ray.rllib.models.modelv2 import ModelV2 from ray.rllib.models.tf.tf_action_dist import Categorical +from ray.rllib.policy.policy import Policy from ray.rllib.policy.sample_batch import SampleBatch from ray.rllib.policy.tf_policy import LearningRateSchedule from ray.rllib.policy.tf_policy_template import build_tf_policy from ray.rllib.utils.error import UnsupportedSpaceException from ray.rllib.utils.exploration import ParameterNoise -from ray.rllib.utils.numpy import convert_to_numpy from ray.rllib.utils.framework import try_import_tf -from ray.rllib.utils.tf_ops import huber_loss, reduce_mean_ignore_inf, \ - minimize_and_clip -from ray.rllib.utils.tf_ops import make_tf_callable +from ray.rllib.utils.numpy import convert_to_numpy +from ray.rllib.utils.tf_ops import (huber_loss, make_tf_callable, + minimize_and_clip, reduce_mean_ignore_inf) +from ray.rllib.utils.typing import (ModelGradients, TensorType, + TrainerConfigDict) tf1, tf, tfv = try_import_tf() @@ -126,9 +130,11 @@ def compute_td_error(obs_t, act_t, rew_t, obs_tp1, done_mask, self.compute_td_error = compute_td_error -def build_q_model(policy, obs_space, action_space, config): +def build_q_model(policy: Policy, obs_space: gym.Space, + action_space: gym.Space, + config: TrainerConfigDict) -> ModelV2: - if not isinstance(action_space, Discrete): + if not isinstance(action_space, gym.spaces.Discrete): raise UnsupportedSpaceException( "Action space {} is not supported for DQN.".format(action_space)) @@ -184,9 +190,9 @@ def build_q_model(policy, obs_space, action_space, config): return policy.q_model -def get_distribution_inputs_and_class(policy, - model, - obs_batch, +def get_distribution_inputs_and_class(policy: Policy, + model: ModelV2, + obs_batch: TensorType, *, explore=True, **kwargs): @@ -198,7 +204,8 @@ def get_distribution_inputs_and_class(policy, return policy.q_values, Categorical, [] # state-out -def build_q_losses(policy, model, _, train_batch): +def build_q_losses(policy: Policy, model, _, + train_batch: SampleBatch) -> TensorType: config = policy.config # q network evaluation q_t, q_logits_t, q_dist_t = compute_q_values( @@ -253,7 +260,8 @@ def build_q_losses(policy, model, _, train_batch): return policy.q_loss.loss -def adam_optimizer(policy, config): +def adam_optimizer(policy: Policy, config: TrainerConfigDict + ) -> "tf.keras.optimizers.Optimizer": if policy.config["framework"] in ["tf2", "tfe"]: return tf.keras.optimizers.Adam( learning_rate=policy.cur_lr, epsilon=config["adam_epsilon"]) @@ -262,7 +270,8 @@ def adam_optimizer(policy, config): learning_rate=policy.cur_lr, epsilon=config["adam_epsilon"]) -def clip_gradients(policy, optimizer, loss): +def clip_gradients(policy: Policy, optimizer: "tf.keras.optimizers.Optimizer", + loss: TensorType) -> ModelGradients: if policy.config["grad_clip"] is not None: grads_and_vars = minimize_and_clip( optimizer, @@ -276,25 +285,28 @@ def clip_gradients(policy, optimizer, loss): return grads_and_vars -def build_q_stats(policy, batch): +def build_q_stats(policy: Policy, batch) -> Dict[str, TensorType]: return dict({ "cur_lr": tf.cast(policy.cur_lr, tf.float64), }, **policy.q_loss.stats) -def setup_early_mixins(policy, obs_space, action_space, config): +def setup_early_mixins(policy: Policy, obs_space, action_space, + config: TrainerConfigDict) -> None: LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"]) -def setup_mid_mixins(policy, obs_space, action_space, config): +def setup_mid_mixins(policy: Policy, obs_space, action_space, config) -> None: ComputeTDErrorMixin.__init__(policy) -def setup_late_mixins(policy, obs_space, action_space, config): +def setup_late_mixins(policy: Policy, obs_space: gym.Space, + action_space: gym.Space, + config: TrainerConfigDict) -> None: TargetNetworkMixin.__init__(policy, obs_space, action_space, config) -def compute_q_values(policy, model, obs, explore): +def compute_q_values(policy: Policy, model: ModelV2, obs: TensorType, explore): config = policy.config model_out, state = model({ @@ -361,7 +373,10 @@ def _adjust_nstep(n_step, gamma, obs, actions, rewards, new_obs, dones): rewards[i] += gamma**j * rewards[i + j] -def postprocess_nstep_and_prio(policy, batch, other_agent=None, episode=None): +def postprocess_nstep_and_prio(policy: Policy, + batch: SampleBatch, + other_agent=None, + episode=None) -> SampleBatch: # N-step Q adjustments. if policy.config["n_step"] > 1: _adjust_nstep(policy.config["n_step"], policy.config["gamma"], diff --git a/rllib/agents/dqn/dqn_torch_policy.py b/rllib/agents/dqn/dqn_torch_policy.py index cb6cf77ad409..e400f6b243c2 100644 --- a/rllib/agents/dqn/dqn_torch_policy.py +++ b/rllib/agents/dqn/dqn_torch_policy.py @@ -1,21 +1,27 @@ -from gym.spaces import Discrete +from typing import Dict, List, Tuple +import gym import ray -from ray.rllib.agents.dqn.dqn_tf_policy import postprocess_nstep_and_prio, \ - PRIO_WEIGHTS, Q_SCOPE, Q_TARGET_SCOPE from ray.rllib.agents.a3c.a3c_torch_policy import apply_grad_clipping +from ray.rllib.agents.dqn.dqn_tf_policy import ( + PRIO_WEIGHTS, Q_SCOPE, Q_TARGET_SCOPE, postprocess_nstep_and_prio) from ray.rllib.agents.dqn.dqn_torch_model import DQNTorchModel from ray.rllib.agents.dqn.simple_q_torch_policy import TargetNetworkMixin -from ray.rllib.policy.sample_batch import SampleBatch from ray.rllib.models.catalog import ModelCatalog -from ray.rllib.models.torch.torch_action_dist import TorchCategorical +from ray.rllib.models.modelv2 import ModelV2 +from ray.rllib.models.torch.torch_action_dist import (TorchCategorical, + TorchDistributionWrapper) +from ray.rllib.policy.policy import Policy +from ray.rllib.policy.sample_batch import SampleBatch from ray.rllib.policy.torch_policy import LearningRateSchedule from ray.rllib.policy.torch_policy_template import build_torch_policy from ray.rllib.utils.error import UnsupportedSpaceException from ray.rllib.utils.exploration.parameter_noise import ParameterNoise from ray.rllib.utils.framework import try_import_torch -from ray.rllib.utils.torch_ops import huber_loss, reduce_mean_ignore_inf, \ - softmax_cross_entropy_with_logits, FLOAT_MIN +from ray.rllib.utils.torch_ops import (FLOAT_MIN, huber_loss, + reduce_mean_ignore_inf, + softmax_cross_entropy_with_logits) +from ray.rllib.utils.typing import TensorType, TrainerConfigDict torch, nn = try_import_torch() F = None @@ -115,9 +121,11 @@ def compute_td_error(obs_t, act_t, rew_t, obs_tp1, done_mask, self.compute_td_error = compute_td_error -def build_q_model_and_distribution(policy, obs_space, action_space, config): +def build_q_model_and_distribution( + policy: Policy, obs_space: gym.Space, action_space: gym.Space, + config: TrainerConfigDict) -> Tuple[ModelV2, TorchDistributionWrapper]: - if not isinstance(action_space, Discrete): + if not isinstance(action_space, gym.spaces.Discrete): raise UnsupportedSpaceException( "Action space {} is not supported for DQN.".format(action_space)) @@ -179,13 +187,14 @@ def build_q_model_and_distribution(policy, obs_space, action_space, config): return policy.q_model, TorchCategorical -def get_distribution_inputs_and_class(policy, - model, - obs_batch, - *, - explore=True, - is_training=False, - **kwargs): +def get_distribution_inputs_and_class( + policy: Policy, + model: ModelV2, + obs_batch: TensorType, + *, + explore: bool = True, + is_training: bool = False, + **kwargs) -> Tuple[TensorType, type, List[TensorType]]: q_vals = compute_q_values(policy, model, obs_batch, explore, is_training) q_vals = q_vals[0] if isinstance(q_vals, tuple) else q_vals @@ -193,7 +202,8 @@ def get_distribution_inputs_and_class(policy, return policy.q_values, TorchCategorical, [] # state-out -def build_q_losses(policy, model, _, train_batch): +def build_q_losses(policy: Policy, model, _, + train_batch: SampleBatch) -> TensorType: config = policy.config # Q-network evaluation. q_t, q_logits_t, q_probs_t = compute_q_values( @@ -259,22 +269,25 @@ def build_q_losses(policy, model, _, train_batch): return policy.q_loss.loss -def adam_optimizer(policy, config): +def adam_optimizer(policy: Policy, + config: TrainerConfigDict) -> "torch.optim.Optimizer": return torch.optim.Adam( policy.q_func_vars, lr=policy.cur_lr, eps=config["adam_epsilon"]) -def build_q_stats(policy, batch): +def build_q_stats(policy: Policy, batch) -> Dict[str, TensorType]: return dict({ "cur_lr": policy.cur_lr, }, **policy.q_loss.stats) -def setup_early_mixins(policy, obs_space, action_space, config): +def setup_early_mixins(policy: Policy, obs_space, action_space, + config: TrainerConfigDict) -> None: LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"]) -def after_init(policy, obs_space, action_space, config): +def after_init(policy: Policy, obs_space: gym.Space, action_space: gym.Space, + config: TrainerConfigDict) -> None: ComputeTDErrorMixin.__init__(policy) TargetNetworkMixin.__init__(policy, obs_space, action_space, config) # Move target net to device (this is done autoatically for the @@ -282,7 +295,11 @@ def after_init(policy, obs_space, action_space, config): policy.target_q_model = policy.target_q_model.to(policy.device) -def compute_q_values(policy, model, obs, explore, is_training=False): +def compute_q_values(policy: Policy, + model: ModelV2, + obs: TensorType, + explore, + is_training: bool = False): config = policy.config model_out, state = model({ @@ -323,12 +340,15 @@ def compute_q_values(policy, model, obs, explore, is_training=False): return value, logits, probs_or_logits -def grad_process_and_td_error_fn(policy, optimizer, loss): +def grad_process_and_td_error_fn(policy: Policy, + optimizer: "torch.optim.Optimizer", + loss: TensorType) -> Dict[str, TensorType]: # Clip grads if configured. return apply_grad_clipping(policy, optimizer, loss) -def extra_action_out_fn(policy, input_dict, state_batches, model, action_dist): +def extra_action_out_fn(policy: Policy, input_dict, state_batches, model, + action_dist) -> Dict[str, TensorType]: return {"q_values": policy.q_values} diff --git a/rllib/agents/dqn/simple_q.py b/rllib/agents/dqn/simple_q.py index d24bb786aa4d..443daf7f810a 100644 --- a/rllib/agents/dqn/simple_q.py +++ b/rllib/agents/dqn/simple_q.py @@ -1,14 +1,28 @@ +""" +Simple Q (simple_q) +=================== + +This file defines the distributed Trainer class for the simple Q learning. +See `simple_q_[tf|torch]_policy.py` for the definition of the policy loss. +""" + import logging +from typing import Optional, Type -from ray.rllib.agents.trainer import with_common_config -from ray.rllib.agents.dqn.simple_q_tf_policy import SimpleQTFPolicy from ray.rllib.agents.dqn.dqn import DQNTrainer +from ray.rllib.agents.dqn.simple_q_tf_policy import SimpleQTFPolicy +from ray.rllib.agents.dqn.simple_q_torch_policy import SimpleQTorchPolicy +from ray.rllib.agents.trainer import with_common_config +from ray.rllib.evaluation.worker_set import WorkerSet from ray.rllib.execution.concurrency_ops import Concurrently -from ray.rllib.execution.replay_ops import StoreToReplayBuffer, Replay -from ray.rllib.execution.rollout_ops import ParallelRollouts -from ray.rllib.execution.train_ops import TrainOneStep, UpdateTargetNetwork from ray.rllib.execution.metric_ops import StandardMetricsReporting from ray.rllib.execution.replay_buffer import LocalReplayBuffer +from ray.rllib.execution.replay_ops import Replay, StoreToReplayBuffer +from ray.rllib.execution.rollout_ops import ParallelRollouts +from ray.rllib.execution.train_ops import TrainOneStep, UpdateTargetNetwork +from ray.rllib.policy.policy import Policy +from ray.rllib.utils.typing import TrainerConfigDict +from ray.util.iter import LocalIterator logger = logging.getLogger(__name__) @@ -78,16 +92,22 @@ # yapf: enable -def get_policy_class(config): +def get_policy_class(config: TrainerConfigDict) -> Optional[Type[Policy]]: + """Policy class picker function. Class is chosen based on DL-framework. + + Args: + config (TrainerConfigDict): The trainer's configuration dict. + + Returns: + Optional[Type[Policy]]: The Policy class to use with PGTrainer. + If None, use `default_policy` provided in build_trainer(). + """ if config["framework"] == "torch": - from ray.rllib.agents.dqn.simple_q_torch_policy import \ - SimpleQTorchPolicy return SimpleQTorchPolicy - else: - return SimpleQTFPolicy -def execution_plan(workers, config): +def execution_plan(workers: WorkerSet, + config: TrainerConfigDict) -> LocalIterator[dict]: local_replay_buffer = LocalReplayBuffer( num_shards=1, learning_starts=config["learning_starts"], diff --git a/rllib/agents/dqn/simple_q_tf_policy.py b/rllib/agents/dqn/simple_q_tf_policy.py index c6a70615b300..526980c1a7f7 100644 --- a/rllib/agents/dqn/simple_q_tf_policy.py +++ b/rllib/agents/dqn/simple_q_tf_policy.py @@ -1,19 +1,24 @@ """Basic example of a DQN policy without any optimizations.""" -from gym.spaces import Discrete import logging +from typing import List, Tuple, Type +import gym import ray -from ray.rllib.policy.sample_batch import SampleBatch from ray.rllib.models import ModelCatalog +from ray.rllib.models.modelv2 import ModelV2 +from ray.rllib.models.tf.tf_action_dist import (Categorical, + TFActionDistribution) from ray.rllib.models.torch.torch_action_dist import TorchCategorical -from ray.rllib.models.tf.tf_action_dist import Categorical -from ray.rllib.utils.annotations import override -from ray.rllib.utils.error import UnsupportedSpaceException +from ray.rllib.policy import Policy +from ray.rllib.policy.sample_batch import SampleBatch from ray.rllib.policy.tf_policy import TFPolicy from ray.rllib.policy.tf_policy_template import build_tf_policy +from ray.rllib.utils.annotations import override +from ray.rllib.utils.error import UnsupportedSpaceException from ray.rllib.utils.framework import try_import_tf from ray.rllib.utils.tf_ops import huber_loss, make_tf_callable +from ray.rllib.utils.typing import TensorType, TrainerConfigDict tf1, tf, tfv = try_import_tf() logger = logging.getLogger(__name__) @@ -23,7 +28,8 @@ class TargetNetworkMixin: - def __init__(self, obs_space, action_space, config): + def __init__(self, obs_space: gym.Space, action_space: gym.Space, + config: TrainerConfigDict): @make_tf_callable(self.get_session()) def do_update(): # update_target_fn will be called periodically to copy Q network to @@ -44,9 +50,11 @@ def variables(self): return self.q_func_vars + self.target_q_func_vars -def build_q_models(policy, obs_space, action_space, config): +def build_q_models(policy: Policy, obs_space: gym.Space, + action_space: gym.Space, + config: TrainerConfigDict) -> ModelV2: - if not isinstance(action_space, Discrete): + if not isinstance(action_space, gym.spaces.Discrete): raise UnsupportedSpaceException( "Action space {} is not supported for DQN.".format(action_space)) @@ -72,13 +80,14 @@ def build_q_models(policy, obs_space, action_space, config): return policy.q_model -def get_distribution_inputs_and_class(policy, - q_model, - obs_batch, - *, - explore=True, - is_training=True, - **kwargs): +def get_distribution_inputs_and_class( + policy: Policy, + q_model: ModelV2, + obs_batch: TensorType, + *, + explore=True, + is_training=True, + **kwargs) -> Tuple[TensorType, type, List[TensorType]]: q_vals = compute_q_values(policy, q_model, obs_batch, explore, is_training) q_vals = q_vals[0] if isinstance(q_vals, tuple) else q_vals @@ -88,7 +97,9 @@ def get_distribution_inputs_and_class(policy, Categorical), [] # state-outs -def build_q_losses(policy, model, dist_class, train_batch): +def build_q_losses(policy: Policy, model: ModelV2, + dist_class: Type[TFActionDistribution], + train_batch: SampleBatch) -> TensorType: # q network evaluation q_t = compute_q_values( policy, @@ -131,7 +142,11 @@ def build_q_losses(policy, model, dist_class, train_batch): return loss -def compute_q_values(policy, model, obs, explore, is_training=None): +def compute_q_values(policy: Policy, + model: ModelV2, + obs: TensorType, + explore, + is_training=None) -> TensorType: model_out, _ = model({ SampleBatch.CUR_OBS: obs, "is_training": is_training @@ -141,7 +156,9 @@ def compute_q_values(policy, model, obs, explore, is_training=None): return model_out -def setup_late_mixins(policy, obs_space, action_space, config): +def setup_late_mixins(policy: Policy, obs_space: gym.Space, + action_space: gym.Space, + config: TrainerConfigDict) -> None: TargetNetworkMixin.__init__(policy, obs_space, action_space, config) diff --git a/rllib/agents/dqn/simple_q_torch_policy.py b/rllib/agents/dqn/simple_q_torch_policy.py index 941bacb0e1ef..fbdcc05ae676 100644 --- a/rllib/agents/dqn/simple_q_torch_policy.py +++ b/rllib/agents/dqn/simple_q_torch_policy.py @@ -1,15 +1,20 @@ """Basic example of a DQN policy without any optimizations.""" import logging +from typing import Dict +import gym import ray -from ray.rllib.agents.dqn.simple_q_tf_policy import build_q_models, \ - get_distribution_inputs_and_class, compute_q_values -from ray.rllib.policy.sample_batch import SampleBatch +from ray.rllib.agents.dqn.simple_q_tf_policy import ( + build_q_models, compute_q_values, get_distribution_inputs_and_class) +from ray.rllib.models.modelv2 import ModelV2 from ray.rllib.models.torch.torch_action_dist import TorchCategorical +from ray.rllib.policy import Policy +from ray.rllib.policy.sample_batch import SampleBatch from ray.rllib.policy.torch_policy_template import build_torch_policy from ray.rllib.utils.framework import try_import_torch from ray.rllib.utils.torch_ops import huber_loss +from ray.rllib.utils.typing import TensorType, TrainerConfigDict torch, nn = try_import_torch() F = None @@ -19,7 +24,8 @@ class TargetNetworkMixin: - def __init__(self, obs_space, action_space, config): + def __init__(self, obs_space: gym.Space, action_space: gym.Space, + config: TrainerConfigDict): def do_update(): # Update_target_fn will be called periodically to copy Q network to # target Q network. @@ -30,12 +36,15 @@ def do_update(): self.update_target = do_update -def build_q_model_and_distribution(policy, obs_space, action_space, config): +def build_q_model_and_distribution(policy: Policy, obs_space: gym.Space, + action_space: gym.Space, + config: TrainerConfigDict) -> ModelV2: return build_q_models(policy, obs_space, action_space, config), \ TorchCategorical -def build_q_losses(policy, model, dist_class, train_batch): +def build_q_losses(policy: Policy, model, dist_class, + train_batch: SampleBatch) -> TensorType: # q network evaluation q_t = compute_q_values( policy, @@ -78,12 +87,15 @@ def build_q_losses(policy, model, dist_class, train_batch): return loss -def extra_action_out_fn(policy, input_dict, state_batches, model, action_dist): +def extra_action_out_fn(policy: Policy, input_dict, state_batches, model, + action_dist) -> Dict[str, TensorType]: """Adds q-values to action out dict.""" return {"q_values": policy.q_values} -def setup_late_mixins(policy, obs_space, action_space, config): +def setup_late_mixins(policy: Policy, obs_space: gym.Space, + action_space: gym.Space, + config: TrainerConfigDict) -> None: TargetNetworkMixin.__init__(policy, obs_space, action_space, config) diff --git a/rllib/agents/maml/maml_torch_policy.py b/rllib/agents/maml/maml_torch_policy.py index cf378a4ba681..8a143b455e65 100644 --- a/rllib/agents/maml/maml_torch_policy.py +++ b/rllib/agents/maml/maml_torch_policy.py @@ -199,10 +199,9 @@ def __init__(self, current_policy_vars[i] = adapted_policy_vars kls.append(kl_loss) inner_ppo_loss.append(ppo_loss) - inner_kls.append(kls) + inner_kls.extend(kls) - mean_inner_kl = [torch.mean(torch.stack(kls)) for kls in inner_kls] - self.mean_inner_kl = mean_inner_kl + self.mean_inner_kl = inner_kls ppo_obj = [] for i in range(self.num_tasks): @@ -230,10 +229,10 @@ def __init__(self, self.mean_entropy = entropy_loss self.inner_kl_loss = torch.mean( - torch.stack( - [a * b for a, b in zip(self.cur_kl_coeff, mean_inner_kl)])) + torch.stack([ + a * b for a, b in zip(self.cur_kl_coeff, self.mean_inner_kl) + ])) self.loss = torch.mean(torch.stack(ppo_obj)) + self.inner_kl_loss - print("Meta-Loss: ", self.loss, ", Inner KL:", self.inner_kl_loss) def feed_forward(self, obs, policy_vars, policy_config): # Hacky for now, reconstruct FC network with adapted weights @@ -298,7 +297,6 @@ def fc_network(inp, network_vars, hidden_nonlinearity, return pi_new_logits, torch.squeeze(value_fn) def compute_updated_variables(self, loss, network_vars, model): - grad = torch.autograd.grad( loss, inputs=model.parameters(), @@ -389,8 +387,9 @@ def maml_stats(policy, train_batch): class KLCoeffMixin: def __init__(self, config): - self.kl_coeff_val = [config["kl_coeff"] - ] * config["inner_adaptation_steps"] + self.kl_coeff_val = [ + config["kl_coeff"] + ] * config["inner_adaptation_steps"] * config["num_workers"] self.kl_target = self.config["kl_target"] def update_kls(self, sampled_kls): diff --git a/rllib/agents/marwil/__init__.py b/rllib/agents/marwil/__init__.py index f901cf07269e..5b66c96f172c 100644 --- a/rllib/agents/marwil/__init__.py +++ b/rllib/agents/marwil/__init__.py @@ -1,8 +1,11 @@ +from ray.rllib.agents.marwil.bc import BCTrainer, BC_DEFAULT_CONFIG from ray.rllib.agents.marwil.marwil import MARWILTrainer, DEFAULT_CONFIG from ray.rllib.agents.marwil.marwil_tf_policy import MARWILTFPolicy from ray.rllib.agents.marwil.marwil_torch_policy import MARWILTorchPolicy __all__ = [ + "BCTrainer", + "BC_DEFAULT_CONFIG", "DEFAULT_CONFIG", "MARWILTFPolicy", "MARWILTorchPolicy", diff --git a/rllib/agents/marwil/bc.py b/rllib/agents/marwil/bc.py new file mode 100644 index 000000000000..81f8afce5970 --- /dev/null +++ b/rllib/agents/marwil/bc.py @@ -0,0 +1,29 @@ +"""Behavioral Cloning (derived from MARWIL). + +Simply uses the MARWIL agent with beta force-set to 0.0. +""" +from ray.rllib.agents.marwil.marwil import MARWILTrainer, \ + DEFAULT_CONFIG as MARWIL_CONFIG +from ray.rllib.utils.typing import TrainerConfigDict + +# yapf: disable +# __sphinx_doc_begin__ +BC_DEFAULT_CONFIG = MARWILTrainer.merge_trainer_configs( + MARWIL_CONFIG, { + "beta": 0.0, + }) +# __sphinx_doc_end__ +# yapf: enable + + +def validate_config(config: TrainerConfigDict): + if config["beta"] != 0.0: + raise ValueError( + "For behavioral cloning, `beta` parameter must be 0.0!") + + +BCTrainer = MARWILTrainer.with_updates( + name="BC", + default_config=BC_DEFAULT_CONFIG, + validate_config=validate_config, +) diff --git a/rllib/agents/marwil/marwil.py b/rllib/agents/marwil/marwil.py index a196015e02e8..e68c61dc931e 100644 --- a/rllib/agents/marwil/marwil.py +++ b/rllib/agents/marwil/marwil.py @@ -16,22 +16,22 @@ # Use importance sampling estimators for reward "input_evaluation": ["is", "wis"], - # Scaling of advantages in exponential terms - # When beta is 0, MARWIL is reduced to imitation learning + # Scaling of advantages in exponential terms. + # When beta is 0.0, MARWIL is reduced to imitation learning. "beta": 1.0, - # Balancing value estimation loss and policy optimization loss + # Balancing value estimation loss and policy optimization loss. "vf_coeff": 1.0, - # Whether to calculate cumulative rewards + # Whether to calculate cumulative rewards. "postprocess_inputs": True, - # Whether to rollout "complete_episodes" or "truncate_episodes" + # Whether to rollout "complete_episodes" or "truncate_episodes". "batch_mode": "complete_episodes", - # Learning rate for adam optimizer + # Learning rate for adam optimizer. "lr": 1e-4, - # Number of timesteps collected for each SGD round + # Number of timesteps collected for each SGD round. "train_batch_size": 2000, - # Number of steps max to keep in the batch replay buffer + # Number of steps max to keep in the batch replay buffer. "replay_buffer_size": 100000, - # Number of steps to read before learning starts + # Number of steps to read before learning starts. "learning_starts": 0, # === Parallelism === "num_workers": 0, @@ -45,8 +45,6 @@ def get_policy_class(config): from ray.rllib.agents.marwil.marwil_torch_policy import \ MARWILTorchPolicy return MARWILTorchPolicy - else: - return MARWILTFPolicy def execution_plan(workers, config): diff --git a/rllib/agents/marwil/tests/test_bc.py b/rllib/agents/marwil/tests/test_bc.py new file mode 100644 index 000000000000..31a9b3818618 --- /dev/null +++ b/rllib/agents/marwil/tests/test_bc.py @@ -0,0 +1,65 @@ +import os +from pathlib import Path +import unittest + +import ray +import ray.rllib.agents.marwil as marwil +from ray.rllib.utils.framework import try_import_tf +from ray.rllib.utils.test_utils import check_compute_single_action, \ + framework_iterator + +tf1, tf, tfv = try_import_tf() + + +class TestBC(unittest.TestCase): + @classmethod + def setUpClass(cls): + ray.init() + + @classmethod + def tearDownClass(cls): + ray.shutdown() + + def test_bc_compilation_and_learning_from_offline_file(self): + """Test whether a BCTrainer can be built with all frameworks. + + And learns from a historic-data file. + """ + rllib_dir = Path(__file__).parent.parent.parent.parent + print("rllib dir={}".format(rllib_dir)) + data_file = os.path.join(rllib_dir, "tests/data/cartpole/large.json") + print("data_file={} exists={}".format(data_file, + os.path.isfile(data_file))) + + config = marwil.BC_DEFAULT_CONFIG.copy() + config["num_workers"] = 0 # Run locally. + config["evaluation_num_workers"] = 1 + config["evaluation_interval"] = 1 + # Evaluate on actual environment. + config["evaluation_config"] = {"input": "sampler"} + # Learn from offline data. + config["input"] = [data_file] + num_iterations = 300 + + # Test for all frameworks. + for _ in framework_iterator(config, frameworks=("tf", "torch")): + trainer = marwil.BCTrainer(config=config, env="CartPole-v0") + for i in range(num_iterations): + eval_results = trainer.train()["evaluation"] + print("iter={} R={}".format( + i, eval_results["episode_reward_mean"])) + # Learn until some reward is reached on an actual live env. + if eval_results["episode_reward_mean"] > 60.0: + print("learnt!") + break + + check_compute_single_action( + trainer, include_prev_action_reward=True) + + trainer.stop() + + +if __name__ == "__main__": + import pytest + import sys + sys.exit(pytest.main(["-v", __file__])) diff --git a/rllib/agents/marwil/tests/test_marwil.py b/rllib/agents/marwil/tests/test_marwil.py index 49a223da1d06..b390ada22d09 100644 --- a/rllib/agents/marwil/tests/test_marwil.py +++ b/rllib/agents/marwil/tests/test_marwil.py @@ -35,7 +35,9 @@ def test_marwil_compilation_and_learning_from_offline_file(self): config["num_workers"] = 0 # Run locally. config["evaluation_num_workers"] = 1 config["evaluation_interval"] = 1 + # Evaluate on actual environment. config["evaluation_config"] = {"input": "sampler"} + # Learn from offline data. config["input"] = [data_file] num_iterations = 300 diff --git a/rllib/agents/mbmpo/mbmpo.py b/rllib/agents/mbmpo/mbmpo.py index cf24f8a7821d..eebbb1dcd3db 100644 --- a/rllib/agents/mbmpo/mbmpo.py +++ b/rllib/agents/mbmpo/mbmpo.py @@ -18,7 +18,8 @@ from ray.rllib.evaluation.metrics import collect_episodes from ray.rllib.agents.mbmpo.model_vector_env import custom_model_vector_env from ray.rllib.evaluation.metrics import collect_metrics -from ray.rllib.agents.mbmpo.utils import calculate_gae_advantages +from ray.rllib.agents.mbmpo.utils import calculate_gae_advantages, \ + MBMPOExploration logger = logging.getLogger(__name__) @@ -69,7 +70,7 @@ # Number of Transition-Dynamics Models for Ensemble "ensemble_size": 5, # Hidden Layers for Model Ensemble - "fcnet_hiddens": [512, 512], + "fcnet_hiddens": [512, 512, 512], # Model Learning Rate "lr": 1e-3, # Max number of training epochs per MBMPO iter @@ -81,10 +82,11 @@ # Normalize Data (obs, action, and deltas) "normalize_data": True, }, + "exploration_config": { + "type": MBMPOExploration, + }, # Workers sample from dynamics models "custom_vector_env": custom_model_vector_env, - # How many enviornments there are per worker (vectorized) - "num_worker_envs": 20, # How many iterations through MAML per MBMPO iteration "num_maml_steps": 10, }) @@ -152,7 +154,7 @@ def update(pi, pi_id): metrics.info[LEARNER_INFO] = fetches metrics.counters[STEPS_TRAINED_COUNTER] += samples.count - if self.step_counter == self.num_steps: + if self.step_counter == self.num_steps - 1: td_metric = self.workers.local_worker().foreach_policy( fit_dynamics)[0] diff --git a/rllib/agents/mbmpo/model_ensemble.py b/rllib/agents/mbmpo/model_ensemble.py index c252e046449f..1c8d03562070 100644 --- a/rllib/agents/mbmpo/model_ensemble.py +++ b/rllib/agents/mbmpo/model_ensemble.py @@ -158,7 +158,7 @@ def __init__(self, obs_space, action_space, num_outputs, model_config, for i in range(self.num_models): self.add_module("TD-model-" + str(i), self.dynamics_ensemble[i]) - self.replay_buffer_max = 100000 + self.replay_buffer_max = 10000 self.replay_buffer = None self.optimizers = [ torch.optim.Adam( @@ -170,7 +170,8 @@ def __init__(self, obs_space, action_space, num_outputs, model_config, self.metrics[STEPS_SAMPLED_COUNTER] = 0 # For each worker, choose a random model to choose trajectories from - self.sample_index = np.random.randint(self.num_models) + worker_index = get_global_worker().worker_index + self.sample_index = int((worker_index - 1) / self.num_models) self.global_itr = 0 self.device = (torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")) @@ -195,9 +196,10 @@ def fit(self): # Add env samples to Replay Buffer local_worker = get_global_worker() new_samples = local_worker.sample() + # Initial Exploration of 8000 timesteps if not self.global_itr: - tmp = local_worker.sample() - new_samples.concat(tmp) + extra = local_worker.sample() + new_samples.concat(extra) # Process Samples new_samples = process_samples(new_samples) @@ -257,9 +259,6 @@ def convert_to_str(lst): train_losses[ind] = train_losses[ ind].detach().cpu().numpy() - del x - del y - # Validation val_lists = [] for data in zip(*val_loaders): @@ -273,8 +272,6 @@ def convert_to_str(lst): for ind in range(self.num_models): val_losses[ind] = val_losses[ind].detach().cpu().numpy() - del x - del y val_lists = np.array(val_lists) avg_val_losses = np.mean(val_lists, axis=0) diff --git a/rllib/agents/mbmpo/model_vector_env.py b/rllib/agents/mbmpo/model_vector_env.py index 655169e0613e..4a0b56836a3e 100644 --- a/rllib/agents/mbmpo/model_vector_env.py +++ b/rllib/agents/mbmpo/model_vector_env.py @@ -81,7 +81,7 @@ def vector_step(self, actions): next_obs_batch = self.model.predict_model_batches( obs_batch, action_batch, device=self.device) - next_obs_batch = np.clip(next_obs_batch, -50, 50) + next_obs_batch = np.clip(next_obs_batch, -1000, 1000) rew_batch = self.envs[0].reward(obs_batch, action_batch, next_obs_batch) @@ -95,7 +95,8 @@ def vector_step(self, actions): self.cur_obs = next_obs_batch - return list(obs_batch), list(rew_batch), list(dones_batch), info_batch + return list(next_obs_batch), list(rew_batch), list( + dones_batch), info_batch @override(VectorEnv) def get_unwrapped(self): diff --git a/rllib/agents/mbmpo/utils.py b/rllib/agents/mbmpo/utils.py index 16bb922da74f..b6efcdb477ba 100644 --- a/rllib/agents/mbmpo/utils.py +++ b/rllib/agents/mbmpo/utils.py @@ -1,5 +1,16 @@ import numpy as np import scipy +from typing import Union + +from ray.rllib.models.action_dist import ActionDistribution +from ray.rllib.models.modelv2 import ModelV2 +from ray.rllib.utils.annotations import override +from ray.rllib.utils.exploration.exploration import Exploration +from ray.rllib.utils.framework import try_import_tf, try_import_torch, \ + TensorType + +tf1, tf, tfv = try_import_tf() +torch, _ = try_import_torch() class LinearFeatureBaseline(): @@ -66,3 +77,50 @@ def discount_cumsum(x, discount): """ return scipy.signal.lfilter( [1], [1, float(-discount)], x[::-1], axis=0)[::-1] + + +class MBMPOExploration(Exploration): + """An exploration that simply samples from a distribution. + + The sampling can be made deterministic by passing explore=False into + the call to `get_exploration_action`. + Also allows for scheduled parameters for the distributions, such as + lowering stddev, temperature, etc.. over time. + """ + + def __init__(self, action_space, *, framework: str, model: ModelV2, + **kwargs): + """Initializes a StochasticSampling Exploration object. + + Args: + action_space (Space): The gym action space used by the environment. + framework (str): One of None, "tf", "torch". + """ + assert framework is not None + self.timestep = 0 + self.worker_index = kwargs["worker_index"] + super().__init__( + action_space, model=model, framework=framework, **kwargs) + + @override(Exploration) + def get_exploration_action(self, + *, + action_distribution: ActionDistribution, + timestep: Union[int, TensorType], + explore: bool = True): + assert self.framework == "torch" + return self._get_torch_exploration_action(action_distribution, explore) + + def _get_torch_exploration_action(self, action_dist, explore): + action = action_dist.sample() + logp = action_dist.sampled_action_logp() + + batch_size = action.size()[0] + + # Initial Random Exploration for Real Env Interaction + if self.worker_index == 0 and self.timestep < 8000: + print("Using Random") + action = [self.action_space.sample() for _ in range(batch_size)] + logp = [0.0 for _ in range(batch_size)] + self.timestep += batch_size + return action, logp diff --git a/rllib/agents/registry.py b/rllib/agents/registry.py index 2f46106c7850..001f921d4d0a 100644 --- a/rllib/agents/registry.py +++ b/rllib/agents/registry.py @@ -5,9 +5,24 @@ from ray.rllib.contrib.registry import CONTRIBUTED_ALGORITHMS -def _import_sac(): - from ray.rllib.agents import sac - return sac.SACTrainer +def _import_a2c(): + from ray.rllib.agents import a3c + return a3c.A2CTrainer + + +def _import_a3c(): + from ray.rllib.agents import a3c + return a3c.A3CTrainer + + +def _import_apex(): + from ray.rllib.agents import dqn + return dqn.ApexTrainer + + +def _import_apex_ddpg(): + from ray.rllib.agents import ddpg + return ddpg.ApexDDPGTrainer def _import_appo(): @@ -15,14 +30,14 @@ def _import_appo(): return ppo.APPOTrainer -def _import_ddppo(): - from ray.rllib.agents import ppo - return ppo.DDPPOTrainer +def _import_ars(): + from ray.rllib.agents import ars + return ars.ARSTrainer -def _import_qmix(): - from ray.rllib.agents import qmix - return qmix.QMixTrainer +def _import_bc(): + from ray.rllib.agents import marwil + return marwil.BCTrainer def _import_ddpg(): @@ -30,19 +45,19 @@ def _import_ddpg(): return ddpg.DDPGTrainer -def _import_apex_ddpg(): - from ray.rllib.agents import ddpg - return ddpg.ApexDDPGTrainer +def _import_ddppo(): + from ray.rllib.agents import ppo + return ppo.DDPPOTrainer -def _import_td3(): - from ray.rllib.agents import ddpg - return ddpg.TD3Trainer +def _import_dqn(): + from ray.rllib.agents import dqn + return dqn.DQNTrainer -def _import_ppo(): - from ray.rllib.agents import ppo - return ppo.PPOTrainer +def _import_dreamer(): + from ray.rllib.agents import dreamer + return dreamer.DREAMERTrainer def _import_es(): @@ -50,34 +65,24 @@ def _import_es(): return es.ESTrainer -def _import_ars(): - from ray.rllib.agents import ars - return ars.ARSTrainer - - -def _import_dqn(): - from ray.rllib.agents import dqn - return dqn.DQNTrainer - - -def _import_simple_q(): - from ray.rllib.agents import dqn - return dqn.SimpleQTrainer +def _import_impala(): + from ray.rllib.agents import impala + return impala.ImpalaTrainer -def _import_apex(): - from ray.rllib.agents import dqn - return dqn.ApexTrainer +def _import_maml(): + from ray.rllib.agents import maml + return maml.MAMLTrainer -def _import_a3c(): - from ray.rllib.agents import a3c - return a3c.A3CTrainer +def _import_marwil(): + from ray.rllib.agents import marwil + return marwil.MARWILTrainer -def _import_a2c(): - from ray.rllib.agents import a3c - return a3c.A2CTrainer +def _import_mbmpo(): + from ray.rllib.agents import mbmpo + return mbmpo.MBMPOTrainer def _import_pg(): @@ -85,53 +90,54 @@ def _import_pg(): return pg.PGTrainer -def _import_impala(): - from ray.rllib.agents import impala - return impala.ImpalaTrainer +def _import_ppo(): + from ray.rllib.agents import ppo + return ppo.PPOTrainer -def _import_marwil(): - from ray.rllib.agents import marwil - return marwil.MARWILTrainer +def _import_qmix(): + from ray.rllib.agents import qmix + return qmix.QMixTrainer -def _import_maml(): - from ray.rllib.agents import maml - return maml.MAMLTrainer +def _import_sac(): + from ray.rllib.agents import sac + return sac.SACTrainer -def _import_mbmpo(): - from ray.rllib.agents import mbmpo - return mbmpo.MBMPOTrainer +def _import_simple_q(): + from ray.rllib.agents import dqn + return dqn.SimpleQTrainer -def _import_dreamer(): - from ray.rllib.agents import dreamer - return dreamer.DREAMERTrainer +def _import_td3(): + from ray.rllib.agents import ddpg + return ddpg.TD3Trainer ALGORITHMS = { - "SAC": _import_sac, - "DDPG": _import_ddpg, + "A2C": _import_a2c, + "A3C": _import_a3c, + "APEX": _import_apex, "APEX_DDPG": _import_apex_ddpg, - "TD3": _import_td3, - "PPO": _import_ppo, - "ES": _import_es, + "APPO": _import_appo, "ARS": _import_ars, + "BC": _import_bc, + "ES": _import_es, + "DDPG": _import_ddpg, + "DDPPO": _import_ddppo, "DQN": _import_dqn, - "SimpleQ": _import_simple_q, - "APEX": _import_apex, - "A3C": _import_a3c, - "A2C": _import_a2c, - "PG": _import_pg, + "DREAMER": _import_dreamer, "IMPALA": _import_impala, - "QMIX": _import_qmix, - "APPO": _import_appo, - "DDPPO": _import_ddppo, - "MARWIL": _import_marwil, "MAML": _import_maml, + "MARWIL": _import_marwil, "MBMPO": _import_mbmpo, - "DREAMER": _import_dreamer, + "PG": _import_pg, + "PPO": _import_ppo, + "QMIX": _import_qmix, + "SAC": _import_sac, + "SimpleQ": _import_simple_q, + "TD3": _import_td3, } diff --git a/rllib/agents/trainer.py b/rllib/agents/trainer.py index 40107668cc3a..92e252e52181 100644 --- a/rllib/agents/trainer.py +++ b/rllib/agents/trainer.py @@ -1160,6 +1160,13 @@ def __setstate__(self, state: dict): if "optimizer" in state: self.optimizer.restore(state["optimizer"]) + @staticmethod + def with_updates(**overrides) -> Type["Trainer"]: + raise NotImplementedError( + "`with_updates` may only be called on Trainer sub-classes " + "that were generated via the `ray.rllib.agents.trainer_template." + "build_trainer()` function!") + def _register_if_needed(self, env_object: Union[str, EnvType]): if isinstance(env_object, str): return env_object diff --git a/rllib/agents/trainer_template.py b/rllib/agents/trainer_template.py index 1461d46b9583..2241621d9a0b 100644 --- a/rllib/agents/trainer_template.py +++ b/rllib/agents/trainer_template.py @@ -139,27 +139,30 @@ def _before_evaluate(self): if before_evaluate_fn: before_evaluate_fn(self) + @override(Trainer) def __getstate__(self): state = Trainer.__getstate__(self) state["train_exec_impl"] = ( self.train_exec_impl.shared_metrics.get().save()) return state + @override(Trainer) def __setstate__(self, state): Trainer.__setstate__(self, state) self.train_exec_impl.shared_metrics.get().restore( state["train_exec_impl"]) - def with_updates(**overrides): - """Build a copy of this trainer with the specified overrides. + @staticmethod + @override(Trainer) + def with_updates(**overrides) -> Type[Trainer]: + """Build a copy of this trainer with the specified overrides. - Arguments: - overrides (dict): use this to override any of the arguments - originally passed to build_trainer() for this policy. - """ - return build_trainer(**dict(original_kwargs, **overrides)) + Keyword Args: + overrides (dict): use this to override any of the arguments + originally passed to build_trainer() for this policy. + """ + return build_trainer(**dict(original_kwargs, **overrides)) - trainer_cls.with_updates = staticmethod(with_updates) trainer_cls.__name__ = name trainer_cls.__qualname__ = name return trainer_cls diff --git a/rllib/evaluation/sampler.py b/rllib/evaluation/sampler.py index be957e1fecca..b3755d8e371d 100644 --- a/rllib/evaluation/sampler.py +++ b/rllib/evaluation/sampler.py @@ -1093,7 +1093,10 @@ def _process_observations_w_trajectory_view_api( # Invoke the step callback after the step is logged to the episode callbacks.on_episode_step( - worker=worker, base_env=base_env, episode=episode) + worker=worker, + base_env=base_env, + episode=episode, + env_index=env_id) # Cut the batch if ... # - all-agents-done and not packing multiple episodes into one diff --git a/rllib/examples/env/halfcheetah.py b/rllib/examples/env/mbmpo_env.py similarity index 57% rename from rllib/examples/env/halfcheetah.py rename to rllib/examples/env/mbmpo_env.py index 70f946468a29..22315e547036 100644 --- a/rllib/examples/env/halfcheetah.py +++ b/rllib/examples/env/mbmpo_env.py @@ -1,21 +1,5 @@ import numpy as np -from gym.envs.mujoco import HalfCheetahEnv -import inspect - - -def get_all_function_arguments(function, locals): - kwargs_dict = {} - for arg in inspect.getfullargspec(function).kwonlyargs: - if arg not in ["args", "kwargs"]: - kwargs_dict[arg] = locals[arg] - args = [locals[arg] for arg in inspect.getfullargspec(function).args] - - if "args" in locals: - args += locals["args"] - - if "kwargs" in locals: - kwargs_dict.update(locals["kwargs"]) - return args, kwargs_dict +from gym.envs.mujoco import HalfCheetahEnv, HopperEnv class HalfCheetahWrapper(HalfCheetahEnv): @@ -42,8 +26,28 @@ def reward(self, obs, action, obs_next): return np.minimum(np.maximum(-1000.0, reward), 1000.0) +class HopperWrapper(HopperEnv): + """Hopper Wrapper that wraps Mujoco Hopper-v2 env + with an additional defined reward function for model-based RL. + + This is currently used for MBMPO. + """ + + def __init__(self, *args, **kwargs): + HopperEnv.__init__(self, *args, **kwargs) + + def reward(self, obs, action, obs_next): + alive_bonus = 1.0 + assert obs.ndim == 2 and action.ndim == 2 + assert obs.shape == obs_next.shape and action.shape[0] == obs.shape[0] + vel = obs_next[:, 5] + ctrl_cost = 1e-3 * np.sum(np.square(action), axis=1) + reward = vel + alive_bonus - ctrl_cost + return np.minimum(np.maximum(-1000.0, reward), 1000.0) + + if __name__ == "__main__": - env = HalfCheetahWrapper() + env = HopperWrapper() env.reset() for _ in range(1000): env.step(env.action_space.sample()) diff --git a/rllib/examples/unity3d_env_local.py b/rllib/examples/unity3d_env_local.py index 60eace4a8a90..1c4c7c45b410 100644 --- a/rllib/examples/unity3d_env_local.py +++ b/rllib/examples/unity3d_env_local.py @@ -117,18 +117,18 @@ config["exploration_config"] = { "type": "Curiosity", "eta": 0.1, - "lr": tune.grid_search([0.0003, 0.001]), + "lr": 0.001, # No actual feature net: map directly from observations to feature # vector (linearly). "feature_net_config": { - "fcnet_hiddens": tune.grid_search([[], [256]]), + "fcnet_hiddens": [], "fcnet_activation": "relu", }, "sub_exploration": { "type": "StochasticSampling", }, - "forward_net_activation": tune.grid_search(["relu", "swish"]), - "inverse_net_activation": tune.grid_search(["relu", "swish"]), + "forward_net_activation": "relu", + "inverse_net_activation": "relu", } stop = { diff --git a/rllib/offline/json_reader.py b/rllib/offline/json_reader.py index 1229bdd07f24..e6315f561720 100644 --- a/rllib/offline/json_reader.py +++ b/rllib/offline/json_reader.py @@ -93,7 +93,7 @@ def _postprocess_if_needed(self, return SampleBatch.concat_samples(out) else: # TODO(ekl) this is trickier since the alignments between agent - # trajectories in the episode are not available any more. + # trajectories in the episode are not available any more. raise NotImplementedError( "Postprocessing of multi-agent data not implemented yet.") diff --git a/rllib/policy/torch_policy_template.py b/rllib/policy/torch_policy_template.py index 1e1fd480665e..ec91a58d13ae 100644 --- a/rllib/policy/torch_policy_template.py +++ b/rllib/policy/torch_policy_template.py @@ -85,7 +85,7 @@ def build_torch_policy( values given the policy and training batch. If None, will use `TorchPolicy.extra_grad_info()` instead. The stats dict is used for logging (e.g. in TensorBoard). - extra_action_out_fn (Optional[Callable[[Policy, Dict[str, TensorType, + extra_action_out_fn (Optional[Callable[[Policy, Dict[str, TensorType], List[TensorType], ModelV2, TorchDistributionWrapper]], Dict[str, TensorType]]]): Optional callable that returns a dict of extra values to include in experiences. If None, no extra computations diff --git a/rllib/tuned_examples/marwil/cartpole-bc.yaml b/rllib/tuned_examples/marwil/cartpole-bc.yaml new file mode 100644 index 000000000000..c0c0af0da741 --- /dev/null +++ b/rllib/tuned_examples/marwil/cartpole-bc.yaml @@ -0,0 +1,20 @@ +# To generate training data, first run: +# $ ./train.py --run=PPO --env=CartPole-v0 \ +# --stop='{"timesteps_total": 50000}' \ +# --config='{"output": "/tmp/out", "batch_mode": "complete_episodes"}' +cartpole-bc: + env: CartPole-v0 + run: BC + stop: + timesteps_total: 500000 + config: + # Works for both torch and tf. + framework: tf + # In order to evaluate on an actual environment, use these following + # settings: + evaluation_num_workers: 1 + evaluation_interval: 1 + evaluation_config: + input: sampler + # The historic (offline) data file from the PPO run (at the top). + input: /tmp/out diff --git a/rllib/tuned_examples/marwil/cartpole-marwil.yaml b/rllib/tuned_examples/marwil/cartpole-marwil.yaml index 6e9643778a58..06759ef9510b 100644 --- a/rllib/tuned_examples/marwil/cartpole-marwil.yaml +++ b/rllib/tuned_examples/marwil/cartpole-marwil.yaml @@ -16,8 +16,6 @@ cartpole-marwil: evaluation_interval: 1 evaluation_config: input: sampler - # Compare IL (beta=0) vs MARWIL. - beta: - grid_search: [0, 1] + beta: 1.0 # Compare to behavior cloning (beta=0.0). # The historic (offline) data file from the PPO run (at the top). input: /tmp/out diff --git a/rllib/tuned_examples/mbmpo/halfcheetah-mbmpo.yaml b/rllib/tuned_examples/mbmpo/halfcheetah-mbmpo.yaml index 9e69fde03ffb..7980894aff08 100644 --- a/rllib/tuned_examples/mbmpo/halfcheetah-mbmpo.yaml +++ b/rllib/tuned_examples/mbmpo/halfcheetah-mbmpo.yaml @@ -1,11 +1,12 @@ -halfcheetah-mb-mpo: - env: ray.rllib.examples.env.halfcheetah.HalfCheetahWrapper +halfcheetah-mbmpo: + env: ray.rllib.examples.env.mbmpo_env.HalfCheetahWrapper run: MBMPO stop: training_iteration: 500 config: # Only supported in torch right now framework: torch + # 200 in paper, 1000 will take forever horizon: 200 num_envs_per_worker: 20 inner_adaptation_steps: 1 @@ -14,12 +15,13 @@ halfcheetah-mb-mpo: lambda: 1.0 lr: 0.001 clip_param: 0.5 - kl_target: 0.01 + kl_target: 0.003 kl_coeff: 0.0000000001 num_workers: 20 num_gpus: 1 inner_lr: 0.001 clip_actions: False + num_maml_steps: 15 model: fcnet_hiddens: [32, 32] free_log_std: True diff --git a/rllib/tuned_examples/mbmpo/hopper-mbmpo.yaml b/rllib/tuned_examples/mbmpo/hopper-mbmpo.yaml new file mode 100644 index 000000000000..28d6a0b54d2e --- /dev/null +++ b/rllib/tuned_examples/mbmpo/hopper-mbmpo.yaml @@ -0,0 +1,27 @@ +hopper-mbmpo: + env: ray.rllib.examples.env.mbmpo_env.HopperWrapper + run: MBMPO + stop: + training_iteration: 500 + config: + # Only supported in torch right now + framework: torch + # 200 in paper, 1000 will take forever + horizon: 200 + num_envs_per_worker: 20 + inner_adaptation_steps: 1 + maml_optimizer_steps: 8 + gamma: 0.99 + lambda: 1.0 + lr: 0.001 + clip_param: 0.5 + kl_target: 0.003 + kl_coeff: 0.0000000001 + num_workers: 20 + num_gpus: 1 + inner_lr: 0.001 + clip_actions: False + num_maml_steps: 15 + model: + fcnet_hiddens: [32, 32] + free_log_std: True diff --git a/src/ray/core_worker/task_manager.cc b/src/ray/core_worker/task_manager.cc index 9ed0ce0e0cd6..8d1fc2eeb435 100644 --- a/src/ray/core_worker/task_manager.cc +++ b/src/ray/core_worker/task_manager.cc @@ -301,8 +301,8 @@ bool TaskManager::PendingTaskFailed(const TaskID &task_id, rpc::ErrorType error_ if (num_retries_left != 0) { auto retries_str = num_retries_left == -1 ? "infinite" : std::to_string(num_retries_left); - RAY_LOG(ERROR) << retries_str << " retries left for task " << spec.TaskId() - << ", attempting to resubmit."; + RAY_LOG(INFO) << retries_str << " retries left for task " << spec.TaskId() + << ", attempting to resubmit."; retry_task_callback_(spec, /*delay=*/true); will_retry = true; } else { @@ -315,8 +315,8 @@ bool TaskManager::PendingTaskFailed(const TaskID &task_id, rpc::ErrorType error_ (current_time_ms() - last_log_time_ms_) > kTaskFailureLoggingFrequencyMillis)) { if (num_failure_logs_++ == kTaskFailureThrottlingThreshold) { - RAY_LOG(ERROR) << "Too many failure logs, throttling to once every " - << kTaskFailureLoggingFrequencyMillis << " millis."; + RAY_LOG(WARNING) << "Too many failure logs, throttling to once every " + << kTaskFailureLoggingFrequencyMillis << " millis."; } last_log_time_ms_ = current_time_ms(); if (status != nullptr) { diff --git a/src/ray/raylet/node_manager.cc b/src/ray/raylet/node_manager.cc index 7d4fa0961dfd..3e5514bf47a6 100644 --- a/src/ray/raylet/node_manager.cc +++ b/src/ray/raylet/node_manager.cc @@ -2242,11 +2242,11 @@ void NodeManager::MarkObjectsAsFailed( // If we failed to save the error code, log a warning and push an error message // to the driver. std::ostringstream stream; - stream << "An plasma error (" << status.ToString() << ") occurred while saving" + stream << "A plasma error (" << status.ToString() << ") occurred while saving" << " error code to object " << object_id << ". Anyone who's getting this" << " object may hang forever."; std::string error_message = stream.str(); - RAY_LOG(WARNING) << error_message; + RAY_LOG(ERROR) << error_message; auto error_data_ptr = gcs::CreateErrorTableData("task", error_message, current_time_ms(), job_id); RAY_CHECK_OK(gcs_client_->Errors().AsyncReportJobError(error_data_ptr, nullptr)); diff --git a/src/ray/raylet/worker_pool.cc b/src/ray/raylet/worker_pool.cc index caf4f67e8d9c..6ade2367d735 100644 --- a/src/ray/raylet/worker_pool.cc +++ b/src/ray/raylet/worker_pool.cc @@ -133,6 +133,10 @@ WorkerPool::WorkerPool(boost::asio::io_service &io_service, int num_workers, void WorkerPool::Start(int num_workers) { RAY_CHECK(!RayConfig::instance().enable_multi_tenancy()); for (auto &entry : states_by_lang_) { + if (entry.first == Language::JAVA) { + // Disable initial workers for Java. + continue; + } auto &state = entry.second; int num_worker_processes = static_cast( std::ceil(static_cast(num_workers) / state.num_workers_per_process)); @@ -387,9 +391,15 @@ Process WorkerPool::StartProcess(const std::vector &worker_command_ argv.push_back(NULL); Process child(argv.data(), io_service_, ec, /*decouple=*/false, env); if (!child.IsValid() || ec) { - // The worker failed to start. This is a fatal error. - RAY_LOG(FATAL) << "Failed to start worker with return value " << ec << ": " - << ec.message(); + // errorcode 24: Too many files. This is caused by ulimit. + if (ec.value() == 24) { + RAY_LOG(FATAL) << "Too many workers, failed to create a file. Try setting " + << "`ulimit -n ` then restart Ray."; + } else { + // The worker failed to start. This is a fatal error. + RAY_LOG(FATAL) << "Failed to start worker with return value " << ec << ": " + << ec.message(); + } } return child; } diff --git a/src/ray/raylet/worker_pool_test.cc b/src/ray/raylet/worker_pool_test.cc index 56b251e66069..2d1a831d389f 100644 --- a/src/ray/raylet/worker_pool_test.cc +++ b/src/ray/raylet/worker_pool_test.cc @@ -273,13 +273,10 @@ TEST_P(WorkerPoolTest, StartupJavaWorkerProcessCount) { TEST_P(WorkerPoolTest, InitialWorkerProcessCount) { if (!RayConfig::instance().enable_multi_tenancy()) { worker_pool_->Start(1); - // Here we try to start only 1 worker for each worker language. But since each Java - // worker process contains exactly NUM_WORKERS_PER_PROCESS_JAVA (3) workers here, - // it's expected to see 3 workers for Java and 1 worker for Python, instead of 1 for - // each worker language. - ASSERT_NE(worker_pool_->NumWorkersStarting(), 1 * LANGUAGES.size()); - ASSERT_EQ(worker_pool_->NumWorkersStarting(), 1 + NUM_WORKERS_PER_PROCESS_JAVA); - ASSERT_EQ(worker_pool_->NumWorkerProcessesStarting(), LANGUAGES.size()); + // Here we try to start only 1 worker for each worker language. But since we disabled + // initial workers for Java, we expect to see only 1 worker which is a Python worker. + ASSERT_EQ(worker_pool_->NumWorkersStarting(), 1); + ASSERT_EQ(worker_pool_->NumWorkerProcessesStarting(), 1); } else { ASSERT_EQ(worker_pool_->NumWorkersStarting(), 0); ASSERT_EQ(worker_pool_->NumWorkerProcessesStarting(), 0); diff --git a/streaming/BUILD.bazel b/streaming/BUILD.bazel index 83d3c3a5e168..175643069a28 100644 --- a/streaming/BUILD.bazel +++ b/streaming/BUILD.bazel @@ -150,6 +150,7 @@ cc_library( "@bazel_tools//src/conditions:windows": [ # TODO(mehrdadn): This is to resolve symbols on Windows for now. Should remove this later. (See d7f8d18.) "//:core_worker_lib", + "//:exported_streaming_internal", ], "//conditions:default": [ "core_worker_lib.so", diff --git a/streaming/java/pom.xml b/streaming/java/pom.xml index 003c7670ada6..91c4186816c0 100644 --- a/streaming/java/pom.xml +++ b/streaming/java/pom.xml @@ -26,6 +26,12 @@ scm:git:ssh://github.com:ray-project/ray.git + + + https://ray.io + + + ossrh diff --git a/streaming/java/streaming-api/src/main/java/io/ray/streaming/api/context/ClusterStarter.java b/streaming/java/streaming-api/src/main/java/io/ray/streaming/api/context/ClusterStarter.java index 1c820e6f23c6..e331208247ed 100644 --- a/streaming/java/streaming-api/src/main/java/io/ray/streaming/api/context/ClusterStarter.java +++ b/streaming/java/streaming-api/src/main/java/io/ray/streaming/api/context/ClusterStarter.java @@ -68,7 +68,6 @@ static synchronized void startCluster(boolean isCrossLanguage, boolean isLocal) String.format("--raylet-socket-name=%s", RAYLET_SOCKET_NAME), String.format("--node-manager-port=%s", nodeManagerPort), "--load-code-from-local", - "--include-java", "--java-worker-options=" + workerOptions, "--system-config=" + new Gson().toJson(config) ); diff --git a/streaming/python/examples/wordcount.py b/streaming/python/examples/wordcount.py index 66b1a811272d..2f62b19dad54 100644 --- a/streaming/python/examples/wordcount.py +++ b/streaming/python/examples/wordcount.py @@ -65,7 +65,7 @@ def splitter(line): args = parser.parse_args() titles_file = str(args.titles_file) - ray.init(_load_code_from_local=True, _include_java=True) + ray.init(_load_code_from_local=True) ctx = StreamingContext.Builder() \ .option(Config.CHANNEL_TYPE, Config.NATIVE_CHANNEL) \ diff --git a/streaming/python/tests/test_failover.py b/streaming/python/tests/test_failover.py index def93f43edc2..adab217e09b3 100644 --- a/streaming/python/tests/test_failover.py +++ b/streaming/python/tests/test_failover.py @@ -8,7 +8,7 @@ def test_word_count(): try: - ray.init(_load_code_from_local=True, _include_java=True) + ray.init(_load_code_from_local=True) # time.sleep(10) # for gdb to attach ctx = StreamingContext.Builder() \ .option("streaming.context-backend.type", "local_file") \ diff --git a/streaming/python/tests/test_hybrid_stream.py b/streaming/python/tests/test_hybrid_stream.py index 7d79b9a0ef4d..e257f0d9fd5a 100644 --- a/streaming/python/tests/test_hybrid_stream.py +++ b/streaming/python/tests/test_hybrid_stream.py @@ -35,7 +35,6 @@ def test_hybrid_stream(): assert not ray.is_initialized() ray.init( _load_code_from_local=True, - _include_java=True, _java_worker_options=java_worker_options, _system_config={"num_workers_per_process_java": 1}) diff --git a/streaming/python/tests/test_stream.py b/streaming/python/tests/test_stream.py index 06dbeba850a5..f99033d19959 100644 --- a/streaming/python/tests/test_stream.py +++ b/streaming/python/tests/test_stream.py @@ -3,7 +3,7 @@ def test_data_stream(): - ray.init(_load_code_from_local=True, _include_java=True) + ray.init(_load_code_from_local=True) ctx = StreamingContext.Builder().build() stream = ctx.from_values(1, 2, 3) java_stream = stream.as_java_stream() @@ -17,7 +17,7 @@ def test_data_stream(): def test_key_data_stream(): - ray.init(_load_code_from_local=True, _include_java=True) + ray.init(_load_code_from_local=True) ctx = StreamingContext.Builder().build() key_stream = ctx.from_values( "a", "b", "c").map(lambda x: (x, 1)).key_by(lambda x: x[0]) @@ -32,7 +32,7 @@ def test_key_data_stream(): def test_stream_config(): - ray.init(_load_code_from_local=True, _include_java=True) + ray.init(_load_code_from_local=True) ctx = StreamingContext.Builder().build() stream = ctx.from_values(1, 2, 3) stream.with_config("k1", "v1") diff --git a/streaming/python/tests/test_union_stream.py b/streaming/python/tests/test_union_stream.py index 4f24226c4b9f..0c655b1d03d7 100644 --- a/streaming/python/tests/test_union_stream.py +++ b/streaming/python/tests/test_union_stream.py @@ -5,7 +5,7 @@ def test_union_stream(): - ray.init(_load_code_from_local=True, _include_java=True) + ray.init(_load_code_from_local=True) ctx = StreamingContext.Builder() \ .option("streaming.metrics.reporters", "") \ .build() diff --git a/streaming/python/tests/test_word_count.py b/streaming/python/tests/test_word_count.py index 07127b96ed10..372ae3e1e44e 100644 --- a/streaming/python/tests/test_word_count.py +++ b/streaming/python/tests/test_word_count.py @@ -4,7 +4,7 @@ def test_word_count(): - ray.init(_load_code_from_local=True, _include_java=True) + ray.init(_load_code_from_local=True) ctx = StreamingContext.Builder() \ .build() ctx.read_text_file(__file__) \ @@ -23,7 +23,7 @@ def test_word_count(): def test_simple_word_count(): - ray.init(_load_code_from_local=True, _include_java=True) + ray.init(_load_code_from_local=True) ctx = StreamingContext.Builder() \ .build() sink_file = "/tmp/ray_streaming_test_simple_word_count.txt"