From 9a6b7092e834b70c89babfc8f6f0cc9cd5b88c1d Mon Sep 17 00:00:00 2001
From: Eric Liang <ekhliang@gmail.com>
Date: Fri, 28 Aug 2020 14:16:54 -0700
Subject: [PATCH 01/13] update

---
 doc/source/cluster/autoscaling.rst            | 109 ++++++++++++++++++
 doc/source/cluster/launcher.rst               |   9 --
 doc/source/index.rst                          |   1 +
 python/ray/autoscaler/aws/example-full.yaml   |   2 +-
 .../aws/example-multi-node-type.yaml          |   9 +-
 .../autoscaler/resource_demand_scheduler.py   |  84 ++++++++------
 6 files changed, 163 insertions(+), 51 deletions(-)
 create mode 100644 doc/source/cluster/autoscaling.rst

diff --git a/doc/source/cluster/autoscaling.rst b/doc/source/cluster/autoscaling.rst
new file mode 100644
index 000000000000..4712a2b17ac1
--- /dev/null
+++ b/doc/source/cluster/autoscaling.rst
@@ -0,0 +1,109 @@
+.. _ref-autoscaling:
+
+Cluster Autoscaling
+===================
+
+Basics
+------
+
+The Ray Cluster Launcher will automatically enable a load-based autoscaler. When cluster resource usage exceeds a configurable threshold (80% by default), new nodes will be launched up the specified ``max_workers`` limit (in the cluster config). When nodes are idle for more than a timeout, they will be removed, down to the ``min_workers`` limit. The head node is never removed.
+
+The default idle timeout is 5 minutes, which can be set in the cluster config. This is to prevent excessive node churn which could impact performance and increase costs (in AWS / GCP there is a minimum billing charge of 1 minute per instance, after which usage is billed by the second).
+
+The basic autoscaling config settings are as follows:
+
+.. code::
+
+    # An unique identifier for the head node and workers of this cluster.
+    cluster_name: default
+
+    # The minimum number of workers nodes to launch in addition to the head
+    # node. This number should be >= 0.
+    min_workers: 0
+
+    # The autoscaler will scale up the cluster to this target fraction of resource
+    # usage. For example, if a cluster of 10 nodes is 100% busy and
+    # target_utilization is 0.8, it would resize the cluster to 13. This fraction
+    # can be decreased to increase the aggressiveness of upscaling.
+    # This max value allowed is 1.0, which is the most conservative setting.
+    target_utilization_fraction: 0.8
+
+    # If a node is idle for this many minutes, it will be removed.
+    idle_timeout_minutes: 5
+
+Multiple Node Type Autoscaling
+------------------------------
+
+In 1.0, Ray supports multiple cluster node types. In this mode of operation, the scheduler will look at the queue of resource shape demands from the cluster (e.g., there might be 10 tasks queued each requesting ``{"GPU": 4, "CPU": 16}``), and specifically tries to add nodes that can fulfill these resource demands. This enables precise, rapid scale up as the autoscaler has more visibility into the backlog of work and resource shapes.
+
+The concept of a cluster node type encompasses both the physical instance type (e.g., AWS p3.8xl GPU nodes vs m4.16xl CPU nodes), as well as other attributes (e.g., IAM role, the machine image, etc). Custom resources can be specified for each node type so that Ray is aware of the demand for specific node types at the application level (e.g., a task may request to be placed on a machine with a specific role or machine image via custom resource).
+
+An example of configuring multiple node types is as follows `(full example) <https://github.com/ray-project/ray/blob/master/python/ray/autoscaler/aws/example-multi-node-type.yaml>`__:
+
+.. code::
+
+    # Tell the autoscaler the allowed node types and the resources they provide.
+    # The key is the name of the node type, which is just for debugging purposes.
+    # The node config specifies the launch config and physical instance type.
+    available_node_types:
+        cpu_4_ondemand:
+            node_config:
+                InstanceType: m4.xlarge
+            resources: {"CPU": 4}
+            max_workers: 5
+        cpu_16_spot:
+            node_config:
+                InstanceType: m4.4xlarge
+                InstanceMarketOptions:
+                    MarketType: spot
+            resources: {"CPU": 16, "Custom1": 1}
+            max_workers: 10
+        gpu_1_ondemand:
+            node_config:
+                InstanceType: p2.xlarge
+            resources: {"CPU": 4, "GPU": 1, "Custom2": 2}
+            max_workers: 4
+            worker_setup_commands:
+                - pip install tensorflow-gpu
+        gpu_8_ondemand:
+            node_config:
+                InstanceType: p2.8xlarge
+            resources: {"CPU": 32, "GPU": 8}
+            max_workers: 2
+            worker_setup_commands:
+                - pip install tensorflow-gpu
+
+    # Specify the node type of the head node (as configured above).
+    head_node_type: cpu_4_ondemand
+
+    # Specify the default type of the worker node (as configured above).
+    worker_default_node_type: cpu_4_spot
+
+
+The above config defines two CPU node types (``cpu_4_ondemand`` and ``cpu_16_spot``), and two GPU types (``gpu_1_ondemand`` and ``gpu_8_ondemand``). Each node type has a name (e.g., ``cpu_4_ondemand``), which has no semantic meaning and is only for debugging. Let's look at the inner fields of the ``gpu_1_ondemand`` node type:
+
+The node config tells the underlying Cloud provider how to launch a node of this type. This node config is merged with the top level node config of the YAML and can override fields (i.e., to specify the p2.xlarge instance type here):
+
+.. code::
+
+    node_config:
+        InstanceType: p2.xlarge
+
+The resources field tells the autoscaler what kinds of resources this node provides. This can include custom resources as well (e.g., "Custom2"). This field enables the autoscaler to automatically select the right kind of nodes to launch given the resource demands of the application. For more information, see also the `resource demand scheduler <https://github.com/ray-project/ray/blob/master/python/ray/autoscaler/resource_demand_scheduler.py>`__:
+
+.. code::
+
+    resources: {"CPU": 4, "GPU": 1, "Custom2": 2}
+
+The ``max_workers`` field constrains the number of nodes of this type that can be launched:
+
+.. code::
+
+    max_workers: 4
+
+The ``worker_setup_commands`` field can be used to override the setup and initialization commands for a node type. Note that you can only override the setup for worker nodes. The head node's setup commands are always configured via the top level field in the cluster YAML:
+
+.. code::
+
+    worker_setup_commands:
+        - pip install tensorflow-gpu
diff --git a/doc/source/cluster/launcher.rst b/doc/source/cluster/launcher.rst
index a1f58102a4e4..cde1eb30530e 100644
--- a/doc/source/cluster/launcher.rst
+++ b/doc/source/cluster/launcher.rst
@@ -213,15 +213,6 @@ This tells ``ray up`` to sync the current git branch SHA from your personal comp
 2. Commit the changes with ``git commit`` and ``git push``
 3. Update files on your Ray cluster with ``ray up``
 
-
-Autoscaling
------------
-
-The Ray Cluster Launcher will automatically enable a load-based autoscaler. When cluster resource usage exceeds a configurable threshold (80% by default), new nodes will be launched up the specified ``max_workers`` limit (in the cluster config). When nodes are idle for more than a timeout, they will be removed, down to the ``min_workers`` limit. The head node is never removed.
-
-The default idle timeout is 5 minutes, which can be set in the cluster config. This is to prevent excessive node churn which could impact performance and increase costs (in AWS / GCP there is a minimum billing charge of 1 minute per instance, after which usage is billed by the second).
-
-
 Questions or Issues?
 --------------------
 
diff --git a/doc/source/index.rst b/doc/source/index.rst
index d2acec412e27..51159cd1c48c 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -141,6 +141,7 @@ Academic Papers
 
    cluster/index.rst
    cluster/launcher.rst
+   cluster/autoscaling.rst
    cluster/cloud.rst
    cluster/deploy.rst
 
diff --git a/python/ray/autoscaler/aws/example-full.yaml b/python/ray/autoscaler/aws/example-full.yaml
index 345a9ccb6b53..89f815acb79a 100644
--- a/python/ray/autoscaler/aws/example-full.yaml
+++ b/python/ray/autoscaler/aws/example-full.yaml
@@ -42,7 +42,7 @@ docker:
 # usage. For example, if a cluster of 10 nodes is 100% busy and
 # target_utilization is 0.8, it would resize the cluster to 13. This fraction
 # can be decreased to increase the aggressiveness of upscaling.
-# This value must be less than 1.0 for scaling to happen.
+# This max value allowed is 1.0, which is the most conservative setting.
 target_utilization_fraction: 0.8
 
 # If a node is idle for this many minutes, it will be removed.
diff --git a/python/ray/autoscaler/aws/example-multi-node-type.yaml b/python/ray/autoscaler/aws/example-multi-node-type.yaml
index a6f9a311866f..9951afe2c06b 100644
--- a/python/ray/autoscaler/aws/example-multi-node-type.yaml
+++ b/python/ray/autoscaler/aws/example-multi-node-type.yaml
@@ -18,16 +18,11 @@ available_node_types:
             InstanceType: m4.xlarge
         resources: {"CPU": 4}
         max_workers: 5
-    cpu_4_spot:
+    cpu_16_spot:
         node_config:
-            InstanceType: m4.xlarge
+            InstanceType: m4.4xlarge
             InstanceMarketOptions:
                 MarketType: spot
-        resources: {"CPU": 4}
-        max_workers: 20
-    cpu_16_ondemand:
-        node_config:
-            InstanceType: m4.4xlarge
         resources: {"CPU": 16, "Custom1": 1}
         max_workers: 10
     gpu_1_ondemand:
diff --git a/python/ray/autoscaler/resource_demand_scheduler.py b/python/ray/autoscaler/resource_demand_scheduler.py
index 908bb14eac22..f1b6baeba706 100644
--- a/python/ray/autoscaler/resource_demand_scheduler.py
+++ b/python/ray/autoscaler/resource_demand_scheduler.py
@@ -1,3 +1,12 @@
+"""Implements multi-node-type autoscaling.
+
+This file implements an autoscaling algorithm that is aware of multiple node
+types (e.g., example-multi-node-type.yaml). The Ray autoscaler will pass in
+a vector of resource shape demands, and the resource demand scheduler will
+return a list of node types that can satisfy the demands given constraints
+(i.e., reverse bin packing).
+"""
+
 import copy
 import numpy as np
 import logging
@@ -30,18 +39,43 @@ def __init__(self, provider: NodeProvider,
         self.node_types = node_types
         self.max_workers = max_workers
 
-    def debug_string(self, nodes: List[NodeID],
-                     pending_nodes: Dict[NodeID, int]) -> str:
+    # TODO(ekl) take into account existing utilization of node resources. We
+    # should subtract these from node resources prior to running bin packing.
+    def get_nodes_to_launch(self, nodes: List[NodeID],
+                            pending_nodes: Dict[NodeType, int],
+                            resource_demands: List[ResourceDict]
+                            ) -> List[Tuple[NodeType, int]]:
+        """Given resource demands, return node types to add to the cluster.
+
+        This method:
+            (1) calculates the resources present in the cluster.
+            (2) calculates the unfulfilled resource bundles.
+            (3) calculates which nodes need to be launched to fulfill all
+                the bundle requests, subject to max_worker constraints.
+
+        Args:
+            nodes: List of existing nodes in the cluster.
+            pending_nodes: Summary of node types currently being launched.
+            resource_demands: Vector of resource demands from the scheduler.
+        """
+
+        if resource_demands is None:
+            logger.info("No resource demands")
+            return []
+
         node_resources, node_type_counts = self.calculate_node_resources(
             nodes, pending_nodes)
+        logger.info("Cluster resources: {}".format(node_resources))
+        logger.info("Node counts: {}".format(node_type_counts))
 
-        out = "Worker node types:"
-        for node_type, count in node_type_counts.items():
-            out += "\n - {}: {}".format(node_type, count)
-            if pending_nodes.get(node_type):
-                out += " ({} pending)".format(pending_nodes[node_type])
+        unfulfilled = get_bin_pack_residual(node_resources, resource_demands)
+        logger.info("Resource demands: {}".format(resource_demands))
+        logger.info("Unfulfilled demands: {}".format(unfulfilled))
 
-        return out
+        nodes = get_nodes_for(self.node_types, node_type_counts,
+                              self.max_workers - len(nodes), unfulfilled)
+        logger.info("Node requests: {}".format(nodes))
+        return nodes
 
     def calculate_node_resources(
             self, nodes: List[NodeID], pending_nodes: Dict[NodeID, int]
@@ -73,36 +107,18 @@ def add_node(node_type):
 
         return node_resources, node_type_counts
 
-    def get_nodes_to_launch(self, nodes: List[NodeID],
-                            pending_nodes: Dict[NodeType, int],
-                            resource_demands: List[ResourceDict]
-                            ) -> List[Tuple[NodeType, int]]:
-        """Get a list of node types that should be added to the cluster.
-
-        This method:
-            (1) calculates the resources present in the cluster.
-            (2) calculates the unfulfilled resource bundles.
-            (3) calculates which nodes need to be launched to fulfill all
-                the bundle requests, subject to max_worker constraints.
-        """
-
-        if resource_demands is None:
-            logger.info("No resource demands")
-            return []
-
+    def debug_string(self, nodes: List[NodeID],
+                     pending_nodes: Dict[NodeID, int]) -> str:
         node_resources, node_type_counts = self.calculate_node_resources(
             nodes, pending_nodes)
-        logger.info("Cluster resources: {}".format(node_resources))
-        logger.info("Node counts: {}".format(node_type_counts))
 
-        unfulfilled = get_bin_pack_residual(node_resources, resource_demands)
-        logger.info("Resource demands: {}".format(resource_demands))
-        logger.info("Unfulfilled demands: {}".format(unfulfilled))
+        out = "Worker node types:"
+        for node_type, count in node_type_counts.items():
+            out += "\n - {}: {}".format(node_type, count)
+            if pending_nodes.get(node_type):
+                out += " ({} pending)".format(pending_nodes[node_type])
 
-        nodes = get_nodes_for(self.node_types, node_type_counts,
-                              self.max_workers - len(nodes), unfulfilled)
-        logger.info("Node requests: {}".format(nodes))
-        return nodes
+        return out
 
 
 def get_nodes_for(node_types: Dict[NodeType, NodeTypeConfigDict],

From cd1899980d6c0cb779b769dc3c5eabd0fc9200d0 Mon Sep 17 00:00:00 2001
From: Eric Liang <ekhliang@gmail.com>
Date: Fri, 28 Aug 2020 14:18:44 -0700
Subject: [PATCH 02/13] update

---
 doc/source/cluster/autoscaling.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/source/cluster/autoscaling.rst b/doc/source/cluster/autoscaling.rst
index 4712a2b17ac1..f66705283cd6 100644
--- a/doc/source/cluster/autoscaling.rst
+++ b/doc/source/cluster/autoscaling.rst
@@ -38,6 +38,8 @@ In 1.0, Ray supports multiple cluster node types. In this mode of operation, the
 
 The concept of a cluster node type encompasses both the physical instance type (e.g., AWS p3.8xl GPU nodes vs m4.16xl CPU nodes), as well as other attributes (e.g., IAM role, the machine image, etc). Custom resources can be specified for each node type so that Ray is aware of the demand for specific node types at the application level (e.g., a task may request to be placed on a machine with a specific role or machine image via custom resource).
 
+Multi node type autoscaling operates in conjunction with the basic autoscaler. You may want to configure the basic autoscaler accordingly to act convervatively (i.e., set ``target_utilization_fraction: 1.0``).
+
 An example of configuring multiple node types is as follows `(full example) <https://github.com/ray-project/ray/blob/master/python/ray/autoscaler/aws/example-multi-node-type.yaml>`__:
 
 .. code::

From d7b38c8c6fc99c3132e3994633a3314ceff827d4 Mon Sep 17 00:00:00 2001
From: Eric Liang <ekhliang@gmail.com>
Date: Fri, 28 Aug 2020 14:19:02 -0700
Subject: [PATCH 03/13] fix

---
 python/ray/autoscaler/aws/example-multi-node-type.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/ray/autoscaler/aws/example-multi-node-type.yaml b/python/ray/autoscaler/aws/example-multi-node-type.yaml
index 9951afe2c06b..8872148b33ad 100644
--- a/python/ray/autoscaler/aws/example-multi-node-type.yaml
+++ b/python/ray/autoscaler/aws/example-multi-node-type.yaml
@@ -40,7 +40,7 @@ available_node_types:
 head_node_type: cpu_4_ondemand
 
 # Specify the default type of the worker node (as configured above).
-worker_default_node_type: cpu_4_spot
+worker_default_node_type: cpu_16_spot
 
 # The default settings for the head node. This will be merged with the per-node
 # type configs given above.

From 03b235ab2d11de97aaf560123316f38fdfd17ffe Mon Sep 17 00:00:00 2001
From: Eric Liang <ekhliang@gmail.com>
Date: Fri, 28 Aug 2020 14:19:09 -0700
Subject: [PATCH 04/13] fix

---
 doc/source/cluster/autoscaling.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/cluster/autoscaling.rst b/doc/source/cluster/autoscaling.rst
index f66705283cd6..c39ae2e3ef06 100644
--- a/doc/source/cluster/autoscaling.rst
+++ b/doc/source/cluster/autoscaling.rst
@@ -79,7 +79,7 @@ An example of configuring multiple node types is as follows `(full example) <htt
     head_node_type: cpu_4_ondemand
 
     # Specify the default type of the worker node (as configured above).
-    worker_default_node_type: cpu_4_spot
+    worker_default_node_type: cpu_16_spot
 
 
 The above config defines two CPU node types (``cpu_4_ondemand`` and ``cpu_16_spot``), and two GPU types (``gpu_1_ondemand`` and ``gpu_8_ondemand``). Each node type has a name (e.g., ``cpu_4_ondemand``), which has no semantic meaning and is only for debugging. Let's look at the inner fields of the ``gpu_1_ondemand`` node type:

From 6b1dfd64f149f50c03ed8f98ca5a85da4032dbf3 Mon Sep 17 00:00:00 2001
From: Eric Liang <ekhliang@gmail.com>
Date: Fri, 28 Aug 2020 14:19:56 -0700
Subject: [PATCH 05/13] update

---
 doc/source/cluster/autoscaling.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/cluster/autoscaling.rst b/doc/source/cluster/autoscaling.rst
index c39ae2e3ef06..31d8ef499351 100644
--- a/doc/source/cluster/autoscaling.rst
+++ b/doc/source/cluster/autoscaling.rst
@@ -58,7 +58,7 @@ An example of configuring multiple node types is as follows `(full example) <htt
                 InstanceType: m4.4xlarge
                 InstanceMarketOptions:
                     MarketType: spot
-            resources: {"CPU": 16, "Custom1": 1}
+            resources: {"CPU": 16, "Custom1": 1, "is_spot": 1}
             max_workers: 10
         gpu_1_ondemand:
             node_config:

From b6ca8865ed71b5a2bc12c39b407d1e2faefb56aa Mon Sep 17 00:00:00 2001
From: Eric Liang <ekhliang@gmail.com>
Date: Fri, 28 Aug 2020 15:46:52 -0700
Subject: [PATCH 06/13] Apply suggestions from code review

Co-authored-by: Stephanie Wang <swang@cs.berkeley.edu>
---
 doc/source/cluster/autoscaling.rst | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/doc/source/cluster/autoscaling.rst b/doc/source/cluster/autoscaling.rst
index 31d8ef499351..40f6dd21d0c7 100644
--- a/doc/source/cluster/autoscaling.rst
+++ b/doc/source/cluster/autoscaling.rst
@@ -6,7 +6,7 @@ Cluster Autoscaling
 Basics
 ------
 
-The Ray Cluster Launcher will automatically enable a load-based autoscaler. When cluster resource usage exceeds a configurable threshold (80% by default), new nodes will be launched up the specified ``max_workers`` limit (in the cluster config). When nodes are idle for more than a timeout, they will be removed, down to the ``min_workers`` limit. The head node is never removed.
+The Ray Cluster Launcher will automatically enable a load-based autoscaler. When cluster resource usage exceeds a configurable threshold (80% by default), new nodes will be launched up to the specified ``max_workers`` limit (specified in the cluster config). When nodes are idle for more than a timeout, they will be removed, down to the ``min_workers`` limit. The head node is never removed.
 
 The default idle timeout is 5 minutes, which can be set in the cluster config. This is to prevent excessive node churn which could impact performance and increase costs (in AWS / GCP there is a minimum billing charge of 1 minute per instance, after which usage is billed by the second).
 
@@ -25,7 +25,7 @@ The basic autoscaling config settings are as follows:
     # usage. For example, if a cluster of 10 nodes is 100% busy and
     # target_utilization is 0.8, it would resize the cluster to 13. This fraction
     # can be decreased to increase the aggressiveness of upscaling.
-    # This max value allowed is 1.0, which is the most conservative setting.
+    # The max value allowed is 1.0, which is the most conservative setting.
     target_utilization_fraction: 0.8
 
     # If a node is idle for this many minutes, it will be removed.
@@ -34,17 +34,17 @@ The basic autoscaling config settings are as follows:
 Multiple Node Type Autoscaling
 ------------------------------
 
-In 1.0, Ray supports multiple cluster node types. In this mode of operation, the scheduler will look at the queue of resource shape demands from the cluster (e.g., there might be 10 tasks queued each requesting ``{"GPU": 4, "CPU": 16}``), and specifically tries to add nodes that can fulfill these resource demands. This enables precise, rapid scale up as the autoscaler has more visibility into the backlog of work and resource shapes.
+Ray supports multiple node types in a single cluster. In this mode of operation, the scheduler will look at the queue of resource shape demands from the cluster (e.g., there might be 10 tasks queued each requesting ``{"GPU": 4, "CPU": 16}``), and tries to add the minimum set of nodes that can fulfill these resource demands. This enables precise, rapid scale up compared to looking only at resource utilization, as the autoscaler also has visiblity into the aggregate resource load.
 
 The concept of a cluster node type encompasses both the physical instance type (e.g., AWS p3.8xl GPU nodes vs m4.16xl CPU nodes), as well as other attributes (e.g., IAM role, the machine image, etc). Custom resources can be specified for each node type so that Ray is aware of the demand for specific node types at the application level (e.g., a task may request to be placed on a machine with a specific role or machine image via custom resource).
 
-Multi node type autoscaling operates in conjunction with the basic autoscaler. You may want to configure the basic autoscaler accordingly to act convervatively (i.e., set ``target_utilization_fraction: 1.0``).
+Multi-node type autoscaling operates in conjunction with the basic autoscaler. You may want to configure the basic autoscaler accordingly to act conservatively (i.e., set ``target_utilization_fraction: 1.0``).
 
 An example of configuring multiple node types is as follows `(full example) <https://github.com/ray-project/ray/blob/master/python/ray/autoscaler/aws/example-multi-node-type.yaml>`__:
 
 .. code::
 
-    # Tell the autoscaler the allowed node types and the resources they provide.
+    # Specify the allowed node types and the resources they provide.
     # The key is the name of the node type, which is just for debugging purposes.
     # The node config specifies the launch config and physical instance type.
     available_node_types:

From 56c9e8fb4f44854ee74377b6e3ad63986b01376d Mon Sep 17 00:00:00 2001
From: Eric Liang <ekhliang@gmail.com>
Date: Fri, 28 Aug 2020 15:49:34 -0700
Subject: [PATCH 07/13] comments

---
 doc/source/cluster/autoscaling.rst | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/doc/source/cluster/autoscaling.rst b/doc/source/cluster/autoscaling.rst
index 31d8ef499351..44e4963601d5 100644
--- a/doc/source/cluster/autoscaling.rst
+++ b/doc/source/cluster/autoscaling.rst
@@ -28,7 +28,8 @@ The basic autoscaling config settings are as follows:
     # This max value allowed is 1.0, which is the most conservative setting.
     target_utilization_fraction: 0.8
 
-    # If a node is idle for this many minutes, it will be removed.
+    # If a node is idle for this many minutes, it will be removed. A node is
+    # considered idle if there are any tasks or actors running on it.
     idle_timeout_minutes: 5
 
 Multiple Node Type Autoscaling
@@ -36,7 +37,7 @@ Multiple Node Type Autoscaling
 
 In 1.0, Ray supports multiple cluster node types. In this mode of operation, the scheduler will look at the queue of resource shape demands from the cluster (e.g., there might be 10 tasks queued each requesting ``{"GPU": 4, "CPU": 16}``), and specifically tries to add nodes that can fulfill these resource demands. This enables precise, rapid scale up as the autoscaler has more visibility into the backlog of work and resource shapes.
 
-The concept of a cluster node type encompasses both the physical instance type (e.g., AWS p3.8xl GPU nodes vs m4.16xl CPU nodes), as well as other attributes (e.g., IAM role, the machine image, etc). Custom resources can be specified for each node type so that Ray is aware of the demand for specific node types at the application level (e.g., a task may request to be placed on a machine with a specific role or machine image via custom resource).
+The concept of a cluster node type encompasses both the physical instance type (e.g., AWS p3.8xl GPU nodes vs m4.16xl CPU nodes), as well as other attributes (e.g., IAM role, the machine image, etc). `Custom resources <configure.html>`__ can be specified for each node type so that Ray is aware of the demand for specific node types at the application level (e.g., a task may request to be placed on a machine with a specific role or machine image via custom resource).
 
 Multi node type autoscaling operates in conjunction with the basic autoscaler. You may want to configure the basic autoscaler accordingly to act convervatively (i.e., set ``target_utilization_fraction: 1.0``).
 

From c3d93d407ae958ac4ee182051f08dbaff99ed1a5 Mon Sep 17 00:00:00 2001
From: Eric Liang <ekhliang@gmail.com>
Date: Fri, 28 Aug 2020 16:42:10 -0700
Subject: [PATCH 08/13] Update doc/source/cluster/autoscaling.rst

Co-authored-by: Stephanie Wang <swang@cs.berkeley.edu>
---
 doc/source/cluster/autoscaling.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/cluster/autoscaling.rst b/doc/source/cluster/autoscaling.rst
index 8394e54b59ba..e875f020053c 100644
--- a/doc/source/cluster/autoscaling.rst
+++ b/doc/source/cluster/autoscaling.rst
@@ -29,7 +29,7 @@ The basic autoscaling config settings are as follows:
     target_utilization_fraction: 0.8
 
     # If a node is idle for this many minutes, it will be removed. A node is
-    # considered idle if there are any tasks or actors running on it.
+    # considered idle if there are no tasks or actors running on it.
     idle_timeout_minutes: 5
 
 Multiple Node Type Autoscaling

From f75a0b9ee5197e1ad0956619952e28db6a4c5eee Mon Sep 17 00:00:00 2001
From: Eric Liang <ekhliang@gmail.com>
Date: Fri, 28 Aug 2020 16:44:08 -0700
Subject: [PATCH 09/13] detail

---
 doc/source/cluster/autoscaling.rst | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/doc/source/cluster/autoscaling.rst b/doc/source/cluster/autoscaling.rst
index e875f020053c..722bd21521a3 100644
--- a/doc/source/cluster/autoscaling.rst
+++ b/doc/source/cluster/autoscaling.rst
@@ -8,7 +8,10 @@ Basics
 
 The Ray Cluster Launcher will automatically enable a load-based autoscaler. When cluster resource usage exceeds a configurable threshold (80% by default), new nodes will be launched up to the specified ``max_workers`` limit (specified in the cluster config). When nodes are idle for more than a timeout, they will be removed, down to the ``min_workers`` limit. The head node is never removed.
 
-The default idle timeout is 5 minutes, which can be set in the cluster config. This is to prevent excessive node churn which could impact performance and increase costs (in AWS / GCP there is a minimum billing charge of 1 minute per instance, after which usage is billed by the second).
+In more detail, the autoscaler implements the following control loop:
+ 1. It calculates the estimated utilization of the cluster based on the most-currently-assigned resource. For example, suppose a cluster has 100/200 CPUs assigned, but 20/25 GPUs assigned, then the utilization will be considered to be max(100/200, 15/25) = 60%.
+ 2. If the estimated utilization is greater than the target (80% by default), then the autoscaler will attempt to add nodes to the cluster.
+ 3. If a node is idle for a timeout (5 minutes by default), it is removed from the cluster.
 
 The basic autoscaling config settings are as follows:
 

From c80a939f800d9944259d61821d17457d03a98d59 Mon Sep 17 00:00:00 2001
From: Eric Liang <ekhliang@gmail.com>
Date: Fri, 28 Aug 2020 16:45:17 -0700
Subject: [PATCH 10/13] update

---
 doc/source/cluster/autoscaling.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/source/cluster/autoscaling.rst b/doc/source/cluster/autoscaling.rst
index 722bd21521a3..3833c4a28ab7 100644
--- a/doc/source/cluster/autoscaling.rst
+++ b/doc/source/cluster/autoscaling.rst
@@ -9,6 +9,7 @@ Basics
 The Ray Cluster Launcher will automatically enable a load-based autoscaler. When cluster resource usage exceeds a configurable threshold (80% by default), new nodes will be launched up to the specified ``max_workers`` limit (specified in the cluster config). When nodes are idle for more than a timeout, they will be removed, down to the ``min_workers`` limit. The head node is never removed.
 
 In more detail, the autoscaler implements the following control loop:
+
  1. It calculates the estimated utilization of the cluster based on the most-currently-assigned resource. For example, suppose a cluster has 100/200 CPUs assigned, but 20/25 GPUs assigned, then the utilization will be considered to be max(100/200, 15/25) = 60%.
  2. If the estimated utilization is greater than the target (80% by default), then the autoscaler will attempt to add nodes to the cluster.
  3. If a node is idle for a timeout (5 minutes by default), it is removed from the cluster.
@@ -95,7 +96,7 @@ The node config tells the underlying Cloud provider how to launch a node of this
     node_config:
         InstanceType: p2.xlarge
 
-The resources field tells the autoscaler what kinds of resources this node provides. This can include custom resources as well (e.g., "Custom2"). This field enables the autoscaler to automatically select the right kind of nodes to launch given the resource demands of the application. For more information, see also the `resource demand scheduler <https://github.com/ray-project/ray/blob/master/python/ray/autoscaler/resource_demand_scheduler.py>`__:
+The resources field tells the autoscaler what kinds of resources this node provides. This can include custom resources as well (e.g., "Custom2"). This field enables the autoscaler to automatically select the right kind of nodes to launch given the resource demands of the application. The resources specified here will be automatically passed to the ``ray start`` command via an environment variable. For more information, see also the `resource demand scheduler <https://github.com/ray-project/ray/blob/master/python/ray/autoscaler/resource_demand_scheduler.py>`__:
 
 .. code::
 

From 269a4d80b8d644af3ca0eb89abead6445650c276 Mon Sep 17 00:00:00 2001
From: Eric Liang <ekhliang@gmail.com>
Date: Fri, 28 Aug 2020 16:45:25 -0700
Subject: [PATCH 11/13] update

---
 doc/source/cluster/autoscaling.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/cluster/autoscaling.rst b/doc/source/cluster/autoscaling.rst
index 3833c4a28ab7..677839f53fbc 100644
--- a/doc/source/cluster/autoscaling.rst
+++ b/doc/source/cluster/autoscaling.rst
@@ -96,7 +96,7 @@ The node config tells the underlying Cloud provider how to launch a node of this
     node_config:
         InstanceType: p2.xlarge
 
-The resources field tells the autoscaler what kinds of resources this node provides. This can include custom resources as well (e.g., "Custom2"). This field enables the autoscaler to automatically select the right kind of nodes to launch given the resource demands of the application. The resources specified here will be automatically passed to the ``ray start`` command via an environment variable. For more information, see also the `resource demand scheduler <https://github.com/ray-project/ray/blob/master/python/ray/autoscaler/resource_demand_scheduler.py>`__:
+The resources field tells the autoscaler what kinds of resources this node provides. This can include custom resources as well (e.g., "Custom2"). This field enables the autoscaler to automatically select the right kind of nodes to launch given the resource demands of the application. The resources specified here will be automatically passed to the ``ray start`` command for the node via an environment variable. For more information, see also the `resource demand scheduler <https://github.com/ray-project/ray/blob/master/python/ray/autoscaler/resource_demand_scheduler.py>`__:
 
 .. code::
 

From e3d249405d5c813291bbbe136ef2a82a0231c337 Mon Sep 17 00:00:00 2001
From: Eric Liang <ekhliang@gmail.com>
Date: Fri, 28 Aug 2020 16:46:51 -0700
Subject: [PATCH 12/13] update

---
 doc/source/cluster/autoscaling.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/source/cluster/autoscaling.rst b/doc/source/cluster/autoscaling.rst
index 677839f53fbc..4113aa795526 100644
--- a/doc/source/cluster/autoscaling.rst
+++ b/doc/source/cluster/autoscaling.rst
@@ -71,14 +71,14 @@ An example of configuring multiple node types is as follows `(full example) <htt
             resources: {"CPU": 4, "GPU": 1, "Custom2": 2}
             max_workers: 4
             worker_setup_commands:
-                - pip install tensorflow-gpu
+                - pip install tensorflow-gpu  # Example command.
         gpu_8_ondemand:
             node_config:
                 InstanceType: p2.8xlarge
             resources: {"CPU": 32, "GPU": 8}
             max_workers: 2
             worker_setup_commands:
-                - pip install tensorflow-gpu
+                - pip install tensorflow-gpu  # Example command.
 
     # Specify the node type of the head node (as configured above).
     head_node_type: cpu_4_ondemand
@@ -113,4 +113,4 @@ The ``worker_setup_commands`` field can be used to override the setup and initia
 .. code::
 
     worker_setup_commands:
-        - pip install tensorflow-gpu
+        - pip install tensorflow-gpu  # Example command.

From 5cc0b83d96b4aad5113c15493107aa7b28230f4f Mon Sep 17 00:00:00 2001
From: Eric Liang <ekhliang@gmail.com>
Date: Fri, 28 Aug 2020 16:47:03 -0700
Subject: [PATCH 13/13] update

---
 python/ray/autoscaler/aws/example-multi-node-type.yaml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/python/ray/autoscaler/aws/example-multi-node-type.yaml b/python/ray/autoscaler/aws/example-multi-node-type.yaml
index 8872148b33ad..ec3d5c37bda7 100644
--- a/python/ray/autoscaler/aws/example-multi-node-type.yaml
+++ b/python/ray/autoscaler/aws/example-multi-node-type.yaml
@@ -30,11 +30,15 @@ available_node_types:
             InstanceType: p2.xlarge
         resources: {"CPU": 4, "GPU": 1, "Custom2": 2}
         max_workers: 4
+        worker_setup_commands:
+            - pip install tensorflow-gpu  # Example command.
     gpu_8_ondemand:
         node_config:
             InstanceType: p2.8xlarge
         resources: {"CPU": 32, "GPU": 8}
         max_workers: 2
+        worker_setup_commands:
+            - pip install tensorflow-gpu  # Example command.
 
 # Specify the node type of the head node (as configured above).
 head_node_type: cpu_4_ondemand