diff --git a/doc/source/cluster/doc_code/yarn/dashboard.py b/doc/source/cluster/doc_code/yarn/dashboard.py new file mode 100644 index 000000000000..e1de646162f3 --- /dev/null +++ b/doc/source/cluster/doc_code/yarn/dashboard.py @@ -0,0 +1,18 @@ +import skein +import sys +from urllib.parse import urlparse + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: python dashboard.py ") + sys.exit(1) + address = sys.argv[1] + # Check if the address is a valid URL + result = urlparse(address) + if not all([result.scheme, result.netloc]): + print("Error: Invalid dashboard address. Please provide a valid URL.") + sys.exit(1) + + print("Registering dashboard " + address + " on skein.") + app = skein.ApplicationClient.from_current() + app.ui.add_page("ray-dashboard", address, "Ray Dashboard") diff --git a/doc/source/cluster/doc_code/yarn/ray-skein.yaml b/doc/source/cluster/doc_code/yarn/ray-skein.yaml index d9d23660df7d..ec94fb488429 100644 --- a/doc/source/cluster/doc_code/yarn/ray-skein.yaml +++ b/doc/source/cluster/doc_code/yarn/ray-skein.yaml @@ -12,6 +12,8 @@ services: files: # ray/doc/source/cluster/doc_code/yarn/example.py example.py: example.py + # ray/doc/source/cluster/doc_code/yarn/dashboard.py + dashboard.py: dashboard.py # # A packaged python environment using `conda-pack`. Note that Skein # # doesn't require any specific way of distributing files, but this # # is a good one for python projects. This is optional. @@ -21,14 +23,20 @@ services: # Activate the packaged conda environment # - source environment/bin/activate + # This gets the IP address of the head node. + RAY_HEAD_ADDRESS=$(hostname -i) + # This stores the Ray head address in the Skein key-value store so that the workers can retrieve it later. - skein kv put current --key=RAY_HEAD_ADDRESS --value=$(hostname -i) + skein kv put current --key=RAY_HEAD_ADDRESS --value=$RAY_HEAD_ADDRESS # This command starts all the processes needed on the ray head node. # By default, we set object store memory and heap memory to roughly 200 MB. This is conservative # and should be set according to application needs. # - ray start --head --port=6379 --object-store-memory=200000000 --memory 200000000 --num-cpus=1 + ray start --head --port=6379 --object-store-memory=200000000 --memory 200000000 --num-cpus=1 --dashboard-host=$RAY_HEAD_ADDRESS + + # This registers the Ray dashboard on Skein, which can be accessed on Skein's web UI. + python dashboard.py "http://$RAY_HEAD_ADDRESS:8265" # This executes the user script. python example.py diff --git a/doc/source/cluster/images/yarn-job-dashboard.png b/doc/source/cluster/images/yarn-job-dashboard.png new file mode 100644 index 000000000000..367028ac220b Binary files /dev/null and b/doc/source/cluster/images/yarn-job-dashboard.png differ diff --git a/doc/source/cluster/vms/user-guides/community/yarn.rst b/doc/source/cluster/vms/user-guides/community/yarn.rst index 462527dce44a..00fc2ce7355d 100644 --- a/doc/source/cluster/vms/user-guides/community/yarn.rst +++ b/doc/source/cluster/vms/user-guides/community/yarn.rst @@ -83,6 +83,8 @@ Use the ``files`` option to specify files that will be copied into the YARN cont files: # ray/doc/yarn/example.py example.py: example.py + # ray/doc/yarn/dashboard.py + dashboard.py: dashboard.py # # A packaged python environment using `conda-pack`. Note that Skein # # doesn't require any specific way of distributing files, but this # # is a good one for python projects. This is optional. @@ -116,6 +118,12 @@ and heap memory to roughly 200 MB. This is conservative and should be set accord ray start --head --port=6379 --object-store-memory=200000000 --memory 200000000 --num-cpus=1 +Register the ray dashboard to Skein. This exposes the dashboard link on the Skein application page. + +.. code-block:: bash + + python dashboard.py "http://$(hostname -i):8265" + Execute the user script containing the Ray program. .. code-block:: bash @@ -177,6 +185,10 @@ Once it has been submitted, you can see the job running on the YARN dashboard. .. image:: /cluster/images/yarn-job.png +If you have registered the Ray dashboard address in the Skein as shown above, you can retrieve it on Skein's application page: + +.. image:: /cluster/images/yarn-job-dashboard.png + Cleaning Up ----------- diff --git a/doc/yarn/dashboard.py b/doc/yarn/dashboard.py new file mode 100644 index 000000000000..e1de646162f3 --- /dev/null +++ b/doc/yarn/dashboard.py @@ -0,0 +1,18 @@ +import skein +import sys +from urllib.parse import urlparse + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: python dashboard.py ") + sys.exit(1) + address = sys.argv[1] + # Check if the address is a valid URL + result = urlparse(address) + if not all([result.scheme, result.netloc]): + print("Error: Invalid dashboard address. Please provide a valid URL.") + sys.exit(1) + + print("Registering dashboard " + address + " on skein.") + app = skein.ApplicationClient.from_current() + app.ui.add_page("ray-dashboard", address, "Ray Dashboard") diff --git a/doc/yarn/ray-skein.yaml b/doc/yarn/ray-skein.yaml index 252aff7b3774..def5efdfcdd4 100644 --- a/doc/yarn/ray-skein.yaml +++ b/doc/yarn/ray-skein.yaml @@ -12,6 +12,8 @@ services: files: # ray/doc/yarn/example.py example.py: example.py + # ray/doc/yarn/dashboard.py + dashboard.py: dashboard.py # # A packaged python environment using `conda-pack`. Note that Skein # # doesn't require any specific way of distributing files, but this # # is a good one for python projects. This is optional. @@ -21,14 +23,20 @@ services: # Activate the packaged conda environment # - source environment/bin/activate + # This gets the IP address of the head node. + RAY_HEAD_ADDRESS=$(hostname -i) + # This stores the Ray head address in the Skein key-value store so that the workers can retrieve it later. - skein kv put current --key=RAY_HEAD_ADDRESS --value=$(hostname -i) + skein kv put current --key=RAY_HEAD_ADDRESS --value=$RAY_HEAD_ADDRESS # This command starts all the processes needed on the ray head node. # By default, we set object store memory and heap memory to roughly 200 MB. This is conservative # and should be set according to application needs. # - ray start --head --port=6379 --object-store-memory=200000000 --memory 200000000 --num-cpus=1 + ray start --head --port=6379 --object-store-memory=200000000 --memory 200000000 --num-cpus=1 --dashboard-host=$RAY_HEAD_ADDRESS + + # This registers the Ray dashboard on Skein, which can be accessed on Skein's web UI. + python dashboard.py "http://$RAY_HEAD_ADDRESS:8265" # This executes the user script. python example.py