config.yaml

cluster_name: mahjong
# Cloud-provider specific configuration.
provider:
  type: gcp
  region: us-central1
  availability_zone: us-central1-a
  project_id: vaulted-algebra-312001  # Globally unique project id

# The autoscaler will scale up the cluster faster with higher upscaling speed.
# E.g., if the task requires adding more nodes then autoscaler will gradually
# scale up the cluster in chunks of upscaling_speed*currently_running_nodes.
# This number should be > 0.
upscaling_speed: 1.0

# This executes all commands on all nodes in the docker container,
# and opens all the necessary ports to support the Ray cluster.
# Empty string means disabled.
docker:
  image: "wate123/mahjong_drl"
#  image: "rayproject/ray-ml:latest-cpu" # You can change this to latest-cpu if you don't need GPU support and want a faster startup
  #  image: "rayproject/ray-ml:latest-gpu" # You can change this to latest-cpu if you don't need GPU support and want a faster startup
  # image: rayproject/ray:latest-gpu   # use this one if you don't need ML dependencies, it's faster to pull
  container_name: "ray_container"
  # If true, pulls latest version of image. Otherwise, `docker run` will only pull the image
  # if no cached version is present.
  pull_before_run: False
  run_options: []  # Extra options to pass into "docker run"

  # Example of running a GPU head with CPU workers
  # head_image: "rayproject/ray-ml:latest-gpu"
  # Allow Ray to automatically detect GPUs

  # worker_image: "rayproject/ray-ml:latest-cpu"
  # worker_run_options: []

# If a node is idle for this many minutes, it will be removed.
idle_timeout_minutes: 5


# How Ray will authenticate with newly launched nodes.
auth:
  ssh_user: ubuntu
# By default Ray creates a new private keypair, but you can also use your own.
# If you do so, make sure to also set "KeyName" in the head and worker node
# configurations below. This requires that you have added the key into the
# project wide meta-data.
#    ssh_private_key: /path/to/your/key.pem

# Tell the autoscaler the allowed node types and the resources they provide.
# The key is the name of the node type, which is just for debugging purposes.
# The node config specifies the launch config and physical instance type.
available_node_types:
  ray_head_default:
    # The minimum number of worker nodes of this type to launch.
    # This number should be >= 0.
    min_workers: 1
    # The maximum number of worker nodes of this type to launch.
    # This takes precedence over min_workers.
    max_workers: 1
    # The resources provided by this node type.
    resources: { "CPU": 8 }
      # Provider-specific config for the head node, e.g. instance type. By default
      # Ray will auto-configure unspecified fields such as subnets and ssh-keys.
      # For more documentation on available fields, see:
    # https://cloud.google.com/compute/docs/reference/rest/v1/instances/insert
    node_config:
      machineType: c2-standard-8
      disks:
        - boot: true
          autoDelete: true
          type: PERSISTENT
          initializeParams:
            diskSizeGb: 500
            # See https://cloud.google.com/compute/docs/images for more images
            sourceImage: projects/deeplearning-platform-release/global/images/family/common-cpu

      # Additional options can be found in in the compute docs at
      # https://cloud.google.com/compute/docs/reference/rest/v1/instances/insert

      # If the network interface is specified as below in both head and worker
      # nodes, the manual network config is used.  Otherwise an existing subnet is
      # used.  To use a shared subnet, ask the subnet owner to grant permission
      # for 'compute.subnetworks.use' to the ray autoscaler account...
      # networkInterfaces:
      #   - kind: compute#networkInterface
      #     subnetwork: path/to/subnet
      #     aliasIpRanges: []
  ray_worker_small:
    # The minimum number of worker nodes of this type to launch.
    # This number should be >= 0.
    min_workers: 1
    # The maximum number of worker nodes of this type to launch.
    # This takes precedence over min_workers.
    max_workers: 6
    # The resources provided by this node type.
    resources: { "CPU": 4 }
    # Provider-specific config for the head node, e.g. instance type. By default
    # Ray will auto-configure unspecified fields such as subnets and ssh-keys.
    # For more documentation on available fields, see:
    # https://cloud.google.com/compute/docs/reference/rest/v1/instances/insert
    node_config:
      machineType: n1-highmem-4
      disks:
        - boot: true
          autoDelete: true
          type: PERSISTENT
          initializeParams:
            diskSizeGb: 50
            # See https://cloud.google.com/compute/docs/images for more images
            sourceImage: projects/deeplearning-platform-release/global/images/family/common-cpu
      # Run workers on preemtible instance by default.
      # Comment this out to use on-demand.
      scheduling:
        - preemptible: true

  # Additional options can be found in in the compute docs at
  # https://cloud.google.com/compute/docs/reference/rest/v1/instances/insert
head_node_type: ray_head_default

# Files or directories to copy to the head and worker nodes. The format is a
# dictionary from REMOTE_PATH: LOCAL_PATH, e.g.
file_mounts: {
#  "/home/ray/bingling.json": "/Users/junlin/bingling.json",
  #    "/path2/on/remote/machine": "/path2/on/local/machine",
}

# Files or directories to copy from the head node to the worker nodes. The format is a
# list of paths. The same path on the head node will be copied to the worker node.
# This behavior is a subset of the file_mounts behavior. In the vast majority of cases
# you should just use file_mounts. Only use this if you know what you're doing!
cluster_synced_files:
  - "/home/ray/Phoenix/models"

# Whether changes to directories in file_mounts or cluster_synced_files in the head node
# should sync to the worker node continuously
file_mounts_sync_continuously: True

# Patterns for files to exclude when running rsync up or rsync down
rsync_exclude:
  - "**/.git"
  - "**/.git/**"

# Pattern files to use for filtering out files when running rsync up or rsync down. The file is searched for
# in the source directory and recursively through all subdirectories. For example, if .gitignore is provided
# as a value, the behavior will match git's behavior for finding and using .gitignore files.
rsync_filter:
  - ".gitignore"

# List of commands that will be run before `setup_commands`. If docker is
# enabled, these commands will run outside the container and before docker
# is setup.
initialization_commands: [ ]

# List of shell commands to run to set up nodes.
setup_commands:
  - export PYTHONPATH=PYTHONPATH:/home/ray/Phoenix
#  - sudo apt install -y curl
#  - echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" | sudo tee -a /etc/apt/sources.list.d/google-cloud-sdk.list
#  - curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key --keyring /usr/share/keyrings/cloud.google.gpg add -
#  - sudo apt-get update && sudo apt-get install -y google-cloud-sdk
#  - git clone https://github.com/USC-CSCI527-Spring2021/Phoenix.git
 # Note: if you're developing Ray, you probably want to create a Docker image that
  # has your Ray repo pre-cloned. Then, you can replace the pip installs
  # below with a git checkout <your_sha> (and possibly a recompile).
  # To run the nightly version of ray (as opposed to the latest), either use a rayproject docker image
  # that has the "nightly" (e.g. "rayproject/ray-ml:nightly-gpu") or uncomment the following line:
# - pip install -U "ray[default] @ https://s3-us-west-2.amazonaws.com/ray-wheels/latest/ray-2.0.0.dev0-cp37-cp37m-manylinux2014_x86_64.whl"


# Custom commands that will be run on the head node after common setup.
head_setup_commands:
#  - pip install google-api-python-client==1.7.8
#  - export GOOGLE_APPLICATION_CREDENTIALS="/home/ray/bingling.json"
  - export PYTHONPATH=PYTHONPATH:/home/ray/Phoenix
  - python setup.py install
#  - git fetch && git checkout experiment_RL && git pull
  - sudo chmod 0777 /home/ray/*
  - sudo chmod 644 /home/ray/ray_bootstrap_key.pem


# Custom commands that will be run on worker nodes after common setup.
worker_setup_commands:
#  - pip install google-api-python-client==1.7.8
#  - export GOOGLE_APPLICATION_CREDENTIALS="/home/ray/bingling.json"
#  - git fetch && git checkout experiment_RL && git pull
  - export PYTHONPATH=PYTHONPATH:/home/ray/Phoenix
  - python setup.py install
  - sudo chmod 700 /home/ray/*
  - sudo chmod 644 /home/ray/ray_bootstrap_key.pem
#  - cd Phoenix && git fetch && git checkout experiment_RL

# Command to start ray on the head node. You don't need to change this.
head_start_ray_commands:
  - ray stop
  - >-
    ulimit -n 65536;
    ray start
    --head
    --port=6379
    --object-manager-port=8076
    --autoscaling-config=~/ray_bootstrap_config.yaml

# Command to start ray on worker nodes. You don't need to change this.
worker_start_ray_commands:
  - ray stop
  - >-
    ulimit -n 65536;
    ray start
    --address=$RAY_HEAD_IP:6379
    --object-manager-port=8076

head_node: { }
worker_nodes: { }