From 6f85e7f467fdea9da311fb09edd3f1203a4cf6fd Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Tue, 1 Jul 2025 10:46:40 -0700 Subject: [PATCH 01/28] --wip-- --- 0001-dyn-sdk-v2.md | 224 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 224 insertions(+) create mode 100644 0001-dyn-sdk-v2.md diff --git a/0001-dyn-sdk-v2.md b/0001-dyn-sdk-v2.md new file mode 100644 index 0000000..5ec9d38 --- /dev/null +++ b/0001-dyn-sdk-v2.md @@ -0,0 +1,224 @@ +# Dynamo SDK v2 + +**Status**: Draft + +**Authors**: [Name/Team] + +**Category**: Architecture + +**Replaces**: [Link of previous proposal if applicable] + +**Replaced By**: [Link of previous proposal if applicable] + +**Sponsor**: [Name of code owner or maintainer to shepard process] + +**Required Reviewers**: [Names of technical leads that are required for acceptance] + +**Review Date**: [Date for review] + +**Pull Request**: [Link to Pull Request of the Proposal itself] + +**Implementation PR / Tracking Issue**: [Link to Pull Request or Tracking Issue for Implementation] + +# Summary + + +```yaml +version: 1.0 +name: my-graph +namespace: ns1 +components: + - name: frontend + py_class: a.b.c:Frontend + dependency: + backend1: dynamo://v1/ns1/backend_1 + backend2: dynamo://v1/ns1/backend_2 + parameters: + a: b + resources: + cpu: 200m + gpu: 1 + replicas: 4 + environment: + CUDA_VISIBLE_DEVICES: "4,5" + SAMPLE_CONFIG: A1 + DB_URI: "${{ secrets.DB_URI }}" + secrets: + - DB_URI + - name: backend_1 + cmd: ["dynamo", "serve"] + arg: ["a.b.c:Backend"] + instances: 1 + dependency: + backend: dynamo://v1/ns1/backend_2 + - name: backend_2 + # alternative syntax - dynamo serve + cmd: ["dynamo", "serve", "..."] # python component + cmd: ["dynamo", "run", "..."] # rust component + replicas: 2 + - name: backend3 + cmd: ["/my/rust_backend3"] + dependency: + backend: dynamo://v1/ns1/backend3 + # New: dynamo run components + - name: http_ingress + cmd: ["dynamo", "serve"] # current dynamo-run + run_config: + input: http + output: dyn + port: 8080 + model_name: "llama3-8b" + replicas: 1 + resources: + cpu: 500m + memory: 2Gi + - name: vllm_worker + cmd: ["dynamo", "serve"] # current dynamo-run + run_config: + input: "dyn://llama3-8b.backend.generate" + output: vllm + model_path: "meta-llama/Meta-Llama-3-8B-Instruct" + tensor_parallel_size: 2 + context_length: 8192 + base_gpu_id: 0 + extra_engine_args: "vllm_config.json" + replicas: 2 + resources: + gpu: 2 + memory: 24Gi + environment: + CUDA_VISIBLE_DEVICES: "0,1" + - name: sglang_worker + cmd: ["dynamo", "serve"] # current dynamo-run + run_config: + input: "dyn://qwen3-32b.backend.generate" + output: sglang + model_path: "/data/models/Qwen/Qwen3-32B" + tensor_parallel_size: 4 + router_mode: "kv" + num_nodes: 2 + node_rank: 0 + leader_addr: "127.0.0.1:9876" + replicas: 1 + resources: + gpu: 4 + memory: 64Gi + - name: llamacpp_worker + cmd: ["dynamo", "serve"] # current dynamo-run + run_config: + input: text + output: llamacpp + model_path: "~/llms/Llama-3.2-3B-Instruct-Q4_K_M.gguf" + model_config: "meta-llama/Llama-3.2-3B-Instruct" + context_length: 4096 + replicas: 1 + resources: + cpu: 2000m + memory: 8Gi + - name: batch_processor + cmd: ["dynamo", "serve"] # current dynamo-run + run_config: + input: "batch:/data/prompts.jsonl" + output: mistralrs + model_path: "Qwen/Qwen3-4B" + verbosity: 2 # -vv flag + replicas: 1 + resources: + gpu: 1 + memory: 16Gi + # Multi-node distributed example + - name: trtllm_leader + cmd: ["dynamo", "serve"] # current dynamo-run + run_config: + input: "dyn://deepseek-70b.backend.generate" + output: trtllm + model_path: "deepseek-ai/DeepSeek-R1-Distill-Llama-70B" + tensor_parallel_size: 16 + num_nodes: 2 + node_rank: 0 + leader_addr: "10.217.98.122:5000" + extra_engine_args: "trtllm_config.yaml" + replicas: 1 + resources: + gpu: 8 + memory: 80Gi + node_selector: + role: leader +``` + + +# Motivation + +**\[Required\]** + +Describe the problem that needs to be addressed with enough detail for +someone familiar with the project to understand. Generally one to two +short paragraphs. Additional details can be placed in the background +section as needed. Cover **what** the issue is and **why** it needs to +be addressed. Link to github issues if relevant. + +## Goals + +**\[Optional \- if not applicable omit\]** + +List out any additional goals in bullet points. Goals may be aspirational / difficult to measure but guide the proposal. + +* Goal + +* Goal + +* Goal + +### Non Goals + +**\[Optional \- if not applicable omit\]** + +List out any items which are out of scope / specifically not required in bullet points. Indicates the scope of the proposal and issue being resolved. + +## Requirements + +**\[Optional \- if not applicable omit\]** + +List out any additional requirements in numbered subheadings. + +**\** + +### REQ \<\#\> \ + +Describe the requirement in as much detail as necessary for others to understand it and how it applies to the DEP. Keep in mind that requirements should be measurable and will be used to determine if a DEP has been successfully implemented or not. + +Requirement names should be prefixed using a monotonically increasing number such as “REQ 1 \” followed by “REQ 2 \” and so on. Use title casing when naming requirements. Requirement names should be as descriptive as possible while remaining as terse as possible. + +Use all-caps, bolded terms like **MUST** and **SHOULD** when describing each requirement. See [RFC-2119](https://datatracker.ietf.org/doc/html/rfc2119) for additional information. + + +# Proposal + +**\[Required\]** + +Describe the high level design / proposal. Use sub sections as needed, but start with an overview and then dig into the details. Try to provide images and diagrams to facilitate understanding. + +# Alternate Solutions + +**\[Required, if not applicable write N/A\]** + +List out solutions that were considered but ultimately rejected. Consider free form \- but a possible format shown below. + +## Alt \<\#\> \ + +**Pros:** + +\ + +**Cons:** + +\ + +**Reason Rejected:** + +\ + +**Notes:** + +\ + From f6d4906b1390b1e7bc967be1bf2d40dccae79100 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Tue, 1 Jul 2025 12:29:16 -0700 Subject: [PATCH 02/28] --wip-- --- 0001-dyn-sdk-v2.md | 149 +++++++++++++++++++-------------------------- 1 file changed, 62 insertions(+), 87 deletions(-) diff --git a/0001-dyn-sdk-v2.md b/0001-dyn-sdk-v2.md index 5ec9d38..29a03bd 100644 --- a/0001-dyn-sdk-v2.md +++ b/0001-dyn-sdk-v2.md @@ -1,28 +1,80 @@ -# Dynamo SDK v2 +# Dynamo SDK v2 and IR design **Status**: Draft -**Authors**: [Name/Team] +**Authors**: [biswa](https://github.com/biswapanda) **Category**: Architecture -**Replaces**: [Link of previous proposal if applicable] +**Replaces**: -**Replaced By**: [Link of previous proposal if applicable] +**Replaced By**: -**Sponsor**: [Name of code owner or maintainer to shepard process] +**Sponsor**: -**Required Reviewers**: [Names of technical leads that are required for acceptance] +**Required Reviewers**: Neelay, Ishan, Alec, Mohammed, Maksim -**Review Date**: [Date for review] +**Review Date**: [TBD] -**Pull Request**: [Link to Pull Request of the Proposal itself] +# Summary -**Implementation PR / Tracking Issue**: [Link to Pull Request or Tracking Issue for Implementation] +1. current `dynamo-run` will converge into `dynamo serve` -# Summary +2. separate responsibilities +`dynamo serve` will launch single component +`dynamo deploy` will launch multiple components (graph) + + +# Motivation + +Issues +## 1: tight coupling between component's implementation and deployment spec +Dynamo user persona range from expert k8s to power dynamo component developers. +Both dont't need handholding and full control. + +```python +@service( + dynamo={ + "namespace": "dynamo-demo", + }, + resources={"gpu": 1, "cpu": "10", "memory": "20Gi"}, + workers=1, +) +class PrefillWorker: +``` + +## 2: Too many levels of configuration +Configurations are managead in SDK decorators, CLI args, env variables and config files. +- Dynamo components authors are confused how to config and launch components +- K8s savvy end-users are confused where and how to configure a dynamo graph in k8s +## 3: Implicit resource allocation +Users are unable to specify gpu resources explicitly + +## Design Principles + +* SOC: Separation of concerns +1. Decouple component author API from k8s deployment related concerns +2. Separate component and graph launch verbs (dynamo serve and dynamo deploy) + + +* Dev-Ex: Simple is better than complex. +1. Enable dynamo developers to completely control how to spin up a component + +* Explicit is better than implicit +Allow users to fully and explicitly specify all configurations (gpu resources, parameters etc.) + + +## Requirements + +### REQ 1: Dynamo serve SHOULD not interleave deployment logic +### REQ 2: Dynamo users MUST be able to explicitly specify exact configuration + + +# Proposal + +## Graph IR ```yaml version: 1.0 name: my-graph @@ -145,80 +197,3 @@ components: node_selector: role: leader ``` - - -# Motivation - -**\[Required\]** - -Describe the problem that needs to be addressed with enough detail for -someone familiar with the project to understand. Generally one to two -short paragraphs. Additional details can be placed in the background -section as needed. Cover **what** the issue is and **why** it needs to -be addressed. Link to github issues if relevant. - -## Goals - -**\[Optional \- if not applicable omit\]** - -List out any additional goals in bullet points. Goals may be aspirational / difficult to measure but guide the proposal. - -* Goal - -* Goal - -* Goal - -### Non Goals - -**\[Optional \- if not applicable omit\]** - -List out any items which are out of scope / specifically not required in bullet points. Indicates the scope of the proposal and issue being resolved. - -## Requirements - -**\[Optional \- if not applicable omit\]** - -List out any additional requirements in numbered subheadings. - -**\** - -### REQ \<\#\> \ - -Describe the requirement in as much detail as necessary for others to understand it and how it applies to the DEP. Keep in mind that requirements should be measurable and will be used to determine if a DEP has been successfully implemented or not. - -Requirement names should be prefixed using a monotonically increasing number such as “REQ 1 \” followed by “REQ 2 \” and so on. Use title casing when naming requirements. Requirement names should be as descriptive as possible while remaining as terse as possible. - -Use all-caps, bolded terms like **MUST** and **SHOULD** when describing each requirement. See [RFC-2119](https://datatracker.ietf.org/doc/html/rfc2119) for additional information. - - -# Proposal - -**\[Required\]** - -Describe the high level design / proposal. Use sub sections as needed, but start with an overview and then dig into the details. Try to provide images and diagrams to facilitate understanding. - -# Alternate Solutions - -**\[Required, if not applicable write N/A\]** - -List out solutions that were considered but ultimately rejected. Consider free form \- but a possible format shown below. - -## Alt \<\#\> \ - -**Pros:** - -\ - -**Cons:** - -\ - -**Reason Rejected:** - -\ - -**Notes:** - -\ - From 865fd56b7d09c7bbaeb597741f75eff2f38075a6 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Tue, 1 Jul 2025 12:30:11 -0700 Subject: [PATCH 03/28] --wip-- --- 0001-dyn-sdk-v2.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/0001-dyn-sdk-v2.md b/0001-dyn-sdk-v2.md index 29a03bd..1db6588 100644 --- a/0001-dyn-sdk-v2.md +++ b/0001-dyn-sdk-v2.md @@ -21,8 +21,8 @@ 1. current `dynamo-run` will converge into `dynamo serve` 2. separate responsibilities -`dynamo serve` will launch single component -`dynamo deploy` will launch multiple components (graph) +- `dynamo serve` will launch single component +- `dynamo deploy` will launch multiple components (graph) # Motivation From ad7de43e3adc00d5c434461c4b067c0da1cf3278 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Tue, 1 Jul 2025 12:30:53 -0700 Subject: [PATCH 04/28] --wip-- --- 0001-dyn-sdk-v2.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/0001-dyn-sdk-v2.md b/0001-dyn-sdk-v2.md index 1db6588..30d9152 100644 --- a/0001-dyn-sdk-v2.md +++ b/0001-dyn-sdk-v2.md @@ -54,15 +54,15 @@ Users are unable to specify gpu resources explicitly ## Design Principles -* SOC: Separation of concerns +### SOC: Separation of concerns 1. Decouple component author API from k8s deployment related concerns 2. Separate component and graph launch verbs (dynamo serve and dynamo deploy) -* Dev-Ex: Simple is better than complex. +### Dev-Ex: Simple is better than complex. 1. Enable dynamo developers to completely control how to spin up a component -* Explicit is better than implicit +### Explicit is better than implicit Allow users to fully and explicitly specify all configurations (gpu resources, parameters etc.) From 97baf02eebd83f765e65c73786878064c0faf991 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Tue, 1 Jul 2025 14:03:54 -0700 Subject: [PATCH 05/28] --wip-- --- 0001-dyn-sdk-v2.md | 121 ++++++++++++++++++++++----------------------- 1 file changed, 60 insertions(+), 61 deletions(-) diff --git a/0001-dyn-sdk-v2.md b/0001-dyn-sdk-v2.md index 30d9152..6d4d9e1 100644 --- a/0001-dyn-sdk-v2.md +++ b/0001-dyn-sdk-v2.md @@ -21,14 +21,14 @@ 1. current `dynamo-run` will converge into `dynamo serve` 2. separate responsibilities -- `dynamo serve` will launch single component +- `dynamo serve` will launch a single component only - `dynamo deploy` will launch multiple components (graph) # Motivation Issues -## 1: tight coupling between component's implementation and deployment spec +## Tight coupling between component's implementation and deployment Dynamo user persona range from expert k8s to power dynamo component developers. Both dont't need handholding and full control. @@ -51,7 +51,6 @@ Configurations are managead in SDK decorators, CLI args, env variables and confi ## 3: Implicit resource allocation Users are unable to specify gpu resources explicitly - ## Design Principles ### SOC: Separation of concerns @@ -74,72 +73,52 @@ Allow users to fully and explicitly specify all configurations (gpu resources, p # Proposal -## Graph IR +## Graph Deployment IR + +Dynamo deployment IR is where user can specify deployment spec in a deployment target agnostic + + +```bash +# creates deploymenet manifests for the target (default=k8s) +dynmao deploy --target k8s -f ./my-graph-config.yaml --out_dir=dir_name +dynmao deploy --target slurm -f ./my-graph-config.yaml --out_dir=dir_name +``` + +`my-graph-config.yaml` ```yaml version: 1.0 -name: my-graph -namespace: ns1 +name: dynamo-graph components: - - name: frontend - py_class: a.b.c:Frontend - dependency: - backend1: dynamo://v1/ns1/backend_1 - backend2: dynamo://v1/ns1/backend_2 - parameters: - a: b - resources: - cpu: 200m - gpu: 1 - replicas: 4 - environment: - CUDA_VISIBLE_DEVICES: "4,5" - SAMPLE_CONFIG: A1 - DB_URI: "${{ secrets.DB_URI }}" - secrets: - - DB_URI - - name: backend_1 - cmd: ["dynamo", "serve"] - arg: ["a.b.c:Backend"] - instances: 1 - dependency: - backend: dynamo://v1/ns1/backend_2 - - name: backend_2 - # alternative syntax - dynamo serve - cmd: ["dynamo", "serve", "..."] # python component - cmd: ["dynamo", "run", "..."] # rust component - replicas: 2 - - name: backend3 - cmd: ["/my/rust_backend3"] - dependency: - backend: dynamo://v1/ns1/backend3 - # New: dynamo run components - name: http_ingress - cmd: ["dynamo", "serve"] # current dynamo-run + cmd: ["dynamo", "serve"] # default cmd, current dynamo-run run_config: input: http output: dyn port: 8080 - model_name: "llama3-8b" - replicas: 1 + replicas: 5 resources: cpu: 500m memory: 2Gi - name: vllm_worker - cmd: ["dynamo", "serve"] # current dynamo-run + cmd: ["dynamo", "serve"] run_config: input: "dyn://llama3-8b.backend.generate" output: vllm - model_path: "meta-llama/Meta-Llama-3-8B-Instruct" - tensor_parallel_size: 2 - context_length: 8192 - base_gpu_id: 0 - extra_engine_args: "vllm_config.json" + parameters: + model_path: "meta-llama/Meta-Llama-3-8B-Instruct" + tensor_parallel_size: 2 + context_length: 8192 + base_gpu_id: 0 + extra_engine_args: "vllm_config.json" replicas: 2 resources: gpu: 2 memory: 24Gi environment: CUDA_VISIBLE_DEVICES: "0,1" + HF_TOKEN: "${{ secrets.HF_TOKEN }}" + secrets: + - HF_TOKEN - name: sglang_worker cmd: ["dynamo", "serve"] # current dynamo-run run_config: @@ -155,18 +134,6 @@ components: resources: gpu: 4 memory: 64Gi - - name: llamacpp_worker - cmd: ["dynamo", "serve"] # current dynamo-run - run_config: - input: text - output: llamacpp - model_path: "~/llms/Llama-3.2-3B-Instruct-Q4_K_M.gguf" - model_config: "meta-llama/Llama-3.2-3B-Instruct" - context_length: 4096 - replicas: 1 - resources: - cpu: 2000m - memory: 8Gi - name: batch_processor cmd: ["dynamo", "serve"] # current dynamo-run run_config: @@ -176,7 +143,6 @@ components: verbosity: 2 # -vv flag replicas: 1 resources: - gpu: 1 memory: 16Gi # Multi-node distributed example - name: trtllm_leader @@ -196,4 +162,37 @@ components: memory: 80Gi node_selector: role: leader + # old + - name: frontend + py_class: a.b.c:Frontend + dependency: + backend1: dynamo://v1/ns1/backend_1 + backend2: dynamo://v1/ns1/backend_2 + parameters: + a: b + resources: + cpu: 200m + gpu: 1 + replicas: 4 + environment: + CUDA_VISIBLE_DEVICES: "4,5" + SAMPLE_CONFIG: A1 + DB_URI: "${{ secrets.DB_URI }}" + secrets: + - DB_URI + - name: backend_1 + cmd: ["dynamo", "serve"] + arg: ["a.b.c:Backend"] + instances: 1 + dependency: + backend: dynamo://v1/ns1/backend_2 + - name: backend_2 + # alternative syntax - dynamo serve + cmd: ["dynamo", "serve", "..."] # python component + cmd: ["dynamo", "run", "..."] # rust component + replicas: 2 + - name: backend3 + cmd: ["/my/rust_backend3"] + dependency: + backend: dynamo://v1/ns1/backend3 ``` From 4c8d5a0673d7f4398f18c0a1836dc8a9990ac52d Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Tue, 1 Jul 2025 14:06:47 -0700 Subject: [PATCH 06/28] --wip-- [skip ci] --- 0001-dyn-sdk-v2.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/0001-dyn-sdk-v2.md b/0001-dyn-sdk-v2.md index 6d4d9e1..e77cfde 100644 --- a/0001-dyn-sdk-v2.md +++ b/0001-dyn-sdk-v2.md @@ -90,10 +90,12 @@ version: 1.0 name: dynamo-graph components: - name: http_ingress + image: ... cmd: ["dynamo", "serve"] # default cmd, current dynamo-run run_config: input: http output: dyn + parameters: port: 8080 replicas: 5 resources: @@ -101,6 +103,7 @@ components: memory: 2Gi - name: vllm_worker cmd: ["dynamo", "serve"] + image: ... run_config: input: "dyn://llama3-8b.backend.generate" output: vllm @@ -120,6 +123,7 @@ components: secrets: - HF_TOKEN - name: sglang_worker + image: ... cmd: ["dynamo", "serve"] # current dynamo-run run_config: input: "dyn://qwen3-32b.backend.generate" From d95ee2b486a65e65521246c2543c04e989654191 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Tue, 1 Jul 2025 14:13:13 -0700 Subject: [PATCH 07/28] --wip-- --- 0001-dyn-sdk-v2.md | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/0001-dyn-sdk-v2.md b/0001-dyn-sdk-v2.md index e77cfde..604fefe 100644 --- a/0001-dyn-sdk-v2.md +++ b/0001-dyn-sdk-v2.md @@ -16,6 +16,9 @@ **Review Date**: [TBD] +**Related Docs**: +- [Dynamo SDK Abstractions design and Multi-Target Deployment](https://docs.google.com/document/d/1UNSD_MUOYa1cbGwHp0Wn53wdO0Ir55KR7rfDUZYvNto/edit?tab=t.0) + # Summary 1. current `dynamo-run` will converge into `dynamo serve` @@ -69,6 +72,7 @@ Allow users to fully and explicitly specify all configurations (gpu resources, p ### REQ 1: Dynamo serve SHOULD not interleave deployment logic ### REQ 2: Dynamo users MUST be able to explicitly specify exact configuration +### REQ 3: Dynamo users MUST be able to deploy a dynamo graph using a simplified config # Proposal @@ -80,8 +84,8 @@ Dynamo deployment IR is where user can specify deployment spec in a deployment t ```bash # creates deploymenet manifests for the target (default=k8s) -dynmao deploy --target k8s -f ./my-graph-config.yaml --out_dir=dir_name -dynmao deploy --target slurm -f ./my-graph-config.yaml --out_dir=dir_name +dynmao deploy --target k8s -f ./my-graph-config.yaml --out_dir=k8s_deployment +dynmao deploy --target slurm -f ./my-graph-config.yaml --out_dir=slum_deployment ``` `my-graph-config.yaml` @@ -90,7 +94,7 @@ version: 1.0 name: dynamo-graph components: - name: http_ingress - image: ... + image: "" cmd: ["dynamo", "serve"] # default cmd, current dynamo-run run_config: input: http @@ -121,7 +125,7 @@ components: CUDA_VISIBLE_DEVICES: "0,1" HF_TOKEN: "${{ secrets.HF_TOKEN }}" secrets: - - HF_TOKEN + - my_secret_name - name: sglang_worker image: ... cmd: ["dynamo", "serve"] # current dynamo-run From a12e6449a6fa75126e1b24e6616ca75435135d27 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Tue, 1 Jul 2025 14:15:14 -0700 Subject: [PATCH 08/28] --wip-- --- 0001-dyn-sdk-v2.md | 39 +++------------------------------------ 1 file changed, 3 insertions(+), 36 deletions(-) diff --git a/0001-dyn-sdk-v2.md b/0001-dyn-sdk-v2.md index 604fefe..798ce5c 100644 --- a/0001-dyn-sdk-v2.md +++ b/0001-dyn-sdk-v2.md @@ -128,7 +128,7 @@ components: - my_secret_name - name: sglang_worker image: ... - cmd: ["dynamo", "serve"] # current dynamo-run + cmd: ["dynamo", "serve"] run_config: input: "dyn://qwen3-32b.backend.generate" output: sglang @@ -143,7 +143,7 @@ components: gpu: 4 memory: 64Gi - name: batch_processor - cmd: ["dynamo", "serve"] # current dynamo-run + cmd: ["dynamo", "serve"] run_config: input: "batch:/data/prompts.jsonl" output: mistralrs @@ -154,7 +154,7 @@ components: memory: 16Gi # Multi-node distributed example - name: trtllm_leader - cmd: ["dynamo", "serve"] # current dynamo-run + cmd: ["dynamo", "serve"] run_config: input: "dyn://deepseek-70b.backend.generate" output: trtllm @@ -170,37 +170,4 @@ components: memory: 80Gi node_selector: role: leader - # old - - name: frontend - py_class: a.b.c:Frontend - dependency: - backend1: dynamo://v1/ns1/backend_1 - backend2: dynamo://v1/ns1/backend_2 - parameters: - a: b - resources: - cpu: 200m - gpu: 1 - replicas: 4 - environment: - CUDA_VISIBLE_DEVICES: "4,5" - SAMPLE_CONFIG: A1 - DB_URI: "${{ secrets.DB_URI }}" - secrets: - - DB_URI - - name: backend_1 - cmd: ["dynamo", "serve"] - arg: ["a.b.c:Backend"] - instances: 1 - dependency: - backend: dynamo://v1/ns1/backend_2 - - name: backend_2 - # alternative syntax - dynamo serve - cmd: ["dynamo", "serve", "..."] # python component - cmd: ["dynamo", "run", "..."] # rust component - replicas: 2 - - name: backend3 - cmd: ["/my/rust_backend3"] - dependency: - backend: dynamo://v1/ns1/backend3 ``` From 4acbd70275a38cdc630733ac183a64bd51c871c6 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Tue, 1 Jul 2025 15:17:13 -0700 Subject: [PATCH 09/28] --wip-- --- 0001-dyn-sdk-v2.md | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/0001-dyn-sdk-v2.md b/0001-dyn-sdk-v2.md index 798ce5c..9299756 100644 --- a/0001-dyn-sdk-v2.md +++ b/0001-dyn-sdk-v2.md @@ -30,7 +30,6 @@ # Motivation -Issues ## Tight coupling between component's implementation and deployment Dynamo user persona range from expert k8s to power dynamo component developers. Both dont't need handholding and full control. @@ -74,12 +73,29 @@ Allow users to fully and explicitly specify all configurations (gpu resources, p ### REQ 2: Dynamo users MUST be able to explicitly specify exact configuration ### REQ 3: Dynamo users MUST be able to deploy a dynamo graph using a simplified config +## Scenarios + +Persona: Entrprise customer (K8s Savvy) +Persona: Compoent Developer # Proposal -## Graph Deployment IR +## Launching a component + +`dyanmo serve` command will launch individual component (single process) + +Example: + +Launch Frontend (+Processor+Router) +```bash +dynamo serve in=http out=dyn -f config.yaml +``` +Launch vllm worker +```bash +dynamo serve in=dyn out=vllm -f config.yam +``` -Dynamo deployment IR is where user can specify deployment spec in a deployment target agnostic +## Launching a graph ```bash @@ -88,7 +104,9 @@ dynmao deploy --target k8s -f ./my-graph-config.yaml --out_dir=k8s_deployment dynmao deploy --target slurm -f ./my-graph-config.yaml --out_dir=slum_deployment ``` -`my-graph-config.yaml` + is where user can specify deployment spec in a deployment target agnostic + +`my-graph-deployment-config.yaml` ```yaml version: 1.0 name: dynamo-graph @@ -99,8 +117,8 @@ components: run_config: input: http output: dyn - parameters: - port: 8080 + parameters: + port: 8080 replicas: 5 resources: cpu: 500m From d8eecb54a6f84fd0f00eacb1b54ce7d85b4bca08 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Tue, 1 Jul 2025 15:36:42 -0700 Subject: [PATCH 10/28] --wip-- --- 0001-dyn-sdk-v2.md | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/0001-dyn-sdk-v2.md b/0001-dyn-sdk-v2.md index 9299756..e832f02 100644 --- a/0001-dyn-sdk-v2.md +++ b/0001-dyn-sdk-v2.md @@ -100,11 +100,17 @@ dynamo serve in=dyn out=vllm -f config.yam ```bash # creates deploymenet manifests for the target (default=k8s) -dynmao deploy --target k8s -f ./my-graph-config.yaml --out_dir=k8s_deployment -dynmao deploy --target slurm -f ./my-graph-config.yaml --out_dir=slum_deployment +dynmao deploy --target k8s -f ./my-graph-deployment-config.yaml -c ./config.yaml --out_dir=k8s_deployment + +dynmao deploy --target slurm -f ./my-graph-deployment-config.yam -c ./config.yaml --out_dir=slum_deployment ``` - is where user can specify deployment spec in a deployment target agnostic +### Alternative 1: separate deployment and component configs + + + +### Alternative 2: Single config file with embedded component parameters +Config is where user can specify deployment spec in a deployment target agnostic `my-graph-deployment-config.yaml` ```yaml From 38917b73b23ca04dfc54dbf8ca8d514da3083984 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Tue, 1 Jul 2025 17:24:59 -0700 Subject: [PATCH 11/28] --wip-- --- 0001-dyn-sdk-v2.md => 0001-Dynamo-UX.md | 94 +++++++------------------ 1 file changed, 25 insertions(+), 69 deletions(-) rename 0001-dyn-sdk-v2.md => 0001-Dynamo-UX.md (54%) diff --git a/0001-dyn-sdk-v2.md b/0001-Dynamo-UX.md similarity index 54% rename from 0001-dyn-sdk-v2.md rename to 0001-Dynamo-UX.md index e832f02..f5c2159 100644 --- a/0001-dyn-sdk-v2.md +++ b/0001-Dynamo-UX.md @@ -1,4 +1,4 @@ -# Dynamo SDK v2 and IR design +# Dynamo UX v2 **Status**: Draft @@ -6,24 +6,19 @@ **Category**: Architecture -**Replaces**: - -**Replaced By**: - -**Sponsor**: - -**Required Reviewers**: Neelay, Ishan, Alec, Mohammed, Maksim +**Required Reviewers**: Itay, Neelay, Ishan, Alec, Mohammed, Maksim **Review Date**: [TBD] **Related Docs**: - [Dynamo SDK Abstractions design and Multi-Target Deployment](https://docs.google.com/document/d/1UNSD_MUOYa1cbGwHp0Wn53wdO0Ir55KR7rfDUZYvNto/edit?tab=t.0) +- # Summary -1. current `dynamo-run` will converge into `dynamo serve` +1. current `dynamo-run` converges into `dynamo serve` -2. separate responsibilities +2. separate responsibilities but similar UX - `dynamo serve` will launch a single component only - `dynamo deploy` will launch multiple components (graph) @@ -45,26 +40,28 @@ Both dont't need handholding and full control. class PrefillWorker: ``` -## 2: Too many levels of configuration +## Too many levels of configuration Configurations are managead in SDK decorators, CLI args, env variables and config files. - Dynamo components authors are confused how to config and launch components - K8s savvy end-users are confused where and how to configure a dynamo graph in k8s -## 3: Implicit resource allocation +## Implicit resource allocation Users are unable to specify gpu resources explicitly ## Design Principles ### SOC: Separation of concerns -1. Decouple component author API from k8s deployment related concerns +1. Decouple component author API from k8s deployment 2. Separate component and graph launch verbs (dynamo serve and dynamo deploy) -### Dev-Ex: Simple is better than complex. -1. Enable dynamo developers to completely control how to spin up a component +### UX: Simple is better than complex. +- Consistent UX across `dynamo serve` and `dynamo deploy` commands +- Enable dynamo developers to completely control how to spin up a component ### Explicit is better than implicit -Allow users to fully and explicitly specify all configurations (gpu resources, parameters etc.) +- No handholding needed, customers/users are domain experts +- Allow users to fully and explicitly specify all configurations (gpu resources, parameters etc.) ## Requirements @@ -98,21 +95,24 @@ dynamo serve in=dyn out=vllm -f config.yam ## Launching a graph +### Alternative 1: Separate deployment and component configs + ```bash -# creates deploymenet manifests for the target (default=k8s) -dynmao deploy --target k8s -f ./my-graph-deployment-config.yaml -c ./config.yaml --out_dir=k8s_deployment +# creates deploymenet manifests for the target (default =k8s) +dynmao deploy -c ./config.yaml -f ./deployment.yaml --out_dir=k8s_deployment -dynmao deploy --target slurm -f ./my-graph-deployment-config.yam -c ./config.yaml --out_dir=slum_deployment +dynmao deploy --target slurm -c ./config.yaml -f ./deployment.yaml --out_dir=slum_deployment ``` -### Alternative 1: separate deployment and component configs +1. config.yaml +This will map to [current config yaml](https://github.com/ai-dynamo/dynamo/blob/main/examples/vllm_v1/configs/disagg.yaml) +`dynamo serve -c ./config.yaml` to run a service -### Alternative 2: Single config file with embedded component parameters -Config is where user can specify deployment spec in a deployment target agnostic +### Alternative 2: Single config file with embedded component configs -`my-graph-deployment-config.yaml` +`deployment-config.yaml` ```yaml version: 1.0 name: dynamo-graph @@ -123,7 +123,7 @@ components: run_config: input: http output: dyn - parameters: + parameters: # these parameters are passed to component port: 8080 replicas: 5 resources: @@ -147,51 +147,7 @@ components: memory: 24Gi environment: CUDA_VISIBLE_DEVICES: "0,1" - HF_TOKEN: "${{ secrets.HF_TOKEN }}" + HF_TOKEN: "${{ my_secret_name.HF_TOKEN }}" secrets: - my_secret_name - - name: sglang_worker - image: ... - cmd: ["dynamo", "serve"] - run_config: - input: "dyn://qwen3-32b.backend.generate" - output: sglang - model_path: "/data/models/Qwen/Qwen3-32B" - tensor_parallel_size: 4 - router_mode: "kv" - num_nodes: 2 - node_rank: 0 - leader_addr: "127.0.0.1:9876" - replicas: 1 - resources: - gpu: 4 - memory: 64Gi - - name: batch_processor - cmd: ["dynamo", "serve"] - run_config: - input: "batch:/data/prompts.jsonl" - output: mistralrs - model_path: "Qwen/Qwen3-4B" - verbosity: 2 # -vv flag - replicas: 1 - resources: - memory: 16Gi - # Multi-node distributed example - - name: trtllm_leader - cmd: ["dynamo", "serve"] - run_config: - input: "dyn://deepseek-70b.backend.generate" - output: trtllm - model_path: "deepseek-ai/DeepSeek-R1-Distill-Llama-70B" - tensor_parallel_size: 16 - num_nodes: 2 - node_rank: 0 - leader_addr: "10.217.98.122:5000" - extra_engine_args: "trtllm_config.yaml" - replicas: 1 - resources: - gpu: 8 - memory: 80Gi - node_selector: - role: leader ``` From e1ca8df9b616936eba5fc2de266f7f26a10d90ec Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Wed, 2 Jul 2025 07:38:07 -0700 Subject: [PATCH 12/28] --wip-- [skip ci] --- 0001-Dynamo-UX.md | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/0001-Dynamo-UX.md b/0001-Dynamo-UX.md index f5c2159..8cb2b3e 100644 --- a/0001-Dynamo-UX.md +++ b/0001-Dynamo-UX.md @@ -8,20 +8,33 @@ **Required Reviewers**: Itay, Neelay, Ishan, Alec, Mohammed, Maksim -**Review Date**: [TBD] +**Review Date**: 07/02/2025 **Related Docs**: - [Dynamo SDK Abstractions design and Multi-Target Deployment](https://docs.google.com/document/d/1UNSD_MUOYa1cbGwHp0Wn53wdO0Ir55KR7rfDUZYvNto/edit?tab=t.0) -- +- [merge dynamo serve and run](https://github.com/ai-dynamo/enhancements/blob/grahamk/serve-run-merge/deps/NNNN-serve-run-merge.md) # Summary -1. current `dynamo-run` converges into `dynamo serve` +1. current `dynamo-run` converges into `dynamo serve` [related DEP](https://github.com/ai-dynamo/enhancements/blob/grahamk/serve-run-merge/deps/NNNN-serve-run-merge.md) -2. separate responsibilities but similar UX +2. separate responsibilities - `dynamo serve` will launch a single component only - `dynamo deploy` will launch multiple components (graph) +3. Consistent UX + +```bash +# serve +dynamo serve +dynamo serve --mode disagg --engine=vllm +dynamo serve --mode disagg --engine=vllm -f ./config.yaml + +# deploy golden path +dynamo deploy +dynamo deploy --mode disagg --engine=vllm +dynamo deploy --mode disagg --engine=vllm -f ./my_custom_config.yaml +``` # Motivation @@ -92,6 +105,13 @@ Launch vllm worker dynamo serve in=dyn out=vllm -f config.yam ``` +Current UX +```bash +dynamo serve --system-app-port 5000 --enable-system-app --use-default-health-checks \ + --service-name VllmWorker graphs.agg:Frontend \ + --VllmWorker.ServiceArgs.dynamo.namespace=dynamo +``` + ## Launching a graph From e9f1cfccc02d14bb041c318252d80569322afa8b Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Wed, 2 Jul 2025 11:51:59 -0700 Subject: [PATCH 13/28] --wip-- --- 0001-Dynamo-UX.md | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/0001-Dynamo-UX.md b/0001-Dynamo-UX.md index 8cb2b3e..429da4e 100644 --- a/0001-Dynamo-UX.md +++ b/0001-Dynamo-UX.md @@ -139,35 +139,34 @@ name: dynamo-graph components: - name: http_ingress image: "" - cmd: ["dynamo", "serve"] # default cmd, current dynamo-run + cmd: ["dynamo", "serve"] # default command is `dynamo serve` run_config: - input: http - output: dyn - parameters: # these parameters are passed to component - port: 8080 + args: # command arguments + - input=http + - output=dyn replicas: 5 resources: cpu: 500m memory: 2Gi - name: vllm_worker cmd: ["dynamo", "serve"] - image: ... + image: "" run_config: - input: "dyn://llama3-8b.backend.generate" - output: vllm - parameters: + args: + input: "dyn://llama3-8b.backend.generate" + output: vllm + options: # options are rendered in the format --a b + a: b model_path: "meta-llama/Meta-Llama-3-8B-Instruct" tensor_parallel_size: 2 context_length: 8192 base_gpu_id: 0 - extra_engine_args: "vllm_config.json" replicas: 2 resources: gpu: 2 memory: 24Gi environment: - CUDA_VISIBLE_DEVICES: "0,1" - HF_TOKEN: "${{ my_secret_name.HF_TOKEN }}" - secrets: - - my_secret_name + DISABLE_FOO: 1 + secret_env: + - my_k8s_secret_name ``` From eecc73253eabb92ab86b8b4c6865d8e766eda0a2 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Wed, 2 Jul 2025 13:02:18 -0700 Subject: [PATCH 14/28] --wip-- --- 0001-Dynamo-UX.md | 112 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 92 insertions(+), 20 deletions(-) diff --git a/0001-Dynamo-UX.md b/0001-Dynamo-UX.md index 429da4e..cf2cae0 100644 --- a/0001-Dynamo-UX.md +++ b/0001-Dynamo-UX.md @@ -25,15 +25,21 @@ 3. Consistent UX ```bash -# serve -dynamo serve -dynamo serve --mode disagg --engine=vllm -dynamo serve --mode disagg --engine=vllm -f ./config.yaml +# serve frontend +dynamo serve in=http out=dyn +# serve a backend +dynamo serve --engine vllm +dynamo serve --task prefill --engine=trtllm +dynamo serve --engine=vllm -f ./config.yaml # deploy golden path dynamo deploy -dynamo deploy --mode disagg --engine=vllm -dynamo deploy --mode disagg --engine=vllm -f ./my_custom_config.yaml + +# deploy a model in aggregated mode +dynamo deploy --mode agg --engine vllm + +# explicit config file +dynamo deploy --mode disagg --engine trtllm -f ./my_custom_config.yaml ``` # Motivation @@ -80,13 +86,17 @@ Users are unable to specify gpu resources explicitly ## Requirements ### REQ 1: Dynamo serve SHOULD not interleave deployment logic -### REQ 2: Dynamo users MUST be able to explicitly specify exact configuration -### REQ 3: Dynamo users MUST be able to deploy a dynamo graph using a simplified config +### REQ 2: Dynamo users MUST be able to deploy a dynamo graph using a simplified explicitly specified config ## Scenarios -Persona: Entrprise customer (K8s Savvy) +Persona: Entrprise customer +- experts in managing K8s +- need full control over customizing a graph deployment + Persona: Compoent Developer +- need full control over launching a component +- need consistent deployment in k8s through ci/cd # Proposal @@ -94,17 +104,29 @@ Persona: Compoent Developer `dyanmo serve` command will launch individual component (single process) -Example: +Dedidcated DEP [merge dynamo serve and run](https://github.com/ai-dynamo/enhancements/blob/grahamk/serve-run-merge/deps/NNNN-serve-run-merge.md) to address the same. + +### Alt 1: Preserve current dynamo run experience Launch Frontend (+Processor+Router) ```bash -dynamo serve in=http out=dyn -f config.yaml +dynamo serve in=http out=dyn ``` Launch vllm worker ```bash -dynamo serve in=dyn out=vllm -f config.yam +dynamo serve in=dyn out=vllm -f config.yaml +``` + +### Alt 2: similar experience across serve and deploy + +```bash +dynamo serve --engine vllm +dynamo serve --task prefill --engine=trtllm +dynamo serve --engine=vllm -f ./config.yaml ``` + +### Note Current UX ```bash dynamo serve --system-app-port 5000 --enable-system-app --use-default-health-checks \ @@ -114,6 +136,9 @@ dynamo serve --system-app-port 5000 --enable-system-app --use-default-health-che ## Launching a graph +Dynamo deploy command will generate target specific manifests. +- input: config.yaml (component run config), deployment.yaml (deployment spec) +- output: target specific manifests ### Alternative 1: Separate deployment and component configs @@ -124,26 +149,71 @@ dynmao deploy -c ./config.yaml -f ./deployment.yaml --out_dir=k8s_deployment dynmao deploy --target slurm -c ./config.yaml -f ./deployment.yaml --out_dir=slum_deployment ``` -1. config.yaml - -This will map to [current config yaml](https://github.com/ai-dynamo/dynamo/blob/main/examples/vllm_v1/configs/disagg.yaml) -`dynamo serve -c ./config.yaml` to run a service +### `config.yaml` +maps to [current config yaml](https://github.com/ai-dynamo/dynamo/blob/main/examples/vllm_v1/configs/disagg.yaml) and used with `dynamo serve ... -c ./config.yaml` to run a service. +`deployment-config.yaml` +```yaml +version: 0.1 +name: dynamo-graph +components: + - name: http_ingress # component name matches with name in config.yaml + image: "" + cmd: ["dynamo", "serve"] + # default command is `dynamo serve` + # Alternatively, user can specify any command - + # cmd: ["python3", "-m", "a.b.MyComponent"] + # cmd: ["rust-binary"] + run_config: + # raw argv style positional args + args: # command arguments + - input=http + - output=dyn + replicas: 5 + resources: + cpu: 500m + memory: 2Gi + - name: vllm_worker + cmd: ["dynamo", "serve"] + image: "" + run_config: + args: + input: "dyn://llama3-8b.backend.generate" + output: vllm + replicas: 2 + resources: + gpu: 2 + cpu: 10 + memory: 24Gi + environment: + DISABLE_FOO: 1 + # these secrets will be injected as env variables + # in k8s, these are secret refs + secret_env: + - my_k8s_secret_name +``` ### Alternative 2: Single config file with embedded component configs `deployment-config.yaml` ```yaml -version: 1.0 +version: 0.1 name: dynamo-graph components: - name: http_ingress image: "" - cmd: ["dynamo", "serve"] # default command is `dynamo serve` + cmd: ["dynamo", "serve"] + # default command is `dynamo serve` + # Alternatively, user can specify any command - + # cmd: ["python3", "-m", "a.b.MyComponent"] + # cmd: ["rust-binary"] run_config: + # raw argv style positional args args: # command arguments - input=http - output=dyn + options: # options are rendered in the format --key value + port: 8000 replicas: 5 resources: cpu: 500m @@ -155,8 +225,7 @@ components: args: input: "dyn://llama3-8b.backend.generate" output: vllm - options: # options are rendered in the format --a b - a: b + options: model_path: "meta-llama/Meta-Llama-3-8B-Instruct" tensor_parallel_size: 2 context_length: 8192 @@ -164,9 +233,12 @@ components: replicas: 2 resources: gpu: 2 + cpu: 10 memory: 24Gi environment: DISABLE_FOO: 1 + # these secrets will be injected as env variables + # in k8s, these are secret refs secret_env: - my_k8s_secret_name ``` From bf845da887bd6a35a951b42be3c63ecead48f97e Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Wed, 2 Jul 2025 13:07:00 -0700 Subject: [PATCH 15/28] --wip-- --- 0001-Dynamo-UX.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/0001-Dynamo-UX.md b/0001-Dynamo-UX.md index cf2cae0..7d6ae47 100644 --- a/0001-Dynamo-UX.md +++ b/0001-Dynamo-UX.md @@ -152,7 +152,7 @@ dynmao deploy --target slurm -c ./config.yaml -f ./deployment.yaml --out_dir=slu ### `config.yaml` maps to [current config yaml](https://github.com/ai-dynamo/dynamo/blob/main/examples/vllm_v1/configs/disagg.yaml) and used with `dynamo serve ... -c ./config.yaml` to run a service. -`deployment-config.yaml` +`deployment.yaml` ```yaml version: 0.1 name: dynamo-graph @@ -195,7 +195,7 @@ components: ### Alternative 2: Single config file with embedded component configs -`deployment-config.yaml` +`deployment.yaml` ```yaml version: 0.1 name: dynamo-graph From 78c4530627c8168cdd9f76bd2bf574577ccd9d57 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Wed, 2 Jul 2025 13:11:49 -0700 Subject: [PATCH 16/28] --wip-- --- 0001-Dynamo-UX.md | 72 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) diff --git a/0001-Dynamo-UX.md b/0001-Dynamo-UX.md index 7d6ae47..df4c613 100644 --- a/0001-Dynamo-UX.md +++ b/0001-Dynamo-UX.md @@ -193,7 +193,72 @@ components: - my_k8s_secret_name ``` -### Alternative 2: Single config file with embedded component configs +### Alternative 2: Single config file + +```bash +dynamo deploy -f ./config.yaml --out_dir=k8s_deployment +``` + + +`config.yaml` +```yaml +version: 0.1 +name: dynamo-graph + +components: + - name: http_ingress + options: + port: http + - name: vllm_worker + options: + model_path: "meta-llama/Meta-Llama-3-8B-Instruct" + tensor_parallel_size: 2 + context_length: 8192 + base_gpu_id: 0 + +# deployment section +deployments: + - name: http_ingress + image: "" + cmd: ["dynamo", "serve"] + # default command is `dynamo serve` + # Alternatively, user can specify any command - + # cmd: ["python3", "-m", "a.b.MyComponent"] + # cmd: ["rust-binary"] + run_config: + # raw argv style positional args + args: # command arguments + - input=http + - output=dyn + options: # options are rendered in the format --key value + port: 8000 + replicas: 5 + resources: + cpu: 500m + memory: 2Gi + - name: vllm_worker + cmd: ["dynamo", "serve"] + image: "" + run_config: + args: + input: "dyn://llama3-8b.backend.generate" + output: vllm + + replicas: 2 + resources: + gpu: 2 + cpu: 10 + memory: 24Gi + environment: + DISABLE_FOO: 1 + # these secrets will be injected as env variables + # in k8s, these are secret refs + secret_env: + - my_k8s_secret_name +``` + + +### Alternative 3: Single config file with embedded component configs `deployment.yaml` ```yaml @@ -242,3 +307,8 @@ components: secret_env: - my_k8s_secret_name ``` + + +### Building base image + +Publish engine specific image with pre-built components for example current form of `examples/vllm/*` is available for python import. \ No newline at end of file From a3b681bb4ca22104cf3c47eafad7f4e1df74a57c Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Wed, 2 Jul 2025 13:13:26 -0700 Subject: [PATCH 17/28] --wip-- --- 0001-Dynamo-UX.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/0001-Dynamo-UX.md b/0001-Dynamo-UX.md index df4c613..56a0b54 100644 --- a/0001-Dynamo-UX.md +++ b/0001-Dynamo-UX.md @@ -230,8 +230,7 @@ deployments: args: # command arguments - input=http - output=dyn - options: # options are rendered in the format --key value - port: 8000 + # options are auto injected replicas: 5 resources: cpu: 500m From 35cd8322fd137fe53889b5403c3164c52ecfd566 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Wed, 2 Jul 2025 13:14:07 -0700 Subject: [PATCH 18/28] --wip-- --- 0001-Dynamo-UX.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/0001-Dynamo-UX.md b/0001-Dynamo-UX.md index 56a0b54..fa1bb84 100644 --- a/0001-Dynamo-UX.md +++ b/0001-Dynamo-UX.md @@ -42,6 +42,8 @@ dynamo deploy --mode agg --engine vllm dynamo deploy --mode disagg --engine trtllm -f ./my_custom_config.yaml ``` +4. Deprecate `dynamo build`, `depnednds` and `link` + # Motivation ## Tight coupling between component's implementation and deployment From a92c1b05c3733cf4db60e8cde0d16a3940f14478 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Wed, 2 Jul 2025 13:16:17 -0700 Subject: [PATCH 19/28] --wip-- --- 0001-Dynamo-UX.md | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/0001-Dynamo-UX.md b/0001-Dynamo-UX.md index fa1bb84..77dcf46 100644 --- a/0001-Dynamo-UX.md +++ b/0001-Dynamo-UX.md @@ -198,6 +198,7 @@ components: ### Alternative 2: Single config file ```bash +dynamo serve http_ingress -f ./config.yaml dynamo deploy -f ./config.yaml --out_dir=k8s_deployment ``` @@ -209,9 +210,18 @@ name: dynamo-graph components: - name: http_ingress + run_config: + # raw argv style positional args + args: # command arguments + - input=http + - output=dyndynamo deploy -f ./config.yaml --out_dir=k8s_deployment options: port: http - name: vllm_worker + run_config: + args: + input: "dyn://llama3-8b.backend.generate" + output: vllm options: model_path: "meta-llama/Meta-Llama-3-8B-Instruct" tensor_parallel_size: 2 @@ -223,16 +233,6 @@ deployments: - name: http_ingress image: "" cmd: ["dynamo", "serve"] - # default command is `dynamo serve` - # Alternatively, user can specify any command - - # cmd: ["python3", "-m", "a.b.MyComponent"] - # cmd: ["rust-binary"] - run_config: - # raw argv style positional args - args: # command arguments - - input=http - - output=dyn - # options are auto injected replicas: 5 resources: cpu: 500m @@ -240,11 +240,6 @@ deployments: - name: vllm_worker cmd: ["dynamo", "serve"] image: "" - run_config: - args: - input: "dyn://llama3-8b.backend.generate" - output: vllm - replicas: 2 resources: gpu: 2 From a3627dd93b241505c3f4d1ac806c6e16f3ae3a42 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Wed, 2 Jul 2025 13:23:05 -0700 Subject: [PATCH 20/28] --wip-- --- 0001-Dynamo-UX.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/0001-Dynamo-UX.md b/0001-Dynamo-UX.md index 77dcf46..38e62aa 100644 --- a/0001-Dynamo-UX.md +++ b/0001-Dynamo-UX.md @@ -39,7 +39,7 @@ dynamo deploy dynamo deploy --mode agg --engine vllm # explicit config file -dynamo deploy --mode disagg --engine trtllm -f ./my_custom_config.yaml +dynamo deploy -f ./my_custom_config.yaml ``` 4. Deprecate `dynamo build`, `depnednds` and `link` From 7d7a4596dda3c191dd51591f4a6c68c76e84b512 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Wed, 2 Jul 2025 18:40:25 -0700 Subject: [PATCH 21/28] --wip-- --- 0001-Dynamo-UX.md | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/0001-Dynamo-UX.md b/0001-Dynamo-UX.md index 38e62aa..7787498 100644 --- a/0001-Dynamo-UX.md +++ b/0001-Dynamo-UX.md @@ -6,7 +6,7 @@ **Category**: Architecture -**Required Reviewers**: Itay, Neelay, Ishan, Alec, Mohammed, Maksim +**Required Reviewers**: Itay, Neelay, Graham, Ishan, Alec, Mohammed, Maksim, Neal **Review Date**: 07/02/2025 @@ -22,27 +22,31 @@ - `dynamo serve` will launch a single component only - `dynamo deploy` will launch multiple components (graph) -3. Consistent UX +3. deprecate `dynamo build` in favor of builderless deployments + +4. deprecate `depends` and `link` + +5. consistent serve/deploy UX ```bash # serve frontend dynamo serve in=http out=dyn -# serve a backend -dynamo serve --engine vllm -dynamo serve --task prefill --engine=trtllm -dynamo serve --engine=vllm -f ./config.yaml -# deploy golden path -dynamo deploy +# serve backend +dynamo serve in=dyn out=vllm -f ./config.yaml +``` -# deploy a model in aggregated mode +TODO: move to seperate DEP +6. golden path for deployment +``` +# deploy a model with sane default parameters +dynamo deploy dynamo deploy --mode agg --engine vllm # explicit config file dynamo deploy -f ./my_custom_config.yaml ``` -4. Deprecate `dynamo build`, `depnednds` and `link` # Motivation From 68b2302e2b0e320f27226d6340d851e429dad0b3 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Wed, 2 Jul 2025 19:25:27 -0700 Subject: [PATCH 22/28] --wip-- --- 0001-Dynamo-UX.md | 131 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 107 insertions(+), 24 deletions(-) diff --git a/0001-Dynamo-UX.md b/0001-Dynamo-UX.md index 7787498..578e1b1 100644 --- a/0001-Dynamo-UX.md +++ b/0001-Dynamo-UX.md @@ -22,30 +22,11 @@ - `dynamo serve` will launch a single component only - `dynamo deploy` will launch multiple components (graph) -3. deprecate `dynamo build` in favor of builderless deployments +3. deprecate +- `dynamo build` in favor of builderless deployments +- `depends` and `link` -4. deprecate `depends` and `link` - -5. consistent serve/deploy UX - -```bash -# serve frontend -dynamo serve in=http out=dyn - -# serve backend -dynamo serve in=dyn out=vllm -f ./config.yaml -``` - -TODO: move to seperate DEP -6. golden path for deployment -``` -# deploy a model with sane default parameters -dynamo deploy -dynamo deploy --mode agg --engine vllm - -# explicit config file -dynamo deploy -f ./my_custom_config.yaml -``` +4. simple deployment with K8s DynamoGraphDeployment CR # Motivation @@ -142,6 +123,83 @@ dynamo serve --system-app-port 5000 --enable-system-app --use-default-health-che ## Launching a graph +**Local**: Consensus is to provide individual `dyanmo serve` commands to launch each component in isolation in bare metal local env. + +We are not opinionated on how to locally serve the graph and left it to end-user. + +Alternatives are: +- bash/python script to launch component in a single node +- docker compose for launching the components locally with pre-built images +- README with `dynamo serve` commands to launch each component +- gstreamer/nextflow like DSL + + +**K8s**: There are 2 approaches to launch a graph in K8s environment +1. K8s DynamoGraphDeployment CR to launch examples [Preferred] + ++ Simple ++ Explicit +- Duplicated config for local serving + +Example: +```bash +kubectl apply -f vllm-disagg-graph.yaml +``` + +2. `dynamo deploy` cli to generate K8s DynamoGraphDeployment CR ++ Extensible to docker compose/slurm ++ DRY: reuse same config between local/k8s ++ CI/CD friendly +- multiple steps + +### Use K8s DynamoGraphDeployment CR to launch examples [Preferred] + +Each Dyanmo graph example will accompany a corresponding k8s DynamoGraphDeployment CR for deployment in K8s. + +`agg-vllm.yaml` +```yaml +apiVersion: nvidia.com/v1alpha1 +kind: DynamoGraphDeployment +metadata: + name: example-graph +spec: + services: + # component + Frontend: + dynamoNamespace: inference + componentType: main + replicas: 4 + extraPodSpec: + mainContainer: + image: + command: + - dynamo run + args: + - in=http + - out=dyn + envs: + - name: DYN_DEPLOYMENT_CONFIG + value: + # component + VllmWorker: + dynamoNamespace: inference + replicas: 2 + extraPodSpec: + mainContainer: + image: + command: + - dynamo run + args: + - in=dyn + - out=vllm + envs: + - name: DYN_DEPLOYMENT_CONFIG + value: +``` + + +**NOTE**: Below alternatives use `dynamo deploy` command to generate k8s manifests + Dynamo deploy command will generate target specific manifests. - input: config.yaml (component run config), deployment.yaml (deployment spec) - output: target specific manifests @@ -311,4 +369,29 @@ components: ### Building base image -Publish engine specific image with pre-built components for example current form of `examples/vllm/*` is available for python import. \ No newline at end of file +Publish engine specific image with pre-built components for example current form of `examples/vllm/*` is available for python import. + + +**Note** +moved to seperate DEP + +golden path for deployment +``` +# deploy a model with sane default parameters +dynamo deploy +dynamo deploy --mode agg --engine vllm + +# explicit config file +dynamo deploy -f ./my_custom_config.yaml +``` + +A: consistent deploy UX + +```bash +# serve frontend +dynamo serve in=http out=dyn +dynamo serve in=dyn out=vllm -f ./config.yaml + +# serve backend +dynamo deploy -f ./config.yaml -c ./deployment.yaml +``` \ No newline at end of file From 7751adcb04ea14361519825107228e56a0c9bb63 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Wed, 2 Jul 2025 20:12:11 -0700 Subject: [PATCH 23/28] --wip-- --- 0001-Dynamo-UX.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/0001-Dynamo-UX.md b/0001-Dynamo-UX.md index 578e1b1..f2535bd 100644 --- a/0001-Dynamo-UX.md +++ b/0001-Dynamo-UX.md @@ -25,9 +25,28 @@ 3. deprecate - `dynamo build` in favor of builderless deployments - `depends` and `link` +- older version of examples 4. simple deployment with K8s DynamoGraphDeployment CR +directory strucutre: +```yaml +examples/llm +- vllm_v1 + - launch + - k8s # this folder contains DynamoGraphDeployment CRs + - agg.yaml + - disagg.yaml + - disagg.yaml + - deepseek_r1 + - agg_dp.yaml + - local # this folder contains DynamoGraphDeployment CRs + - components +- tensorrt_llm +- sglang +- vllm_v0 +- multimodal +``` # Motivation From da8eabce3f019a7ade0ff5bde6a078d79d667cfb Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Wed, 2 Jul 2025 20:12:42 -0700 Subject: [PATCH 24/28] --wip-- --- 0001-Dynamo-UX.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/0001-Dynamo-UX.md b/0001-Dynamo-UX.md index f2535bd..c905035 100644 --- a/0001-Dynamo-UX.md +++ b/0001-Dynamo-UX.md @@ -40,7 +40,7 @@ examples/llm - disagg.yaml - deepseek_r1 - agg_dp.yaml - - local # this folder contains DynamoGraphDeployment CRs + - local # this folder contains scripts/README for local serving - components - tensorrt_llm - sglang From ed280d1fc43d53cbd62f3a6d0b57580185d78822 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Wed, 2 Jul 2025 20:14:19 -0700 Subject: [PATCH 25/28] --wip-- --- 0001-Dynamo-UX.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/0001-Dynamo-UX.md b/0001-Dynamo-UX.md index c905035..4a9fcdc 100644 --- a/0001-Dynamo-UX.md +++ b/0001-Dynamo-UX.md @@ -24,7 +24,8 @@ 3. deprecate - `dynamo build` in favor of builderless deployments -- `depends` and `link` +- `dynamo deploy` in favor of simpler K8s DynamoGraphDeployment CR (next point below) +- `depends` and `link`: these are not used anymore - older version of examples 4. simple deployment with K8s DynamoGraphDeployment CR From 38dd65ac97fa30e96724a447a9c20af94cddef05 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Wed, 2 Jul 2025 20:15:39 -0700 Subject: [PATCH 26/28] --wip-- --- 0001-Dynamo-UX.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/0001-Dynamo-UX.md b/0001-Dynamo-UX.md index 4a9fcdc..5de18eb 100644 --- a/0001-Dynamo-UX.md +++ b/0001-Dynamo-UX.md @@ -26,7 +26,7 @@ - `dynamo build` in favor of builderless deployments - `dynamo deploy` in favor of simpler K8s DynamoGraphDeployment CR (next point below) - `depends` and `link`: these are not used anymore -- older version of examples +- older version of examples (examples/llm) 4. simple deployment with K8s DynamoGraphDeployment CR @@ -392,8 +392,7 @@ components: Publish engine specific image with pre-built components for example current form of `examples/vllm/*` is available for python import. -**Note** -moved to seperate DEP +### Note: Ignore below - moved to a separate dep golden path for deployment ``` From 735bb13d7d44bd283459c6787e26d2795daf9148 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Wed, 2 Jul 2025 20:21:39 -0700 Subject: [PATCH 27/28] --wip-- --- 0001-Dynamo-UX.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/0001-Dynamo-UX.md b/0001-Dynamo-UX.md index 5de18eb..09fa658 100644 --- a/0001-Dynamo-UX.md +++ b/0001-Dynamo-UX.md @@ -20,7 +20,7 @@ 2. separate responsibilities - `dynamo serve` will launch a single component only -- `dynamo deploy` will launch multiple components (graph) +- K8s DynamoGraphDeployment CR will launch multiple components (graph) in k8s environment 3. deprecate - `dynamo build` in favor of builderless deployments From d4797562f247b68b06ccdf799158cf47fcfd0005 Mon Sep 17 00:00:00 2001 From: Biswa Panda Date: Wed, 2 Jul 2025 20:30:02 -0700 Subject: [PATCH 28/28] --wip-- --- 0001-Dynamo-UX.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/0001-Dynamo-UX.md b/0001-Dynamo-UX.md index 09fa658..a23e187 100644 --- a/0001-Dynamo-UX.md +++ b/0001-Dynamo-UX.md @@ -20,7 +20,9 @@ 2. separate responsibilities - `dynamo serve` will launch a single component only -- K8s DynamoGraphDeployment CR will launch multiple components (graph) in k8s environment +- graph launch + - local: unopinionated, users have full control over how to orchestrate multiple components + - k8s : K8s DynamoGraphDeployment CR will launch multiple components (graph) in k8s environment 3. deprecate - `dynamo build` in favor of builderless deployments @@ -28,7 +30,7 @@ - `depends` and `link`: these are not used anymore - older version of examples (examples/llm) -4. simple deployment with K8s DynamoGraphDeployment CR +4. simple k8s graph deployment with [K8s DynamoGraphDeployment CR](#use-k8s-dynamographdeployment-cr-to-launch-examples-preferred) directory strucutre: ```yaml