Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions components/backends/sglang/deploy/agg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,6 @@ spec:
extraPodSpec:
mainContainer:
image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang
command: ["sh", "-c"]
args:
- "python3 -m dynamo.sglang.utils.clear_namespace --namespace sglang-agg && python3 -m dynamo.frontend --http-port=8000"
SGLangDecodeWorker:
envFromSecret: hf-token-secret
dynamoNamespace: sglang-agg
Expand Down
7 changes: 3 additions & 4 deletions components/backends/sglang/deploy/agg_router.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,9 @@ spec:
extraPodSpec:
mainContainer:
image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang
command: ["sh", "-c"]
args:
- "python3 -m dynamo.sglang.utils.clear_namespace --namespace sglang-agg-router && python3 -m dynamo.frontend --http-port=8000 --router-mode kv"
envs:
- name: DYNAMO_ROUTER_MODE
value: kv
SGLangDecodeWorker:
envFromSecret: hf-token-secret
dynamoNamespace: sglang-agg-router
Expand Down
4 changes: 0 additions & 4 deletions components/backends/sglang/deploy/disagg-multinode.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,6 @@ spec:
extraPodSpec:
mainContainer:
image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang
command: ["sh", "-c"]
args:
- "python3 -m dynamo.sglang.utils.clear_namespace --namespace sglang-disagg-multinode && python3 -m dynamo.frontend --http-port=8000"
decode:
multinode:
nodeCount: 2
Expand Down
4 changes: 0 additions & 4 deletions components/backends/sglang/deploy/disagg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,6 @@ spec:
extraPodSpec:
mainContainer:
image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang
command: ["sh", "-c"]
args:
- "python3 -m dynamo.sglang.utils.clear_namespace --namespace sglang-disagg && python3 -m dynamo.frontend --http-port=8000"
SGLangDecodeWorker:
envFromSecret: hf-token-secret
dynamoNamespace: sglang-disagg
Expand Down
4 changes: 0 additions & 4 deletions components/backends/sglang/deploy/disagg_planner.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,6 @@ spec:
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/sglang-runtime:hzhou-0811-1
workingDir: /workspace/components/backends/sglang
command: ["sh", "-c"]
args:
- "python3 -m dynamo.sglang.utils.clear_namespace --namespace sglang-disagg && python3 -m dynamo.frontend --http-port=8000"
Planner:
dynamoNamespace: dynamo
envFromSecret: hf-token-secret
Expand Down
6 changes: 0 additions & 6 deletions components/backends/trtllm/deploy/agg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,6 @@ spec:
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
workingDir: /workspace/components/backends/trtllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000"
TRTLLMWorker:
envFromSecret: hf-token-secret
dynamoNamespace: trtllm-agg
Expand Down
9 changes: 3 additions & 6 deletions components/backends/trtllm/deploy/agg_router.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,9 @@ spec:
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
workingDir: /workspace/components/backends/trtllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000 --router-mode kv"
envs:
- name: DYNAMO_ROUTER_MODE
value: kv
TRTLLMWorker:
envFromSecret: hf-token-secret
dynamoNamespace: trtllm-agg-router
Expand Down
6 changes: 0 additions & 6 deletions components/backends/trtllm/deploy/disagg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,6 @@ spec:
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
workingDir: /workspace/components/backends/trtllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000"
TRTLLMPrefillWorker:
dynamoNamespace: trtllm-disagg
envFromSecret: hf-token-secret
Expand Down
9 changes: 3 additions & 6 deletions components/backends/trtllm/deploy/disagg_router.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,9 @@ spec:
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/trtllm-runtime:dep-233.17
workingDir: /workspace/components/backends/trtllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000 --router-mode kv"
envs:
- name: DYNAMO_ROUTER_MODE
value: kv
TRTLLMPrefillWorker:
dynamoNamespace: trtllm-v1-disagg-router
envFromSecret: hf-token-secret
Expand Down
6 changes: 0 additions & 6 deletions components/backends/vllm/deploy/agg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,6 @@ spec:
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000"
VllmDecodeWorker:
envFromSecret: hf-token-secret
dynamoNamespace: vllm-agg
Expand Down
9 changes: 3 additions & 6 deletions components/backends/vllm/deploy/agg_router.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,9 @@ spec:
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000 --router-mode kv"
envs:
- name: DYNAMO_ROUTER_MODE
value: kv
VllmDecodeWorker:
envFromSecret: hf-token-secret
dynamoNamespace: vllm-agg-router
Expand Down
6 changes: 0 additions & 6 deletions components/backends/vllm/deploy/disagg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,6 @@ spec:
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000"
VllmDecodeWorker:
dynamoNamespace: vllm-disagg
envFromSecret: hf-token-secret
Expand Down
6 changes: 0 additions & 6 deletions components/backends/vllm/deploy/disagg_planner.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,6 @@ spec:
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:hzhou-0814-02
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000"
Planner:
dynamoNamespace: vllm-disagg-planner
envFromSecret: hf-token-secret
Expand Down
9 changes: 3 additions & 6 deletions components/backends/vllm/deploy/disagg_router.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,9 @@ spec:
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000 --router-mode kv"
envs:
- name: DYNAMO_ROUTER_MODE
value: kv
VllmDecodeWorker:
dynamoNamespace: vllm-v1-disagg-router
envFromSecret: hf-token-secret
Expand Down
9 changes: 6 additions & 3 deletions components/frontend/src/dynamo/frontend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,14 +83,17 @@ def parse_args():
"--kv-cache-block-size", type=int, help="KV cache block size (u32)."
)
parser.add_argument(
"--http-port", type=int, default=8080, help="HTTP port for the engine (u16)."
"--http-port",
type=int,
default=int(os.environ.get("DYNAMO_HTTP_PORT", "8080")),
help="HTTP port for the engine (u16). Can be set via DYNAMO_HTTP_PORT env var."
)
parser.add_argument(
"--router-mode",
type=str,
choices=["round-robin", "random", "kv"],
default="round-robin",
help="How to route the request",
default=os.environ.get("DYNAMO_ROUTER_MODE", "round-robin"),
help="How to route the request. Can be set via DYNAMO_ROUTER_MODE env var.",
)
parser.add_argument(
"--kv-overlap-score-weight",
Expand Down
8 changes: 8 additions & 0 deletions deploy/cloud/operator/internal/dynamo/component_frontend.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ func (f *FrontendDefaults) GetBaseContainer(context ComponentContext) (corev1.Co
// Frontend doesn't need backend-specific config
container := f.getCommonContainer(context)

// Set default command and args
container.Command = []string{"python3"}
container.Args = []string{"-m", "dynamo.frontend"}

// Add HTTP port
container.Ports = []corev1.ContainerPort{
{
Expand Down Expand Up @@ -83,6 +87,10 @@ func (f *FrontendDefaults) GetBaseContainer(context ComponentContext) (corev1.Co
Name: commonconsts.EnvDynamoServicePort,
Value: fmt.Sprintf("%d", commonconsts.DynamoServicePort),
},
{
Name: "DYNAMO_HTTP_PORT", // TODO: need to reconcile DYNAMO_PORT and DYNAMO_HTTP_PORT
Value: fmt.Sprintf("%d", commonconsts.DynamoServicePort),
},
}...)

return container, nil
Expand Down
93 changes: 93 additions & 0 deletions deploy/cloud/operator/internal/dynamo/graph_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1283,6 +1283,10 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
},
},
Env: []corev1.EnvVar{
{
Name: "DYNAMO_HTTP_PORT",
Value: fmt.Sprintf("%d", commonconsts.DynamoServicePort),
},
{
Name: "DYNAMO_POD_GANG_SET_REPLICAS",
Value: "1",
Expand Down Expand Up @@ -2020,6 +2024,10 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
},
},
Env: []corev1.EnvVar{
{
Name: "DYNAMO_HTTP_PORT",
Value: fmt.Sprintf("%d", commonconsts.DynamoServicePort),
},
{
Name: "DYNAMO_POD_GANG_SET_REPLICAS",
Value: "1",
Expand Down Expand Up @@ -2769,6 +2777,10 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
},
},
Env: []corev1.EnvVar{
{
Name: "DYNAMO_HTTP_PORT",
Value: fmt.Sprintf("%d", commonconsts.DynamoServicePort),
},
{
Name: "DYNAMO_POD_GANG_SET_REPLICAS",
Value: "1",
Expand Down Expand Up @@ -4211,6 +4223,87 @@ func XTestGenerateGrovePodGangSet_StartsAfterDependencies(t *testing.T) {
}
}

func TestGenerateBasePodSpec_Frontend(t *testing.T) {
secretsRetriever := &mockSecretsRetriever{}
controllerConfig := controller_common.Config{}
dynamoDeployment := &v1alpha1.DynamoGraphDeployment{
ObjectMeta: metav1.ObjectMeta{
Name: "test-deployment",
Namespace: "default",
},
}

tests := []struct {
name string
component *v1alpha1.DynamoComponentDeploymentOverridesSpec
backendFramework BackendFramework
wantEnvVars map[string]string
wantErr bool
}{
{
name: "frontend with default command",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeFrontend,
},
},
backendFramework: BackendFrameworkVLLM,
wantEnvVars: map[string]string{
"DYNAMO_HTTP_PORT": fmt.Sprintf("%d", commonconsts.DynamoServicePort),
},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
podSpec, err := GenerateBasePodSpec(
tt.component,
tt.backendFramework,
secretsRetriever,
dynamoDeployment.Name,
dynamoDeployment.Namespace,
RoleMain,
1,
controllerConfig,
commonconsts.MultinodeDeploymentTypeGrove,
"test-service",
)

if (err != nil) != tt.wantErr {
t.Errorf("GenerateBasePodSpec() error = %v, wantErr %v", err, tt.wantErr)
return
}
if tt.wantErr {
return
}

// Check command and args
wantCommand := []string{"python3"}
wantArgs := []string{"-m", "dynamo.frontend"}
if !reflect.DeepEqual(podSpec.Containers[0].Command, wantCommand) {
t.Errorf("GenerateBasePodSpec() command = %v, want %v",
podSpec.Containers[0].Command, wantCommand)
}
if !reflect.DeepEqual(podSpec.Containers[0].Args, wantArgs) {
t.Errorf("GenerateBasePodSpec() args = %v, want %v",
podSpec.Containers[0].Args, wantArgs)
}

// Check environment variables
envVars := make(map[string]string)
for _, env := range podSpec.Containers[0].Env {
envVars[env.Name] = env.Value
}
for k, v := range tt.wantEnvVars {
if envVars[k] != v {
t.Errorf("GenerateBasePodSpec() env var %s = %v, want %v",
k, envVars[k], v)
}
}
})
}
}

func TestGenerateBasePodSpec_PlannerServiceAccount(t *testing.T) {
secretsRetriever := &mockSecretsRetriever{}
controllerConfig := controller_common.Config{}
Expand Down
Loading