Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions components/backends/sglang/deploy/agg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,6 @@ spec:
extraPodSpec:
mainContainer:
image: my-registry/sglang-runtime:my-tag
workingDir: /workspace/components/backends/sglang
command: ["sh", "-c"]
args:
- "python3 -m dynamo.sglang.utils.clear_namespace --namespace sglang-agg && python3 -m dynamo.frontend --http-port=8000"
SGLangDecodeWorker:
envFromSecret: hf-token-secret
dynamoNamespace: sglang-agg
Expand Down
6 changes: 0 additions & 6 deletions components/backends/vllm/deploy/agg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,6 @@ spec:
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000"
VllmDecodeWorker:
envFromSecret: hf-token-secret
dynamoNamespace: vllm-agg
Expand Down
9 changes: 3 additions & 6 deletions components/backends/vllm/deploy/disagg_router.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,9 @@ spec:
extraPodSpec:
mainContainer:
image: nvcr.io/nvidian/nim-llm-dev/vllm-runtime:dep-233.17
workingDir: /workspace/components/backends/vllm
command:
- /bin/sh
- -c
args:
- "python3 -m dynamo.frontend --http-port 8000 --router-mode kv"
env:
- name: DYNAMO_ROUTER_MODE
value: kv
VllmDecodeWorker:
dynamoNamespace: vllm-v1-disagg-router
envFromSecret: hf-token-secret
Expand Down
9 changes: 6 additions & 3 deletions components/frontend/src/dynamo/frontend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,14 +83,17 @@ def parse_args():
"--kv-cache-block-size", type=int, help="KV cache block size (u32)."
)
parser.add_argument(
"--http-port", type=int, default=8080, help="HTTP port for the engine (u16)."
"--http-port",
type=int,
default=int(os.environ.get("DYNAMO_HTTP_PORT", "8080")),
help="HTTP port for the engine (u16). Can be set via DYNAMO_HTTP_PORT env var."
)
parser.add_argument(
"--router-mode",
type=str,
choices=["round-robin", "random", "kv"],
default="round-robin",
help="How to route the request",
default=os.environ.get("DYNAMO_ROUTER_MODE", "round-robin"),
help="How to route the request. Can be set via DYNAMO_ROUTER_MODE env var.",
)
parser.add_argument(
"--kv-overlap-score-weight",
Expand Down
15 changes: 0 additions & 15 deletions deploy/cloud/operator/config/rbac/role.yaml
Original file line number Diff line number Diff line change
@@ -1,18 +1,3 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
Expand Down
1 change: 1 addition & 0 deletions deploy/cloud/operator/internal/dynamo/component_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ type ComponentContext struct {
DynamoNamespace string
ParentGraphDeploymentName string
ParentGraphDeploymentNamespace string
BackendFramework BackendFramework
}

func (b *BaseComponentDefaults) GetBaseContainer(context ComponentContext) (corev1.Container, error) {
Expand Down
26 changes: 26 additions & 0 deletions deploy/cloud/operator/internal/dynamo/component_frontend.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,32 @@ func NewFrontendDefaults() *FrontendDefaults {
return &FrontendDefaults{&BaseComponentDefaults{}}
}

func (f *FrontendDefaults) getWorkingDir(context ComponentContext) string {
switch context.BackendFramework {
case BackendFrameworkVLLM:
return "/workspace/components/backends/vllm"
case BackendFrameworkSGLang:
return "/workspace/components/backends/sglang"
case BackendFrameworkTRTLLM:
return "/workspace/components/backends/trtllm"
default:
return "" // signal no working dir default available for this framework
}
}

func (f *FrontendDefaults) GetBaseContainer(context ComponentContext) (corev1.Container, error) {
// Frontend doesn't need backend-specific config
container := f.getCommonContainer(context)

// Set working directory based on backend framework if available
if workingDir := f.getWorkingDir(context); workingDir != "" {
container.WorkingDir = workingDir
}

// Set default command and args
container.Command = []string{"python3"}
container.Args = []string{"-m", "dynamo.frontend"}

// Add HTTP port
container.Ports = []corev1.ContainerPort{
{
Expand Down Expand Up @@ -83,6 +105,10 @@ func (f *FrontendDefaults) GetBaseContainer(context ComponentContext) (corev1.Co
Name: commonconsts.EnvDynamoServicePort,
Value: fmt.Sprintf("%d", commonconsts.DynamoServicePort),
},
{
Name: "DYNAMO_HTTP_PORT", // TODO: need to reconcile DYNAMO_PORT and DYNAMO_HTTP_PORT
Value: fmt.Sprintf("%d", commonconsts.DynamoServicePort),
},
}...)

return container, nil
Expand Down
5 changes: 3 additions & 2 deletions deploy/cloud/operator/internal/dynamo/graph.go
Original file line number Diff line number Diff line change
Expand Up @@ -689,7 +689,7 @@ func GenerateBasePodSpec(
serviceName string,
) (corev1.PodSpec, error) {
// Start with base container generated per component type
componentContext := generateComponentContext(component, parentGraphDeploymentName, namespace, numberOfNodes)
componentContext := generateComponentContext(component, parentGraphDeploymentName, namespace, numberOfNodes, backendFramework)
componentDefaults := ComponentDefaultsFactory(component.ComponentType)
container, err := componentDefaults.GetBaseContainer(componentContext)
if err != nil {
Expand Down Expand Up @@ -823,11 +823,12 @@ func setMetricsLabels(labels map[string]string, dynamoGraphDeployment *v1alpha1.
labels[commonconsts.KubeLabelMetricsEnabled] = commonconsts.KubeLabelValueTrue
}

func generateComponentContext(component *v1alpha1.DynamoComponentDeploymentOverridesSpec, parentGraphDeploymentName string, namespace string, numberOfNodes int32) ComponentContext {
func generateComponentContext(component *v1alpha1.DynamoComponentDeploymentOverridesSpec, parentGraphDeploymentName string, namespace string, numberOfNodes int32, backendFramework BackendFramework) ComponentContext {
componentContext := ComponentContext{
numberOfNodes: numberOfNodes,
ParentGraphDeploymentName: parentGraphDeploymentName,
ParentGraphDeploymentNamespace: namespace,
BackendFramework: backendFramework,
}
if component.DynamoNamespace != nil {
componentContext.DynamoNamespace = *component.DynamoNamespace
Expand Down
125 changes: 125 additions & 0 deletions deploy/cloud/operator/internal/dynamo/graph_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1283,6 +1283,10 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
},
},
Env: []corev1.EnvVar{
{
Name: "DYNAMO_HTTP_PORT",
Value: fmt.Sprintf("%d", commonconsts.DynamoServicePort),
},
{
Name: "DYNAMO_POD_GANG_SET_REPLICAS",
Value: "1",
Expand Down Expand Up @@ -2020,6 +2024,10 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
},
},
Env: []corev1.EnvVar{
{
Name: "DYNAMO_HTTP_PORT",
Value: fmt.Sprintf("%d", commonconsts.DynamoServicePort),
},
{
Name: "DYNAMO_POD_GANG_SET_REPLICAS",
Value: "1",
Expand Down Expand Up @@ -2769,6 +2777,10 @@ func TestGenerateGrovePodGangSet(t *testing.T) {
},
},
Env: []corev1.EnvVar{
{
Name: "DYNAMO_HTTP_PORT",
Value: fmt.Sprintf("%d", commonconsts.DynamoServicePort),
},
{
Name: "DYNAMO_POD_GANG_SET_REPLICAS",
Value: "1",
Expand Down Expand Up @@ -4211,6 +4223,119 @@ func XTestGenerateGrovePodGangSet_StartsAfterDependencies(t *testing.T) {
}
}

func TestGenerateBasePodSpec_Frontend(t *testing.T) {
secretsRetriever := &mockSecretsRetriever{}
controllerConfig := controller_common.Config{}
dynamoDeployment := &v1alpha1.DynamoGraphDeployment{
ObjectMeta: metav1.ObjectMeta{
Name: "test-deployment",
Namespace: "default",
},
}

tests := []struct {
name string
component *v1alpha1.DynamoComponentDeploymentOverridesSpec
backendFramework BackendFramework
wantWorkingDir string
wantEnvVars map[string]string
wantErr bool
}{
{
name: "vllm frontend",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeFrontend,
},
},
backendFramework: BackendFrameworkVLLM,
wantWorkingDir: "/workspace/components/backends/vllm",
wantEnvVars: map[string]string{
"DYNAMO_HTTP_PORT": fmt.Sprintf("%d", commonconsts.DynamoServicePort),
},
},
{
name: "sglang frontend",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeFrontend,
},
},
backendFramework: BackendFrameworkSGLang,
wantWorkingDir: "/workspace/components/backends/sglang",
wantEnvVars: map[string]string{
"DYNAMO_HTTP_PORT": fmt.Sprintf("%d", commonconsts.DynamoServicePort),
},
},
{
name: "trtllm frontend",
component: &v1alpha1.DynamoComponentDeploymentOverridesSpec{
DynamoComponentDeploymentSharedSpec: v1alpha1.DynamoComponentDeploymentSharedSpec{
ComponentType: commonconsts.ComponentTypeFrontend,
},
},
backendFramework: BackendFrameworkTRTLLM,
wantWorkingDir: "/workspace/components/backends/trtllm",
wantEnvVars: map[string]string{
"DYNAMO_HTTP_PORT": fmt.Sprintf("%d", commonconsts.DynamoServicePort),
},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
podSpec, err := GenerateBasePodSpec(
tt.component,
tt.backendFramework,
secretsRetriever,
dynamoDeployment.Name,
dynamoDeployment.Namespace,
RoleMain,
1,
controllerConfig,
commonconsts.MultinodeDeploymentTypeGrove,
"test-service",
)

if (err != nil) != tt.wantErr {
t.Errorf("GenerateBasePodSpec() error = %v, wantErr %v", err, tt.wantErr)
return
}
if tt.wantErr {
return
}

// Check working directory
if podSpec.Containers[0].WorkingDir != tt.wantWorkingDir {
t.Errorf("GenerateBasePodSpec() workingDir = %v, want %v",
podSpec.Containers[0].WorkingDir, tt.wantWorkingDir)
}

// Check command and args
if !reflect.DeepEqual(podSpec.Containers[0].Command, []string{"python3"}) {
t.Errorf("GenerateBasePodSpec() command = %v, want %v",
podSpec.Containers[0].Command, []string{"python3"})
}
if !reflect.DeepEqual(podSpec.Containers[0].Args, []string{"-m", "dynamo.frontend"}) {
t.Errorf("GenerateBasePodSpec() args = %v, want %v",
podSpec.Containers[0].Args, []string{"-m", "dynamo.frontend"})
}

// Check environment variables
envVars := make(map[string]string)
for _, env := range podSpec.Containers[0].Env {
envVars[env.Name] = env.Value
}
for k, v := range tt.wantEnvVars {
if envVars[k] != v {
t.Errorf("GenerateBasePodSpec() env var %s = %v, want %v",
k, envVars[k], v)
}
}
})
}
}

func TestGenerateBasePodSpec_PlannerServiceAccount(t *testing.T) {
secretsRetriever := &mockSecretsRetriever{}
controllerConfig := controller_common.Config{}
Expand Down
Loading