Skip to content

Commit 80e2809

Browse files
committed
feat: extract deploymentType as interface
1 parent fd35899 commit 80e2809

File tree

11 files changed

+440
-399
lines changed

11 files changed

+440
-399
lines changed

deploy/cloud/operator/internal/dynamo/backend_common.go

Lines changed: 0 additions & 13 deletions
This file was deleted.

deploy/cloud/operator/internal/dynamo/backend_sglang.go

Lines changed: 14 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -6,27 +6,32 @@ import (
66
"strings"
77

88
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
9-
commonconsts "github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
109
corev1 "k8s.io/api/core/v1"
1110
)
1211

12+
type MultinodeDeployer interface {
13+
GetLeaderHostname(serviceName string) string
14+
GetHostNames(serviceName string, numberOfNodes int32) []string
15+
GetNodeRank() string
16+
}
17+
1318
type SGLangBackend struct{}
1419

15-
func (b *SGLangBackend) UpdateContainer(container *corev1.Container, numberOfNodes int32, role Role, component *v1alpha1.DynamoComponentDeploymentOverridesSpec, multinodeDeploymentType commonconsts.MultinodeDeploymentType, serviceName string) {
20+
func (b *SGLangBackend) UpdateContainer(container *corev1.Container, numberOfNodes int32, role Role, component *v1alpha1.DynamoComponentDeploymentOverridesSpec, serviceName string, multinodeDeployer MultinodeDeployer) {
1621
// For single node, nothing to do
1722
if numberOfNodes <= 1 {
1823
return
1924
}
2025

21-
// Remove probes for multinode leader and worker
22-
if role == RoleLeader || role == RoleWorker {
26+
// Remove probes for multinode worker
27+
if role == RoleWorker {
2328
container.LivenessProbe = nil
2429
container.ReadinessProbe = nil
2530
container.StartupProbe = nil
2631
}
2732

2833
// Generate the flags to add
29-
flags := b.getMultinodeFlags(numberOfNodes, role, multinodeDeploymentType, serviceName)
34+
flags := b.getMultinodeFlags(numberOfNodes, role, serviceName, multinodeDeployer)
3035
if flags == "" {
3136
return
3237
}
@@ -39,33 +44,18 @@ func (b *SGLangBackend) UpdateContainer(container *corev1.Container, numberOfNod
3944
}
4045
}
4146

42-
func (b *SGLangBackend) UpdatePodSpec(podSpec *corev1.PodSpec, numberOfNodes int32, role Role, component *v1alpha1.DynamoComponentDeploymentOverridesSpec, multinodeDeploymentType commonconsts.MultinodeDeploymentType, serviceName string) {
47+
func (b *SGLangBackend) UpdatePodSpec(podSpec *corev1.PodSpec, numberOfNodes int32, role Role, component *v1alpha1.DynamoComponentDeploymentOverridesSpec, serviceName string) {
4348
// do nothing
4449
}
4550

4651
// getMultinodeFlags returns the multinode flags as a single string
47-
func (b *SGLangBackend) getMultinodeFlags(numberOfNodes int32, role Role, multinodeDeploymentType commonconsts.MultinodeDeploymentType, serviceName string) string {
48-
var distInitAddr, nodeRank string
49-
50-
// Determine dist-init-addr
51-
if multinodeDeploymentType == commonconsts.MultinodeDeploymentTypeGrove {
52-
leaderHostname := generateGroveLeaderHostname(serviceName)
53-
distInitAddr = fmt.Sprintf("%s:29500", leaderHostname)
54-
} else {
55-
distInitAddr = "${LWS_LEADER_ADDRESS}:29500"
56-
}
57-
52+
func (b *SGLangBackend) getMultinodeFlags(numberOfNodes int32, role Role, serviceName string, multinodeDeployer MultinodeDeployer) string {
53+
distInitAddr := fmt.Sprintf("%s:29500", multinodeDeployer.GetLeaderHostname(serviceName))
54+
nodeRank := multinodeDeployer.GetNodeRank()
5855
// Determine node-rank
5956
if role == RoleLeader {
6057
nodeRank = "0"
61-
} else {
62-
if multinodeDeploymentType == commonconsts.MultinodeDeploymentTypeGrove {
63-
nodeRank = "$((GROVE_PCLQ_POD_INDEX + 1))"
64-
} else {
65-
nodeRank = "${LWS_WORKER_INDEX}"
66-
}
6758
}
68-
6959
return fmt.Sprintf("--dist-init-addr %s --nnodes %d --node-rank %s", distInitAddr, numberOfNodes, nodeRank)
7060
}
7161

deploy/cloud/operator/internal/dynamo/backend_sglang_test.go

Lines changed: 90 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -5,93 +5,92 @@ import (
55
"testing"
66

77
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/api/v1alpha1"
8-
"github.com/ai-dynamo/dynamo/deploy/cloud/operator/internal/consts"
98
corev1 "k8s.io/api/core/v1"
109
)
1110

1211
func TestSGLangBackend_DirectFlagInjection(t *testing.T) {
1312
backend := &SGLangBackend{}
1413

1514
tests := []struct {
16-
name string
17-
numberOfNodes int32
18-
role Role
19-
multinodeDeploymentType consts.MultinodeDeploymentType
20-
initialArgs []string
21-
expectedArgs []string
22-
description string
15+
name string
16+
numberOfNodes int32
17+
role Role
18+
multinodeDeployer MultinodeDeployer
19+
initialArgs []string
20+
expectedArgs []string
21+
description string
2322
}{
2423
{
25-
name: "single node does not modify args",
26-
numberOfNodes: 1,
27-
role: RoleMain,
28-
multinodeDeploymentType: consts.MultinodeDeploymentTypeGrove,
29-
initialArgs: []string{"python -m dynamo.sglang.worker"},
30-
expectedArgs: []string{"python -m dynamo.sglang.worker"},
31-
description: "Single node should not modify anything",
24+
name: "single node does not modify args",
25+
numberOfNodes: 1,
26+
role: RoleMain,
27+
multinodeDeployer: &GroveMultinodeDeployer{},
28+
initialArgs: []string{"python -m dynamo.sglang.worker"},
29+
expectedArgs: []string{"python -m dynamo.sglang.worker"},
30+
description: "Single node should not modify anything",
3231
},
3332
{
34-
name: "multinode adds flags to simple python command",
35-
numberOfNodes: 2,
36-
role: RoleLeader,
37-
multinodeDeploymentType: consts.MultinodeDeploymentTypeGrove,
38-
initialArgs: []string{"python -m dynamo.sglang.worker"},
39-
expectedArgs: []string{"python -m dynamo.sglang.worker --dist-init-addr ${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-service-ldr-0.${GROVE_HEADLESS_SERVICE}:29500 --nnodes 2 --node-rank 0"},
40-
description: "Should add multinode flags directly to python command",
33+
name: "multinode adds flags to simple python command",
34+
numberOfNodes: 2,
35+
role: RoleLeader,
36+
multinodeDeployer: &GroveMultinodeDeployer{},
37+
initialArgs: []string{"python -m dynamo.sglang.worker"},
38+
expectedArgs: []string{"python -m dynamo.sglang.worker --dist-init-addr ${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-service-ldr-0.${GROVE_HEADLESS_SERVICE}:29500 --nnodes 2 --node-rank 0"},
39+
description: "Should add multinode flags directly to python command",
4140
},
4241
{
43-
name: "multinode with complex command",
44-
numberOfNodes: 2,
45-
role: RoleLeader,
46-
multinodeDeploymentType: consts.MultinodeDeploymentTypeGrove,
47-
initialArgs: []string{"echo blah | wc -l && python -m dynamo.sglang.worker && ls -al"},
48-
expectedArgs: []string{"echo blah | wc -l && python -m dynamo.sglang.worker --dist-init-addr ${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-service-ldr-0.${GROVE_HEADLESS_SERVICE}:29500 --nnodes 2 --node-rank 0 && ls -al"},
49-
description: "Should add flags only to python command, not other commands",
42+
name: "multinode with complex command",
43+
numberOfNodes: 2,
44+
role: RoleLeader,
45+
multinodeDeployer: &GroveMultinodeDeployer{},
46+
initialArgs: []string{"echo blah | wc -l && python -m dynamo.sglang.worker && ls -al"},
47+
expectedArgs: []string{"echo blah | wc -l && python -m dynamo.sglang.worker --dist-init-addr ${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-service-ldr-0.${GROVE_HEADLESS_SERVICE}:29500 --nnodes 2 --node-rank 0 && ls -al"},
48+
description: "Should add flags only to python command, not other commands",
5049
},
5150
{
52-
name: "multinode worker with Grove deployment",
53-
numberOfNodes: 3,
54-
role: RoleWorker,
55-
multinodeDeploymentType: consts.MultinodeDeploymentTypeGrove,
56-
initialArgs: []string{"python -m dynamo.sglang.worker"},
57-
expectedArgs: []string{"python -m dynamo.sglang.worker --dist-init-addr ${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-service-ldr-0.${GROVE_HEADLESS_SERVICE}:29500 --nnodes 3 --node-rank $((GROVE_PCLQ_POD_INDEX + 1))"},
58-
description: "Worker should get correct node rank",
51+
name: "multinode worker with Grove deployment",
52+
numberOfNodes: 3,
53+
role: RoleWorker,
54+
multinodeDeployer: &GroveMultinodeDeployer{},
55+
initialArgs: []string{"python -m dynamo.sglang.worker"},
56+
expectedArgs: []string{"python -m dynamo.sglang.worker --dist-init-addr ${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-service-ldr-0.${GROVE_HEADLESS_SERVICE}:29500 --nnodes 3 --node-rank $((GROVE_PCLQ_POD_INDEX + 1))"},
57+
description: "Worker should get correct node rank",
5958
},
6059
{
61-
name: "LWS deployment uses correct address",
62-
numberOfNodes: 2,
63-
role: RoleLeader,
64-
multinodeDeploymentType: consts.MultinodeDeploymentTypeLWS,
65-
initialArgs: []string{"python -m dynamo.sglang.worker"},
66-
expectedArgs: []string{"python -m dynamo.sglang.worker --dist-init-addr ${LWS_LEADER_ADDRESS}:29500 --nnodes 2 --node-rank 0"},
67-
description: "LWS deployment should use LWS_LEADER_ADDRESS",
60+
name: "LWS deployment uses correct address",
61+
numberOfNodes: 2,
62+
role: RoleLeader,
63+
multinodeDeployer: &LWSMultinodeDeployer{},
64+
initialArgs: []string{"python -m dynamo.sglang.worker"},
65+
expectedArgs: []string{"python -m dynamo.sglang.worker --dist-init-addr ${LWS_LEADER_ADDRESS}:29500 --nnodes 2 --node-rank 0"},
66+
description: "LWS deployment should use LWS_LEADER_ADDRESS",
6867
},
6968
{
70-
name: "command with pipes gets flags before pipe",
71-
numberOfNodes: 2,
72-
role: RoleLeader,
73-
multinodeDeploymentType: consts.MultinodeDeploymentTypeGrove,
74-
initialArgs: []string{"python -m dynamo.sglang.worker | tee /tmp/log"},
75-
expectedArgs: []string{"python -m dynamo.sglang.worker --dist-init-addr ${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-service-ldr-0.${GROVE_HEADLESS_SERVICE}:29500 --nnodes 2 --node-rank 0 | tee /tmp/log"},
76-
description: "Should insert flags before pipe operator",
69+
name: "command with pipes gets flags before pipe",
70+
numberOfNodes: 2,
71+
role: RoleLeader,
72+
multinodeDeployer: &GroveMultinodeDeployer{},
73+
initialArgs: []string{"python -m dynamo.sglang.worker | tee /tmp/log"},
74+
expectedArgs: []string{"python -m dynamo.sglang.worker --dist-init-addr ${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-service-ldr-0.${GROVE_HEADLESS_SERVICE}:29500 --nnodes 2 --node-rank 0 | tee /tmp/log"},
75+
description: "Should insert flags before pipe operator",
7776
},
7877
{
79-
name: "multiple args are flattened and processed together",
80-
numberOfNodes: 2,
81-
role: RoleLeader,
82-
multinodeDeploymentType: consts.MultinodeDeploymentTypeGrove,
83-
initialArgs: []string{"echo start", "python -m dynamo.sglang.worker", "echo done"},
84-
expectedArgs: []string{"echo start python -m dynamo.sglang.worker --dist-init-addr ${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-service-ldr-0.${GROVE_HEADLESS_SERVICE}:29500 --nnodes 2 --node-rank 0 echo done"},
85-
description: "Multiple args should be flattened and python command gets flags",
78+
name: "multiple args are flattened and processed together",
79+
numberOfNodes: 2,
80+
role: RoleLeader,
81+
multinodeDeployer: &GroveMultinodeDeployer{},
82+
initialArgs: []string{"echo start", "python -m dynamo.sglang.worker", "echo done"},
83+
expectedArgs: []string{"echo start python -m dynamo.sglang.worker --dist-init-addr ${GROVE_PCSG_NAME}-${GROVE_PCSG_INDEX}-test-service-ldr-0.${GROVE_HEADLESS_SERVICE}:29500 --nnodes 2 --node-rank 0 echo done"},
84+
description: "Multiple args should be flattened and python command gets flags",
8685
},
8786
{
88-
name: "no sglang command means flattened but no changes",
89-
numberOfNodes: 2,
90-
role: RoleLeader,
91-
multinodeDeploymentType: consts.MultinodeDeploymentTypeGrove,
92-
initialArgs: []string{"echo hello", "python -m some.other.module"},
93-
expectedArgs: []string{"echo hello python -m some.other.module"},
94-
description: "Non-sglang commands should be flattened but not modified",
87+
name: "no sglang command means flattened but no changes",
88+
numberOfNodes: 2,
89+
role: RoleLeader,
90+
multinodeDeployer: &GroveMultinodeDeployer{},
91+
initialArgs: []string{"echo hello", "python -m some.other.module"},
92+
expectedArgs: []string{"echo hello python -m some.other.module"},
93+
description: "Non-sglang commands should be flattened but not modified",
9594
},
9695
}
9796

@@ -101,7 +100,7 @@ func TestSGLangBackend_DirectFlagInjection(t *testing.T) {
101100
Args: append([]string{}, tt.initialArgs...),
102101
}
103102

104-
backend.UpdateContainer(container, tt.numberOfNodes, tt.role, &v1alpha1.DynamoComponentDeploymentOverridesSpec{}, tt.multinodeDeploymentType, "test-service")
103+
backend.UpdateContainer(container, tt.numberOfNodes, tt.role, &v1alpha1.DynamoComponentDeploymentOverridesSpec{}, "test-service", tt.multinodeDeployer)
105104

106105
if !reflect.DeepEqual(container.Args, tt.expectedArgs) {
107106
t.Errorf("UpdateContainer() args = %v, want %v", container.Args, tt.expectedArgs)
@@ -124,39 +123,39 @@ func TestSGLangBackend_ProbeRemoval(t *testing.T) {
124123
backend := &SGLangBackend{}
125124

126125
tests := []struct {
127-
name string
128-
numberOfNodes int32
129-
role Role
130-
multinodeDeploymentType consts.MultinodeDeploymentType
131-
expectProbesRemoved bool
126+
name string
127+
numberOfNodes int32
128+
role Role
129+
multinodeDeployer MultinodeDeployer
130+
expectProbesRemoved bool
132131
}{
133132
{
134-
name: "single node does not remove probes",
135-
numberOfNodes: 1,
136-
role: RoleMain,
137-
multinodeDeploymentType: consts.MultinodeDeploymentTypeGrove,
138-
expectProbesRemoved: false,
133+
name: "single node does not remove probes",
134+
numberOfNodes: 1,
135+
role: RoleMain,
136+
multinodeDeployer: &GroveMultinodeDeployer{},
137+
expectProbesRemoved: false,
139138
},
140139
{
141-
name: "multinode leader removes probes",
142-
numberOfNodes: 2,
143-
role: RoleLeader,
144-
multinodeDeploymentType: consts.MultinodeDeploymentTypeGrove,
145-
expectProbesRemoved: true,
140+
name: "multinode leader does not remove probes",
141+
numberOfNodes: 2,
142+
role: RoleLeader,
143+
multinodeDeployer: &GroveMultinodeDeployer{},
144+
expectProbesRemoved: false,
146145
},
147146
{
148-
name: "multinode worker removes probes",
149-
numberOfNodes: 2,
150-
role: RoleWorker,
151-
multinodeDeploymentType: consts.MultinodeDeploymentTypeGrove,
152-
expectProbesRemoved: true,
147+
name: "multinode worker removes probes",
148+
numberOfNodes: 2,
149+
role: RoleWorker,
150+
multinodeDeployer: &GroveMultinodeDeployer{},
151+
expectProbesRemoved: true,
153152
},
154153
{
155-
name: "multinode main role does not remove probes",
156-
numberOfNodes: 2,
157-
role: RoleMain,
158-
multinodeDeploymentType: consts.MultinodeDeploymentTypeGrove,
159-
expectProbesRemoved: false,
154+
name: "multinode main role does not remove probes",
155+
numberOfNodes: 2,
156+
role: RoleMain,
157+
multinodeDeployer: &GroveMultinodeDeployer{},
158+
expectProbesRemoved: false,
160159
},
161160
}
162161

@@ -174,7 +173,7 @@ func TestSGLangBackend_ProbeRemoval(t *testing.T) {
174173
StartupProbe: startupProbe,
175174
}
176175

177-
backend.UpdateContainer(container, tt.numberOfNodes, tt.role, &v1alpha1.DynamoComponentDeploymentOverridesSpec{}, tt.multinodeDeploymentType, "test-service")
176+
backend.UpdateContainer(container, tt.numberOfNodes, tt.role, &v1alpha1.DynamoComponentDeploymentOverridesSpec{}, "test-service", tt.multinodeDeployer)
178177

179178
if tt.expectProbesRemoved {
180179
if container.LivenessProbe != nil {

0 commit comments

Comments
 (0)