Skip to content

Commit 4eb2563

Browse files
authored
feat: use consistent small models across all deploy examples (#2573)
1 parent 26b3b60 commit 4eb2563

File tree

18 files changed

+44
-44
lines changed

18 files changed

+44
-44
lines changed

components/backends/sglang/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ Send a test request to verify your deployment:
193193
curl localhost:8000/v1/chat/completions \
194194
-H "Content-Type: application/json" \
195195
-d '{
196-
"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
196+
"model": "Qwen/Qwen3-0.6B",
197197
"messages": [
198198
{
199199
"role": "user",

components/backends/sglang/deploy/agg.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ spec:
3232
args:
3333
- >-
3434
python3 -m dynamo.sglang
35-
--model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B
36-
--served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B
35+
--model-path Qwen/Qwen3-0.6B
36+
--served-model-name Qwen/Qwen3-0.6B
3737
--page-size 16
3838
--tp 1
3939
--trust-remote-code

components/backends/sglang/deploy/agg_router.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ spec:
3535
args:
3636
- >-
3737
python3 -m dynamo.sglang
38-
--model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B
39-
--served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B
38+
--model-path Qwen/Qwen3-0.6B
39+
--served-model-name Qwen/Qwen3-0.6B
4040
--page-size 16
4141
--tp 1
4242
--trust-remote-code

components/backends/sglang/deploy/disagg-multinode.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,8 @@ spec:
6868
args:
6969
- >-
7070
python3 -m dynamo.sglang
71-
--model-path meta-llama/Llama-3.3-70B-Instruct
72-
--served-model-name meta-llama/Llama-3.3-70B-Instruct
71+
--model-path Qwen/Qwen3-0.6B
72+
--served-model-name Qwen/Qwen3-0.6B
7373
--tp-size 8
7474
--trust-remote-code
7575
--skip-tokenizer-init

components/backends/sglang/deploy/disagg.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,8 @@ spec:
3232
args:
3333
- >-
3434
python3 -m dynamo.sglang
35-
--model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B
36-
--served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B
35+
--model-path Qwen/Qwen3-0.6B
36+
--served-model-name Qwen/Qwen3-0.6B
3737
--page-size 16
3838
--tp 1
3939
--trust-remote-code
@@ -59,8 +59,8 @@ spec:
5959
args:
6060
- >-
6161
python3 -m dynamo.sglang
62-
--model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B
63-
--served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B
62+
--model-path Qwen/Qwen3-0.6B
63+
--served-model-name Qwen/Qwen3-0.6B
6464
--page-size 16
6565
--tp 1
6666
--trust-remote-code

components/backends/sglang/deploy/disagg_planner.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,8 @@ spec:
116116
args:
117117
- >-
118118
python3 -m dynamo.sglang
119-
--model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B
120-
--served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B
119+
--model-path Qwen/Qwen3-0.6B
120+
--served-model-name Qwen/Qwen3-0.6B
121121
--page-size 16
122122
--tp 1
123123
--trust-remote-code
@@ -142,8 +142,8 @@ spec:
142142
args:
143143
- >-
144144
python3 -m dynamo.sglang
145-
--model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B
146-
--served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B
145+
--model-path Qwen/Qwen3-0.6B
146+
--served-model-name Qwen/Qwen3-0.6B
147147
--page-size 16
148148
--tp 1
149149
--trust-remote-code

components/backends/sglang/docs/sgl-hicache-example.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ This guide shows how to enable SGLang's Hierarchical Cache (HiCache) inside Dyna
1111

1212
```bash
1313
python -m dynamo.sglang \
14-
--model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
14+
--model-path Qwen/Qwen3-0.6B \
1515
--host 0.0.0.0 --port 8000 \
1616
--page-size 64 \
1717
--enable-hierarchical-cache \
@@ -39,7 +39,7 @@ python -m dynamo.frontend --http-port 8000
3939
curl localhost:8000/v1/chat/completions \
4040
-H "Content-Type: application/json" \
4141
-d '{
42-
"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
42+
"model": "Qwen/Qwen3-0.6B",
4343
"messages": [
4444
{
4545
"role": "user",
@@ -56,7 +56,7 @@ curl localhost:8000/v1/chat/completions \
5656
Run the perf script:
5757
```bash
5858
bash -x /workspace/benchmarks/llm/perf.sh \
59-
--model deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
59+
--model Qwen/Qwen3-0.6B \
6060
--tensor-parallelism 1 \
6161
--data-parallelism 1 \
6262
--concurrency "2,4,8" \

components/backends/sglang/launch/agg.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ DYNAMO_PID=$!
2020

2121
# run worker
2222
python3 -m dynamo.sglang \
23-
--model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
24-
--served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
23+
--model-path Qwen/Qwen3-0.6B \
24+
--served-model-name Qwen/Qwen3-0.6B \
2525
--page-size 16 \
2626
--tp 1 \
2727
--trust-remote-code \

components/backends/sglang/launch/agg_router.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ DYNAMO_PID=$!
2020

2121
# run worker
2222
python3 -m dynamo.sglang \
23-
--model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
24-
--served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
23+
--model-path Qwen/Qwen3-0.6B \
24+
--served-model-name Qwen/Qwen3-0.6B \
2525
--page-size 16 \
2626
--tp 1 \
2727
--trust-remote-code \
@@ -30,8 +30,8 @@ python3 -m dynamo.sglang \
3030
WORKER_PID=$!
3131

3232
CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.sglang \
33-
--model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
34-
--served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
33+
--model-path Qwen/Qwen3-0.6B \
34+
--served-model-name Qwen/Qwen3-0.6B \
3535
--page-size 16 \
3636
--tp 1 \
3737
--trust-remote-code \

components/backends/sglang/launch/disagg.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ DYNAMO_PID=$!
2020

2121
# run prefill worker
2222
python3 -m dynamo.sglang \
23-
--model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
24-
--served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
23+
--model-path Qwen/Qwen3-0.6B \
24+
--served-model-name Qwen/Qwen3-0.6B \
2525
--page-size 16 \
2626
--tp 1 \
2727
--trust-remote-code \
@@ -32,8 +32,8 @@ PREFILL_PID=$!
3232

3333
# run decode worker
3434
CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.sglang \
35-
--model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
36-
--served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
35+
--model-path Qwen/Qwen3-0.6B \
36+
--served-model-name Qwen/Qwen3-0.6B \
3737
--page-size 16 \
3838
--tp 1 \
3939
--trust-remote-code \

0 commit comments

Comments
 (0)