File tree Expand file tree Collapse file tree 18 files changed +44
-44
lines changed Expand file tree Collapse file tree 18 files changed +44
-44
lines changed Original file line number Diff line number Diff line change @@ -193,7 +193,7 @@ Send a test request to verify your deployment:
193193curl localhost:8000/v1/chat/completions \
194194 -H " Content-Type: application/json" \
195195 -d ' {
196- "model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B ",
196+ "model": "Qwen/Qwen3-0.6B ",
197197 "messages": [
198198 {
199199 "role": "user",
Original file line number Diff line number Diff line change 3232 args :
3333 - >-
3434 python3 -m dynamo.sglang
35- --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B
36- --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B
35+ --model-path Qwen/Qwen3-0.6B
36+ --served-model-name Qwen/Qwen3-0.6B
3737 --page-size 16
3838 --tp 1
3939 --trust-remote-code
Original file line number Diff line number Diff line change 3535 args :
3636 - >-
3737 python3 -m dynamo.sglang
38- --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B
39- --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B
38+ --model-path Qwen/Qwen3-0.6B
39+ --served-model-name Qwen/Qwen3-0.6B
4040 --page-size 16
4141 --tp 1
4242 --trust-remote-code
Original file line number Diff line number Diff line change 6868 args :
6969 - >-
7070 python3 -m dynamo.sglang
71- --model-path meta-llama/Llama-3.3-70B-Instruct
72- --served-model-name meta-llama/Llama-3.3-70B-Instruct
71+ --model-path Qwen/Qwen3-0.6B
72+ --served-model-name Qwen/Qwen3-0.6B
7373 --tp-size 8
7474 --trust-remote-code
7575 --skip-tokenizer-init
Original file line number Diff line number Diff line change 3232 args :
3333 - >-
3434 python3 -m dynamo.sglang
35- --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B
36- --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B
35+ --model-path Qwen/Qwen3-0.6B
36+ --served-model-name Qwen/Qwen3-0.6B
3737 --page-size 16
3838 --tp 1
3939 --trust-remote-code
5959 args :
6060 - >-
6161 python3 -m dynamo.sglang
62- --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B
63- --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B
62+ --model-path Qwen/Qwen3-0.6B
63+ --served-model-name Qwen/Qwen3-0.6B
6464 --page-size 16
6565 --tp 1
6666 --trust-remote-code
Original file line number Diff line number Diff line change @@ -116,8 +116,8 @@ spec:
116116 args :
117117 - >-
118118 python3 -m dynamo.sglang
119- --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B
120- --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B
119+ --model-path Qwen/Qwen3-0.6B
120+ --served-model-name Qwen/Qwen3-0.6B
121121 --page-size 16
122122 --tp 1
123123 --trust-remote-code
@@ -142,8 +142,8 @@ spec:
142142 args :
143143 - >-
144144 python3 -m dynamo.sglang
145- --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B
146- --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B
145+ --model-path Qwen/Qwen3-0.6B
146+ --served-model-name Qwen/Qwen3-0.6B
147147 --page-size 16
148148 --tp 1
149149 --trust-remote-code
Original file line number Diff line number Diff line change @@ -11,7 +11,7 @@ This guide shows how to enable SGLang's Hierarchical Cache (HiCache) inside Dyna
1111
1212``` bash
1313python -m dynamo.sglang \
14- --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
14+ --model-path Qwen/Qwen3-0.6B \
1515 --host 0.0.0.0 --port 8000 \
1616 --page-size 64 \
1717 --enable-hierarchical-cache \
@@ -39,7 +39,7 @@ python -m dynamo.frontend --http-port 8000
3939curl localhost:8000/v1/chat/completions \
4040 -H " Content-Type: application/json" \
4141 -d ' {
42- "model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B ",
42+ "model": "Qwen/Qwen3-0.6B ",
4343 "messages": [
4444 {
4545 "role": "user",
@@ -56,7 +56,7 @@ curl localhost:8000/v1/chat/completions \
5656Run the perf script:
5757``` bash
5858bash -x /workspace/benchmarks/llm/perf.sh \
59- --model deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
59+ --model Qwen/Qwen3-0.6B \
6060 --tensor-parallelism 1 \
6161 --data-parallelism 1 \
6262 --concurrency " 2,4,8" \
Original file line number Diff line number Diff line change @@ -20,8 +20,8 @@ DYNAMO_PID=$!
2020
2121# run worker
2222python3 -m dynamo.sglang \
23- --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
24- --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
23+ --model-path Qwen/Qwen3-0.6B \
24+ --served-model-name Qwen/Qwen3-0.6B \
2525 --page-size 16 \
2626 --tp 1 \
2727 --trust-remote-code \
Original file line number Diff line number Diff line change @@ -20,8 +20,8 @@ DYNAMO_PID=$!
2020
2121# run worker
2222python3 -m dynamo.sglang \
23- --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
24- --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
23+ --model-path Qwen/Qwen3-0.6B \
24+ --served-model-name Qwen/Qwen3-0.6B \
2525 --page-size 16 \
2626 --tp 1 \
2727 --trust-remote-code \
@@ -30,8 +30,8 @@ python3 -m dynamo.sglang \
3030WORKER_PID=$!
3131
3232CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.sglang \
33- --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
34- --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
33+ --model-path Qwen/Qwen3-0.6B \
34+ --served-model-name Qwen/Qwen3-0.6B \
3535 --page-size 16 \
3636 --tp 1 \
3737 --trust-remote-code \
Original file line number Diff line number Diff line change @@ -20,8 +20,8 @@ DYNAMO_PID=$!
2020
2121# run prefill worker
2222python3 -m dynamo.sglang \
23- --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
24- --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
23+ --model-path Qwen/Qwen3-0.6B \
24+ --served-model-name Qwen/Qwen3-0.6B \
2525 --page-size 16 \
2626 --tp 1 \
2727 --trust-remote-code \
@@ -32,8 +32,8 @@ PREFILL_PID=$!
3232
3333# run decode worker
3434CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.sglang \
35- --model-path deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
36- --served-model-name deepseek-ai/DeepSeek-R1-Distill-Llama-8B \
35+ --model-path Qwen/Qwen3-0.6B \
36+ --served-model-name Qwen/Qwen3-0.6B \
3737 --page-size 16 \
3838 --tp 1 \
3939 --trust-remote-code \
You can’t perform that action at this time.
0 commit comments