sgl-project · zhyncs · Mar 27, 2025 · Mar 23, 2025 · Mar 24, 2025 · Mar 26, 2025
diff --git a/benchmark/mmmu/README.md b/benchmark/mmmu/README.md
@@ -2,12 +2,16 @@
 
 ### Evaluate sglang
 
+Host the VLM:
+
 ```
-python -m sglang.launch_server --model-path Qwen/Qwen2-VL-7B-Instruct --port 30000
+python -m sglang.launch_server --model-path Qwen/Qwen2-VL-7B-Instruct --chat-template qwen2-vl --port 30000
 ```
 
+Benchmark:
+
 ```
-python benchmark/mmmu/bench_sglang.py --model-path Qwen/Qwen2-VL-7B-Instruct --chat-template qwen2-vl --port 30000
+python benchmark/mmmu/bench_sglang.py --port 30000
 ```
 
 It's recommended to reduce the memory usage by appending something ike `--mem-fraction-static 0.6` to the command above.

diff --git a/benchmark/mmmu/bench_sglang.py b/benchmark/mmmu/bench_sglang.py
@@ -2,7 +2,9 @@
 Bench the sglang-hosted vLM with benchmark MMMU
 
 Usage:
-    python benchmark/mmmu/bench_sglang.py --model-path Qwen/Qwen2-VL-7B-Instruct --chat-template qwen2-vl
+    Host the VLM: python -m sglang.launch_server --model-path Qwen/Qwen2-VL-7B-Instruct --chat-template qwen2-vl --port 30000
+
+    Benchmark: python benchmark/mmmu/bench_sglang.py --port 30000
 
 The eval output will be logged
 """