diff --git a/components/backends/sglang/deploy/disagg.yaml b/components/backends/sglang/deploy/disagg.yaml index 451a01d9c5af..315a25e8eb11 100644 --- a/components/backends/sglang/deploy/disagg.yaml +++ b/components/backends/sglang/deploy/disagg.yaml @@ -61,7 +61,7 @@ spec: image: my-registry/sglang-runtime:my-tag workingDir: /workspace/components/backends/sglang command: - - python3E + - python3 - -m - dynamo.sglang args: diff --git a/components/backends/sglang/launch/disagg_dp_attn.sh b/components/backends/sglang/launch/disagg_dp_attn.sh index b5d8ba1bc5c8..ae35364c1344 100755 --- a/components/backends/sglang/launch/disagg_dp_attn.sh +++ b/components/backends/sglang/launch/disagg_dp_attn.sh @@ -16,10 +16,6 @@ trap cleanup EXIT INT TERM python3 -m dynamo.frontend --http-port=8000 & DYNAMO_PID=$! -# Set the expert distribution recording directory -mkdir -p /tmp/sglang_expert_distribution_record -export SGLANG_EXPERT_DISTRIBUTION_RECORDER_DIR=/tmp/sglang_expert_distribution_record - # run prefill worker python3 -m dynamo.sglang \ --model-path silence09/DeepSeek-R1-Small-2layers \ @@ -31,7 +27,7 @@ python3 -m dynamo.sglang \ --trust-remote-code \ --disaggregation-mode prefill \ --disaggregation-transfer-backend nixl \ - --expert-distribution-recorder-mode stat \ + --load-balance-method round_robin \ --port 30000 & PREFILL_PID=$! @@ -46,5 +42,5 @@ CUDA_VISIBLE_DEVICES=2,3 python3 -m dynamo.sglang \ --trust-remote-code \ --disaggregation-mode decode \ --disaggregation-transfer-backend nixl \ - --expert-distribution-recorder-mode stat \ + --prefill-round-robin-balance \ --port 31000