Skip to content

Commit 88da9d9

Browse files
zccjjjplusNew001
andauthored
[XPU] [CI] Change CI ep test from offline to online (#4885)
* change CI ep test from offline to online * add ep all2all ci's changes, from offline to online * change env var in ep-all2all ci test * add expected response for ep8tp8 all2all * Adapt to CI refactoring and support dual-concurrent code execution * Adapt to CI refactoring and support dual-concurrent, second * Explicitly specify the #port * change the startup method of all2all * Modify the command of all2all * Update assertion to check multiple keywords * Update assertion to check multiple keywords * Update run_w4a8.py * Update run_w4a8.py --------- Co-authored-by: plusNew001 <[email protected]>
1 parent 4a0d881 commit 88da9d9

File tree

4 files changed

+208
-35
lines changed

4 files changed

+208
-35
lines changed

scripts/run_ci_xpu.sh

Lines changed: 164 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ function stop_processes() {
1111
ps -efww | grep -E 'api_server' | grep -v grep | awk '{print $2}' | xargs kill -9 || true
1212
ps -efww | grep -E "$((8188 + GPU_ID * 100))" | grep -v grep | awk '{print $2}' | xargs kill -9 || true
1313
lsof -t -i :$((8188 + GPU_ID * 100)) | xargs kill -9 || true
14+
for port in {$((8188 + GPU_ID * 100 + 10))..$((8188 + GPU_ID * 100 + 40))}; do
15+
lsof -t -i :${port} | xargs kill -9 || true
16+
done
1417
}
1518
stop_processes
1619

@@ -286,10 +289,11 @@ if [ ${vl_test_exit_code} -ne 0 ]; then
286289
fi
287290

288291

289-
echo "============================开始 EP8TP1 测试!============================"
292+
echo "============================开始 EP4TP4 在线服务测试!============================"
290293
sleep 5
291294
rm -rf log/*
292295
rm -f core*
296+
# pkill -9 python #流水线不执行这个
293297
ipcrm --all=msg
294298
xpu-smi
295299
if [[ "$GPU_ID" == "0" ]]; then
@@ -312,11 +316,58 @@ cd xDeepEP
312316
bash build.sh
313317
cd -
314318

315-
export enable_expert_parallel=1
316-
export enable_tensor_parallel=0
319+
export port_num=$((8188 + GPU_ID * 100))
320+
# 启动服务
321+
python -m fastdeploy.entrypoints.openai.api_server \
322+
--model ${MODEL_PATH}/ERNIE-4.5-300B-A47B-Paddle \
323+
--port $port_num \
324+
--tensor-parallel-size 4 \
325+
--enable-expert-parallel \
326+
--data-parallel-size 1 \
327+
--max-model-len 32768 \
328+
--max-num-seqs 64 \
329+
--quantization "wint4" \
330+
--engine-worker-queue-port $((port_num + 10)) \
331+
--metrics-port $((port_num + 2)) \
332+
--cache-queue-port $((port_num + 47873)) \
333+
--disable-sequence-parallel-moe \
334+
--gpu-memory-utilization 0.9 \
335+
--load-choices "default" > server.log 2>&1 &
317336

318-
python -m pytest -s --timeout=600 tests/ci_use/XPU_45T/run_ep.py
319-
ep_exit_code=$?
337+
sleep 60
338+
# 探活
339+
TIMEOUT=$((15 * 60))
340+
INTERVAL=10
341+
ENDPOINT="http://0.0.0.0:${port_num}/health"
342+
START_TIME=$(date +%s)
343+
echo "开始服务健康检查,最长等待时间:${TIMEOUT}"
344+
while true; do
345+
CURRENT_TIME=$(date +%s)
346+
ELAPSED=$((CURRENT_TIME - START_TIME))
347+
if [ $ELAPSED -ge $TIMEOUT ]; then
348+
echo -e "\n服务启动超时:经过 $((TIMEOUT/60)) 分钟服务仍未启动!"
349+
stop_processes
350+
cat server.log
351+
echo "log/workerlog.0"
352+
cat log/workerlog.0
353+
exit 1
354+
fi
355+
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -m 2 "$ENDPOINT" || true)
356+
echo -e "\r服务健康检查中... 已等待 ${ELAPSED} 秒,当前状态码:${HTTP_CODE}"
357+
if [ "$HTTP_CODE" = "200" ]; then
358+
echo -e "\n服务启动成功!耗时 ${ELAPSED}"
359+
break
360+
else
361+
sleep $INTERVAL
362+
fi
363+
done
364+
365+
cat server.log
366+
367+
# 执行在线推理验证脚本
368+
python tests/ci_use/XPU_45T/run_ep_online.py
369+
ep_online_exit_code=$?
370+
echo ep_online_exit_code is ${ep_online_exit_code}
320371

321372
unset BKCL_ENABLE_XDR
322373
unset BKCL_RDMA_NICS
@@ -327,26 +378,24 @@ unset XSHMEM_QP_NUM_PER_RANK
327378
unset BKCL_RDMA_VERBS
328379
stop_processes
329380

330-
if [ ${ep_exit_code} -ne 0 ]; then
331-
echo "log/workerlog.0"
381+
if [ ${ep_online_exit_code} -ne 0 ]; then
332382
cat log/workerlog.0
333-
echo "EP8TP1 相关测试失败,请检查pr代码"
383+
echo "EP4TP4 在线服务相关测试失败,请检查pr代码"
334384
exit 1
335385
fi
336386

337-
338-
echo "============================开始 EP8TP8 allreduce 测试!============================"
387+
echo "============================开始 EP4TP1 在线服务测试!============================"
339388
sleep 5
340389
rm -rf log/*
341390
rm -f core*
391+
# pkill -9 python #流水线不执行这个
342392
ipcrm --all=msg
343393
xpu-smi
344394
if [[ "$GPU_ID" == "0" ]]; then
345395
export XPU_VISIBLE_DEVICES="0,1,2,3"
346396
else
347397
export XPU_VISIBLE_DEVICES="4,5,6,7"
348398
fi
349-
350399
export BKCL_ENABLE_XDR=1
351400
export BKCL_RDMA_NICS=xgbe1,xgbe2,xgbe3,xgbe4
352401
export BKCL_TRACE_TOPO=1
@@ -355,12 +404,54 @@ export XSHMEM_MODE=1
355404
export XSHMEM_QP_NUM_PER_RANK=32
356405
export BKCL_RDMA_VERBS=1
357406

358-
export enable_expert_parallel=1
359-
export enable_tensor_parallel=1
360-
export disable_sequence_parallel_moe=1
407+
export port_num=$((8188 + GPU_ID * 100))
408+
# 启动服务
409+
python -m fastdeploy.entrypoints.openai.api_server \
410+
--model ${MODEL_PATH}/ERNIE-4.5-300B-A47B-Paddle \
411+
--port $port_num \
412+
--tensor-parallel-size 1 \
413+
--enable-expert-parallel \
414+
--data-parallel-size 4 \
415+
--max-model-len 32768 \
416+
--max-num-seqs 64 \
417+
--quantization "wint4" \
418+
--engine-worker-queue-port "$((port_num + 10)),$((port_num + 20)),$((port_num + 30)),$((port_num + 40))" \
419+
--metrics-port $((port_num + 2)) \
420+
--cache-queue-port $((port_num + 47873)) \
421+
--gpu-memory-utilization 0.9 \
422+
--load-choices "default" > server.log 2>&1 &
423+
424+
sleep 60
425+
# 探活(同上)
426+
TIMEOUT=$((15 * 60))
427+
INTERVAL=10
428+
ENDPOINT="http://0.0.0.0:${port_num}/health"
429+
START_TIME=$(date +%s)
430+
while true; do
431+
CURRENT_TIME=$(date +%s)
432+
ELAPSED=$((CURRENT_TIME - START_TIME))
433+
if [ $ELAPSED -ge $TIMEOUT ]; then
434+
echo -e "\n服务启动超时:经过 $((TIMEOUT/60)) 分钟服务仍未启动!"
435+
stop_processes
436+
cat server.log
437+
cat log/workerlog.0
438+
exit 1
439+
fi
440+
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -m 2 "$ENDPOINT" || true)
441+
if [ "$HTTP_CODE" = "200" ]; then
442+
echo -e "\n服务启动成功!耗时 ${ELAPSED}"
443+
break
444+
else
445+
sleep $INTERVAL
446+
fi
447+
done
448+
449+
cat server.log
361450

362-
python -m pytest -s --timeout=600 tests/ci_use/XPU_45T/run_ep.py
363-
ep_exit_code=$?
451+
# 执行在线推理验证脚本
452+
python tests/ci_use/XPU_45T/run_ep_online.py
453+
ep_online_exit_code=$?
454+
echo ep_online_exit_code is ${ep_online_exit_code}
364455

365456
unset BKCL_ENABLE_XDR
366457
unset BKCL_RDMA_NICS
@@ -369,30 +460,27 @@ unset BKCL_PCIE_RING
369460
unset XSHMEM_MODE
370461
unset XSHMEM_QP_NUM_PER_RANK
371462
unset BKCL_RDMA_VERBS
372-
unset enable_expert_parallel
373-
unset enable_tensor_parallel
374-
unset disable_sequence_parallel_moe
375463
stop_processes
376464

377-
if [ ${ep_exit_code} -ne 0 ]; then
378-
echo "log/workerlog.0"
465+
if [ ${ep_online_exit_code} -ne 0 ]; then
379466
cat log/workerlog.0
380-
echo "EP8TP8 allreduce 相关测试失败,请检查pr代码"
467+
echo "EP4TP1 在线服务相关测试失败,请检查pr代码"
381468
exit 1
382469
fi
383470

384-
385-
echo "============================开始 EP8TP8 all2all 测试!============================"
471+
echo "============================开始 EP4TP4 all2all 测试!============================"
386472
sleep 5
387473
rm -rf log/*
388474
rm -f core*
475+
# pkill -9 python #流水线不执行这个
389476
ipcrm --all=msg
390477
xpu-smi
391478
if [[ "$GPU_ID" == "0" ]]; then
392479
export XPU_VISIBLE_DEVICES="0,1,2,3"
393480
else
394481
export XPU_VISIBLE_DEVICES="4,5,6,7"
395482
fi
483+
396484
export BKCL_ENABLE_XDR=1
397485
export BKCL_RDMA_NICS=xgbe1,xgbe2,xgbe3,xgbe4
398486
export BKCL_TRACE_TOPO=1
@@ -401,11 +489,57 @@ export XSHMEM_MODE=1
401489
export XSHMEM_QP_NUM_PER_RANK=32
402490
export BKCL_RDMA_VERBS=1
403491

404-
export enable_expert_parallel=1
405-
export enable_tensor_parallel=1
492+
export port_num=$((8188 + GPU_ID * 100))
493+
# 启动服务
494+
python -m fastdeploy.entrypoints.openai.api_server \
495+
--model ${MODEL_PATH}/ERNIE-4.5-300B-A47B-Paddle \
496+
--port $port_num \
497+
--tensor-parallel-size 4 \
498+
--enable-expert-parallel \
499+
--data-parallel-size 1 \
500+
--max-model-len 32768 \
501+
--max-num-seqs 64 \
502+
--quantization "wint4" \
503+
--engine-worker-queue-port $((port_num + 10)) \
504+
--metrics-port $((port_num + 2)) \
505+
--cache-queue-port $((port_num + 47873)) \
506+
--gpu-memory-utilization 0.9 \
507+
--load-choices "default" > server.log 2>&1 &
406508

407-
python -m pytest -s --timeout=600 tests/ci_use/XPU_45T/run_ep.py
408-
ep_exit_code=$?
509+
sleep 60
510+
# 探活
511+
TIMEOUT=$((15 * 60))
512+
INTERVAL=10
513+
ENDPOINT="http://0.0.0.0:${port_num}/health"
514+
START_TIME=$(date +%s)
515+
echo "开始服务健康检查,最长等待时间:${TIMEOUT}"
516+
while true; do
517+
CURRENT_TIME=$(date +%s)
518+
ELAPSED=$((CURRENT_TIME - START_TIME))
519+
if [ $ELAPSED -ge $TIMEOUT ]; then
520+
echo -e "\n服务启动超时:经过 $((TIMEOUT/60)) 分钟服务仍未启动!"
521+
stop_processes
522+
cat server.log
523+
echo "log/workerlog.0"
524+
cat log/workerlog.0
525+
exit 1
526+
fi
527+
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -m 2 "$ENDPOINT" || true)
528+
echo -e "\r服务健康检查中... 已等待 ${ELAPSED} 秒,当前状态码:${HTTP_CODE}"
529+
if [ "$HTTP_CODE" = "200" ]; then
530+
echo -e "\n服务启动成功!耗时 ${ELAPSED}"
531+
break
532+
else
533+
sleep $INTERVAL
534+
fi
535+
done
536+
537+
cat server.log
538+
539+
# 执行在线推理验证脚本
540+
python tests/ci_use/XPU_45T/run_ep_online.py
541+
ep_online_exit_code=$?
542+
echo ep_online_exit_code is ${ep_online_exit_code}
409543

410544
unset BKCL_ENABLE_XDR
411545
unset BKCL_RDMA_NICS
@@ -414,13 +548,10 @@ unset BKCL_PCIE_RING
414548
unset XSHMEM_MODE
415549
unset XSHMEM_QP_NUM_PER_RANK
416550
unset BKCL_RDMA_VERBS
417-
unset enable_expert_parallel
418-
unset enable_tensor_parallel
419551
stop_processes
420552

421-
if [ ${ep_exit_code} -ne 0 ]; then
422-
echo "log/workerlog.0"
553+
if [ ${ep_online_exit_code} -ne 0 ]; then
423554
cat log/workerlog.0
424-
echo "EP8TP8 all2all 相关测试失败,请检查pr代码"
555+
echo "EP4TP4 all2all 在线服务相关测试失败,请检查pr代码"
425556
exit 1
426557
fi

tests/ci_use/XPU_45T/run_45T.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def test_45t():
3636
)
3737
print(response.choices[0].message.content)
3838
# print(base_response)
39-
assert "人工智能" in response.choices[0].message.content
39+
assert any(keyword in response.choices[0].message.content for keyword in ["人工智能", "文心一言"])
4040

4141

4242
if __name__ == "__main__":
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
import os
15+
16+
import openai
17+
18+
19+
def test_ep():
20+
ip = "0.0.0.0"
21+
gpu_id = int(os.getenv("GPU_ID", "0"))
22+
service_http_port = 8188 + gpu_id * 100 # 服务配置的
23+
client = openai.Client(base_url=f"http://{ip}:{service_http_port}/v1", api_key="EMPTY_API_KEY")
24+
# 非流式对话
25+
response = client.chat.completions.create(
26+
model="default",
27+
messages=[
28+
{"role": "user", "content": "你好,你是谁?"},
29+
],
30+
temperature=1,
31+
top_p=0,
32+
max_tokens=64,
33+
stream=False,
34+
)
35+
36+
print(response.choices[0].message.content)
37+
# print(base_response)
38+
assert any(keyword in response.choices[0].message.content for keyword in ["人工智能", "文心一言"])
39+
40+
41+
if __name__ == "__main__":
42+
test_ep()

tests/ci_use/XPU_45T/run_w4a8.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def test_w4a8():
3636
)
3737
print(response.choices[0].message.content)
3838
# print(base_response)
39-
assert "人工智能" in response.choices[0].message.content
39+
assert any(keyword in response.choices[0].message.content for keyword in ["人工智能", "文心一言"])
4040

4141

4242
if __name__ == "__main__":

0 commit comments

Comments
 (0)