From 3122790bf8b2293aa14d5f1106da9aabeb6b97ba Mon Sep 17 00:00:00 2001
From: plusNew001 <suijiaxin_job@163.com>
Date: Wed, 15 Oct 2025 09:27:27 +0000
Subject: [PATCH 01/11] add xpu ci case

---
 scripts/run_ci_xpu.sh           | 18 +++++++++++
 tests/ci_use/XPU_45T/test_ep.py | 53 +++++++++++++++++++++++++++++++++
 2 files changed, 71 insertions(+)
 create mode 100644 tests/ci_use/XPU_45T/test_ep.py

diff --git a/scripts/run_ci_xpu.sh b/scripts/run_ci_xpu.sh
index 870b463d91..d7a49ef641 100644
--- a/scripts/run_ci_xpu.sh
+++ b/scripts/run_ci_xpu.sh
@@ -176,3 +176,21 @@ if [ ${kv_block_test_exit_code} -ne 0 ]; then
     echo "kv block相关测试失败，请检查pr代码"
     exit 1
 fi
+
+echo "============================开始EP并行测试!============================"
+
+export XPU_VISIBLE_DEVICES="0,1,2,3"
+export BKCL_ENABLE_XDR=1
+export BKCL_RDMA_NICS=xgbe1,xgbe2,xgbe3,xgbe4
+export BKCL_TRACE_TOPO=1
+export BKCL_PCIE_RING=1
+export XSHMEM_MODE=1
+export XSHMEM_QP_NUM_PER_RANK=32
+export BKCL_RDMA_VERBS=1
+python tests/ci_use/XPU_45T/test_ep.py
+ep_exit_code=$?
+
+if [ ${ep_exit_code} -ne 0 ]; then
+    echo "EP并行 相关测试失败，请检查pr代码"
+    exit 1
+f
\ No newline at end of file
diff --git a/tests/ci_use/XPU_45T/test_ep.py b/tests/ci_use/XPU_45T/test_ep.py
new file mode 100644
index 0000000000..a7dc554b3d
--- /dev/null
+++ b/tests/ci_use/XPU_45T/test_ep.py
@@ -0,0 +1,53 @@
+import os
+import copy
+from fastdeploy import LLM, SamplingParams
+
+msg1=[
+    {"role": "system", "content": ""},
+    {"role": "user", "content": "北京天安门广场在哪里?"},
+]
+
+messages = [msg1,
+           ]
+
+# 采样参数
+sampling_params = SamplingParams(top_p=0, max_tokens=500)
+model=os.getenv("model_path", "/ssd3/model/ERNIE-4.5-300B-A47B-Paddle")
+
+xpu_visible_devices=os.getenv("XPU_VISIBLE_DEVICES", "0")
+xpu_device_num=len(xpu_visible_devices.split(','))
+enable_expert_parallel=True
+if enable_expert_parallel:
+    tensor_parallel_size=1
+    data_parallel_size=xpu_device_num
+else:
+    tensor_parallel_size=xpu_device_num
+    data_parallel_size=1
+engine_worker_queue_port=[str(8023+i*10) for i in range(data_parallel_size)]
+engine_worker_queue_port=",".join(engine_worker_queue_port)
+
+# messages=[copy.deepcopy(msg1) for i in range(data_parallel_size)]
+print(f"messages: {messages}")
+
+llm = LLM(model=model,
+          enable_expert_parallel=enable_expert_parallel,
+          tensor_parallel_size=tensor_parallel_size,
+          data_parallel_size=data_parallel_size,
+          max_model_len=8192,
+          quantization="wint4",
+          engine_worker_queue_port=engine_worker_queue_port,
+          max_num_seqs=8,
+         )
+
+# 批量进行推理（llm内部基于资源情况进行请求排队、动态插入处理）
+outputs = llm.chat(messages, sampling_params)
+
+# 输出结果
+for output in outputs:
+    prompt = output.prompt
+    generated_text = output.outputs.text
+    print(f"-"*100)
+    print(f"prompt: {prompt}")
+    print(f"-"*100)
+    print(f"generated_text: {generated_text}")
+    print(f"-"*100)
\ No newline at end of file

From b82625ce89fa6e3f66ebac3856e644b9a8409034 Mon Sep 17 00:00:00 2001
From: plusNew001 <95567040+plusNew001@users.noreply.github.com>
Date: Thu, 16 Oct 2025 14:46:47 +0800
Subject: [PATCH 02/11] Add xDeepEP download and build steps

Download and build xDeepEP before running tests.
---
 scripts/run_ci_xpu.sh | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/scripts/run_ci_xpu.sh b/scripts/run_ci_xpu.sh
index d7a49ef641..612e6d5f98 100644
--- a/scripts/run_ci_xpu.sh
+++ b/scripts/run_ci_xpu.sh
@@ -187,10 +187,17 @@ export BKCL_PCIE_RING=1
 export XSHMEM_MODE=1
 export XSHMEM_QP_NUM_PER_RANK=32
 export BKCL_RDMA_VERBS=1
+
+wget -q https://paddle-qa.bj.bcebos.com/xpu_third_party/xDeepEP.tar.gz
+tar -xzf xDeepEP.tar.gz
+cd xDeepEP
+bash build.sh
+cd -
+
 python tests/ci_use/XPU_45T/test_ep.py
 ep_exit_code=$?
 
 if [ ${ep_exit_code} -ne 0 ]; then
     echo "EP并行 相关测试失败，请检查pr代码"
     exit 1
-f
\ No newline at end of file
+f

From 5fe0fed600488b029ff8a676f39389cc596eed64 Mon Sep 17 00:00:00 2001
From: plusNew001 <95567040+plusNew001@users.noreply.github.com>
Date: Fri, 17 Oct 2025 10:34:27 +0800
Subject: [PATCH 03/11] Fix formatting and add missing sleep command

---
 scripts/run_ci_xpu.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/run_ci_xpu.sh b/scripts/run_ci_xpu.sh
index 612e6d5f98..6925419baf 100644
--- a/scripts/run_ci_xpu.sh
+++ b/scripts/run_ci_xpu.sh
@@ -178,7 +178,8 @@ if [ ${kv_block_test_exit_code} -ne 0 ]; then
 fi
 
 echo "============================开始EP并行测试!============================"
-
+sleep 5
+xpu_smi
 export XPU_VISIBLE_DEVICES="0,1,2,3"
 export BKCL_ENABLE_XDR=1
 export BKCL_RDMA_NICS=xgbe1,xgbe2,xgbe3,xgbe4

From 235e2b49494f6314b849eb9df3aa08c3fe697b65 Mon Sep 17 00:00:00 2001
From: plusNew001 <95567040+plusNew001@users.noreply.github.com>
Date: Fri, 17 Oct 2025 11:12:37 +0800
Subject: [PATCH 04/11] Update Docker image version in CI workflow

---
 .github/workflows/ci_xpu.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/ci_xpu.yml b/.github/workflows/ci_xpu.yml
index f99ca7d172..7398af53d3 100644
--- a/.github/workflows/ci_xpu.yml
+++ b/.github/workflows/ci_xpu.yml
@@ -24,7 +24,7 @@ jobs:
 
       - name: Code Checkout
         env:
-          docker_image: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-xpu:2.1.0
+          docker_image: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-xpu:2.2.0
         run: |
           REPO="https://github.com/${{ github.repository }}.git"
           FULL_REPO="${{ github.repository }}"
@@ -55,7 +55,7 @@ jobs:
 
       - name: Run CI unittest
         env:
-          docker_image: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-xpu:2.1.0
+          docker_image: ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-xpu:2.2.0
         run: |
           runner_name="${{ runner.name }}"
           last_char="${runner_name: -1}"

From 6f1252d3f8edfee98d1733817470d5235917313e Mon Sep 17 00:00:00 2001
From: plusNew001 <95567040+plusNew001@users.noreply.github.com>
Date: Fri, 17 Oct 2025 12:51:50 +0800
Subject: [PATCH 05/11] Modify run_ci_xpu.sh for log cleanup and error handling

Clean up log files before running tests and output worker log on failure.
---
 scripts/run_ci_xpu.sh | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/scripts/run_ci_xpu.sh b/scripts/run_ci_xpu.sh
index 6925419baf..3de41ac970 100644
--- a/scripts/run_ci_xpu.sh
+++ b/scripts/run_ci_xpu.sh
@@ -179,7 +179,9 @@ fi
 
 echo "============================开始EP并行测试!============================"
 sleep 5
-xpu_smi
+rm -rf log/*
+rm -f core*
+xpu-smi
 export XPU_VISIBLE_DEVICES="0,1,2,3"
 export BKCL_ENABLE_XDR=1
 export BKCL_RDMA_NICS=xgbe1,xgbe2,xgbe3,xgbe4
@@ -199,6 +201,8 @@ python tests/ci_use/XPU_45T/test_ep.py
 ep_exit_code=$?
 
 if [ ${ep_exit_code} -ne 0 ]; then
+    echo "log/workerlog.0"
+    cat log/workerlog.0
     echo "EP并行 相关测试失败，请检查pr代码"
     exit 1
 f

From 4f4b523bb652b116c4e0acdf210b5ed79d579c04 Mon Sep 17 00:00:00 2001
From: plusNew001 <95567040+plusNew001@users.noreply.github.com>
Date: Fri, 17 Oct 2025 16:57:26 +0800
Subject: [PATCH 06/11] Enhance test_ep.py with process management and
 assertions

Refactor test function to include process cleanup and assertions.
---
 tests/ci_use/XPU_45T/test_ep.py | 124 +++++++++++++++++++-------------
 1 file changed, 74 insertions(+), 50 deletions(-)

diff --git a/tests/ci_use/XPU_45T/test_ep.py b/tests/ci_use/XPU_45T/test_ep.py
index a7dc554b3d..1ddcf50a98 100644
--- a/tests/ci_use/XPU_45T/test_ep.py
+++ b/tests/ci_use/XPU_45T/test_ep.py
@@ -1,53 +1,77 @@
 import os
-import copy
+import psutil
 from fastdeploy import LLM, SamplingParams
 
-msg1=[
-    {"role": "system", "content": ""},
-    {"role": "user", "content": "北京天安门广场在哪里?"},
-]
-
-messages = [msg1,
-           ]
-
-# 采样参数
-sampling_params = SamplingParams(top_p=0, max_tokens=500)
-model=os.getenv("model_path", "/ssd3/model/ERNIE-4.5-300B-A47B-Paddle")
-
-xpu_visible_devices=os.getenv("XPU_VISIBLE_DEVICES", "0")
-xpu_device_num=len(xpu_visible_devices.split(','))
-enable_expert_parallel=True
-if enable_expert_parallel:
-    tensor_parallel_size=1
-    data_parallel_size=xpu_device_num
-else:
-    tensor_parallel_size=xpu_device_num
-    data_parallel_size=1
-engine_worker_queue_port=[str(8023+i*10) for i in range(data_parallel_size)]
-engine_worker_queue_port=",".join(engine_worker_queue_port)
-
-# messages=[copy.deepcopy(msg1) for i in range(data_parallel_size)]
-print(f"messages: {messages}")
-
-llm = LLM(model=model,
-          enable_expert_parallel=enable_expert_parallel,
-          tensor_parallel_size=tensor_parallel_size,
-          data_parallel_size=data_parallel_size,
-          max_model_len=8192,
-          quantization="wint4",
-          engine_worker_queue_port=engine_worker_queue_port,
-          max_num_seqs=8,
-         )
-
-# 批量进行推理（llm内部基于资源情况进行请求排队、动态插入处理）
-outputs = llm.chat(messages, sampling_params)
-
-# 输出结果
-for output in outputs:
-    prompt = output.prompt
-    generated_text = output.outputs.text
-    print(f"-"*100)
-    print(f"prompt: {prompt}")
-    print(f"-"*100)
-    print(f"generated_text: {generated_text}")
-    print(f"-"*100)
\ No newline at end of file
+def test_fd_ep():
+    """
+    """
+
+    msg1 = [
+        {"role": "system", "content": ""},
+        {"role": "user", "content": "北京天安门广场在哪里?"},
+    ]
+    messages = [msg1]
+
+    # 采样参数
+    sampling_params = SamplingParams(top_p=0, max_tokens=500)
+
+    # 模型路径与设备配置
+    model = os.getenv("model_path", "/home/ERNIE-4.5-300B-A47B-Paddle")
+    xpu_visible_devices = os.getenv("XPU_VISIBLE_DEVICES", "0")
+    xpu_device_num = len(xpu_visible_devices.split(','))
+
+    enable_expert_parallel = True
+    if enable_expert_parallel:
+        tensor_parallel_size = 1
+        data_parallel_size = xpu_device_num
+    else:
+        tensor_parallel_size = xpu_device_num
+        data_parallel_size = 1
+
+    engine_worker_queue_port = [str(8023 + i * 10) for i in range(data_parallel_size)]
+    engine_worker_queue_port = ",".join(engine_worker_queue_port)
+
+    print(f"[INFO] messages: {messages}")
+
+    llm = LLM(
+        model=model,
+        enable_expert_parallel=enable_expert_parallel,
+        tensor_parallel_size=tensor_parallel_size,
+        data_parallel_size=data_parallel_size,
+        max_model_len=8192,
+        quantization="wint4",
+        engine_worker_queue_port=engine_worker_queue_port,
+        max_num_seqs=8,
+    )
+
+    try:
+        outputs = llm.chat(messages, sampling_params)
+        assert outputs, "❌ LLM 推理返回空结果。"
+
+        for idx, output in enumerate(outputs):
+            prompt = output.prompt
+            generated_text = getattr(output.outputs, "text", "").strip()
+
+            print(f"{'-'*100}")
+            print(f"[PROMPT {idx}] {prompt}")
+            print(f"{'-'*100}")
+            print(f"[GENERATED TEXT] {generated_text}")
+            print(f"{'-'*100}")
+
+            # 核心断言：输出不能为空
+            assert generated_text, f"❌ 推理结果为空 (index={idx})"
+
+    finally:
+        # 无论是否报错都清理子进程
+        current_process = psutil.Process(os.getpid())
+        for child in current_process.children(recursive=True):
+            try:
+                child.kill()
+                print(f"[CLEANUP] 已杀死子进程 {child.pid}")
+            except Exception as e:
+                print(f"[WARN] 无法杀死子进程 {child.pid}: {e}")
+        print("✅ 已清理所有 FastDeploy 子进程。")
+
+
+if __name__ == "__main__":
+    test_fastdeploy_llm()

From 771600e1afffe9152206d1a83cee1d3e35b874ce Mon Sep 17 00:00:00 2001
From: plusNew001 <95567040+plusNew001@users.noreply.github.com>
Date: Fri, 17 Oct 2025 17:19:56 +0800
Subject: [PATCH 07/11] Replace test_fastdeploy_llm with test_fd_ep

---
 tests/ci_use/XPU_45T/test_ep.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci_use/XPU_45T/test_ep.py b/tests/ci_use/XPU_45T/test_ep.py
index 1ddcf50a98..0b4fe3a7d7 100644
--- a/tests/ci_use/XPU_45T/test_ep.py
+++ b/tests/ci_use/XPU_45T/test_ep.py
@@ -74,4 +74,4 @@ def test_fd_ep():
 
 
 if __name__ == "__main__":
-    test_fastdeploy_llm()
+    test_fd_ep()

From 392b9ded64e9c48bd690d050474e6db9b41ba7ba Mon Sep 17 00:00:00 2001
From: plusNew001 <95567040+plusNew001@users.noreply.github.com>
Date: Fri, 17 Oct 2025 18:54:28 +0800
Subject: [PATCH 08/11] Fix conditional statement in run_ci_xpu.sh

---
 scripts/run_ci_xpu.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/run_ci_xpu.sh b/scripts/run_ci_xpu.sh
index 3de41ac970..320f00f7c5 100644
--- a/scripts/run_ci_xpu.sh
+++ b/scripts/run_ci_xpu.sh
@@ -205,4 +205,4 @@ if [ ${ep_exit_code} -ne 0 ]; then
     cat log/workerlog.0
     echo "EP并行 相关测试失败，请检查pr代码"
     exit 1
-f
+fi

From 7b1eead28d9167d20976bbd63023b9209f61690a Mon Sep 17 00:00:00 2001
From: plusNew001 <95567040+plusNew001@users.noreply.github.com>
Date: Tue, 21 Oct 2025 10:34:26 +0800
Subject: [PATCH 09/11] Update test_ep.py for string handling and formatting

Fix string encoding issues and improve readability.
---
 tests/ci_use/XPU_45T/test_ep.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/ci_use/XPU_45T/test_ep.py b/tests/ci_use/XPU_45T/test_ep.py
index 0b4fe3a7d7..c82242aa39 100644
--- a/tests/ci_use/XPU_45T/test_ep.py
+++ b/tests/ci_use/XPU_45T/test_ep.py
@@ -1,10 +1,12 @@
 import os
+
 import psutil
+
 from fastdeploy import LLM, SamplingParams
 
+
 def test_fd_ep():
-    """
-    """
+    """ """
 
     msg1 = [
         {"role": "system", "content": ""},
@@ -18,7 +20,7 @@ def test_fd_ep():
     # 模型路径与设备配置
     model = os.getenv("model_path", "/home/ERNIE-4.5-300B-A47B-Paddle")
     xpu_visible_devices = os.getenv("XPU_VISIBLE_DEVICES", "0")
-    xpu_device_num = len(xpu_visible_devices.split(','))
+    xpu_device_num = len(xpu_visible_devices.split(","))
 
     enable_expert_parallel = True
     if enable_expert_parallel:

From 83f3108f3448452d0618aa2eb1080a23b1efe74f Mon Sep 17 00:00:00 2001
From: plusNew001 <95567040+plusNew001@users.noreply.github.com>
Date: Tue, 21 Oct 2025 14:29:32 +0800
Subject: [PATCH 10/11] Rename test_ep.py to run_ep.py

---
 tests/ci_use/XPU_45T/{test_ep.py => run_ep.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/ci_use/XPU_45T/{test_ep.py => run_ep.py} (100%)

diff --git a/tests/ci_use/XPU_45T/test_ep.py b/tests/ci_use/XPU_45T/run_ep.py
similarity index 100%
rename from tests/ci_use/XPU_45T/test_ep.py
rename to tests/ci_use/XPU_45T/run_ep.py

From ab3d52494af4f374731a6d8ceb379d7057516c3a Mon Sep 17 00:00:00 2001
From: plusNew001 <95567040+plusNew001@users.noreply.github.com>
Date: Tue, 21 Oct 2025 14:30:07 +0800
Subject: [PATCH 11/11] Change test script from test_ep.py to run_ep.py

---
 scripts/run_ci_xpu.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/run_ci_xpu.sh b/scripts/run_ci_xpu.sh
index 320f00f7c5..9afd81249b 100644
--- a/scripts/run_ci_xpu.sh
+++ b/scripts/run_ci_xpu.sh
@@ -197,7 +197,7 @@ cd xDeepEP
 bash build.sh
 cd -
 
-python tests/ci_use/XPU_45T/test_ep.py
+python tests/ci_use/XPU_45T/run_ep.py
 ep_exit_code=$?
 
 if [ ${ep_exit_code} -ne 0 ]; then