vllm-project · hsliuustc0106 · Jun 2, 2026 · Jun 1, 2026 · chatgpt-codex-connector · Jun 1, 2026
@@ -10,7 +10,7 @@ steps:
           DOCKER_BUILDKIT: "1"
           # Buildkite will automatically replace this with the actual commit hash
           VLLM_IMAGE_TAG: "${BUILDKITE_COMMIT}"
-          VLLM_VERSION: "v0.21.0"
+          VLLM_VERSION: "v0.22.0"
         priority: 100
         timeout_in_minutes: 60
         soft_fail: false
@@ -39,7 +39,8 @@ def get_cuda_graph_config():
     stage_config = get_cuda_graph_config()
 
 # Create parameter combinations for model and stage config
-test_params = [(model, stage_config) for model in models]
+# Qwen2.5-Omni with TP=3 needs longer init timeout
+test_params = [(model, stage_config, {"stage_init_timeout": 1200, "init_timeout": 1800}) for model in models]
 
 
 def get_question(prompt_type="mix"):

@@ -21,8 +21,10 @@ def cuda_marks(*, res: str, num_cards: int):
         return marks
     test_distributed = pytest.mark.distributed_cuda(num_cards=num_cards)
 
+    if not current_platform.is_cuda():
+        return marks + [test_distributed]
     test_skipif = pytest.mark.skipif(
-        not current_platform.is_cuda() or (current_platform.device_count() < num_cards),
+        current_platform.device_count() < num_cards,
         reason=f"Need at least {num_cards} CUDA GPUs to run the test.",
     )
     return marks + [test_distributed, test_skipif]
@@ -52,8 +54,10 @@ def xpu_marks(*, res: str, num_cards: int):
         return marks
     test_distributed = pytest.mark.distributed_xpu(num_cards=num_cards)
 
+    if not current_platform.is_xpu():
+        return marks + [test_distributed]
     test_skipif = pytest.mark.skipif(
-        not current_platform.is_xpu() or (current_platform.device_count() < num_cards),
+        current_platform.device_count() < num_cards,
         reason=f"Need at least {num_cards} XPUs to run the test.",
     )
     return marks + [test_distributed, test_skipif]