diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index a7d5b6744b3a..6192d2a67750 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -72,12 +72,14 @@ } # Add Intel XPU tests -# NOTE: please sort the test cases alphabetically by the test file name +# NOTE: Intentionally NOT alphabetical. Lighter benchmarks run first because +# heavy models (e.g. DeepSeek-OCR ~6GB) can leave XPU device memory unreclaimed, +# causing OOM for subsequent tests on memory-constrained devices. suite_xpu = { "per-commit-xpu": [ + TestFile("xpu/test_intel_xpu_backend.py"), TestFile("xpu/test_deepseek_ocr.py"), # TestFile("xpu/test_internvl.py"), - TestFile("xpu/test_intel_xpu_backend.py"), ], } diff --git a/test/srt/xpu/test_deepseek_ocr.py b/test/srt/xpu/test_deepseek_ocr.py index 9d5da10e363a..e5d1c3fccb83 100644 --- a/test/srt/xpu/test_deepseek_ocr.py +++ b/test/srt/xpu/test_deepseek_ocr.py @@ -11,6 +11,7 @@ from sglang.srt.utils import kill_process_tree from sglang.test.test_utils import ( + DEFAULT_IMAGE_URL, DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, DEFAULT_URL_FOR_TEST, CustomTestCase, @@ -50,7 +51,7 @@ def get_request_json(self, max_new_tokens=32, n=1): self.base_url + "/generate", json={ "text": "\n<|grounding|>Convert the document to pure text.", - "image_data": "../../examples/assets/example_image.png", + "image_data": DEFAULT_IMAGE_URL, "sampling_params": { "temperature": 0 if n == 1 else 0.5, "max_new_tokens": max_new_tokens,