diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index 49077fcd6e67..9fff2f7db1a5 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -77,7 +77,8 @@ # NOTE: please sort the test cases alphabetically by the test file name suite_xpu = { "per-commit-xpu": [ - TestFile("xpu/test_deepseek_ocr.py"), + TestFile("xpu/test_deepseek_ocr.py", 360), + TestFile("xpu/test_deepseek_ocr_triton.py", 360), # TestFile("xpu/test_internvl.py"), TestFile("xpu/test_intel_xpu_backend.py"), ], diff --git a/test/srt/xpu/test_deepseek_ocr.py b/test/srt/xpu/test_deepseek_ocr.py index 9d5da10e363a..5f78c4f9d481 100644 --- a/test/srt/xpu/test_deepseek_ocr.py +++ b/test/srt/xpu/test_deepseek_ocr.py @@ -2,9 +2,11 @@ python3 -m unittest test_deepseek_ocr.py """ +import gc import json import os import unittest +from pathlib import Path import requests from transformers import AutoTokenizer @@ -19,11 +21,32 @@ class TestDeepSeekOCR(CustomTestCase): + @classmethod + def _cleanup_xpu_memory(cls): + gc.collect() + try: + import torch + + if hasattr(torch, "xpu") and torch.xpu.is_available(): + torch.xpu.synchronize() + torch.xpu.empty_cache() + except Exception: + # Best-effort cleanup only; tests should continue if cleanup is unavailable. + pass + @classmethod def setUpClass(cls): + cls._cleanup_xpu_memory() cls.model = "deepseek-ai/DeepSeek-OCR" - cls.tokenizer = AutoTokenizer.from_pretrained(cls.model, use_fast=False) + cls.tokenizer = AutoTokenizer.from_pretrained( + cls.model, use_fast=False, trust_remote_code=True + ) cls.base_url = DEFAULT_URL_FOR_TEST + cls.image_path = str( + (Path(__file__).resolve().parents[3] / "examples/assets/example_image.png") + ) + if not os.path.exists(cls.image_path): + raise FileNotFoundError(f"Image not found: {cls.image_path}") cls.common_args = [ "--device", "xpu", @@ -43,14 +66,16 @@ def setUpClass(cls): @classmethod def tearDownClass(cls): """Fixture that is run once after all tests in the class.""" - kill_process_tree(cls.process.pid) + if hasattr(cls, "process") and cls.process: + kill_process_tree(cls.process.pid) + cls._cleanup_xpu_memory() def get_request_json(self, max_new_tokens=32, n=1): response = requests.post( self.base_url + "/generate", json={ "text": "\n<|grounding|>Convert the document to pure text.", - "image_data": "../../examples/assets/example_image.png", + "image_data": self.image_path, "sampling_params": { "temperature": 0 if n == 1 else 0.5, "max_new_tokens": max_new_tokens, @@ -94,28 +119,5 @@ def test_moe(self): self.run_decode() -class TestDeepSeekOCRTriton(TestDeepSeekOCR): - @classmethod - def setUpClass(cls): - cls.model = "deepseek-ai/DeepSeek-OCR" - cls.tokenizer = AutoTokenizer.from_pretrained(cls.model, use_fast=False) - cls.base_url = DEFAULT_URL_FOR_TEST - cls.common_args = [ - "--device", - "xpu", - "--attention-backend", - "intel_xpu", - ] - os.environ["SGLANG_USE_SGL_XPU"] = "0" - cls.process = popen_launch_server( - cls.model, - cls.base_url, - timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, - other_args=[ - *cls.common_args, - ], - ) - - if __name__ == "__main__": unittest.main() diff --git a/test/srt/xpu/test_deepseek_ocr_triton.py b/test/srt/xpu/test_deepseek_ocr_triton.py new file mode 100644 index 000000000000..9222231e2121 --- /dev/null +++ b/test/srt/xpu/test_deepseek_ocr_triton.py @@ -0,0 +1,51 @@ +""" +python3 -m unittest test_deepseek_ocr_triton.py +""" + +import os +import unittest +from pathlib import Path + +import test_deepseek_ocr as deepseek_ocr +from transformers import AutoTokenizer + +from sglang.test.test_utils import ( + DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + DEFAULT_URL_FOR_TEST, + popen_launch_server, +) + + +class TestDeepSeekOCRTriton(deepseek_ocr.TestDeepSeekOCR): + @classmethod + def setUpClass(cls): + cls._cleanup_xpu_memory() + cls.model = "deepseek-ai/DeepSeek-OCR" + cls.tokenizer = AutoTokenizer.from_pretrained( + cls.model, use_fast=False, trust_remote_code=True + ) + cls.base_url = DEFAULT_URL_FOR_TEST + cls.image_path = str( + (Path(__file__).resolve().parents[3] / "examples/assets/example_image.png") + ) + if not os.path.exists(cls.image_path): + raise FileNotFoundError(f"Image not found: {cls.image_path}") + cls.common_args = [ + "--device", + "xpu", + "--attention-backend", + "intel_xpu", + ] + os.environ["SGLANG_USE_SGL_XPU"] = "0" + cls.process = popen_launch_server( + cls.model, + cls.base_url, + timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH, + other_args=[ + *cls.common_args, + ], + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/srt/xpu/test_intel_xpu_backend.py b/test/srt/xpu/test_intel_xpu_backend.py index 701769e75c9c..1be345212499 100644 --- a/test/srt/xpu/test_intel_xpu_backend.py +++ b/test/srt/xpu/test_intel_xpu_backend.py @@ -3,6 +3,7 @@ python3 -m unittest test_intel_xpu_backend.TestIntelXPUBackend.test_latency_qwen_model """ +import gc import unittest from functools import wraps @@ -15,26 +16,44 @@ ) +def _cleanup_xpu_memory(): + gc.collect() + try: + import torch + + if hasattr(torch, "xpu") and torch.xpu.is_available(): + torch.xpu.synchronize() + torch.xpu.empty_cache() + except Exception: + # Best-effort cleanup only. + pass + + def intel_xpu_benchmark(extra_args=None, min_throughput=None): def decorator(test_func): @wraps(test_func) def wrapper(self): + _cleanup_xpu_memory() common_args = [ "--disable-radix", "--trust-remote-code", "--mem-fraction-static", - "0.3", + "0.4", "--batch-size", "1", "--device", "xpu", ] - full_args = common_args + (extra_args or []) + ci_args = ["--input", "64", "--output", "4"] if is_in_ci() else [] + full_args = common_args + ci_args + (extra_args or []) model = test_func(self) - prefill_latency, decode_throughput, decode_latency = run_bench_one_batch( - model, full_args - ) + try: + prefill_latency, decode_throughput, decode_latency = ( + run_bench_one_batch(model, full_args) + ) + finally: + _cleanup_xpu_memory() print(f"{model=}") print(f"{prefill_latency=}")