Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion test/srt/run_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@
# NOTE: please sort the test cases alphabetically by the test file name
suite_xpu = {
"per-commit-xpu": [
TestFile("xpu/test_deepseek_ocr.py"),
TestFile("xpu/test_deepseek_ocr.py", 360),
TestFile("xpu/test_deepseek_ocr_triton.py", 360),
# TestFile("xpu/test_internvl.py"),
TestFile("xpu/test_intel_xpu_backend.py"),
],
Expand Down
54 changes: 28 additions & 26 deletions test/srt/xpu/test_deepseek_ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
python3 -m unittest test_deepseek_ocr.py
"""

import gc
import json
import os
import unittest
from pathlib import Path

import requests
from transformers import AutoTokenizer
Expand All @@ -19,11 +21,32 @@


class TestDeepSeekOCR(CustomTestCase):
@classmethod
def _cleanup_xpu_memory(cls):
gc.collect()
try:
import torch

if hasattr(torch, "xpu") and torch.xpu.is_available():
torch.xpu.synchronize()
torch.xpu.empty_cache()
except Exception:
# Best-effort cleanup only; tests should continue if cleanup is unavailable.
pass

@classmethod
def setUpClass(cls):
cls._cleanup_xpu_memory()
cls.model = "deepseek-ai/DeepSeek-OCR"
cls.tokenizer = AutoTokenizer.from_pretrained(cls.model, use_fast=False)
cls.tokenizer = AutoTokenizer.from_pretrained(
cls.model, use_fast=False, trust_remote_code=True
)
cls.base_url = DEFAULT_URL_FOR_TEST
cls.image_path = str(
(Path(__file__).resolve().parents[3] / "examples/assets/example_image.png")
)
if not os.path.exists(cls.image_path):
raise FileNotFoundError(f"Image not found: {cls.image_path}")
cls.common_args = [
"--device",
"xpu",
Expand All @@ -43,14 +66,16 @@ def setUpClass(cls):
@classmethod
def tearDownClass(cls):
"""Fixture that is run once after all tests in the class."""
kill_process_tree(cls.process.pid)
if hasattr(cls, "process") and cls.process:
kill_process_tree(cls.process.pid)
cls._cleanup_xpu_memory()

def get_request_json(self, max_new_tokens=32, n=1):
response = requests.post(
self.base_url + "/generate",
json={
"text": "<image>\n<|grounding|>Convert the document to pure text.",
"image_data": "../../examples/assets/example_image.png",
"image_data": self.image_path,
"sampling_params": {
"temperature": 0 if n == 1 else 0.5,
"max_new_tokens": max_new_tokens,
Expand Down Expand Up @@ -94,28 +119,5 @@ def test_moe(self):
self.run_decode()


class TestDeepSeekOCRTriton(TestDeepSeekOCR):
@classmethod
def setUpClass(cls):
cls.model = "deepseek-ai/DeepSeek-OCR"
cls.tokenizer = AutoTokenizer.from_pretrained(cls.model, use_fast=False)
cls.base_url = DEFAULT_URL_FOR_TEST
cls.common_args = [
"--device",
"xpu",
"--attention-backend",
"intel_xpu",
]
os.environ["SGLANG_USE_SGL_XPU"] = "0"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=[
*cls.common_args,
],
)


if __name__ == "__main__":
unittest.main()
51 changes: 51 additions & 0 deletions test/srt/xpu/test_deepseek_ocr_triton.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""
python3 -m unittest test_deepseek_ocr_triton.py
"""

import os
import unittest
from pathlib import Path

import test_deepseek_ocr as deepseek_ocr
from transformers import AutoTokenizer

from sglang.test.test_utils import (
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
DEFAULT_URL_FOR_TEST,
popen_launch_server,
)


class TestDeepSeekOCRTriton(deepseek_ocr.TestDeepSeekOCR):
@classmethod
def setUpClass(cls):
cls._cleanup_xpu_memory()
cls.model = "deepseek-ai/DeepSeek-OCR"
cls.tokenizer = AutoTokenizer.from_pretrained(
cls.model, use_fast=False, trust_remote_code=True
)
cls.base_url = DEFAULT_URL_FOR_TEST
cls.image_path = str(
(Path(__file__).resolve().parents[3] / "examples/assets/example_image.png")
)
if not os.path.exists(cls.image_path):
raise FileNotFoundError(f"Image not found: {cls.image_path}")
cls.common_args = [
"--device",
"xpu",
"--attention-backend",
"intel_xpu",
]
os.environ["SGLANG_USE_SGL_XPU"] = "0"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=[
*cls.common_args,
],
)


if __name__ == "__main__":
unittest.main()
29 changes: 24 additions & 5 deletions test/srt/xpu/test_intel_xpu_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
python3 -m unittest test_intel_xpu_backend.TestIntelXPUBackend.test_latency_qwen_model
"""

import gc
import unittest
from functools import wraps

Expand All @@ -15,26 +16,44 @@
)


def _cleanup_xpu_memory():
gc.collect()
try:
import torch

if hasattr(torch, "xpu") and torch.xpu.is_available():
torch.xpu.synchronize()
torch.xpu.empty_cache()
except Exception:
# Best-effort cleanup only.
pass


def intel_xpu_benchmark(extra_args=None, min_throughput=None):
def decorator(test_func):
@wraps(test_func)
def wrapper(self):
_cleanup_xpu_memory()
common_args = [
"--disable-radix",
"--trust-remote-code",
"--mem-fraction-static",
"0.3",
"0.4",
"--batch-size",
"1",
"--device",
"xpu",
]
full_args = common_args + (extra_args or [])
ci_args = ["--input", "64", "--output", "4"] if is_in_ci() else []
full_args = common_args + ci_args + (extra_args or [])

model = test_func(self)
prefill_latency, decode_throughput, decode_latency = run_bench_one_batch(
model, full_args
)
try:
prefill_latency, decode_throughput, decode_latency = (
run_bench_one_batch(model, full_args)
)
finally:
_cleanup_xpu_memory()

print(f"{model=}")
print(f"{prefill_latency=}")
Expand Down
Loading