diff --git a/.github/workflows/nightly-test-nvidia.yml b/.github/workflows/nightly-test-nvidia.yml index 26ef7c99e18c..1aab43609bc0 100644 --- a/.github/workflows/nightly-test-nvidia.yml +++ b/.github/workflows/nightly-test-nvidia.yml @@ -193,6 +193,68 @@ jobs: run: | python3 scripts/ci/publish_traces.py --traces-dir test/srt/performance_profiles_vlms + # diffusion performance tests + nightly-test-multimodal-server-1-gpu: + if: github.repository == 'sgl-project/sglang' + runs-on: 1-gpu-runner + strategy: + fail-fast: false + max-parallel: 5 + matrix: + part: [0, 1] + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Install dependencies + run: | + bash scripts/ci/ci_install_dependency.sh diffusion + pip install slack_sdk + + - name: Run diffusion server tests + env: + SGLANG_DIFFUSION_SLACK_TOKEN: ${{ secrets.SGLANG_DIFFUSION_SLACK_TOKEN }} + GITHUB_RUN_ID: ${{ github.run_id }} + + timeout-minutes: 60 + run: | + cd python + python3 sglang/multimodal_gen/test/run_suite.py \ + --suite 1-gpu \ + --partition-id ${{ matrix.part }} \ + --total-partitions 2 + + + nightly-test-multimodal-server-2-gpu: + if: github.repository == 'sgl-project/sglang' + runs-on: 2-gpu-runner + strategy: + fail-fast: false + max-parallel: 5 + matrix: + part: [0, 1] + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Install dependencies + run: | + bash scripts/ci/ci_install_dependency.sh diffusion + pip install slack_sdk + + - name: Run diffusion server tests + env: + SGLANG_DIFFUSION_SLACK_TOKEN: ${{ secrets.SGLANG_DIFFUSION_SLACK_TOKEN }} + GITHUB_RUN_ID: ${{ github.run_id }} + + timeout-minutes: 60 + run: | + cd python + python3 sglang/multimodal_gen/test/run_suite.py \ + --suite 2-gpu \ + --partition-id ${{ matrix.part }} \ + --total-partitions 2 + # B200 Performance tests - 4 GPU nightly-test-perf-4-gpu-b200: if: github.repository == 'sgl-project/sglang' @@ -275,6 +337,8 @@ jobs: - nightly-test-text-perf-2-gpu-runner - nightly-test-vlm-accuracy-2-gpu-runner - nightly-test-vlm-perf-2-gpu-runner + - nightly-test-multimodal-server-1-gpu + - nightly-test-multimodal-server-2-gpu - nightly-test-perf-4-gpu-b200 - nightly-test-perf-8-gpu-b200 runs-on: ubuntu-latest diff --git a/python/sglang/multimodal_gen/test/server/test_server_common.py b/python/sglang/multimodal_gen/test/server/test_server_common.py index c7bf6fbadba3..0c3590e54484 100644 --- a/python/sglang/multimodal_gen/test/server/test_server_common.py +++ b/python/sglang/multimodal_gen/test/server/test_server_common.py @@ -7,6 +7,7 @@ from __future__ import annotations +import base64 import os import time from pathlib import Path @@ -32,6 +33,7 @@ PerformanceSummary, ScenarioConfig, ) +from sglang.multimodal_gen.test.slack_utils import upload_file_to_slack from sglang.multimodal_gen.test.test_utils import ( get_dynamic_server_port, read_perf_logs, @@ -225,6 +227,19 @@ def _create_and_download_video( resp = client.videos.download_content(video_id=video_id) # type: ignore[attr-defined] content = resp.read() validate_openai_video(content) + + tmp_path = f"{video_id}.mp4" + with open(tmp_path, "wb") as f: + f.write(content) + upload_file_to_slack( + case_id=case.id, + model=case.model_path, + prompt=case.prompt, + file_path=tmp_path, + origin_file_path=case.image_path, + ) + os.remove(tmp_path) + return video_id # for all tests, seconds = case.seconds or fallback 4 seconds @@ -248,6 +263,19 @@ def generate_image() -> str: ) result = response.parse() validate_image(result.data[0].b64_json) + + img_data = base64.b64decode(result.data[0].b64_json) + tmp_path = f"{result.created}.png" + with open(tmp_path, "wb") as f: + f.write(img_data) + upload_file_to_slack( + case_id=case.id, + model=case.model_path, + prompt=case.prompt, + file_path=tmp_path, + ) + os.remove(tmp_path) + return str(result.created) def generate_image_edit() -> str: @@ -276,6 +304,20 @@ def generate_image_edit() -> str: result = response.parse() validate_image(result.data[0].b64_json) + + img_data = base64.b64decode(result.data[0].b64_json) + tmp_path = f"{rid}.png" + with open(tmp_path, "wb") as f: + f.write(img_data) + upload_file_to_slack( + case_id=case.id, + model=case.model_path, + prompt=case.edit_prompt, + file_path=tmp_path, + origin_file_path=case.image_path, + ) + os.remove(tmp_path) + return rid # ------------------------- diff --git a/python/sglang/multimodal_gen/test/slack_utils.py b/python/sglang/multimodal_gen/test/slack_utils.py new file mode 100644 index 000000000000..ce25381a8f9e --- /dev/null +++ b/python/sglang/multimodal_gen/test/slack_utils.py @@ -0,0 +1,186 @@ +""" + This file upload the media generated in diffusion-nightly-test to a slack channel of SGLang +""" + +import logging +import os +import tempfile +from datetime import datetime +from urllib.parse import urlparse +from urllib.request import urlopen + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +import inspect + +try: + import sglang.multimodal_gen.test.server.testcase_configs as configs + from sglang.multimodal_gen.test.server.testcase_configs import DiffusionTestCase + + ALL_CASES = [] + for name, value in inspect.getmembers(configs): + if name.endswith("_CASES") or "_CASES_" in name: + if ( + isinstance(value, list) + and len(value) > 0 + and isinstance(value[0], DiffusionTestCase) + ): + ALL_CASES.extend(value) + elif isinstance(value, list) and len(value) == 0: + # Assume empty list with matching name is a valid case list container + pass + + # Deduplicate cases by ID + seen_ids = set() + unique_cases = [] + for c in ALL_CASES: + if c.id not in seen_ids: + seen_ids.add(c.id) + unique_cases.append(c) + ALL_CASES = unique_cases + +except Exception as e: + logger.warning(f"Failed to import test cases: {e}") + ALL_CASES = [] + + +def _get_status_message(run_id, current_case_id, thread_messages=None): + date_str = datetime.now().strftime("%d/%m") + base_header = f"*🧵 for nightly test of {date_str}*\n*GitHub Run ID:* {run_id}\n*Total Tasks:* {len(ALL_CASES)}" + + if not ALL_CASES: + return base_header + + default_emoji_for_case_in_progress = "⏳" + status_map = {c.id: default_emoji_for_case_in_progress for c in ALL_CASES} + + if thread_messages: + for msg in thread_messages: + text = msg.get("text", "") + # Look for case_id in the message (format: *Case ID:* `case_id`) + for c in ALL_CASES: + if f"*Case ID:* `{c.id}`" in text: + status_map[c.id] = "✅" + + if current_case_id: + status_map[current_case_id] = "✅" + + lines = [base_header, "", "*Tasks Status:*"] + + # Calculate padding + max_len = max(len(c.id) for c in ALL_CASES) if ALL_CASES else 10 + max_len = max(max_len, len("Case ID")) + + # Build markdown table inside a code block + table_lines = ["```"] + table_lines.append(f"| {'Case ID'.ljust(max_len)} | Status |") + table_lines.append(f"| {'-' * max_len} | :----: |") + + for c in ALL_CASES: + mark = status_map.get(c.id, default_emoji_for_case_in_progress) + table_lines.append(f"| {c.id.ljust(max_len)} | {mark} |") + + table_lines.append("```") + + lines.extend(table_lines) + + return "\n".join(lines) + + +def upload_file_to_slack( + case_id: str = None, + model: str = None, + prompt: str = None, + file_path: str = None, + origin_file_path: str = None, +) -> bool: + temp_path = None + try: + from slack_sdk import WebClient + + run_id = os.getenv("GITHUB_RUN_ID", "local") + + token = os.environ.get("SGLANG_DIFFUSION_SLACK_TOKEN") + if not token: + logger.info(f"Slack upload failed: no token") + return False + + if not file_path or not os.path.exists(file_path): + logger.info(f"Slack upload failed: no file path") + return False + + if origin_file_path and origin_file_path.startswith(("http", "https")): + suffix = os.path.splitext(urlparse(origin_file_path).path)[1] or ".tmp" + with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tf: + with urlopen(origin_file_path) as response: + tf.write(response.read()) + temp_path = tf.name + origin_file_path = temp_path + + uploads = [{"file": file_path, "title": "Generated Image"}] + if origin_file_path and os.path.exists(origin_file_path): + uploads.insert(0, {"file": origin_file_path, "title": "Original Image"}) + + message = ( + f"*Case ID:* `{case_id}`\n" f"*Model:* `{model}`\n" f"*Prompt:* {prompt}" + ) + + client = WebClient(token=token) + channel_id = "C0A02NDF7UY" + thread_ts = None + + parent_msg_text = None + try: + history = client.conversations_history(channel=channel_id, limit=100) + for msg in history.get("messages", []): + if f"*GitHub Run ID:* {run_id}" in msg.get("text", ""): + # Use thread_ts if it exists (msg is a reply), otherwise use ts (msg is a parent) + thread_ts = msg.get("thread_ts") or msg.get("ts") + parent_msg_text = msg.get("text", "") + logger.info(f"Found thread_ts: {thread_ts}") + break + except Exception as e: + logger.warning(f"Failed to search slack history: {e}") + + if not thread_ts: + try: + text = _get_status_message(run_id, case_id) + response = client.chat_postMessage(channel=channel_id, text=text) + thread_ts = response["ts"] + except Exception as e: + logger.warning(f"Failed to create parent thread: {e}") + + # Upload first to ensure it's in history + client.files_upload_v2( + channel=channel_id, + file_uploads=uploads, + initial_comment=message, + thread_ts=thread_ts, + ) + + # Then update status based on thread replies + if thread_ts: + try: + replies = client.conversations_replies( + channel=channel_id, ts=thread_ts, limit=200 + ) + messages = replies.get("messages", []) + new_text = _get_status_message(run_id, case_id, messages) + + # Only update if changed significantly (ignoring timestamp diffs if any) + # But here we just check text content + if new_text != parent_msg_text: + client.chat_update(channel=channel_id, ts=thread_ts, text=new_text) + except Exception as e: + logger.warning(f"Failed to update parent message: {e}") + + logger.info(f"File uploaded successfully: {os.path.basename(file_path)}") + return True + + except Exception as e: + logger.info(f"Slack upload failed: {e}") + return False + finally: + if temp_path and os.path.exists(temp_path): + os.remove(temp_path)