diff --git a/python/sglang/multimodal_gen/test/run_suite.py b/python/sglang/multimodal_gen/test/run_suite.py index 9f47799cdfa4..26fd0213d068 100644 --- a/python/sglang/multimodal_gen/test/run_suite.py +++ b/python/sglang/multimodal_gen/test/run_suite.py @@ -22,12 +22,14 @@ "1-gpu": [ "test_server_a.py", "test_server_b.py", + "test_server_correctness.py", "test_lora_format_adapter.py", # add new 1-gpu test files here ], "2-gpu": [ "test_server_2_gpu_a.py", "test_server_2_gpu_b.py", + "test_server_2_gpu_correctness.py", # add new 2-gpu test files here ], } diff --git a/python/sglang/multimodal_gen/test/server/test_server_2_gpu_correctness.py b/python/sglang/multimodal_gen/test/server/test_server_2_gpu_correctness.py new file mode 100644 index 000000000000..a2891a07c6ed --- /dev/null +++ b/python/sglang/multimodal_gen/test/server/test_server_2_gpu_correctness.py @@ -0,0 +1,33 @@ +""" +End-to-end functional correctness tests for 2-GPU diffusion server. +""" + +from __future__ import annotations + +import pytest + +from sglang.multimodal_gen.runtime.utils.logging_utils import init_logger +from sglang.multimodal_gen.test.server.test_server_common import ( # noqa: F401 + diffusion_server, +) +from sglang.multimodal_gen.test.server.test_server_correctness import ( + CorrectnessTestMixin, +) +from sglang.multimodal_gen.test.server.testcase_configs import ( + CORRECTNESS_2_GPU_CASES, + DiffusionTestCase, +) + +logger = init_logger(__name__) + + +class TestDiffusionServerTwoGpuCorrectness(CorrectnessTestMixin): + """ + Functional correctness tests for 2-GPU diffusion cases. + Inherits shared functional logic from CorrectnessTestMixin. + """ + + @pytest.fixture(params=CORRECTNESS_2_GPU_CASES, ids=lambda c: c.id) + def case(self, request) -> DiffusionTestCase: + """Provide a DiffusionTestCase for each 2-GPU correctness test.""" + return request.param diff --git a/python/sglang/multimodal_gen/test/server/test_server_correctness.py b/python/sglang/multimodal_gen/test/server/test_server_correctness.py new file mode 100644 index 000000000000..5120c83feb60 --- /dev/null +++ b/python/sglang/multimodal_gen/test/server/test_server_correctness.py @@ -0,0 +1,104 @@ +""" +End-to-end functional correctness tests for diffusion server. +Verifies functional logic, parameter propagation, and error robustness. +""" + +from __future__ import annotations + +import pytest +import requests +from openai import OpenAI + +from sglang.multimodal_gen.runtime.utils.logging_utils import init_logger +from sglang.multimodal_gen.test.server.test_server_common import ( # noqa: F401 + diffusion_server, +) +from sglang.multimodal_gen.test.server.test_server_utils import get_generate_fn +from sglang.multimodal_gen.test.server.testcase_configs import ( + CORRECTNESS_1_GPU_CASES, + DiffusionTestCase, +) + +logger = init_logger(__name__) + + +class CorrectnessTestMixin: + """ + Mixin containing functional verification logic shared across GPU suites. + """ + + def _client(self, ctx): + """Get OpenAI client for the server.""" + return OpenAI( + api_key="sglang-anything", + base_url=f"http://localhost:{ctx.port}/v1", + ) + + def test_functional_success(self, case: DiffusionTestCase, diffusion_server): + """ + Verify that the model generates output successfully for its modality. + Reuses project-standard polling and validation logic from get_generate_fn. + """ + client = self._client(diffusion_server) + generate_fn = get_generate_fn( + model_path=case.server_args.model_path, + modality=case.server_args.modality, + sampling_params=case.sampling_params, + ) + + # functional success check + generate_fn(case.id, client) + logger.info(f"Functional success verified for {case.id}") + + def test_seed_determinism(self, case: DiffusionTestCase, diffusion_server): + """ + Verify bit-identical results for identical seeds (Image only). + """ + if case.server_args.modality != "image": + pytest.skip("Seed determinism check restricted to image modality") + + client = self._client(diffusion_server) + payload = { + "model": case.server_args.model_path, + "prompt": case.sampling_params.prompt or "A dog with sunglasses", + "size": case.sampling_params.output_size, + "response_format": "b64_json", + "seed": 42, + } + + resp1 = client.images.generate(**payload) + resp2 = client.images.generate(**payload) + + assert ( + resp1.data[0].b64_json == resp2.data[0].b64_json + ), "Seed determinism failed: outputs are not bit-identical" + logger.info(f"Seed determinism verified for {case.id}") + + def test_api_error_codes(self, case: DiffusionTestCase, diffusion_server): + """ + Verify server returns correct HTTP error codes for malformed requests. + """ + if case.server_args.modality != "image": + pytest.skip( + "Error code boundary check restricted to image generations endpoint" + ) + + client = self._client(diffusion_server) + # Use client.base_url to construct the target endpoint dynamically + base_url = f"{client.base_url}/images/generations" + + # Verify 422 for missing mandatory prompt + payload = {"model": case.server_args.model_path} + resp = requests.post(base_url, json=payload) + assert resp.status_code == 422 + logger.info(f"Error handling verified for {case.id}") + + +class TestDiffusionCorrectness(CorrectnessTestMixin): + """ + Functional correctness tests for 1-GPU diffusion cases. + """ + + @pytest.fixture(params=CORRECTNESS_1_GPU_CASES, ids=lambda c: c.id) + def case(self, request) -> DiffusionTestCase: + return request.param diff --git a/python/sglang/multimodal_gen/test/server/testcase_configs.py b/python/sglang/multimodal_gen/test/server/testcase_configs.py index 0bb2b5a5a273..bbef43052c47 100644 --- a/python/sglang/multimodal_gen/test/server/testcase_configs.py +++ b/python/sglang/multimodal_gen/test/server/testcase_configs.py @@ -569,5 +569,49 @@ def from_req_perf_record( ), ] +# Correctness Cases +CORRECTNESS_1_GPU_CASES = [ + # 1. Standard Image Correctness (Seed & Functional Check) + DiffusionTestCase( + "qwen_image_correctness_seed", + DiffusionServerArgs( + model_path="Qwen/Qwen-Image", modality="image", warmup_text=1 + ), + T2I_sampling_params, + ), + # Multimodal Image Edit Correctness (Exercises /edits and Vision encoders) + DiffusionTestCase( + "qwen_image_edit_correctness", + DiffusionServerArgs( + model_path="Qwen/Qwen-Image-Edit", modality="image", warmup_edit=1 + ), + TI2I_sampling_params, + ), + # Video generation correctness + DiffusionTestCase( + "wan_video_correctness", + DiffusionServerArgs( + model_path="Wan-AI/Wan2.1-T2V-1.3B-Diffusers", + modality="video", + warmup_text=0, + ), + DiffusionSamplingParams(prompt="A dog with sunglasses", num_frames=8), + ), +] + +CORRECTNESS_2_GPU_CASES = [ + # Multimodal Image to Video + DiffusionTestCase( + "wan_video_i2v_correctness_2gpu", + DiffusionServerArgs( + model_path="Wan-AI/Wan2.1-I2V-14B-480P-Diffusers", + modality="video", + num_gpus=2, + warmup_edit=0, + ), + TI2V_sampling_params, + ), +] + # Load global configuration BASELINE_CONFIG = BaselineConfig.load(Path(__file__).with_name("perf_baselines.json"))