From bb0287be9bba0a967d170059268faa46c10e8880 Mon Sep 17 00:00:00 2001 From: Didan Deng <33117903+wtomin@users.noreply.github.com> Date: Mon, 13 Apr 2026 14:26:38 +0800 Subject: [PATCH 1/3] update flux-2-dev test Signed-off-by: Didan Deng <33117903+wtomin@users.noreply.github.com> --- tests/e2e/online_serving/test_flux_2_dev_expansion.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/e2e/online_serving/test_flux_2_dev_expansion.py b/tests/e2e/online_serving/test_flux_2_dev_expansion.py index 9d96a48c0c8..f7477ed803e 100644 --- a/tests/e2e/online_serving/test_flux_2_dev_expansion.py +++ b/tests/e2e/online_serving/test_flux_2_dev_expansion.py @@ -27,7 +27,7 @@ NEGATIVE_PROMPT = "low quality, blurry, distorted, deformed, watermark" SINGLE_CARD_FEATURE_MARKS = hardware_marks(res={"cuda": "H100"}) -PARALLEL_FEATURE_MARKS = hardware_marks(res={"cuda": "L4"}, num_cards=2) +PARALLEL_FEATURE_MARKS = hardware_marks(res={"cuda": "H100"}, num_cards=2) def _get_flux_2_dev_feature_cases(model: str): @@ -48,8 +48,6 @@ def _get_flux_2_dev_feature_cases(model: str): OmniServerParams( model=model, server_args=[ - "--cache-backend", - "cache_dit", "--enable-cpu-offload", "--cfg-parallel-size", "2", From 0d4df4bc2df97790f4ac0490a08ccce1b9fc46b2 Mon Sep 17 00:00:00 2001 From: Didan Deng <33117903+wtomin@users.noreply.github.com> Date: Mon, 13 Apr 2026 14:39:33 +0800 Subject: [PATCH 2/3] dynin_omni l4 e2e test fix Signed-off-by: Didan Deng <33117903+wtomin@users.noreply.github.com> --- tests/e2e/online_serving/test_dynin_omni_expansion.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/e2e/online_serving/test_dynin_omni_expansion.py b/tests/e2e/online_serving/test_dynin_omni_expansion.py index 39b6dc8e212..710c480f08d 100644 --- a/tests/e2e/online_serving/test_dynin_omni_expansion.py +++ b/tests/e2e/online_serving/test_dynin_omni_expansion.py @@ -120,7 +120,7 @@ def _build_i2i_messages(prompt: str) -> list[dict]: @pytest.mark.advanced_model @pytest.mark.omni -@hardware_test(res={"cuda": "L4", "rocm": "MI325"}) +@hardware_test(res={"cuda": "H100", "rocm": "MI325"}) @pytest.mark.parametrize("omni_server", TEST_PARAMS, indirect=True) def test_send_i2i_request_001(omni_server, openai_client) -> None: request_config = { @@ -136,7 +136,7 @@ def test_send_i2i_request_001(omni_server, openai_client) -> None: @pytest.mark.advanced_model @pytest.mark.omni -@hardware_test(res={"cuda": "L4", "rocm": "MI325"}) +@hardware_test(res={"cuda": "H100", "rocm": "MI325"}) @pytest.mark.parametrize("omni_server", TEST_PARAMS, indirect=True) def test_send_t2i_request_001(omni_server, openai_client) -> None: request_config = { @@ -149,7 +149,7 @@ def test_send_t2i_request_001(omni_server, openai_client) -> None: @pytest.mark.core_model @pytest.mark.omni -@hardware_test(res={"cuda": "L4", "rocm": "MI325"}) +@hardware_test(res={"cuda": "H100", "rocm": "MI325"}) @pytest.mark.parametrize("omni_server", TEST_PARAMS, indirect=True) def test_send_t2s_request_001(omni_server, dynin_t2s_openai_client) -> None: request_config = { From e0ef6301363304ffdb755ce4d6acaeb2c9bf6c83 Mon Sep 17 00:00:00 2001 From: Didan Deng <33117903+wtomin@users.noreply.github.com> Date: Mon, 13 Apr 2026 14:52:43 +0800 Subject: [PATCH 3/3] enlarge diffusion perf test threshold Signed-off-by: Didan Deng <33117903+wtomin@users.noreply.github.com> --- tests/dfx/perf/tests/test_qwen_image_vllm_omni.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/dfx/perf/tests/test_qwen_image_vllm_omni.json b/tests/dfx/perf/tests/test_qwen_image_vllm_omni.json index 387e874ad5f..97c1bbfb3c7 100644 --- a/tests/dfx/perf/tests/test_qwen_image_vllm_omni.json +++ b/tests/dfx/perf/tests/test_qwen_image_vllm_omni.json @@ -72,7 +72,7 @@ "enable-negative-prompt": true, "baseline": { "throughput_qps": 0.1, - "latency_mean": 2.34, + "latency_mean": 2.7, "peak_memory_mb_mean": 61000 } },