From bb0287be9bba0a967d170059268faa46c10e8880 Mon Sep 17 00:00:00 2001
From: Didan Deng <33117903+wtomin@users.noreply.github.com>
Date: Mon, 13 Apr 2026 14:26:38 +0800
Subject: [PATCH 1/3] update flux-2-dev test

Signed-off-by: Didan Deng <33117903+wtomin@users.noreply.github.com>
---
 tests/e2e/online_serving/test_flux_2_dev_expansion.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/e2e/online_serving/test_flux_2_dev_expansion.py b/tests/e2e/online_serving/test_flux_2_dev_expansion.py
index 9d96a48c0c8..f7477ed803e 100644
--- a/tests/e2e/online_serving/test_flux_2_dev_expansion.py
+++ b/tests/e2e/online_serving/test_flux_2_dev_expansion.py
@@ -27,7 +27,7 @@
 NEGATIVE_PROMPT = "low quality, blurry, distorted, deformed, watermark"
 
 SINGLE_CARD_FEATURE_MARKS = hardware_marks(res={"cuda": "H100"})
-PARALLEL_FEATURE_MARKS = hardware_marks(res={"cuda": "L4"}, num_cards=2)
+PARALLEL_FEATURE_MARKS = hardware_marks(res={"cuda": "H100"}, num_cards=2)
 
 
 def _get_flux_2_dev_feature_cases(model: str):
@@ -48,8 +48,6 @@ def _get_flux_2_dev_feature_cases(model: str):
             OmniServerParams(
                 model=model,
                 server_args=[
-                    "--cache-backend",
-                    "cache_dit",
                     "--enable-cpu-offload",
                     "--cfg-parallel-size",
                     "2",

From 0d4df4bc2df97790f4ac0490a08ccce1b9fc46b2 Mon Sep 17 00:00:00 2001
From: Didan Deng <33117903+wtomin@users.noreply.github.com>
Date: Mon, 13 Apr 2026 14:39:33 +0800
Subject: [PATCH 2/3] dynin_omni l4 e2e test fix

Signed-off-by: Didan Deng <33117903+wtomin@users.noreply.github.com>
---
 tests/e2e/online_serving/test_dynin_omni_expansion.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/e2e/online_serving/test_dynin_omni_expansion.py b/tests/e2e/online_serving/test_dynin_omni_expansion.py
index 39b6dc8e212..710c480f08d 100644
--- a/tests/e2e/online_serving/test_dynin_omni_expansion.py
+++ b/tests/e2e/online_serving/test_dynin_omni_expansion.py
@@ -120,7 +120,7 @@ def _build_i2i_messages(prompt: str) -> list[dict]:
 
 @pytest.mark.advanced_model
 @pytest.mark.omni
-@hardware_test(res={"cuda": "L4", "rocm": "MI325"})
+@hardware_test(res={"cuda": "H100", "rocm": "MI325"})
 @pytest.mark.parametrize("omni_server", TEST_PARAMS, indirect=True)
 def test_send_i2i_request_001(omni_server, openai_client) -> None:
     request_config = {
@@ -136,7 +136,7 @@ def test_send_i2i_request_001(omni_server, openai_client) -> None:
 
 @pytest.mark.advanced_model
 @pytest.mark.omni
-@hardware_test(res={"cuda": "L4", "rocm": "MI325"})
+@hardware_test(res={"cuda": "H100", "rocm": "MI325"})
 @pytest.mark.parametrize("omni_server", TEST_PARAMS, indirect=True)
 def test_send_t2i_request_001(omni_server, openai_client) -> None:
     request_config = {
@@ -149,7 +149,7 @@ def test_send_t2i_request_001(omni_server, openai_client) -> None:
 
 @pytest.mark.core_model
 @pytest.mark.omni
-@hardware_test(res={"cuda": "L4", "rocm": "MI325"})
+@hardware_test(res={"cuda": "H100", "rocm": "MI325"})
 @pytest.mark.parametrize("omni_server", TEST_PARAMS, indirect=True)
 def test_send_t2s_request_001(omni_server, dynin_t2s_openai_client) -> None:
     request_config = {

From e0ef6301363304ffdb755ce4d6acaeb2c9bf6c83 Mon Sep 17 00:00:00 2001
From: Didan Deng <33117903+wtomin@users.noreply.github.com>
Date: Mon, 13 Apr 2026 14:52:43 +0800
Subject: [PATCH 3/3] enlarge diffusion perf test threshold

Signed-off-by: Didan Deng <33117903+wtomin@users.noreply.github.com>
---
 tests/dfx/perf/tests/test_qwen_image_vllm_omni.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/dfx/perf/tests/test_qwen_image_vllm_omni.json b/tests/dfx/perf/tests/test_qwen_image_vllm_omni.json
index 387e874ad5f..97c1bbfb3c7 100644
--- a/tests/dfx/perf/tests/test_qwen_image_vllm_omni.json
+++ b/tests/dfx/perf/tests/test_qwen_image_vllm_omni.json
@@ -72,7 +72,7 @@
                 "enable-negative-prompt": true,
                 "baseline": {
                     "throughput_qps": 0.1,
-                    "latency_mean": 2.34,
+                    "latency_mean": 2.7,
                     "peak_memory_mb_mean": 61000
                 }
             },