diff --git a/tests/integration/defs/test_e2e.py b/tests/integration/defs/test_e2e.py
index 4bf42373128..411c40248bc 100644
--- a/tests/integration/defs/test_e2e.py
+++ b/tests/integration/defs/test_e2e.py
@@ -2575,13 +2575,6 @@ def test_ptp_quickstart_multimodal(llm_root, llm_venv, model_name, model_path,
             [["invention", "person", "scientists", "Lick", "engineers"],
              ["landscape", "trees", "road", "depicts", "scenic"]]
         },
-        "gemma-3-27b-it": {
-            "image": [
-                ["natural", "turbulent", "dramatic", "scene", "wave"],
-                ["image", "famous", "rock", "granite", "landmark"],
-                ["traffic", "moderate", "heavy", "flowing", "cars"],
-            ],
-        },
     }
 
     cmd = [
@@ -2622,6 +2615,14 @@ def test_ptp_quickstart_multimodal(llm_root, llm_venv, model_name, model_path,
 
     output = llm_venv.run_cmd(cmd, caller=check_output)
 
+    # For gemma-3-27b-it, we only smoke test the model. Keyword matching is flaky.
+    if model_name == "gemma-3-27b-it":
+        print(
+            f"Skipping keyword matching test for {model_name}. Smoke test completed successfully."
+        )
+        print("output:", output)
+        return
+
     match_ratio = 4.0 / 5
     if model_name == "qwen2-vl-7b-instruct" and modality == "image":
         match_ratio = 4.0 / 6
@@ -3078,12 +3079,6 @@ def test_ptp_quickstart_multimodal_2gpu(llm_root, llm_venv, model_name,
 
     # Define expected keywords for each model
     expected_keywords = {
-        "gemma-3-27b-it": {
-            "image": [
-                ["half", "dome", "yosemite", "landmark", "rounded"],
-                ["flowing", "traffic", "vehicles", "road", "Changi"],
-            ],
-        },
         "mistral-small-3.1-24b-instruct": {
             "image": [
                 ["scenic", "rock", "landscape", "monolith", "formation"],
@@ -3140,6 +3135,14 @@ def test_ptp_quickstart_multimodal_2gpu(llm_root, llm_venv, model_name,
 
     output = llm_venv.run_cmd(cmd, caller=check_output)
 
+    # For gemma-3-27b-it, we only smoke test the model. Keyword matching is flaky.
+    if model_name == "gemma-3-27b-it":
+        print(
+            f"Skipping keyword matching test for {model_name}. Smoke test completed successfully."
+        )
+        print("output:", output)
+        return
+
     # Set match ratio based on model
     match_ratio = 4.0 / 5
     if model_name == "Phi-4-multimodal-instruct":
@@ -3188,12 +3191,6 @@ def test_ptp_quickstart_multimodal_multiturn(llm_root, llm_venv, model_name,
 
     # Define expected keywords for each model
     expected_keywords = {
-        "gemma-3-27b-it": {
-            "image": [
-                ["description", "image", "half", "dome", "park"],
-                ["atmosphere", "peaceful", "majestic", "scene", "sky"],
-            ],
-        },
         "mistral-small-3.1-24b-instruct": {
             "image": [
                 [
@@ -3250,6 +3247,14 @@ def test_ptp_quickstart_multimodal_multiturn(llm_root, llm_venv, model_name,
 
     output = llm_venv.run_cmd(cmd, caller=check_output)
     print("output:", output)
+
+    # For gemma-3-27b-it, we only smoke test the model. Keyword matching is flaky.
+    if model_name == "gemma-3-27b-it":
+        print(
+            f"Skipping keyword matching test for {model_name}. Smoke test completed successfully."
+        )
+        return
+
     # Set match ratio based on model
     match_ratio = 4.0 / 5
     if model_name == "Phi-4-multimodal-instruct":