diff --git a/README.md b/README.md
index 5aedc683..83d1caee 100644
--- a/README.md
+++ b/README.md
@@ -86,6 +86,7 @@ lightspeed-eval --system-config config/system_api_disabled.yaml --eval-data conf
   - Response Evaluation
     - [`answer_correctness`](src/lightspeed_evaluation/core/metrics/custom.py)
     - [`intent_eval`](src/lightspeed_evaluation/core/metrics/custom.py) - Evaluates whether the response demonstrates the expected intent or purpose
+    - [`keywords_eval`](src/lightspeed_evaluation/core/metrics/custom/keywords_eval.py) - Keywords evaluation with alternatives (ALL keywords must match, case insensitive)
   - Tool Evaluation
     - [`tool_eval`](src/lightspeed_evaluation/core/metrics/custom.py) - Validates tool calls and arguments with regex pattern matching
 - **Script-based**
@@ -149,6 +150,9 @@ metrics_metadata:
       
     "custom:tool_eval":
       description: "Tool call evaluation comparing expected vs actual tool calls (regex for arguments)"
+      
+    "custom:keywords_eval":  # Binary evaluation (0 or 1)
+      description: "Keywords evaluation (ALL match) with sequential alternate checking (case insensitive)"
   
   conversation_level:
     "deepeval:conversation_completeness":
@@ -226,12 +230,14 @@ embedding:
       contexts:
         - OpenShift Virtualization is an extension of the OpenShift ...
       attachments: []                   # Attachments (Optional)
+      expected_keywords: [["virtualization"], ["openshift"]]  # For keywords_eval evaluation
       expected_response: OpenShift Virtualization is an extension of the OpenShift Container Platform that allows running virtual machines alongside containers
       expected_intent: "explain a concept"  # Expected intent for intent evaluation
       
       # Per-turn metrics (overrides system defaults)
       turn_metrics:
         - "ragas:faithfulness"
+        - "custom:keywords_eval"
         - "custom:answer_correctness"
         - "custom:intent_eval"
       
@@ -289,6 +295,7 @@ embedding:
 | `response`            | string           | 📋       | Actual response from system          | ✅ (if API enabled)   |
 | `contexts`            | list[string]     | 📋       | Context information for evaluation   | ✅ (if API enabled)   |
 | `attachments`         | list[string]     | ❌       | Attachments                          | ❌                    |
+| `expected_keywords`   | list[list[string]] | 📋     | Expected keywords for keyword evaluation (list of alternatives) | ❌ |
 | `expected_response`   | string           | 📋       | Expected response for comparison     | ❌                    |
 | `expected_intent`     | string           | 📋       | Expected intent for intent evaluation| ❌                    |
 | `expected_tool_calls` | list[list[list[dict]]] | 📋 | Expected tool call sequences (multiple alternative sets) | ❌ |
@@ -300,6 +307,7 @@ embedding:
 > 📋 **Required based on metrics**: Some fields are required only when using specific metrics
 
 Examples
+> - `expected_keywords`: Required for `custom:keywords_eval` (case insensitive matching)
 > - `expected_response`: Required for `custom:answer_correctness`
 > - `expected_intent`: Required for `custom:intent_eval`
 > - `expected_tool_calls`: Required for `custom:tool_eval` (multiple alternative sets format)
diff --git a/config/system.yaml b/config/system.yaml
index 24cab1f2..435ec687 100644
--- a/config/system.yaml
+++ b/config/system.yaml
@@ -76,6 +76,9 @@ metrics_metadata:
       description: "Is what we retrieved actually relevant to user query?"
 
     # Custom metrics
+    "custom:keywords_eval":  # boolean eval (either 0 or 1)
+      description: "Keywords (ALL) matching evaluation with alternative sets"
+
     "custom:answer_correctness":
       threshold: 0.75
       description: "Correctness vs expected answer using custom LLM evaluation"
diff --git a/src/lightspeed_evaluation/core/metrics/custom/__init__.py b/src/lightspeed_evaluation/core/metrics/custom/__init__.py
index a9985573..112dafa5 100644
--- a/src/lightspeed_evaluation/core/metrics/custom/__init__.py
+++ b/src/lightspeed_evaluation/core/metrics/custom/__init__.py
@@ -1,6 +1,7 @@
 """Custom metrics components package."""
 
 from lightspeed_evaluation.core.metrics.custom.custom import CustomMetrics
+from lightspeed_evaluation.core.metrics.custom.keywords_eval import evaluate_keywords
 from lightspeed_evaluation.core.metrics.custom.prompts import (
     ANSWER_CORRECTNESS_PROMPT,
     INTENT_EVALUATION_PROMPT,
@@ -9,6 +10,7 @@
 
 __all__ = [
     "CustomMetrics",
+    "evaluate_keywords",
     "evaluate_tool_calls",
     # Prompts
     "ANSWER_CORRECTNESS_PROMPT",
diff --git a/src/lightspeed_evaluation/core/metrics/custom/custom.py b/src/lightspeed_evaluation/core/metrics/custom/custom.py
index 079329db..e2639638 100644
--- a/src/lightspeed_evaluation/core/metrics/custom/custom.py
+++ b/src/lightspeed_evaluation/core/metrics/custom/custom.py
@@ -9,6 +9,7 @@
     ANSWER_CORRECTNESS_PROMPT,
     INTENT_EVALUATION_PROMPT,
 )
+from lightspeed_evaluation.core.metrics.custom.keywords_eval import evaluate_keywords
 from lightspeed_evaluation.core.metrics.custom.tool_eval import evaluate_tool_calls
 from lightspeed_evaluation.core.models import EvaluationScope, TurnData
 from lightspeed_evaluation.core.system.exceptions import LLMError
@@ -28,6 +29,7 @@ def __init__(self, llm_manager: LLMManager):
         )
 
         self.supported_metrics = {
+            "keywords_eval": evaluate_keywords,
             "answer_correctness": self._evaluate_answer_correctness,
             "intent_eval": self._evaluate_intent,
             "tool_eval": self._evaluate_tool_calls,
diff --git a/src/lightspeed_evaluation/core/metrics/custom/keywords_eval.py b/src/lightspeed_evaluation/core/metrics/custom/keywords_eval.py
new file mode 100644
index 00000000..b8876547
--- /dev/null
+++ b/src/lightspeed_evaluation/core/metrics/custom/keywords_eval.py
@@ -0,0 +1,129 @@
+"""Keywords evaluation utilities."""
+
+from typing import Any, Optional
+
+from lightspeed_evaluation.core.models import TurnData
+
+
+def _validate_inputs(
+    is_conversation: bool, turn_data: Optional[TurnData]
+) -> Optional[tuple[Optional[float], str]]:
+    """Validate inputs for keywords evaluation."""
+    if is_conversation:
+        return None, "Keywords eval is a turn-level metric"
+
+    if turn_data is None:
+        return None, "TurnData is required for keywords eval evaluation"
+
+    if not turn_data.expected_keywords:
+        return None, "No expected keywords provided for keywords eval evaluation"
+
+    if not turn_data.response:
+        return 0.0, "No response provided for keywords eval evaluation"
+
+    return None
+
+
+def _check_keyword_list(
+    keyword_list: list[str], response_lower: str
+) -> tuple[list[str], bool]:
+    """Check if all keywords in a list match the response."""
+    matched_keywords = []
+    all_matched = True
+
+    for keyword in keyword_list:
+        if keyword.lower() in response_lower:
+            matched_keywords.append(keyword)
+        else:
+            all_matched = False
+
+    return matched_keywords, all_matched
+
+
+def _create_success_result(
+    list_index: int, matched_keywords: list[str]
+) -> tuple[float, str]:
+    """Create success result for keywords evaluation."""
+    matched_str = ", ".join(f"'{kw}'" for kw in matched_keywords)
+    reason = (
+        f"Keywords eval successful: Option {list_index + 1} - "
+        f"all keywords matched: {matched_str}"
+    )
+    return 1.0, reason
+
+
+def _create_failure_result(
+    expected_keywords: list[list[str]], response_lower: str
+) -> tuple[float, str]:
+    """Create failure result for keywords evaluation."""
+    failed_details = []
+
+    for list_index, keyword_list in enumerate(expected_keywords):
+        matched_keywords, _ = _check_keyword_list(keyword_list, response_lower)
+        unmatched_keywords = [
+            kw for kw in keyword_list if kw.lower() not in response_lower
+        ]
+
+        if unmatched_keywords:
+            unmatched_str = ", ".join(f"'{kw}'" for kw in unmatched_keywords)
+            matched_str = (
+                ", ".join(f"'{kw}'" for kw in matched_keywords)
+                if matched_keywords
+                else "none"
+            )
+            failed_details.append(
+                f"Option {list_index + 1}: unmatched [{unmatched_str}], matched [{matched_str}]"
+            )
+
+    reason = f"Keywords eval failed: All options failed - {'; '.join(failed_details)}"
+    return 0.0, reason
+
+
+def evaluate_keywords(
+    _conv_data: Any,
+    _turn_idx: Optional[int],
+    turn_data: Optional[TurnData],
+    is_conversation: bool,
+) -> tuple[Optional[float], str]:
+    """Evaluate keywords using substring matching with sequential list checking.
+
+    Logic: Check first option - if all keywords match, evaluation succeeds.
+    If first option fails, try next alternative, and so on.
+    If all alternatives fail, evaluation fails.
+
+    Args:
+        _conv_data: Conversation data (unused)
+        _turn_idx: Turn index (unused)
+        turn_data: Turn data containing response and expected keywords
+        is_conversation: Whether this is conversation-level evaluation
+
+    Returns:
+        tuple: (score: float, reason: str)
+            - score: 1.0 if any keyword list has all keywords matched, 0.0 otherwise
+            - reason: Detailed explanation of evaluation results
+    """
+    # Validate inputs
+    validation_result = _validate_inputs(is_conversation, turn_data)
+    if validation_result:
+        return validation_result
+
+    if (
+        turn_data is None
+        or turn_data.response is None
+        or turn_data.expected_keywords is None
+    ):
+        return None, "Invalid turn data after validation"
+
+    response_lower = turn_data.response.lower()
+
+    # Check each expected keywords list
+    for list_index, keyword_list in enumerate(turn_data.expected_keywords):
+        matched_keywords, all_matched = _check_keyword_list(
+            keyword_list, response_lower
+        )
+
+        if all_matched:
+            return _create_success_result(list_index, matched_keywords)
+
+    # If we reach here, all alternatives failed
+    return _create_failure_result(turn_data.expected_keywords, response_lower)
diff --git a/src/lightspeed_evaluation/core/models/data.py b/src/lightspeed_evaluation/core/models/data.py
index 3041b025..d7a692a4 100644
--- a/src/lightspeed_evaluation/core/models/data.py
+++ b/src/lightspeed_evaluation/core/models/data.py
@@ -53,6 +53,10 @@ class TurnData(BaseModel):
     contexts: Optional[list[str]] = Field(
         default=None, min_length=1, description="Contexts"
     )
+    expected_keywords: Optional[list[list[str]]] = Field(
+        default=None,
+        description="Expected keywords for keyword evaluation (list of alternatives)",
+    )
     expected_response: Optional[str] = Field(
         default=None, min_length=1, description="Expected response for comparison"
     )
@@ -89,6 +93,36 @@ def validate_turn_metrics(cls, v: Optional[list[str]]) -> Optional[list[str]]:
             v = _validate_and_deduplicate_metrics(v, "Turn metric")
         return v
 
+    @field_validator("expected_keywords")
+    @classmethod
+    def validate_expected_keywords(
+        cls, v: Optional[list[list[str]]]
+    ) -> Optional[list[list[str]]]:
+        """Validate expected keywords when provided."""
+        if v is None:
+            return None
+
+        if not isinstance(v, list):
+            raise ValueError("expected_keywords must be a list of lists")
+
+        # Validate each alternative group
+        for i, keyword_group in enumerate(v):
+            if not isinstance(keyword_group, list):
+                raise ValueError(f"expected_keywords[{i}] must be a list of strings")
+
+            if not keyword_group:
+                raise ValueError(f"expected_keywords[{i}] cannot be empty")
+
+            for j, keyword in enumerate(keyword_group):
+                if not isinstance(keyword, str):
+                    raise ValueError(f"expected_keywords[{i}][{j}] must be a string")
+                if not keyword.strip():
+                    raise ValueError(
+                        f"expected_keywords[{i}][{j}] cannot be empty or whitespace"
+                    )
+
+        return v
+
     @field_validator("expected_tool_calls", mode="before")
     @classmethod
     def validate_expected_tool_calls(
diff --git a/src/lightspeed_evaluation/core/system/validator.py b/src/lightspeed_evaluation/core/system/validator.py
index bc5b3aba..98705d1f 100644
--- a/src/lightspeed_evaluation/core/system/validator.py
+++ b/src/lightspeed_evaluation/core/system/validator.py
@@ -40,6 +40,10 @@
         "required_fields": ["response", "contexts"],
         "description": "requires 'response' and 'contexts' fields",
     },
+    "custom:keywords_eval": {
+        "required_fields": ["response", "expected_keywords"],
+        "description": "requires 'response' and 'expected_keywords' fields",
+    },
     "custom:answer_correctness": {
         "required_fields": ["response", "expected_response"],
         "description": "requires 'response' and 'expected_response' fields",
diff --git a/tests/unit/core/metrics/test_keywords_eval.py b/tests/unit/core/metrics/test_keywords_eval.py
new file mode 100644
index 00000000..df140918
--- /dev/null
+++ b/tests/unit/core/metrics/test_keywords_eval.py
@@ -0,0 +1,189 @@
+"""Tests for keywords eval metric."""
+
+from lightspeed_evaluation.core.metrics.custom.keywords_eval import evaluate_keywords
+from lightspeed_evaluation.core.models import TurnData
+
+
+class TestKeywordsEval:
+    """Test cases for keywords eval metric."""
+
+    def test_keywords_eval_first_list_all_matched(self):
+        """Test successful keywords evaluation when first list has all keywords matched."""
+        turn_data = TurnData(
+            turn_id="test_turn",
+            query="Test query",
+            response="This response contains openshift-monitoring and yes it exists",
+            expected_keywords=[
+                ["yes", "openshift-monitoring"],  # Option 1: Both keywords should match
+                ["confirmed", "monitoring"],  # Option 2: Should not be checked
+            ],
+        )
+
+        score, reason = evaluate_keywords(None, 0, turn_data, False)
+
+        assert score == 1.0
+        assert "Keywords eval successful: Option 1" in reason
+        assert "all keywords matched: 'yes', 'openshift-monitoring'" in reason
+
+    def test_keywords_eval_first_list_fails_second_succeeds(self):
+        """Test keywords evaluation when first list fails but second list succeeds."""
+        turn_data = TurnData(
+            turn_id="test_turn",
+            query="Test query",
+            response="This response contains monitoring and confirmed status",
+            expected_keywords=[
+                [
+                    "yes",
+                    "openshift-monitoring",
+                ],  # Option 1: "yes" missing, "openshift-monitoring" missing
+                ["monitoring", "confirmed"],  # Option 2: Both should match
+            ],
+        )
+
+        score, reason = evaluate_keywords(None, 0, turn_data, False)
+
+        assert score == 1.0
+        assert "Keywords eval successful: Option 2" in reason
+        assert "all keywords matched: 'monitoring', 'confirmed'" in reason
+
+    def test_keywords_eval_all_lists_fail(self):
+        """Test keywords evaluation when all lists fail."""
+        turn_data = TurnData(
+            turn_id="test_turn",
+            query="Test query",
+            response="This response contains nothing relevant",
+            expected_keywords=[
+                ["yes", "openshift-monitoring"],  # Option 1: Both missing
+                ["confirmed", "monitoring"],  # Option 2: Both missing
+            ],
+        )
+
+        score, reason = evaluate_keywords(None, 0, turn_data, False)
+
+        assert score == 0.0
+        assert "Keywords eval failed: All options failed" in reason
+        assert (
+            "Option 1: unmatched ['yes', 'openshift-monitoring'], matched [none]"
+            in reason
+        )
+        assert (
+            "Option 2: unmatched ['confirmed', 'monitoring'], matched [none]" in reason
+        )
+
+    def test_keywords_eval_partial_match_in_failed_list(self):
+        """Test keywords evaluation with partial matches in failed lists."""
+        turn_data = TurnData(
+            turn_id="test_turn",
+            query="Test query",
+            response="This response contains monitoring but no confirmation",
+            expected_keywords=[
+                ["yes", "confirmed"],  # Option 1: Both missing
+                [
+                    "monitoring",
+                    "openshift",
+                ],  # Option 2: "monitoring" matches, "openshift" missing
+            ],
+        )
+
+        score, reason = evaluate_keywords(None, 0, turn_data, False)
+
+        assert score == 0.0
+        assert "Keywords eval failed: All options failed" in reason
+        assert "Option 1: unmatched ['yes', 'confirmed'], matched [none]" in reason
+        assert "Option 2: unmatched ['openshift'], matched ['monitoring']" in reason
+
+    def test_keywords_eval_case_insensitive(self):
+        """Test that keywords evaluation is case insensitive."""
+        turn_data = TurnData(
+            turn_id="test_turn",
+            query="Test query",
+            response="This response contains YES and OPENSHIFT-MONITORING",
+            expected_keywords=[
+                ["yes", "openshift-monitoring"]  # Should match despite case differences
+            ],
+        )
+
+        score, reason = evaluate_keywords(None, 0, turn_data, False)
+
+        assert score == 1.0
+        assert "Keywords eval successful: Option 1" in reason
+        assert "all keywords matched: 'yes', 'openshift-monitoring'" in reason
+
+    def test_keywords_eval_substring_matching(self):
+        """Test that keywords evaluation works with substring matching."""
+        turn_data = TurnData(
+            turn_id="test_turn",
+            query="Test query",
+            response="The openshift-monitoring-operator is running successfully",
+            expected_keywords=[
+                [
+                    "monitoring",
+                    "success",
+                ]  # Should match "monitoring" in "openshift-monitoring-operator"
+            ],
+        )
+
+        score, reason = evaluate_keywords(None, 0, turn_data, False)
+
+        assert score == 1.0
+        assert "Keywords eval successful: Option 1" in reason
+        assert "all keywords matched: 'monitoring', 'success'" in reason
+
+    def test_keywords_eval_no_expected_keywords(self):
+        """Test keywords evaluation when no expected keywords provided."""
+        turn_data = TurnData(
+            turn_id="test_turn",
+            query="Test query",
+            response="Some response",
+            expected_keywords=None,
+        )
+
+        score, reason = evaluate_keywords(None, 0, turn_data, False)
+
+        assert score is None
+        assert "No expected keywords provided" in reason
+
+    def test_keywords_eval_no_response(self):
+        """Test keywords evaluation when no response provided."""
+        turn_data = TurnData(
+            turn_id="test_turn",
+            query="Test query",
+            response=None,
+            expected_keywords=[["yes"], ["monitoring"]],
+        )
+
+        score, reason = evaluate_keywords(None, 0, turn_data, False)
+
+        assert score == 0.0
+        assert "No response provided" in reason
+
+    def test_keywords_eval_empty_response(self):
+        """Test keywords evaluation with empty response."""
+        # Create turn data with valid response first, then modify it
+        turn_data = TurnData(
+            turn_id="test_turn",
+            query="Test query",
+            response="valid response",
+            expected_keywords=[["yes"], ["monitoring"]],
+        )
+        # Manually set response to empty to bypass validation
+        turn_data.response = ""
+
+        score, reason = evaluate_keywords(None, 0, turn_data, False)
+
+        assert score == 0.0
+        assert "No response provided" in reason
+
+    def test_keywords_eval_conversation_level_error(self):
+        """Test that keywords_eval returns error for conversation-level evaluation."""
+        score, reason = evaluate_keywords(None, None, None, True)
+
+        assert score is None
+        assert "Keywords eval is a turn-level metric" in reason
+
+    def test_keywords_eval_no_turn_data(self):
+        """Test keywords evaluation when no turn data provided."""
+        score, reason = evaluate_keywords(None, 0, None, False)
+
+        assert score is None
+        assert "TurnData is required" in reason
diff --git a/tests/unit/core/models/test_data.py b/tests/unit/core/models/test_data.py
index b6d86ff9..4c306dac 100644
--- a/tests/unit/core/models/test_data.py
+++ b/tests/unit/core/models/test_data.py
@@ -332,3 +332,136 @@ def test_is_single_set_format_detection(self):
         assert expected is not None
         assert len(expected) == 1  # One alternative set
         assert len(expected[0]) == 2  # Two sequences in that set
+
+
+class TestTurnDataKeywordsValidation:
+    """Test cases for expected_keywords validation in TurnData."""
+
+    def test_valid_expected_keywords_single_group(self):
+        """Test valid expected_keywords with single group."""
+        turn_data = TurnData(
+            turn_id="test_turn",
+            query="Test query",
+            expected_keywords=[["keyword1", "keyword2"]],
+        )
+
+        assert turn_data.expected_keywords == [["keyword1", "keyword2"]]
+
+    def test_valid_expected_keywords_multiple_groups(self):
+        """Test valid expected_keywords with multiple groups."""
+        turn_data = TurnData(
+            turn_id="test_turn",
+            query="Test query",
+            expected_keywords=[
+                ["yes", "confirmed"],
+                ["monitoring", "namespace"],
+                ["success", "complete"],
+            ],
+        )
+
+        assert len(turn_data.expected_keywords) == 3
+        assert turn_data.expected_keywords[0] == ["yes", "confirmed"]
+        assert turn_data.expected_keywords[1] == ["monitoring", "namespace"]
+        assert turn_data.expected_keywords[2] == ["success", "complete"]
+
+    def test_valid_expected_keywords_none(self):
+        """Test that None is valid for expected_keywords."""
+        turn_data = TurnData(
+            turn_id="test_turn", query="Test query", expected_keywords=None
+        )
+
+        assert turn_data.expected_keywords is None
+
+    def test_invalid_expected_keywords_not_list(self):
+        """Test that non-list expected_keywords raises ValidationError."""
+        with pytest.raises(ValidationError) as exc_info:
+            TurnData(
+                turn_id="test_turn", query="Test query", expected_keywords="not_a_list"
+            )
+
+        assert "Input should be a valid list" in str(exc_info.value)
+
+    def test_invalid_expected_keywords_inner_not_list(self):
+        """Test that non-list inner elements raise ValidationError."""
+        with pytest.raises(ValidationError) as exc_info:
+            TurnData(
+                turn_id="test_turn",
+                query="Test query",
+                expected_keywords=["not_a_list", ["valid_list"]],
+            )
+
+        assert "Input should be a valid list" in str(exc_info.value)
+
+    def test_invalid_expected_keywords_empty_inner_list(self):
+        """Test that empty inner lists raise ValidationError."""
+        with pytest.raises(ValidationError) as exc_info:
+            TurnData(
+                turn_id="test_turn",
+                query="Test query",
+                expected_keywords=[[], ["valid_list"]],
+            )
+
+        assert "expected_keywords[0] cannot be empty" in str(exc_info.value)
+
+    def test_invalid_expected_keywords_non_string_element(self):
+        """Test that non-string elements in inner lists raise ValidationError."""
+        with pytest.raises(ValidationError) as exc_info:
+            TurnData(
+                turn_id="test_turn",
+                query="Test query",
+                expected_keywords=[["valid_string", 123]],
+            )
+
+        assert "Input should be a valid string" in str(exc_info.value)
+
+    def test_invalid_expected_keywords_empty_string_element(self):
+        """Test that empty string elements raise ValidationError."""
+        with pytest.raises(ValidationError) as exc_info:
+            TurnData(
+                turn_id="test_turn",
+                query="Test query",
+                expected_keywords=[["valid_string", ""]],
+            )
+
+        assert "expected_keywords[0][1] cannot be empty or whitespace" in str(
+            exc_info.value
+        )
+
+    def test_invalid_expected_keywords_whitespace_only_element(self):
+        """Test that whitespace-only string elements raise ValidationError."""
+        with pytest.raises(ValidationError) as exc_info:
+            TurnData(
+                turn_id="test_turn",
+                query="Test query",
+                expected_keywords=[["valid_string", "   "]],
+            )
+
+        assert "expected_keywords[0][1] cannot be empty or whitespace" in str(
+            exc_info.value
+        )
+
+    def test_complex_valid_expected_keywords(self):
+        """Test complex but valid expected_keywords structure."""
+        turn_data = TurnData(
+            turn_id="test_turn",
+            query="Test query",
+            expected_keywords=[
+                ["yes", "confirmed", "affirmative"],
+                [
+                    "openshift-monitoring",
+                    "monitoring namespace",
+                ],
+                [
+                    "created successfully",
+                    "creation complete",
+                    "successfully created",
+                ],
+                ["pod", "container", "workload"],
+            ],
+        )
+
+        assert len(turn_data.expected_keywords) == 4
+        assert len(turn_data.expected_keywords[0]) == 3
+        assert len(turn_data.expected_keywords[1]) == 2
+        assert len(turn_data.expected_keywords[2]) == 3
+        assert len(turn_data.expected_keywords[3]) == 3