RasaHQ · dakshvar22 · Jul 29, 2021 · Jul 12, 2021 · Jul 12, 2021 · Jul 21, 2021
diff --git a/changelog/9203.bugfix.md b/changelog/9203.bugfix.md
@@ -0,0 +1,3 @@
+Fixes a bug which caused training of `UnexpecTEDIntentPolicy` to crash when end-to-end training stories were included in the training data.
+
+Stories with end-to-end training data will now be skipped for the training of `UnexpecTEDIntentPolicy`.
diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py
@@ -548,24 +548,40 @@ def _create_model_data(
 
         return model_data
 
+    @staticmethod
+    def _get_trackers_for_training(
+        trackers: List[TrackerWithCachedStates],
+    ) -> List[TrackerWithCachedStates]:
+        """Filters out the list of trackers which should not be used for training.
+
+        Args:
+            trackers: All trackers available for training.
+
+        Returns:
+            Trackers which should be used for training.
+        """
+        # By default, we train on all available trackers.
+        return trackers
+
     def _prepare_for_training(
         self,
-        training_trackers: List[TrackerWithCachedStates],
+        trackers: List[TrackerWithCachedStates],
         domain: Domain,
         interpreter: NaturalLanguageInterpreter,
         **kwargs: Any,
     ) -> Tuple[RasaModelData, np.ndarray]:
         """Prepares data to be fed into the model.
 
         Args:
-            training_trackers: List of training trackers to be featurized.
+            trackers: List of training trackers to be featurized.
             domain: Domain of the assistant.
             interpreter: NLU interpreter to be used for featurizing states.
             **kwargs: Any other arguments.
 
         Returns:
             Featurized data to be fed to the model and corresponding label ids.
         """
+        training_trackers = self._get_trackers_for_training(trackers)
         # dealing with training data
         tracker_state_features, label_ids, entity_tags = self._featurize_for_training(
             training_trackers,

diff --git a/rasa/core/policies/unexpected_intent_policy.py b/rasa/core/policies/unexpected_intent_policy.py
@@ -23,6 +23,7 @@
 from rasa.core.featurizers.single_state_featurizer import (
     IntentTokenizerSingleStateFeaturizer,
 )
+from rasa.shared.core.generator import TrackerWithCachedStates
 from rasa.core.constants import UNLIKELY_INTENT_POLICY_PRIORITY, DIALOGUE
 from rasa.core.policies.policy import PolicyPrediction
 from rasa.core.policies.ted_policy import (
@@ -397,6 +398,37 @@ def compute_label_quantiles_post_training(
         # value specified in the configuration.
         self.label_quantiles = self._compute_label_quantiles(label_id_scores)
 
+    @staticmethod
+    def _get_trackers_for_training(
+        trackers: List[TrackerWithCachedStates],
+    ) -> List[TrackerWithCachedStates]:
+        """Filters out the list of trackers which should not be used for training.
+
+        `UnexpecTEDIntentPolicy` cannot be trained on trackers with:
+        1. `UserUttered` events with no intent.
+        2. `ActionExecuted` events with no action_name.
+
+        Trackers with such events are filtered out.
+
+        Args:
+            trackers: All trackers available for training.
+
+        Returns:
+            Trackers which should be used for training.
+        """
+        trackers_for_training = []
+        for tracker in trackers:
+            tracker_compatible = True
+            for event in tracker.events:
+                if (isinstance(event, UserUttered) and event.intent_name is None) or (
+                    isinstance(event, ActionExecuted) and event.action_name is None
+                ):
+                    tracker_compatible = False
+                    break
+            if tracker_compatible:
+                trackers_for_training.append(tracker)
+        return trackers_for_training
+
     def run_training(
         self, model_data: RasaModelData, label_ids: Optional[np.ndarray] = None
     ) -> None:

diff --git a/tests/core/policies/test_unexpected_intent_policy.py b/tests/core/policies/test_unexpected_intent_policy.py
@@ -14,6 +14,7 @@
     TrackerFeaturizer,
     IntentMaxHistoryTrackerFeaturizer,
 )
+from rasa.shared.core.generator import TrackerWithCachedStates
 from rasa.core.policies.ted_policy import PREDICTION_FEATURES, TEDPolicy
 from rasa.core.policies.unexpected_intent_policy import UnexpecTEDIntentPolicy
 from rasa.shared.core.constants import ACTION_UNLIKELY_INTENT_NAME, ACTION_LISTEN_NAME
@@ -820,3 +821,288 @@ def test_individual_label_metadata(
             test_individual_label_metadata(
                 label_metadata, label_thresholds, similarities, label_index
             )
+
+    @pytest.mark.parametrize(
+        "tracker_events_for_training, expected_trackers_with_events",
+        [
+            # Filter because of no intent and action name
+            (
+                [
+                    [
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                        UserUttered(text="hello", intent={"name": "greet"}),
+                        ActionExecuted("utter_greet"),
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                        UserUttered(
+                            text="happy to make it work", intent={"name": "goodbye"}
+                        ),
+                        ActionExecuted("utter_goodbye"),
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                    ],
+                    [
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                        UserUttered(text="hello"),
+                        ActionExecuted("utter_greet"),
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                        UserUttered(text="happy to make it work"),
+                        ActionExecuted(action_text="Great!"),
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                    ],
+                ],
+                [
+                    [
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                        UserUttered(text="hello", intent={"name": "greet"}),
+                        ActionExecuted("utter_greet"),
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                        UserUttered(
+                            text="happy to make it work", intent={"name": "goodbye"}
+                        ),
+                        ActionExecuted("utter_goodbye"),
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                    ],
+                ],
+            ),
+            # Filter because of no action name
+            (
+                [
+                    [
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                        UserUttered(text="hello", intent={"name": "greet"}),
+                        ActionExecuted("utter_greet"),
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                        UserUttered(
+                            text="happy to make it work", intent={"name": "goodbye"}
+                        ),
+                        ActionExecuted("utter_goodbye"),
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                    ],
+                    [
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                        UserUttered(text="hello"),
+                        ActionExecuted("utter_greet"),
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                        UserUttered(
+                            text="happy to make it work", intent={"name": "goodbye"}
+                        ),
+                        ActionExecuted(action_text="Great!"),
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                    ],
+                ],
+                [
+                    [
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                        UserUttered(text="hello", intent={"name": "greet"}),
+                        ActionExecuted("utter_greet"),
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                        UserUttered(
+                            text="happy to make it work", intent={"name": "goodbye"}
+                        ),
+                        ActionExecuted("utter_goodbye"),
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                    ],
+                ],
+            ),
+            # Filter because of no intent
+            (
+                [
+                    [
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                        UserUttered(text="hello", intent={"name": "greet"}),
+                        ActionExecuted("utter_greet"),
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                        UserUttered(
+                            text="happy to make it work", intent={"name": "goodbye"}
+                        ),
+                        ActionExecuted("utter_goodbye"),
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                    ],
+                    [
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                        UserUttered(text="hello"),
+                        ActionExecuted("utter_greet"),
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                        UserUttered(text="happy to make it work"),
+                        ActionExecuted("utter_goodbye"),
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                    ],
+                ],
+                [
+                    [
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                        UserUttered(text="hello", intent={"name": "greet"}),
+                        ActionExecuted("utter_greet"),
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                        UserUttered(
+                            text="happy to make it work", intent={"name": "goodbye"}
+                        ),
+                        ActionExecuted("utter_goodbye"),
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                    ],
+                ],
+            ),
+            # No filter needed
+            (
+                [
+                    [
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                        UserUttered(text="hello", intent={"name": "greet"}),
+                        ActionExecuted("utter_greet"),
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                        UserUttered(
+                            text="happy to make it work", intent={"name": "goodbye"}
+                        ),
+                        ActionExecuted("utter_goodbye"),
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                    ],
+                ],
+                [
+                    [
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                        UserUttered(text="hello", intent={"name": "greet"}),
+                        ActionExecuted("utter_greet"),
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                        UserUttered(
+                            text="happy to make it work", intent={"name": "goodbye"}
+                        ),
+                        ActionExecuted("utter_goodbye"),
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                    ],
+                ],
+            ),
+            # Filter to return empty list of trackers
+            (
+                [
+                    [
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                        UserUttered(text="hello", intent={"name": "greet"}),
+                        ActionExecuted("utter_greet"),
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                        UserUttered(
+                            text="happy to make it work", intent={"name": "goodbye"}
+                        ),
+                        ActionExecuted(action_text="Great!"),
+                        ActionExecuted(ACTION_LISTEN_NAME),
+                    ],
+                ],
+                [],
+            ),
+        ],
+    )
+    def test_filter_training_trackers(
+        self,
+        tracker_events_for_training: List[List[Event]],
+        expected_trackers_with_events: List[List[Event]],
+        domain: Domain,
+    ):
+        trackers_for_training = [
+            TrackerWithCachedStates.from_events(
+                sender_id=f"{tracker_index}", evts=events, domain=domain
+            )
+            for tracker_index, events in enumerate(tracker_events_for_training)
+        ]
+
+        filtered_trackers = UnexpecTEDIntentPolicy._get_trackers_for_training(
+            trackers_for_training
+        )
+        assert len(filtered_trackers) == len(expected_trackers_with_events)
+        for collected_tracker, expected_tracker_events in zip(
+            filtered_trackers, expected_trackers_with_events
+        ):
+            collected_tracker_events = list(collected_tracker.events)
+            assert collected_tracker_events == expected_tracker_events
+
+
+@pytest.mark.parametrize(
+    "tracker_events, skip_training",
+    [
+        (
+            [
+                [
+                    ActionExecuted(ACTION_LISTEN_NAME),
+                    UserUttered(text="hello", intent={"name": "greet"}),
+                    ActionExecuted("utter_greet"),
+                    ActionExecuted(ACTION_LISTEN_NAME),
+                    UserUttered(
+                        text="happy to make it work", intent={"name": "goodbye"}
+                    ),
+                    ActionExecuted("utter_goodbye"),
+                    ActionExecuted(ACTION_LISTEN_NAME),
+                ],
+                [
+                    ActionExecuted(ACTION_LISTEN_NAME),
+                    UserUttered(text="hello"),
+                    ActionExecuted("utter_greet"),
+                    ActionExecuted(ACTION_LISTEN_NAME),
+                    UserUttered(text="happy to make it work"),
+                    ActionExecuted(action_text="Great!"),
+                    ActionExecuted(ACTION_LISTEN_NAME),
+                ],
+            ],
+            False,
+        ),
+        (
+            [
+                [
+                    ActionExecuted(ACTION_LISTEN_NAME),
+                    UserUttered(text="hello"),
+                    ActionExecuted("utter_greet"),
+                    ActionExecuted(ACTION_LISTEN_NAME),
+                    UserUttered(text="happy to make it work"),
+                    ActionExecuted(action_text="Great!"),
+                    ActionExecuted(ACTION_LISTEN_NAME),
+                ],
+            ],
+            True,
+        ),
+        (
+            [
+                [
+                    ActionExecuted(ACTION_LISTEN_NAME),
+                    UserUttered(text="hello"),
+                    ActionExecuted("utter_greet"),
+                    ActionExecuted(ACTION_LISTEN_NAME),
+                    UserUttered(text="happy to make it work"),
+                    ActionExecuted("utter_goodbye"),
+                    ActionExecuted(ACTION_LISTEN_NAME),
+                ],
+            ],
+            True,
+        ),
+        (
+            [
+                [
+                    ActionExecuted(ACTION_LISTEN_NAME),
+                    UserUttered(text="hello"),
+                    ActionExecuted("utter_greet"),
+                    ActionExecuted(ACTION_LISTEN_NAME),
+                    UserUttered(
+                        text="happy to make it work", intent={"name": "goodbye"}
+                    ),
+                    ActionExecuted(action_text="Great!"),
+                    ActionExecuted(ACTION_LISTEN_NAME),
+                ],
+            ],
+            True,
+        ),
+    ],
+)
+def test_train_with_e2e_data(
+    tracker_events: List[List[Event]], skip_training: bool, domain: Domain,
+):
+    policy = UnexpecTEDIntentPolicy(
+        featurizer=IntentMaxHistoryTrackerFeaturizer(
+            IntentTokenizerSingleStateFeaturizer()
+        )
+    )
+    trackers_for_training = [
+        TrackerWithCachedStates.from_events(
+            sender_id=f"{tracker_index}", evts=events, domain=domain
+        )
+        for tracker_index, events in enumerate(tracker_events)
+    ]
+    if skip_training:
+        with pytest.warns(UserWarning):
+            policy.train(trackers_for_training, domain, interpreter=RegexInterpreter())
+    else:
+        policy.train(trackers_for_training, domain, interpreter=RegexInterpreter())