RasaHQ · Ghostvv · Dec 11, 2020 · Dec 9, 2020 · Dec 10, 2020 · Dec 10, 2020
diff --git a/rasa/core/featurizers/tracker_featurizers.py b/rasa/core/featurizers/tracker_featurizers.py
@@ -206,9 +206,8 @@ def featurize_trackers(
 
         return tracker_state_features, label_ids, entity_tags
 
-    @staticmethod
     def _choose_last_user_input(
-        trackers_as_states: List[List[State]], use_text_for_last_user_input: bool
+        self, trackers_as_states: List[List[State]], use_text_for_last_user_input: bool
     ) -> None:
         for states in trackers_as_states:
             last_state = states[-1]
@@ -228,6 +227,9 @@ def _choose_last_user_input(
                 if last_state.get(USER, {}).get(TEXT):
                     del last_state[USER][TEXT]
 
+        # make sure that all dialogue steps are either intent or text based
+        self._remove_user_text_if_intent(trackers_as_states)
+
     def prediction_states(
         self,
         trackers: List[DialogueStateTracker],

diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py
@@ -512,8 +512,9 @@ def _featurize_tracker_for_e2e(
         domain: Domain,
         interpreter: NaturalLanguageInterpreter,
     ) -> List[List[Dict[Text, List["Features"]]]]:
-        # Construct two examples in the batch to be fed to the model - 
-        # One by featurizing last user text and second an optional one(see conditions below).
+        # construct two examples in the batch to be fed to the model -
+        # one by featurizing last user text
+        # and second - an optional one (see conditions below),
         # the first example in the constructed batch either does not contain user input
         # or uses intent or text based on whether TED is e2e only.
         tracker_state_features = self.featurizer.create_state_features(
@@ -590,7 +591,7 @@ def predict_action_probabilities(
             confidence = train_utils.normalize(confidence, self.config[RANKING_LENGTH])
 
         optional_events = self._create_optional_event_for_entities(
-            output, interpreter, tracker
+            output, is_e2e_prediction, interpreter, tracker
         )
 
         return self._prediction(
@@ -602,28 +603,35 @@ def predict_action_probabilities(
     def _create_optional_event_for_entities(
         self,
         prediction_output: Dict[Text, tf.Tensor],
+        is_e2e_prediction: bool,
         interpreter: NaturalLanguageInterpreter,
         tracker: DialogueStateTracker,
     ) -> Optional[List[Event]]:
-        if tracker.latest_action_name != ACTION_LISTEN_NAME:
-            # entities belong to the last user message
+        if tracker.latest_action_name != ACTION_LISTEN_NAME or not is_e2e_prediction:
+            # entities belong only to the last user message
+            # and only if user text was used for prediction,
             # a user message always comes after action listen
-            return None
+            return
 
         if not self.config[ENTITY_RECOGNITION]:
             # entity recognition is not turned on, no entities can be predicted
-            return None
+            return
 
+        # The batch dimension of entity prediction is not the same as batch size,
+        # rather it is the number of last (if max history featurizer else all)
+        # text inputs in the batch
+        # therefore, in order to pick entities from the latest user message
+        # we need to pick entities from the last batch dimension of entity prediction
         (
             predicted_tags,
             confidence_values,
         ) = rasa.utils.train_utils.entity_label_to_tags(
-            prediction_output, self._entity_tag_specs
+            prediction_output, self._entity_tag_specs, prediction_index=-1
         )
 
         if ENTITY_ATTRIBUTE_TYPE not in predicted_tags:
             # no entities detected
-            return None
+            return
 
         # entities belong to the last message of the tracker
         # convert the predicted tags to actual entities

diff --git a/rasa/shared/core/domain.py b/rasa/shared/core/domain.py
@@ -930,10 +930,13 @@ def _get_user_sub_state(
 
         # filter entities based on intent config
         # sub_state will be transformed to frozenset therefore we need to
-        # convert the list to the tuple
+        # convert the set to the tuple
         # sub_state is transformed to frozenset because we will later hash it
         # for deduplication
-        entities = tuple(self._get_featurized_entities(latest_message))
+        entities = tuple(
+            self._get_featurized_entities(latest_message)
+            & set(sub_state.get(rasa.shared.nlu.constants.ENTITIES, ()))
+        )
         if entities:
             sub_state[rasa.shared.nlu.constants.ENTITIES] = entities
         else:

diff --git a/rasa/shared/core/events.py b/rasa/shared/core/events.py
@@ -22,6 +22,7 @@
     LOOP_INTERRUPTED,
     ENTITY_LABEL_SEPARATOR,
     ACTION_SESSION_START_NAME,
+    ACTION_LISTEN_NAME,
 )
 from rasa.shared.exceptions import UnsupportedFeatureException
 from rasa.shared.nlu.constants import (
@@ -440,13 +441,15 @@ def __str__(self) -> Text:
                 f"{entity[ENTITY_ATTRIBUTE_VALUE]} "
                 f"(Type: {entity[ENTITY_ATTRIBUTE_TYPE]}, "
                 f"Role: {entity.get(ENTITY_ATTRIBUTE_ROLE)}, "
-                f"Group: {entity.get(ENTITY_ATTRIBUTE_GROUP)}"
+                f"Group: {entity.get(ENTITY_ATTRIBUTE_GROUP)})"
                 for entity in self.entities
             ]
             entities = f", entities: {', '.join(entities)}"
 
         return (
-            f"UserUttered(text: {self.text}, intent: {self.intent_name}" f"{entities}))"
+            f"UserUttered(text: {self.text}, intent: {self.intent_name}"
+            f"{entities}"
+            f", use_text_for_featurization: {self.use_text_for_featurization})"
         )
 
     @staticmethod
@@ -583,7 +586,20 @@ def create_external(
 
 
 # noinspection PyProtectedMember
-class DefinePrevUserUtteredFeaturization(Event):
+class DefinePrevUserUttered(Event):
+    """Defines the family of events that are used to update previous user utterance."""
+
+    def as_story_string(self) -> None:
+        """Returns the event as story string.
+
+        Returns:
+            None, as this event should not appear inside the story.
+        """
+        return
+
+
+# noinspection PyProtectedMember
+class DefinePrevUserUtteredFeaturization(DefinePrevUserUttered):
     """Stores information whether action was predicted based on text or intent."""
 
     type_name = "user_featurization"
@@ -613,10 +629,6 @@ def __hash__(self) -> int:
         """Returns unique hash for event."""
         return hash(self.use_text_for_featurization)
 
-    def as_story_string(self) -> None:
-        """Skips representing the event in stories."""
-        return None
-
     @classmethod
     def _from_parameters(cls, parameters) -> "DefinePrevUserUtteredFeaturization":
         return DefinePrevUserUtteredFeaturization(
@@ -631,9 +643,25 @@ def as_dict(self) -> Dict[Text, Any]:
         d.update({USE_TEXT_FOR_FEATURIZATION: self.use_text_for_featurization})
         return d
 
+    def apply_to(self, tracker: "DialogueStateTracker") -> None:
+        """Applies event to current conversation state.
+
+        Args:
+            tracker: The current conversation state.
+        """
+        if tracker.latest_action_name != ACTION_LISTEN_NAME:
+            # featurization belong only to the last user message
+            # a user message is always followed by action listen
+            return
+
+        # update previous user message's featurization based on this event
+        tracker.latest_message.use_text_for_featurization = (
+            self.use_text_for_featurization
+        )
+
 
 # noinspection PyProtectedMember
-class DefinePrevUserUtteredEntities(Event):
+class DefinePrevUserUtteredEntities(DefinePrevUserUttered):
     """Event that is used to set entities on a previous user uttered event."""
 
     type_name = "user_entities"
@@ -667,14 +695,6 @@ def __eq__(self, other) -> bool:
         """Compares this event with another event."""
         return isinstance(other, DefinePrevUserUtteredEntities)
 
-    def as_story_string(self) -> None:
-        """Returns the event as story string.
-
-        Returns:
-            None, as this event should not appear inside the story.
-        """
-        return None
-
     @classmethod
     def _from_parameters(cls, parameters) -> "DefinePrevUserUtteredEntities":
         return DefinePrevUserUtteredEntities(
@@ -693,6 +713,21 @@ def as_dict(self) -> Dict[Text, Any]:
         d.update({ENTITIES: self.entities})
         return d
 
+    def apply_to(self, tracker: "DialogueStateTracker") -> None:
+        """Applies event to current conversation state.
+
+        Args:
+            tracker: The current conversation state.
+        """
+        if tracker.latest_action_name != ACTION_LISTEN_NAME:
+            # entities belong only to the last user message
+            # a user message always comes after action listen
+            return
+
+        for entity in self.entities:
+            if entity not in tracker.latest_message.entities:
+                tracker.latest_message.entities.append(entity)
+
 
 # noinspection PyProtectedMember
 class BotUttered(Event):

diff --git a/rasa/shared/core/trackers.py b/rasa/shared/core/trackers.py
@@ -57,8 +57,7 @@
     ActiveLoop,
     SessionStarted,
     ActionExecutionRejected,
-    DefinePrevUserUtteredFeaturization,
-    DefinePrevUserUtteredEntities,
+    DefinePrevUserUttered,
 )
 from rasa.shared.core.domain import Domain, State
 from rasa.shared.core.slots import Slot
@@ -434,8 +433,6 @@ def applied_events(self) -> List[Event]:
             for event in self.events
             if isinstance(event, ActiveLoop) and event.name
         ]
-        # we'll need to slice events, so convert the deque into a list
-        events_as_list = list(self.events)
 
         applied_events = []
 
@@ -461,46 +458,11 @@ def applied_events(self) -> List[Event]:
                 self._undo_till_previous_loop_execution(
                     event.action_name, applied_events
                 )
-            elif isinstance(event, UserUttered):
-                # update event's featurization based on the future event
-                use_text_for_featurization = self._define_user_featurization(
-                    events_as_list[i + 1 :]
-                )
-                if event.use_text_for_featurization is None:
-                    event.use_text_for_featurization = use_text_for_featurization
-                # update event's entities based on the future event
-                entities = self._define_user_entities(events_as_list[i + 1 :])
-                if entities is not None:
-                    for entity in entities:
-                        if entity not in event.entities:
-                            event.entities.append(entity)
-
-                applied_events.append(event)
             else:
                 applied_events.append(event)
 
         return applied_events
 
-    @staticmethod
-    def _define_user_featurization(future_events: List[Event]) -> bool:
-        for future_event in future_events:
-            if isinstance(future_event, ActionExecuted):
-                # use intent by default
-                return False
-            elif isinstance(future_event, DefinePrevUserUtteredFeaturization):
-                return future_event.use_text_for_featurization
-
-    @staticmethod
-    def _define_user_entities(
-        future_events: List[Event],
-    ) -> Optional[List[Dict[Text, Any]]]:
-        for future_event in future_events:
-            if isinstance(future_event, ActionExecuted):
-                # the search should happen only within one dialogue turn
-                return None
-            if isinstance(future_event, DefinePrevUserUtteredEntities):
-                return future_event.entities
-
     @staticmethod
     def _undo_till_previous(event_type: Type[Event], done_events: List[Event]) -> None:
         """Removes events from `done_events`.

diff --git a/rasa/utils/train_utils.py b/rasa/utils/train_utils.py
@@ -196,13 +196,16 @@ def entity_label_to_tags(
     model_predictions: Dict[Text, Any],
     entity_tag_specs: List["EntityTagSpec"],
     bilou_flag: bool = False,
+    prediction_index: int = 0,
 ) -> Tuple[Dict[Text, List[Text]], Dict[Text, List[float]]]:
     """Convert the output predictions for entities to the actual entity tags.
 
     Args:
         model_predictions: the output predictions using the entity tag indices
         entity_tag_specs: the entity tag specifications
         bilou_flag: if 'True', the BILOU tagging schema was used
+        prediction_index: the index in the batch of predictions
+            to use for entity extraction
 
     Returns:
         A map of entity tag type, e.g. entity, role, group, to actual entity tags and
@@ -218,8 +221,8 @@ def entity_label_to_tags(
         if not np.any(predictions):
             continue
 
-        confidences = [float(c) for c in confidences[0]]
-        tags = [tag_spec.ids_to_tags[p] for p in predictions[0]]
+        confidences = [float(c) for c in confidences[prediction_index]]
+        tags = [tag_spec.ids_to_tags[p] for p in predictions[prediction_index]]
 
         if bilou_flag:
             (