Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update user messages in apply_to of define events #7503

Merged
merged 13 commits into from
Dec 11, 2020
6 changes: 4 additions & 2 deletions rasa/core/featurizers/tracker_featurizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,9 +206,8 @@ def featurize_trackers(

return tracker_state_features, label_ids, entity_tags

@staticmethod
def _choose_last_user_input(
trackers_as_states: List[List[State]], use_text_for_last_user_input: bool
self, trackers_as_states: List[List[State]], use_text_for_last_user_input: bool
) -> None:
for states in trackers_as_states:
last_state = states[-1]
Expand All @@ -228,6 +227,9 @@ def _choose_last_user_input(
if last_state.get(USER, {}).get(TEXT):
del last_state[USER][TEXT]

# make sure that all dialogue steps are either intent or text based
self._remove_user_text_if_intent(trackers_as_states)

def prediction_states(
self,
trackers: List[DialogueStateTracker],
Expand Down
26 changes: 17 additions & 9 deletions rasa/core/policies/ted_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,8 +512,9 @@ def _featurize_tracker_for_e2e(
domain: Domain,
interpreter: NaturalLanguageInterpreter,
) -> List[List[Dict[Text, List["Features"]]]]:
# Construct two examples in the batch to be fed to the model -
# One by featurizing last user text and second an optional one(see conditions below).
# construct two examples in the batch to be fed to the model -
# one by featurizing last user text
# and second - an optional one (see conditions below),
# the first example in the constructed batch either does not contain user input
# or uses intent or text based on whether TED is e2e only.
tracker_state_features = self.featurizer.create_state_features(
Expand Down Expand Up @@ -590,7 +591,7 @@ def predict_action_probabilities(
confidence = train_utils.normalize(confidence, self.config[RANKING_LENGTH])

optional_events = self._create_optional_event_for_entities(
output, interpreter, tracker
output, is_e2e_prediction, interpreter, tracker
)

return self._prediction(
Expand All @@ -602,28 +603,35 @@ def predict_action_probabilities(
def _create_optional_event_for_entities(
self,
prediction_output: Dict[Text, tf.Tensor],
is_e2e_prediction: bool,
interpreter: NaturalLanguageInterpreter,
tracker: DialogueStateTracker,
) -> Optional[List[Event]]:
if tracker.latest_action_name != ACTION_LISTEN_NAME:
# entities belong to the last user message
if tracker.latest_action_name != ACTION_LISTEN_NAME or not is_e2e_prediction:
# entities belong only to the last user message
# and only if user text was used for prediction,
# a user message always comes after action listen
return None
return
tabergma marked this conversation as resolved.
Show resolved Hide resolved

if not self.config[ENTITY_RECOGNITION]:
# entity recognition is not turned on, no entities can be predicted
return None
return

# The batch dimension of entity prediction is not the same as batch size,
# rather it is the number of last (if max history featurizer else all)
# text inputs in the batch
# therefore, in order to pick entities from the latest user message
# we need to pick entities from the last batch dimension of entity prediction
(
predicted_tags,
confidence_values,
) = rasa.utils.train_utils.entity_label_to_tags(
prediction_output, self._entity_tag_specs
prediction_output, self._entity_tag_specs, prediction_index=-1
)

if ENTITY_ATTRIBUTE_TYPE not in predicted_tags:
# no entities detected
return None
return

# entities belong to the last message of the tracker
# convert the predicted tags to actual entities
Expand Down
7 changes: 5 additions & 2 deletions rasa/shared/core/domain.py
Original file line number Diff line number Diff line change
Expand Up @@ -930,10 +930,13 @@ def _get_user_sub_state(

# filter entities based on intent config
# sub_state will be transformed to frozenset therefore we need to
# convert the list to the tuple
# convert the set to the tuple
# sub_state is transformed to frozenset because we will later hash it
# for deduplication
entities = tuple(self._get_featurized_entities(latest_message))
entities = tuple(
self._get_featurized_entities(latest_message)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nearly there but not entirely.

This still fails because self.latest_message is just a pointer to the event.

def test_tobi():
    tracker = DialogueStateTracker.from_events(
        "Vova",
        evts=[
            ActionExecuted(ACTION_LISTEN_NAME),
            UserUttered("hi", intent={"name": "greet"}),
            DefinePrevUserUtteredFeaturization(True),
            DefinePrevUserUtteredEntities(
                entities=[{"entity": "entity1", "value": "value1"}]
            ),
        ],
    )

    user = list(tracker.events)[1]
    assert isinstance(user, UserUttered)

    # Fails
    assert not user.entities

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it should fail

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we want user uttered to have entities

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but this means that when we store the tracker, we store the updated version of UserUttered which would change history.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that's fine

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is no need to rely on a fragile thing such as the time of persistence.

We should only persist UserUttered event after policy prediction happened and all the policy events are applied

There is no need for that as we have the two additional / new events. We will just replay events and come back to the current state. When we discussed the timing of persistence, it was when we considered if we need the additional events after all.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, but we use them differently

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It means that the TrackerStore is no longer a 1:1 presentation of what happened.

which is good, since we effectively made UserUttered a dynamic event

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is good for the ML models, but it's not good for the system itself. Breaking the immutability property is opening the box of the pandora in my opinion.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I saw the other comment, I see your point

& set(sub_state.get(rasa.shared.nlu.constants.ENTITIES, ()))
)
if entities:
sub_state[rasa.shared.nlu.constants.ENTITIES] = entities
else:
Expand Down
67 changes: 51 additions & 16 deletions rasa/shared/core/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
LOOP_INTERRUPTED,
ENTITY_LABEL_SEPARATOR,
ACTION_SESSION_START_NAME,
ACTION_LISTEN_NAME,
)
from rasa.shared.exceptions import UnsupportedFeatureException
from rasa.shared.nlu.constants import (
Expand Down Expand Up @@ -440,13 +441,15 @@ def __str__(self) -> Text:
f"{entity[ENTITY_ATTRIBUTE_VALUE]} "
f"(Type: {entity[ENTITY_ATTRIBUTE_TYPE]}, "
f"Role: {entity.get(ENTITY_ATTRIBUTE_ROLE)}, "
f"Group: {entity.get(ENTITY_ATTRIBUTE_GROUP)}"
f"Group: {entity.get(ENTITY_ATTRIBUTE_GROUP)})"
for entity in self.entities
]
entities = f", entities: {', '.join(entities)}"

return (
f"UserUttered(text: {self.text}, intent: {self.intent_name}" f"{entities}))"
f"UserUttered(text: {self.text}, intent: {self.intent_name}"
f"{entities}"
f", use_text_for_featurization: {self.use_text_for_featurization})"
)

@staticmethod
Expand Down Expand Up @@ -583,7 +586,20 @@ def create_external(


# noinspection PyProtectedMember
class DefinePrevUserUtteredFeaturization(Event):
class DefinePrevUserUttered(Event):
"""Defines the family of events that are used to update previous user utterance."""

def as_story_string(self) -> None:
"""Returns the event as story string.

Returns:
None, as this event should not appear inside the story.
"""
return


# noinspection PyProtectedMember
class DefinePrevUserUtteredFeaturization(DefinePrevUserUttered):
"""Stores information whether action was predicted based on text or intent."""

type_name = "user_featurization"
Expand Down Expand Up @@ -613,10 +629,6 @@ def __hash__(self) -> int:
"""Returns unique hash for event."""
return hash(self.use_text_for_featurization)

def as_story_string(self) -> None:
"""Skips representing the event in stories."""
return None

@classmethod
def _from_parameters(cls, parameters) -> "DefinePrevUserUtteredFeaturization":
return DefinePrevUserUtteredFeaturization(
Expand All @@ -631,9 +643,25 @@ def as_dict(self) -> Dict[Text, Any]:
d.update({USE_TEXT_FOR_FEATURIZATION: self.use_text_for_featurization})
return d

def apply_to(self, tracker: "DialogueStateTracker") -> None:
Ghostvv marked this conversation as resolved.
Show resolved Hide resolved
"""Applies event to current conversation state.

Args:
tracker: The current conversation state.
"""
if tracker.latest_action_name != ACTION_LISTEN_NAME:
# featurization belong only to the last user message
# a user message is always followed by action listen
return

# update previous user message's featurization based on this event
tracker.latest_message.use_text_for_featurization = (
self.use_text_for_featurization
)


# noinspection PyProtectedMember
class DefinePrevUserUtteredEntities(Event):
class DefinePrevUserUtteredEntities(DefinePrevUserUttered):
"""Event that is used to set entities on a previous user uttered event."""

type_name = "user_entities"
Expand Down Expand Up @@ -667,14 +695,6 @@ def __eq__(self, other) -> bool:
"""Compares this event with another event."""
return isinstance(other, DefinePrevUserUtteredEntities)

def as_story_string(self) -> None:
"""Returns the event as story string.

Returns:
None, as this event should not appear inside the story.
"""
return None

@classmethod
def _from_parameters(cls, parameters) -> "DefinePrevUserUtteredEntities":
return DefinePrevUserUtteredEntities(
Expand All @@ -693,6 +713,21 @@ def as_dict(self) -> Dict[Text, Any]:
d.update({ENTITIES: self.entities})
return d

def apply_to(self, tracker: "DialogueStateTracker") -> None:
"""Applies event to current conversation state.

Args:
tracker: The current conversation state.
"""
if tracker.latest_action_name != ACTION_LISTEN_NAME:
# entities belong only to the last user message
# a user message always comes after action listen
return

for entity in self.entities:
if entity not in tracker.latest_message.entities:
tracker.latest_message.entities.append(entity)


# noinspection PyProtectedMember
class BotUttered(Event):
Expand Down
40 changes: 1 addition & 39 deletions rasa/shared/core/trackers.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,7 @@
ActiveLoop,
SessionStarted,
ActionExecutionRejected,
DefinePrevUserUtteredFeaturization,
DefinePrevUserUtteredEntities,
DefinePrevUserUttered,
)
from rasa.shared.core.domain import Domain, State
from rasa.shared.core.slots import Slot
Expand Down Expand Up @@ -434,8 +433,6 @@ def applied_events(self) -> List[Event]:
for event in self.events
if isinstance(event, ActiveLoop) and event.name
]
# we'll need to slice events, so convert the deque into a list
events_as_list = list(self.events)

applied_events = []

Expand All @@ -461,46 +458,11 @@ def applied_events(self) -> List[Event]:
self._undo_till_previous_loop_execution(
event.action_name, applied_events
)
elif isinstance(event, UserUttered):
# update event's featurization based on the future event
use_text_for_featurization = self._define_user_featurization(
events_as_list[i + 1 :]
)
if event.use_text_for_featurization is None:
event.use_text_for_featurization = use_text_for_featurization
# update event's entities based on the future event
entities = self._define_user_entities(events_as_list[i + 1 :])
if entities is not None:
for entity in entities:
if entity not in event.entities:
event.entities.append(entity)

applied_events.append(event)
else:
applied_events.append(event)

return applied_events

@staticmethod
def _define_user_featurization(future_events: List[Event]) -> bool:
for future_event in future_events:
if isinstance(future_event, ActionExecuted):
# use intent by default
return False
elif isinstance(future_event, DefinePrevUserUtteredFeaturization):
return future_event.use_text_for_featurization

@staticmethod
def _define_user_entities(
future_events: List[Event],
) -> Optional[List[Dict[Text, Any]]]:
for future_event in future_events:
if isinstance(future_event, ActionExecuted):
# the search should happen only within one dialogue turn
return None
if isinstance(future_event, DefinePrevUserUtteredEntities):
return future_event.entities

@staticmethod
def _undo_till_previous(event_type: Type[Event], done_events: List[Event]) -> None:
"""Removes events from `done_events`.
Expand Down
7 changes: 5 additions & 2 deletions rasa/utils/train_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,13 +196,16 @@ def entity_label_to_tags(
model_predictions: Dict[Text, Any],
entity_tag_specs: List["EntityTagSpec"],
bilou_flag: bool = False,
prediction_index: int = 0,
) -> Tuple[Dict[Text, List[Text]], Dict[Text, List[float]]]:
"""Convert the output predictions for entities to the actual entity tags.

Args:
model_predictions: the output predictions using the entity tag indices
entity_tag_specs: the entity tag specifications
bilou_flag: if 'True', the BILOU tagging schema was used
prediction_index: the index in the batch of predictions
to use for entity extraction

Returns:
A map of entity tag type, e.g. entity, role, group, to actual entity tags and
Expand All @@ -218,8 +221,8 @@ def entity_label_to_tags(
if not np.any(predictions):
continue

confidences = [float(c) for c in confidences[0]]
tags = [tag_spec.ids_to_tags[p] for p in predictions[0]]
confidences = [float(c) for c in confidences[prediction_index]]
tags = [tag_spec.ids_to_tags[p] for p in predictions[prediction_index]]

if bilou_flag:
(
Expand Down