Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

E2e story printing #7388

Merged
merged 35 commits into from
Dec 7, 2020
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
321e937
remove unused imports
wochinge Nov 26, 2020
a8d8e04
test and fix writing YAML stories
wochinge Nov 26, 2020
4d1ba87
move `MarkdownStoryWriter` tests to separate file
wochinge Dec 1, 2020
dae731d
use `tmp_path`
wochinge Dec 1, 2020
810bc3e
consider end-to-end stories correctly
wochinge Dec 1, 2020
4eb2a43
fix story reading for retrieval intents
wochinge Dec 1, 2020
32537d7
fix missing renames for `prepare_from_domain`
wochinge Dec 1, 2020
ecb3355
fixup for last merge in from `master`
wochinge Dec 2, 2020
1493ad7
dump story not as test story
wochinge Dec 2, 2020
12912fb
fix docstring errors
wochinge Dec 2, 2020
cbce93d
remove unused method (not used in Rasa X either)
wochinge Dec 2, 2020
1109185
raise if printing end-to-end things in Markdown
wochinge Dec 2, 2020
60b2dca
add todos
wochinge Dec 2, 2020
330286f
Merge branch 'e2e' into e2e-story-printing
wochinge Dec 2, 2020
d7f2a89
fix error with entity formatting
wochinge Dec 2, 2020
1bfaafd
move to `rasa.shared`
wochinge Dec 2, 2020
76424f5
remove CoreDataImporter
wochinge Dec 2, 2020
4f967b3
change fingerprinting to use yaml writer
wochinge Dec 2, 2020
040dad5
fix tests failing due to new default story file
wochinge Dec 2, 2020
8fe7306
adapt remaining parts to `as_story_string` failing if end-to-end event
wochinge Dec 2, 2020
cdc5ff6
remove `as_story_string` from story validator
wochinge Dec 2, 2020
a382eb6
only train NLU model if data or end to end
wochinge Dec 2, 2020
bec8926
fix import
wochinge Dec 3, 2020
d415218
read and write in test
wochinge Dec 3, 2020
c6a245c
fix displaying of end-to-end actions in rasa interactive
wochinge Dec 3, 2020
c8971c9
skip warning for end-to-end user messages in training data
wochinge Dec 3, 2020
311684b
add docs link
wochinge Dec 3, 2020
d01f4ab
remove trailing whitespace
wochinge Dec 3, 2020
6d239d8
return `NotImplemented` if other class
wochinge Dec 4, 2020
7a34057
remove `md_` as it's not related to md
wochinge Dec 4, 2020
dcbdac1
add docstrings to entire module
wochinge Dec 4, 2020
59c7cc0
add more docstrings
wochinge Dec 7, 2020
97e1860
increase timeout due to failing windows tests
wochinge Dec 7, 2020
8ba989c
improve string representation of `UserUttered`
wochinge Dec 7, 2020
a1e2bee
fix hashing of `UserUttered`
wochinge Dec 7, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion rasa/core/training/interactive.py
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,7 @@ def add_user_cell(data, cell):

for idx, event in enumerate(applied_events):
if isinstance(event, ActionExecuted):
bot_column.append(colored(event.action_name, "autocyan"))
bot_column.append(colored(str(event), "autocyan"))
if event.confidence is not None:
bot_column[-1] += colored(f" {event.confidence:03.2f}", "autowhite")

Expand Down
8 changes: 4 additions & 4 deletions rasa/core/training/story_conflict.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,8 +197,9 @@ def _find_conflicting_states(
state_action_mapping = defaultdict(list)
for element in _sliced_states_iterator(trackers, domain, max_history):
hashed_state = element.sliced_states_hash
if element.event.as_story_string() not in state_action_mapping[hashed_state]:
state_action_mapping[hashed_state] += [element.event.as_story_string()]
current_hash = hash(element.event)
Ghostvv marked this conversation as resolved.
Show resolved Hide resolved
if current_hash not in state_action_mapping[hashed_state]:
state_action_mapping[hashed_state] += [current_hash]

# Keep only conflicting `state_action_mapping`s
return {
Expand Down Expand Up @@ -238,8 +239,7 @@ def _build_conflicts_from_states(
conflicts[hashed_state] = StoryConflict(element.sliced_states)

conflicts[hashed_state].add_conflicting_action(
action=element.event.as_story_string(),
story_name=element.tracker.sender_id,
action=str(element.event), story_name=element.tracker.sender_id,
)

# Return list of conflicts that arise from unpredictable actions
Expand Down
1 change: 1 addition & 0 deletions rasa/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,5 @@ def __init__(self, timestamp: float) -> None:
super(PublishingError, self).__init__()

def __str__(self) -> Text:
"""Returns string representation of exception."""
return str(self.timestamp)
67 changes: 47 additions & 20 deletions rasa/shared/core/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import rasa.shared.utils.common
from typing import Union

from rasa.shared.constants import DOCS_URL_TRAINING_DATA
from rasa.shared.core.constants import (
LOOP_NAME,
EXTERNAL_MESSAGE_PREFIX,
Expand All @@ -22,6 +23,7 @@
ENTITY_LABEL_SEPARATOR,
ACTION_SESSION_START_NAME,
)
from rasa.shared.exceptions import UnsupportedFeatureException
from rasa.shared.nlu.constants import (
ENTITY_ATTRIBUTE_TYPE,
INTENT,
Expand Down Expand Up @@ -323,6 +325,7 @@ def __init__(
input_channel: Optional[Text] = None,
message_id: Optional[Text] = None,
metadata: Optional[Dict] = None,
use_text_for_featurization: Optional[bool] = None,
) -> None:
self.text = text
self.intent = intent if intent else {}
Expand All @@ -332,18 +335,16 @@ def __init__(

super().__init__(timestamp, metadata)

# The featurization is set by the policies during prediction time using a
# `DefinePrevUserUtteredFeaturization` event.
self.use_text_for_featurization = use_text_for_featurization
# define how this user utterance should be featurized
if self.text and not self.intent_name:
# happens during training
self.use_text_for_featurization = True
elif self.intent_name and not self.text:
# happens during training
self.use_text_for_featurization = False
else:
# happens during prediction
# featurization should be defined by the policy
# and set in the applied events
self.use_text_for_featurization = None

self.parse_data = {
"intent": self.intent,
Expand Down Expand Up @@ -489,23 +490,34 @@ def _entity_string(self):
return ""

def as_story_string(self, e2e: bool = False) -> Text:
Ghostvv marked this conversation as resolved.
Show resolved Hide resolved
text_with_entities = md_format_message(
self.text or "", self.intent_name, self.entities
)
if e2e or self.use_text_for_featurization is None:
intent_prefix = f"{self.intent_name}: " if self.intent_name else ""
return f"{intent_prefix}{text_with_entities}"
"""Return event as string for Markdown training format.

if self.intent_name and not self.use_text_for_featurization:
return f"{self.intent_name or ''}{self._entity_string()}"
Args:
e2e: `True` if the the event should be printed in the format for
end-to-end conversation tests.

if self.text and self.use_text_for_featurization:
return text_with_entities
Returns:
Event as string.
"""
if self.use_text_for_featurization and not e2e:
raise UnsupportedFeatureException(
f"Printing end-to-end user utterances is not supported in the "
f"Markdown training format. Please use the YAML training data format "
f"instead. Please see {DOCS_URL_TRAINING_DATA} for more information."
)

# UserUttered is empty
return ""
if e2e:
text_with_entities = md_format_message(
self.text or "", self.intent_name, self.entities
)

intent_prefix = f"{self.intent_name}: " if self.intent_name else ""
return f"{intent_prefix}{text_with_entities}"

return f"{self.intent_name or ''}{self._entity_string()}"

def apply_to(self, tracker: "DialogueStateTracker") -> None:
"""Applies event to tracker. See docstring of `Event`."""
tracker.latest_message = self
tracker.clear_followup_action()

Expand Down Expand Up @@ -1211,15 +1223,22 @@ def __init__(

super().__init__(timestamp, metadata)

def __str__(self) -> Text:
def __repr__(self) -> Text:
"""Returns event as string for debugging."""
return "ActionExecuted(action: {}, policy: {}, confidence: {})".format(
self.action_name, self.policy, self.confidence
)

def __str__(self) -> Text:
"""Returns event as human readable string."""
return self.action_name or self.action_text
wochinge marked this conversation as resolved.
Show resolved Hide resolved

def __hash__(self) -> int:
return hash(self.action_name)
"""Returns unique hash for action event."""
return hash(self.action_name or self.action_text)

def __eq__(self, other) -> bool:
def __eq__(self, other: Any) -> bool:
"""Checks if object is equal to another."""
if not isinstance(other, ActionExecuted):
return False
wochinge marked this conversation as resolved.
Show resolved Hide resolved
else:
Expand All @@ -1230,6 +1249,14 @@ def __eq__(self, other) -> bool:
return equal

def as_story_string(self) -> Text:
"""Returns event in Markdown format."""
if self.action_text:
raise UnsupportedFeatureException(
f"Printing end-to-end bot utterances is not supported in the "
f"Markdown training format. Please use the YAML training data format "
f"instead. Please see {DOCS_URL_TRAINING_DATA} for more information."
)

return self.action_name

@classmethod
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,13 +203,14 @@ def _add_e2e_messages(self, e2e_messages: List[Text], line_num: int) -> None:
parsed_messages.append(parsed)
self.current_step_builder.add_user_messages(parsed_messages)

@staticmethod
def parse_e2e_message(line: Text, is_used_for_training: bool = True) -> Message:
def parse_e2e_message(
self, line: Text, is_used_for_training: bool = True
) -> Message:
"""Parses an md list item line based on the current section type.

Matches expressions of the form `<intent>:<example>`. For the
syntax of `<example>` see the Rasa docs on NLU training data."""

syntax of `<example>` see the Rasa docs on NLU training data.
"""
# Match three groups:
# 1) Potential "form" annotation
# 2) The correct intent
Expand All @@ -231,7 +232,7 @@ def parse_e2e_message(line: Text, is_used_for_training: bool = True) -> Message:
intent = match.group(2)
message = match.group(4)
example = entities_parser.parse_training_example(message, intent)
if not is_used_for_training:
if not is_used_for_training and not self.use_e2e:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when is parse_e2e_message used but elf.use_e2e False?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when converting stories from Markdown to YAML core.training.converters.test_story_markdown_to_yaml_converter.test_test_stories was failing

# In case this is a simple conversion from Markdown we should copy over
# the original text and not parse the entities
example.data[rasa.shared.nlu.constants.TEXT] = message
Expand Down
39 changes: 11 additions & 28 deletions rasa/shared/core/training_data/story_reader/yaml_story_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,10 +275,8 @@ def _parse_step(self, step: Union[Text, Dict[Text, Any]]) -> None:
f"'{RULE_SNIPPET_ACTION_NAME}'. It will be skipped.",
docs=self._get_docs_link(),
)
elif KEY_USER_MESSAGE in step.keys():
self._parse_user_message(step)
elif KEY_USER_INTENT in step.keys():
self._parse_labeled_user_utterance(step)
elif KEY_USER_INTENT in step.keys() or KEY_USER_MESSAGE in step.keys():
self._parse_user_utterance(step)
elif KEY_OR in step.keys():
self._parse_or_statement(step)
elif KEY_ACTION in step.keys():
Expand Down Expand Up @@ -312,36 +310,21 @@ def _get_plural_item_title(self) -> Text:
def _get_docs_link(self) -> Text:
raise NotImplementedError()

def _parse_labeled_user_utterance(self, step: Dict[Text, Any]) -> None:
def _parse_user_utterance(self, step: Dict[Text, Any]) -> None:
utterance = self._parse_raw_user_utterance(step)
if utterance:
self._validate_that_utterance_is_in_domain(utterance)
self.current_step_builder.add_user_messages([utterance])

def _parse_user_message(self, step: Dict[Text, Any]) -> None:
also_has_intent = KEY_USER_INTENT in step
if not utterance:
return

if not also_has_intent:
intent = {"name": None}
is_end_to_end_utterance = KEY_USER_INTENT not in step
if is_end_to_end_utterance:
utterance.intent = {INTENT_NAME_KEY: None}
else:
intent_name = self._user_intent_from_step(step)
intent = {"name": intent_name, "confidence": 1.0}

user_message = step[KEY_USER_MESSAGE].strip()
entities = entities_parser.find_entities_in_training_example(user_message)
plain_text = entities_parser.replace_entities(user_message)

if plain_text.startswith(INTENT_MESSAGE_PREFIX):
entities = (
RegexInterpreter().synchronous_parse(plain_text).get(ENTITIES, [])
)
self._validate_that_utterance_is_in_domain(utterance)

self.current_step_builder.add_user_messages(
[UserUttered(plain_text, intent, entities=entities)]
)
self.current_step_builder.add_user_messages([utterance])

def _validate_that_utterance_is_in_domain(self, utterance: UserUttered) -> None:

intent_name = utterance.intent.get(INTENT_NAME_KEY)

# check if this is a retrieval intent
Expand Down Expand Up @@ -389,7 +372,7 @@ def _user_intent_from_step(
) -> Tuple[Text, Optional[Text]]:
user_intent = step.get(KEY_USER_INTENT, "").strip()

if not user_intent:
if not user_intent and KEY_USER_MESSAGE not in step:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does the rest of this method gracefully handle there being no intent?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have tests for it and they pass 🤷‍♂️

rasa.shared.utils.io.raise_warning(
f"Issue found in '{self.source_name}':\n"
f"User utterance cannot be empty. "
Expand Down
23 changes: 13 additions & 10 deletions rasa/shared/core/training_data/story_writer/yaml_story_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import rasa.shared.utils.io
import rasa.shared.core.constants
from rasa.shared.constants import LATEST_TRAINING_DATA_FORMAT_VERSION
import rasa.shared.core.events
from rasa.shared.core.events import (
UserUttered,
ActionExecuted,
Expand Down Expand Up @@ -103,6 +104,7 @@ def stories_to_yaml(

Args:
story_steps: Original story steps to be converted to the YAML.
is_test_story: `True` if the story is an end-to-end conversation test story.
"""
from rasa.shared.utils.validation import KEY_TRAINING_DATA_FORMAT_VERSION

Expand Down Expand Up @@ -185,13 +187,6 @@ def stories_contain_loops(stories: List[StoryStep]) -> bool:
]
)

@staticmethod
def _text_is_real_message(user_utterance: UserUttered) -> bool:
return (
not user_utterance.intent
or user_utterance.text != user_utterance.as_story_string()
)

@staticmethod
def process_user_utterance(
user_utterance: UserUttered, is_test_story: bool = False
Expand All @@ -216,12 +211,20 @@ def process_user_utterance(
)

if user_utterance.text and (
# We only print the utterance text if it was an end-to-end prediction
user_utterance.use_text_for_featurization
or user_utterance.use_text_for_featurization is None
# or if we want to print a conversation test story.
or is_test_story
):
result[KEY_USER_MESSAGE] = user_utterance.as_story_string()
result[KEY_USER_MESSAGE] = LiteralScalarString(
rasa.shared.core.events.md_format_message(
wochinge marked this conversation as resolved.
Show resolved Hide resolved
user_utterance.text,
user_utterance.intent_name,
user_utterance.entities,
)
)

if len(user_utterance.entities):
if len(user_utterance.entities) and not is_test_story:
entities = []
for entity in user_utterance.entities:
if entity["value"]:
Expand Down
6 changes: 5 additions & 1 deletion rasa/shared/core/training_data/structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,11 @@ def fingerprint(self) -> Text:
Returns:
fingerprint of the stories
"""
self_as_string = self.as_story_string()
from rasa.shared.core.training_data.story_writer.yaml_story_writer import (
YAMLStoryWriter,
)

self_as_string = YAMLStoryWriter().dumps(self.story_steps)
wochinge marked this conversation as resolved.
Show resolved Hide resolved
return rasa.shared.utils.io.get_text_hash(self_as_string)

def ordered_steps(self) -> List[StoryStep]:
Expand Down
4 changes: 4 additions & 0 deletions rasa/shared/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,7 @@ class FileIOException(RasaException):

class InvalidConfigException(ValueError, RasaException):
"""Raised if an invalid configuration is encountered."""


class UnsupportedFeatureException(RasaCoreException):
"""Raised if a requested feature is not supported."""
27 changes: 1 addition & 26 deletions rasa/shared/importers/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,34 +214,9 @@ async def get_nlu_data(self, language: Optional[Text] = "en") -> TrainingData:
return await self._importer.get_nlu_data(language)


class CoreDataImporter(TrainingDataImporter):
"""Importer that skips any NLU related file reading."""

def __init__(self, actual_importer: TrainingDataImporter):
self._importer = actual_importer

async def get_domain(self) -> Domain:
return await self._importer.get_domain()

async def get_stories(
self,
template_variables: Optional[Dict] = None,
use_e2e: bool = False,
exclusion_percentage: Optional[int] = None,
) -> StoryGraph:
return await self._importer.get_stories(
template_variables, use_e2e, exclusion_percentage
)

async def get_config(self) -> Dict:
return await self._importer.get_config()

async def get_nlu_data(self, language: Optional[Text] = "en") -> TrainingData:
return TrainingData()


class CombinedDataImporter(TrainingDataImporter):
"""A `TrainingDataImporter` that combines multiple importers.

Uses multiple `TrainingDataImporter` instances
to load the data as if they were a single instance.
"""
Expand Down
2 changes: 1 addition & 1 deletion rasa/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ async def _train_async_internal(
)

# We will train nlu if there are any nlu example, including from e2e stories.
if nlu_data.is_empty():
if nlu_data.contains_no_pure_nlu_data() and not nlu_data.has_e2e_examples():
print_warning("No NLU data present. Just a Rasa Core model will be trained.")
return await _train_core_with_validated_data(
file_importer,
Expand Down
Loading