RasaHQ · wochinge · Dec 7, 2020 · Nov 26, 2020 · Nov 26, 2020 · Dec 1, 2020
diff --git a/rasa/core/training/interactive.py b/rasa/core/training/interactive.py
@@ -534,7 +534,7 @@ def add_user_cell(data, cell):
 
     for idx, event in enumerate(applied_events):
         if isinstance(event, ActionExecuted):
-            bot_column.append(colored(event.action_name, "autocyan"))
+            bot_column.append(colored(str(event), "autocyan"))
             if event.confidence is not None:
                 bot_column[-1] += colored(f" {event.confidence:03.2f}", "autowhite")
 

diff --git a/rasa/core/training/story_conflict.py b/rasa/core/training/story_conflict.py
@@ -197,8 +197,9 @@ def _find_conflicting_states(
     state_action_mapping = defaultdict(list)
     for element in _sliced_states_iterator(trackers, domain, max_history):
         hashed_state = element.sliced_states_hash
-        if element.event.as_story_string() not in state_action_mapping[hashed_state]:
-            state_action_mapping[hashed_state] += [element.event.as_story_string()]
+        current_hash = hash(element.event)
+        if current_hash not in state_action_mapping[hashed_state]:
+            state_action_mapping[hashed_state] += [current_hash]
 
     # Keep only conflicting `state_action_mapping`s
     return {
@@ -238,8 +239,7 @@ def _build_conflicts_from_states(
                 conflicts[hashed_state] = StoryConflict(element.sliced_states)
 
             conflicts[hashed_state].add_conflicting_action(
-                action=element.event.as_story_string(),
-                story_name=element.tracker.sender_id,
+                action=str(element.event), story_name=element.tracker.sender_id,
             )
 
     # Return list of conflicts that arise from unpredictable actions

diff --git a/rasa/exceptions.py b/rasa/exceptions.py
@@ -35,4 +35,5 @@ def __init__(self, timestamp: float) -> None:
         super(PublishingError, self).__init__()
 
     def __str__(self) -> Text:
+        """Returns string representation of exception."""
         return str(self.timestamp)
diff --git a/rasa/shared/core/events.py b/rasa/shared/core/events.py
@@ -12,6 +12,7 @@
 import rasa.shared.utils.common
 from typing import Union
 
+from rasa.shared.constants import DOCS_URL_TRAINING_DATA
 from rasa.shared.core.constants import (
     LOOP_NAME,
     EXTERNAL_MESSAGE_PREFIX,
@@ -22,6 +23,7 @@
     ENTITY_LABEL_SEPARATOR,
     ACTION_SESSION_START_NAME,
 )
+from rasa.shared.exceptions import UnsupportedFeatureException
 from rasa.shared.nlu.constants import (
     ENTITY_ATTRIBUTE_TYPE,
     INTENT,
@@ -323,6 +325,7 @@ def __init__(
         input_channel: Optional[Text] = None,
         message_id: Optional[Text] = None,
         metadata: Optional[Dict] = None,
+        use_text_for_featurization: Optional[bool] = None,
     ) -> None:
         self.text = text
         self.intent = intent if intent else {}
@@ -332,18 +335,16 @@ def __init__(
 
         super().__init__(timestamp, metadata)
 
+        # The featurization is set by the policies during prediction time using a
+        # `DefinePrevUserUtteredFeaturization` event.
+        self.use_text_for_featurization = use_text_for_featurization
         # define how this user utterance should be featurized
         if self.text and not self.intent_name:
             # happens during training
             self.use_text_for_featurization = True
         elif self.intent_name and not self.text:
             # happens during training
             self.use_text_for_featurization = False
-        else:
-            # happens during prediction
-            # featurization should be defined by the policy
-            # and set in the applied events
-            self.use_text_for_featurization = None
 
         self.parse_data = {
             "intent": self.intent,
@@ -489,23 +490,34 @@ def _entity_string(self):
         return ""
 
     def as_story_string(self, e2e: bool = False) -> Text:
-        text_with_entities = md_format_message(
-            self.text or "", self.intent_name, self.entities
-        )
-        if e2e or self.use_text_for_featurization is None:
-            intent_prefix = f"{self.intent_name}: " if self.intent_name else ""
-            return f"{intent_prefix}{text_with_entities}"
+        """Return event as string for Markdown training format.
 
-        if self.intent_name and not self.use_text_for_featurization:
-            return f"{self.intent_name or ''}{self._entity_string()}"
+        Args:
+            e2e: `True` if the the event should be printed in the format for
+                end-to-end conversation tests.
 
-        if self.text and self.use_text_for_featurization:
-            return text_with_entities
+        Returns:
+            Event as string.
+        """
+        if self.use_text_for_featurization and not e2e:
+            raise UnsupportedFeatureException(
+                f"Printing end-to-end user utterances is not supported in the "
+                f"Markdown training format. Please use the YAML training data format "
+                f"instead. Please see {DOCS_URL_TRAINING_DATA} for more information."
+            )
 
-        # UserUttered is empty
-        return ""
+        if e2e:
+            text_with_entities = md_format_message(
+                self.text or "", self.intent_name, self.entities
+            )
+
+            intent_prefix = f"{self.intent_name}: " if self.intent_name else ""
+            return f"{intent_prefix}{text_with_entities}"
+
+        return f"{self.intent_name or ''}{self._entity_string()}"
 
     def apply_to(self, tracker: "DialogueStateTracker") -> None:
+        """Applies event to tracker. See docstring of `Event`."""
         tracker.latest_message = self
         tracker.clear_followup_action()
 
@@ -1211,15 +1223,22 @@ def __init__(
 
         super().__init__(timestamp, metadata)
 
-    def __str__(self) -> Text:
+    def __repr__(self) -> Text:
+        """Returns event as string for debugging."""
         return "ActionExecuted(action: {}, policy: {}, confidence: {})".format(
             self.action_name, self.policy, self.confidence
         )
 
+    def __str__(self) -> Text:
+        """Returns event as human readable string."""
+        return self.action_name or self.action_text
+
     def __hash__(self) -> int:
-        return hash(self.action_name)
+        """Returns unique hash for action event."""
+        return hash(self.action_name or self.action_text)
 
-    def __eq__(self, other) -> bool:
+    def __eq__(self, other: Any) -> bool:
+        """Checks if object is equal to another."""
         if not isinstance(other, ActionExecuted):
             return False
         else:
@@ -1230,6 +1249,14 @@ def __eq__(self, other) -> bool:
             return equal
 
     def as_story_string(self) -> Text:
+        """Returns event in Markdown format."""
+        if self.action_text:
+            raise UnsupportedFeatureException(
+                f"Printing end-to-end bot utterances is not supported in the "
+                f"Markdown training format. Please use the YAML training data format "
+                f"instead. Please see {DOCS_URL_TRAINING_DATA} for more information."
+            )
+
         return self.action_name
 
     @classmethod

diff --git a/rasa/shared/core/training_data/story_reader/markdown_story_reader.py b/rasa/shared/core/training_data/story_reader/markdown_story_reader.py
@@ -203,13 +203,14 @@ def _add_e2e_messages(self, e2e_messages: List[Text], line_num: int) -> None:
             parsed_messages.append(parsed)
         self.current_step_builder.add_user_messages(parsed_messages)
 
-    @staticmethod
-    def parse_e2e_message(line: Text, is_used_for_training: bool = True) -> Message:
+    def parse_e2e_message(
+        self, line: Text, is_used_for_training: bool = True
+    ) -> Message:
         """Parses an md list item line based on the current section type.
 
         Matches expressions of the form `<intent>:<example>`. For the
-        syntax of `<example>` see the Rasa docs on NLU training data."""
-
+        syntax of `<example>` see the Rasa docs on NLU training data.
+        """
         # Match three groups:
         # 1) Potential "form" annotation
         # 2) The correct intent
@@ -231,7 +232,7 @@ def parse_e2e_message(line: Text, is_used_for_training: bool = True) -> Message:
         intent = match.group(2)
         message = match.group(4)
         example = entities_parser.parse_training_example(message, intent)
-        if not is_used_for_training:
+        if not is_used_for_training and not self.use_e2e:
             # In case this is a simple conversion from Markdown we should copy over
             # the original text and not parse the entities
             example.data[rasa.shared.nlu.constants.TEXT] = message

diff --git a/rasa/shared/core/training_data/story_reader/yaml_story_reader.py b/rasa/shared/core/training_data/story_reader/yaml_story_reader.py
@@ -275,10 +275,8 @@ def _parse_step(self, step: Union[Text, Dict[Text, Any]]) -> None:
                 f"'{RULE_SNIPPET_ACTION_NAME}'. It will be skipped.",
                 docs=self._get_docs_link(),
             )
-        elif KEY_USER_MESSAGE in step.keys():
-            self._parse_user_message(step)
-        elif KEY_USER_INTENT in step.keys():
-            self._parse_labeled_user_utterance(step)
+        elif KEY_USER_INTENT in step.keys() or KEY_USER_MESSAGE in step.keys():
+            self._parse_user_utterance(step)
         elif KEY_OR in step.keys():
             self._parse_or_statement(step)
         elif KEY_ACTION in step.keys():
@@ -312,36 +310,21 @@ def _get_plural_item_title(self) -> Text:
     def _get_docs_link(self) -> Text:
         raise NotImplementedError()
 
-    def _parse_labeled_user_utterance(self, step: Dict[Text, Any]) -> None:
+    def _parse_user_utterance(self, step: Dict[Text, Any]) -> None:
         utterance = self._parse_raw_user_utterance(step)
-        if utterance:
-            self._validate_that_utterance_is_in_domain(utterance)
-            self.current_step_builder.add_user_messages([utterance])
 
-    def _parse_user_message(self, step: Dict[Text, Any]) -> None:
-        also_has_intent = KEY_USER_INTENT in step
+        if not utterance:
+            return
 
-        if not also_has_intent:
-            intent = {"name": None}
+        is_end_to_end_utterance = KEY_USER_INTENT not in step
+        if is_end_to_end_utterance:
+            utterance.intent = {INTENT_NAME_KEY: None}
         else:
-            intent_name = self._user_intent_from_step(step)
-            intent = {"name": intent_name, "confidence": 1.0}
-
-        user_message = step[KEY_USER_MESSAGE].strip()
-        entities = entities_parser.find_entities_in_training_example(user_message)
-        plain_text = entities_parser.replace_entities(user_message)
-
-        if plain_text.startswith(INTENT_MESSAGE_PREFIX):
-            entities = (
-                RegexInterpreter().synchronous_parse(plain_text).get(ENTITIES, [])
-            )
+            self._validate_that_utterance_is_in_domain(utterance)
 
-        self.current_step_builder.add_user_messages(
-            [UserUttered(plain_text, intent, entities=entities)]
-        )
+        self.current_step_builder.add_user_messages([utterance])
 
     def _validate_that_utterance_is_in_domain(self, utterance: UserUttered) -> None:
-
         intent_name = utterance.intent.get(INTENT_NAME_KEY)
 
         # check if this is a retrieval intent
@@ -389,7 +372,7 @@ def _user_intent_from_step(
     ) -> Tuple[Text, Optional[Text]]:
         user_intent = step.get(KEY_USER_INTENT, "").strip()
 
-        if not user_intent:
+        if not user_intent and KEY_USER_MESSAGE not in step:
             rasa.shared.utils.io.raise_warning(
                 f"Issue found in '{self.source_name}':\n"
                 f"User utterance cannot be empty. "

diff --git a/rasa/shared/core/training_data/story_writer/yaml_story_writer.py b/rasa/shared/core/training_data/story_writer/yaml_story_writer.py
@@ -9,6 +9,7 @@
 import rasa.shared.utils.io
 import rasa.shared.core.constants
 from rasa.shared.constants import LATEST_TRAINING_DATA_FORMAT_VERSION
+import rasa.shared.core.events
 from rasa.shared.core.events import (
     UserUttered,
     ActionExecuted,
@@ -103,6 +104,7 @@ def stories_to_yaml(
 
         Args:
             story_steps: Original story steps to be converted to the YAML.
+            is_test_story: `True` if the story is an end-to-end conversation test story.
         """
         from rasa.shared.utils.validation import KEY_TRAINING_DATA_FORMAT_VERSION
 
@@ -185,13 +187,6 @@ def stories_contain_loops(stories: List[StoryStep]) -> bool:
             ]
         )
 
-    @staticmethod
-    def _text_is_real_message(user_utterance: UserUttered) -> bool:
-        return (
-            not user_utterance.intent
-            or user_utterance.text != user_utterance.as_story_string()
-        )
-
     @staticmethod
     def process_user_utterance(
         user_utterance: UserUttered, is_test_story: bool = False
@@ -216,12 +211,20 @@ def process_user_utterance(
             )
 
         if user_utterance.text and (
+            # We only print the utterance text if it was an end-to-end prediction
             user_utterance.use_text_for_featurization
-            or user_utterance.use_text_for_featurization is None
+            # or if we want to print a conversation test story.
+            or is_test_story
         ):
-            result[KEY_USER_MESSAGE] = user_utterance.as_story_string()
+            result[KEY_USER_MESSAGE] = LiteralScalarString(
+                rasa.shared.core.events.md_format_message(
+                    user_utterance.text,
+                    user_utterance.intent_name,
+                    user_utterance.entities,
+                )
+            )
 
-        if len(user_utterance.entities):
+        if len(user_utterance.entities) and not is_test_story:
             entities = []
             for entity in user_utterance.entities:
                 if entity["value"]:

diff --git a/rasa/shared/core/training_data/structures.py b/rasa/shared/core/training_data/structures.py
@@ -407,7 +407,11 @@ def fingerprint(self) -> Text:
         Returns:
             fingerprint of the stories
         """
-        self_as_string = self.as_story_string()
+        from rasa.shared.core.training_data.story_writer.yaml_story_writer import (
+            YAMLStoryWriter,
+        )
+
+        self_as_string = YAMLStoryWriter().dumps(self.story_steps)
         return rasa.shared.utils.io.get_text_hash(self_as_string)
 
     def ordered_steps(self) -> List[StoryStep]:

diff --git a/rasa/shared/exceptions.py b/rasa/shared/exceptions.py
@@ -73,3 +73,7 @@ class FileIOException(RasaException):
 
 class InvalidConfigException(ValueError, RasaException):
     """Raised if an invalid configuration is encountered."""
+
+
+class UnsupportedFeatureException(RasaCoreException):
+    """Raised if a requested feature is not supported."""
diff --git a/rasa/shared/importers/importer.py b/rasa/shared/importers/importer.py
@@ -214,34 +214,9 @@ async def get_nlu_data(self, language: Optional[Text] = "en") -> TrainingData:
         return await self._importer.get_nlu_data(language)
 
 
-class CoreDataImporter(TrainingDataImporter):
-    """Importer that skips any NLU related file reading."""
-
-    def __init__(self, actual_importer: TrainingDataImporter):
-        self._importer = actual_importer
-
-    async def get_domain(self) -> Domain:
-        return await self._importer.get_domain()
-
-    async def get_stories(
-        self,
-        template_variables: Optional[Dict] = None,
-        use_e2e: bool = False,
-        exclusion_percentage: Optional[int] = None,
-    ) -> StoryGraph:
-        return await self._importer.get_stories(
-            template_variables, use_e2e, exclusion_percentage
-        )
-
-    async def get_config(self) -> Dict:
-        return await self._importer.get_config()
-
-    async def get_nlu_data(self, language: Optional[Text] = "en") -> TrainingData:
-        return TrainingData()
-
-
 class CombinedDataImporter(TrainingDataImporter):
     """A `TrainingDataImporter` that combines multiple importers.
+
     Uses multiple `TrainingDataImporter` instances
     to load the data as if they were a single instance.
     """

diff --git a/rasa/train.py b/rasa/train.py
@@ -175,7 +175,7 @@ async def _train_async_internal(
         )
 
     # We will train nlu if there are any nlu example, including from e2e stories.
-    if nlu_data.is_empty():
+    if nlu_data.contains_no_pure_nlu_data() and not nlu_data.has_e2e_examples():
         print_warning("No NLU data present. Just a Rasa Core model will be trained.")
         return await _train_core_with_validated_data(
             file_importer,