Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix data validator ignore_warnings logic #11844

Merged
merged 17 commits into from
Feb 22, 2023
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions rasa/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def verify_intents(self, ignore_warnings: bool = True) -> bool:
f"The intent '{intent}' is listed in the domain file, but "
f"is not found in the NLU training data."
)
everything_is_alright = ignore_warnings and everything_is_alright
everything_is_alright = ignore_warnings or everything_is_alright

for intent in nlu_data_intents:
if intent not in self.domain.intents:
Expand All @@ -87,7 +87,7 @@ def verify_intents(self, ignore_warnings: bool = True) -> bool:
f"should need to add that intent to your domain file!",
docs=DOCS_URL_DOMAINS,
)
everything_is_alright = False
everything_is_alright = ignore_warnings

return everything_is_alright

Expand All @@ -106,7 +106,7 @@ def verify_example_repetition_in_intents(
for text, intents in duplication_hash.items():

if len(duplication_hash[text]) > 1:
everything_is_alright = ignore_warnings and everything_is_alright
everything_is_alright = ignore_warnings
intents_string = ", ".join(sorted(intents))
rasa.shared.utils.io.raise_warning(
f"The example '{text}' was found labeled with multiple "
Expand All @@ -122,7 +122,7 @@ def verify_intents_in_stories(self, ignore_warnings: bool = True) -> bool:
Verifies if the intents used in the stories are valid, and whether
all valid intents are used in the stories."""

everything_is_alright = self.verify_intents(ignore_warnings)
everything_is_alright = self.verify_intents(ignore_warnings=ignore_warnings)

stories_intents = {
event.intent["name"]
Expand All @@ -139,14 +139,14 @@ def verify_intents_in_stories(self, ignore_warnings: bool = True) -> bool:
f"domain file!",
docs=DOCS_URL_DOMAINS,
)
everything_is_alright = False
everything_is_alright = ignore_warnings

for intent in self._non_default_intents():
if intent not in stories_intents:
rasa.shared.utils.io.raise_warning(
f"The intent '{intent}' is not used in any story or rule."
)
everything_is_alright = ignore_warnings and everything_is_alright
everything_is_alright = ignore_warnings or everything_is_alright

return everything_is_alright

Expand Down Expand Up @@ -203,15 +203,15 @@ def verify_utterances_in_stories(self, ignore_warnings: bool = True) -> bool:
f"template defined with its name.",
docs=DOCS_URL_ACTIONS + "#utterance-actions",
)
everything_is_alright = False
everything_is_alright = ignore_warnings
stories_utterances.add(event.action_name)

for utterance in utterance_actions:
if utterance not in stories_utterances:
rasa.shared.utils.io.raise_warning(
f"The utterance '{utterance}' is not used in any story or rule."
)
everything_is_alright = ignore_warnings and everything_is_alright
everything_is_alright = ignore_warnings or everything_is_alright

return everything_is_alright

Expand Down
103 changes: 89 additions & 14 deletions tests/test_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,22 @@ def test_verify_nlu_with_e2e_story(tmp_path: Path, nlu_data_path: Path):
steps:
- user: |
hello assistant! Can you help me today?
- action: utter_greet
- story: path 2
steps:
- intent: greet
- action: utter_greet
- intent: affirm
- action: utter_greet
- intent: bot_challenge
- action: utter_greet
- intent: deny
- action: goodbye
- intent: goodbye
- action: utter_goodbye
- intent: mood_great
- action: utter_happy
- intent: mood_unhappy
- action: utter_cheer_up
- action: utter_did_that_help
- action: utter_iamabot
"""
)
importer = RasaFileImporter(
Expand All @@ -49,15 +60,25 @@ def test_verify_nlu_with_e2e_story(tmp_path: Path, nlu_data_path: Path):
)

validator = Validator.from_importer(importer)
assert validator.verify_nlu()
# Since the nlu file actually fails validation,
# record warnings to make sure that the only raised warning
# is about the duplicate example 'good afternoon'
with pytest.warns(UserWarning) as record:
validator.verify_nlu(ignore_warnings=False)
assert len(record) == 1
assert (
"The example 'good afternoon' was found labeled with multiple different"
in record[0].message.args[0]
)


def test_verify_intents_does_not_fail_on_valid_data(nlu_data_path: Text):
importer = RasaFileImporter(
domain_path="data/test_moodbot/domain.yml", training_data_paths=[nlu_data_path]
)
validator = Validator.from_importer(importer)
assert validator.verify_intents()
# force validator to not ignore warnings (default is True)
assert validator.verify_intents(ignore_warnings=False)


def test_verify_intents_does_fail_on_invalid_data(nlu_data_path: Text):
Expand All @@ -66,7 +87,8 @@ def test_verify_intents_does_fail_on_invalid_data(nlu_data_path: Text):
domain_path="data/test_domains/default.yml", training_data_paths=[nlu_data_path]
)
validator = Validator.from_importer(importer)
assert not validator.verify_intents()
# force validator to not ignore warnings (default is True)
assert not validator.verify_intents(ignore_warnings=False)


def test_verify_valid_responses():
Expand All @@ -90,14 +112,16 @@ def test_verify_valid_responses_in_rules(nlu_data_path: Text):
],
)
validator = Validator.from_importer(importer)
assert not validator.verify_utterances_in_stories()
# force validator to not ignore warnings (default is True)
assert not validator.verify_utterances_in_stories(ignore_warnings=False)


def test_verify_story_structure(stories_path: Text):
importer = RasaFileImporter(
domain_path="data/test_domains/default.yml", training_data_paths=[stories_path]
)
validator = Validator.from_importer(importer)
# force validator to not ignore warnings (default is True)
assert validator.verify_story_structure(ignore_warnings=False)


Expand All @@ -107,6 +131,7 @@ def test_verify_bad_story_structure():
training_data_paths=["data/test_yaml_stories/stories_conflicting_2.yml"],
)
validator = Validator.from_importer(importer)
# force validator to not ignore warnings (default is True)
assert not validator.verify_story_structure(ignore_warnings=False)


Expand Down Expand Up @@ -135,6 +160,7 @@ def test_verify_bad_e2e_story_structure_when_text_identical(tmp_path: Path):
training_data_paths=[story_file_name],
)
validator = Validator.from_importer(importer)
# force validator to not ignore warnings (default is True)
assert not validator.verify_story_structure(ignore_warnings=False)


Expand Down Expand Up @@ -167,6 +193,7 @@ def test_verify_correct_e2e_story_structure(tmp_path: Path):
training_data_paths=[story_file_name],
)
validator = Validator.from_importer(importer)
# force validator to not ignore warnings (default is True)
assert validator.verify_story_structure(ignore_warnings=False)


Expand All @@ -192,6 +219,7 @@ def test_verify_correct_e2e_story_structure_with_intents(tmp_path: Path):
training_data_paths=[story_file_name],
)
validator = Validator.from_importer(importer)
# force validator to not ignore warnings (default is True)
assert validator.verify_story_structure(ignore_warnings=False)


Expand All @@ -218,19 +246,22 @@ def test_verify_bad_story_structure_ignore_warnings():
def test_verify_there_is_example_repetition_in_intents(nlu_data_path: Text):
# moodbot nlu data already has duplicated example 'good afternoon'
# for intents greet and goodbye

importer = RasaFileImporter(
domain_path="data/test_moodbot/domain.yml", training_data_paths=[nlu_data_path]
)
validator = Validator.from_importer(importer)
assert not validator.verify_example_repetition_in_intents(False)
# force validator to not ignore warnings (default is True)
assert not validator.verify_example_repetition_in_intents(ignore_warnings=False)


def test_verify_logging_message_for_intent_not_used_in_nlu(
caplog: LogCaptureFixture, validator_under_test: Validator
):
caplog.clear()
with pytest.warns(UserWarning) as record:
validator_under_test.verify_intents(False)
# force validator to not ignore warnings (default is True)
validator_under_test.verify_intents(ignore_warnings=False)

assert (
"The intent 'goodbye' is listed in the domain file, "
Expand All @@ -244,7 +275,8 @@ def test_verify_logging_message_for_intent_not_used_in_story(
):
caplog.clear()
with pytest.warns(UserWarning) as record:
validator_under_test.verify_intents_in_stories(False)
# force validator to not ignore warnings (default is True)
validator_under_test.verify_intents_in_stories(ignore_warnings=False)

assert "The intent 'goodbye' is not used in any story or rule." in (
m.message.args[0] for m in record
Expand All @@ -256,7 +288,8 @@ def test_verify_logging_message_for_unused_utterance(
):
caplog.clear()
with pytest.warns(UserWarning) as record:
validator_under_test.verify_utterances_in_stories(False)
# force validator to not ignore warnings (default is True)
validator_under_test.verify_utterances_in_stories(ignore_warnings=False)

assert "The utterance 'utter_chatter' is not used in any story or rule." in (
m.message.args[0] for m in record
Expand All @@ -272,7 +305,8 @@ def test_verify_logging_message_for_repetition_in_intents(caplog, nlu_data_path:
validator = Validator.from_importer(importer)
caplog.clear() # clear caplog to avoid counting earlier debug messages
with pytest.warns(UserWarning) as record:
validator.verify_example_repetition_in_intents(False)
# force validator to not ignore warnings (default is True)
validator.verify_example_repetition_in_intents(ignore_warnings=False)
assert len(record) == 1
assert "You should fix that conflict " in record[0].message.args[0]

Expand Down Expand Up @@ -316,7 +350,8 @@ def test_verify_there_is_not_example_repetition_in_intents():
training_data_paths=["examples/knowledgebasebot/data/nlu.yml"],
)
validator = Validator.from_importer(importer)
assert validator.verify_example_repetition_in_intents(False)
# force validator to not ignore warnings (default is True)
assert validator.verify_example_repetition_in_intents(ignore_warnings=False)


def test_verify_actions_in_stories_not_in_domain(tmp_path: Path, domain_path: Text):
Expand Down Expand Up @@ -415,7 +450,8 @@ def test_response_selector_responses_in_domain_no_errors():
],
)
validator = Validator.from_importer(importer)
assert validator.verify_utterances_in_stories(ignore_warnings=True)
# force validator to not ignore warnings (default is True)
assert validator.verify_utterances_in_stories(ignore_warnings=False)


def test_invalid_domain_mapping_policy():
Expand Down Expand Up @@ -757,3 +793,42 @@ def test_verify_from_trigger_intent_slot_mapping_not_in_forms_does_not_warn(
with warnings.catch_warnings():
warnings.simplefilter("error")
assert validator.verify_slot_mappings()


def test_verify_utterances_does_not_error_when_no_utterance_template_provided(
tmp_path: Path, nlu_data_path: Path
):
story_file_name = tmp_path / "stories.yml"
with open(story_file_name, "w") as file:
file.write(
f"""
version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}"
stories:
- story: path 1
steps:
- intent: greet
- action: utter_greet
"""
)
domain_file_name = tmp_path / "domain.yml"
with open(domain_file_name, "w") as file:
file.write(
f"""
version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}"
intents:
- greet
actions:
- utter_greet
"""
)
importer = RasaFileImporter(
config_file="data/test_moodbot/config.yml",
domain_path=domain_file_name,
training_data_paths=[story_file_name, nlu_data_path],
)

validator = Validator.from_importer(importer)
# force validator to not ignore warnings (default is True)
assert not validator.verify_utterances_in_stories(ignore_warnings=False)
# test whether ignoring warnings actually works
assert validator.verify_utterances_in_stories(ignore_warnings=True)