Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add tests for state and tracker featurizers #7086

Merged
merged 7 commits into from
Oct 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion rasa/core/featurizers/tracker_featurizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ def _featurize_states(
def _convert_labels_to_ids(
trackers_as_actions: List[List[Text]], domain: Domain
) -> np.ndarray:
# store labels in numpy arrays so that it corresponds to np arrays of input features
# store labels in numpy arrays so that it corresponds to np arrays of input
# features
return np.array(
[
np.array(
Expand Down
2 changes: 1 addition & 1 deletion rasa/shared/core/trackers.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ def past_states(self, domain: Domain) -> List[State]:
"""
return domain.states_for_tracker_history(self)

def change_loop_to(self, loop_name: Text) -> None:
def change_loop_to(self, loop_name: Optional[Text]) -> None:
"""Set the currently active loop.

Args:
Expand Down
1 change: 0 additions & 1 deletion rasa/utils/tensorflow/model_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
Union,
Generator,
NamedTuple,
ValuesView,
ItemsView,
)
from collections import defaultdict, OrderedDict
Expand Down
Empty file.
Original file line number Diff line number Diff line change
@@ -1,21 +1,30 @@
from typing import Text
from rasa.core.featurizers.tracker_featurizers import TrackerFeaturizer
from rasa.core.featurizers.single_state_featurizer import SingleStateFeaturizer
from rasa.shared.core.domain import Domain
import numpy as np
from rasa.shared.nlu.constants import ACTION_TEXT, ACTION_NAME, ENTITIES, TEXT, INTENT
from rasa.shared.core.constants import ACTIVE_LOOP, SLOTS
from rasa.shared.nlu.interpreter import RegexInterpreter
import scipy.sparse

import pytest

def test_fail_to_load_non_existent_featurizer():
assert TrackerFeaturizer.load("non_existent_class") is None
from rasa.core.featurizers.single_state_featurizer import SingleStateFeaturizer
from rasa.shared.core.domain import Domain
from rasa.shared.nlu.constants import (
ACTION_TEXT,
ACTION_NAME,
ENTITIES,
TEXT,
INTENT,
FEATURE_TYPE_SEQUENCE,
FEATURE_TYPE_SENTENCE,
)
from rasa.shared.core.constants import ACTIVE_LOOP, SLOTS
from rasa.shared.nlu.interpreter import RegexInterpreter
from rasa.shared.core.slots import Slot
from rasa.shared.nlu.training_data.features import Features


def test_single_state_featurizer_without_interpreter_state_not_with_action_listen():
"""This test are for encoding state without a trained interpreter.
action_name is not action_listen, so, INTENT, TEXT and ENTITIES should not be featurized
action_name is not action_listen, so, INTENT, TEXT and ENTITIES should not be
featurized.
"""
f = SingleStateFeaturizer()
f._default_feature_states[INTENT] = {"a": 0, "b": 1}
Expand All @@ -32,6 +41,7 @@ def test_single_state_featurizer_without_interpreter_state_not_with_action_liste
},
interpreter=RegexInterpreter(),
)

# user input is ignored as prev action is not action_listen
assert list(encoded.keys()) == [ACTION_NAME, ACTIVE_LOOP, SLOTS]
assert (
Expand All @@ -44,8 +54,7 @@ def test_single_state_featurizer_without_interpreter_state_not_with_action_liste


def test_single_state_featurizer_without_interpreter_state_with_action_listen():
"""
This test are for encoding state without a trained interpreter.
"""This test are for encoding state without a trained interpreter.
action_name is action_listen, so, INTENT and ENTITIES should be featurized
while text shouldn't because we don't have an interpreter.
"""
Expand All @@ -64,6 +73,7 @@ def test_single_state_featurizer_without_interpreter_state_with_action_listen():
},
interpreter=RegexInterpreter(),
)

# we featurize all the features except for *_text ones because NLU wasn't trained
assert list(encoded.keys()) == [INTENT, ACTION_NAME, ACTIVE_LOOP, SLOTS]
assert (encoded[INTENT][0].features != scipy.sparse.coo_matrix([[1, 0]])).nnz == 0
Expand All @@ -82,7 +92,9 @@ def test_single_state_featurizer_without_interpreter_state_no_intent_no_action_n
f._default_feature_states[ACTION_NAME] = {"c": 0, "d": 1, "action_listen": 2}
f._default_feature_states[SLOTS] = {"e_0": 0, "f_0": 1, "g_0": 2}
f._default_feature_states[ACTIVE_LOOP] = {"h": 0, "i": 1, "j": 2, "k": 3}
# check that no intent / action_name features are added when the interpreter isn't there and

# check that no intent / action_name features are added when the interpreter
# isn't there and
# intent / action_name not in input
encoded = f.encode_state(
{
Expand All @@ -93,6 +105,7 @@ def test_single_state_featurizer_without_interpreter_state_no_intent_no_action_n
},
interpreter=RegexInterpreter(),
)

assert list(encoded.keys()) == [ACTIVE_LOOP, SLOTS]
assert (
encoded[ACTIVE_LOOP][0].features != scipy.sparse.coo_matrix([[0, 0, 0, 1]])
Expand All @@ -104,14 +117,41 @@ def test_single_state_featurizer_correctly_encodes_non_existing_value():
f = SingleStateFeaturizer()
f._default_feature_states[INTENT] = {"a": 0, "b": 1}
f._default_feature_states[ACTION_NAME] = {"c": 0, "d": 1}

encoded = f.encode_state(
{"user": {"intent": "e"}, "prev_action": {"action_name": "action_listen"}},
interpreter=RegexInterpreter(),
)

assert list(encoded.keys()) == [INTENT, ACTION_NAME]
assert (encoded[INTENT][0].features != scipy.sparse.coo_matrix([[0, 0]])).nnz == 0


def test_single_state_featurizer_prepare_from_domain():
domain = Domain(
intents=["greet"],
entities=["name"],
slots=[Slot("name")],
templates={},
forms=[],
action_names=["utter_greet", "action_check_weather"],
)

f = SingleStateFeaturizer()
f.prepare_from_domain(domain)

assert len(f._default_feature_states[INTENT]) > 1
assert "greet" in f._default_feature_states[INTENT]
assert len(f._default_feature_states[ENTITIES]) == 1
assert f._default_feature_states[ENTITIES]["name"] == 0
assert len(f._default_feature_states[SLOTS]) == 1
assert f._default_feature_states[SLOTS]["name_0"] == 0
assert len(f._default_feature_states[ACTION_NAME]) > 2
assert "utter_greet" in f._default_feature_states[ACTION_NAME]
assert "action_check_weather" in f._default_feature_states[ACTION_NAME]
assert len(f._default_feature_states[ACTIVE_LOOP]) == 0


def test_single_state_featurizer_creates_encoded_all_actions():
domain = Domain(
intents=[],
Expand All @@ -121,9 +161,11 @@ def test_single_state_featurizer_creates_encoded_all_actions():
forms=[],
action_names=["a", "b", "c", "d"],
)

f = SingleStateFeaturizer()
f.prepare_from_domain(domain)
encoded_actions = f.encode_all_actions(domain, RegexInterpreter())

assert len(encoded_actions) == len(domain.action_names)
assert all(
[
Expand All @@ -138,13 +180,15 @@ def test_single_state_featurizer_uses_dtype_float():
f._default_feature_states[INTENT] = {"a": 0, "b": 1}
f._default_feature_states[ACTION_NAME] = {"e": 0, "d": 1}
f._default_feature_states[ENTITIES] = {"c": 0}

encoded = f.encode_state(
{
"user": {"intent": "a", "entities": ["c"]},
"prev_action": {"action_name": "d"},
},
interpreter=RegexInterpreter(),
)

assert encoded[ACTION_NAME][0].features.dtype == np.float32


Expand All @@ -161,6 +205,7 @@ def test_single_state_featurizer_with_interpreter_state_with_action_listen(
f._default_feature_states[ACTION_NAME] = {"e": 0, "d": 1, "action_listen": 2}
f._default_feature_states[SLOTS] = {"e_0": 0, "f_0": 1, "g_0": 2}
f._default_feature_states[ACTIVE_LOOP] = {"h": 0, "i": 1, "j": 2, "k": 3}

encoded = f.encode_state(
{
"user": {"text": "a ball", "intent": "b", "entities": ["c"]},
Expand All @@ -173,7 +218,9 @@ def test_single_state_featurizer_with_interpreter_state_with_action_listen(
},
interpreter=interpreter,
)
# check all the features are encoded and *_text features are encoded by a densefeaturizer

# check all the features are encoded and *_text features are encoded by a
# dense featurizer
assert sorted(list(encoded.keys())) == sorted(
[TEXT, ENTITIES, ACTION_NAME, SLOTS, ACTIVE_LOOP, INTENT, ACTION_TEXT]
)
Expand Down Expand Up @@ -203,6 +250,7 @@ def test_single_state_featurizer_with_interpreter_state_not_with_action_listen(
f._default_feature_states[ACTION_NAME] = {"e": 0, "d": 1, "action_listen": 2}
f._default_feature_states[SLOTS] = {"e_0": 0, "f_0": 1, "g_0": 2}
f._default_feature_states[ACTIVE_LOOP] = {"h": 0, "i": 1, "j": 2, "k": 3}

encoded = f.encode_state(
{
"user": {"text": "a ball", "intent": "b", "entities": ["c"]},
Expand All @@ -212,6 +260,7 @@ def test_single_state_featurizer_with_interpreter_state_not_with_action_listen(
},
interpreter=interpreter,
)

# check user input is ignored when action is not action_listen
assert list(encoded.keys()) == [ACTION_TEXT, ACTION_NAME, ACTIVE_LOOP, SLOTS]
assert encoded[ACTION_TEXT][0].features.shape[-1] == 300
Expand All @@ -234,12 +283,14 @@ def test_single_state_featurizer_with_interpreter_state_with_no_action_name(
from rasa.core.agent import Agent

interpreter = Agent.load(unpacked_trained_moodbot_path).interpreter

f = SingleStateFeaturizer()
f._default_feature_states[INTENT] = {"a": 0, "b": 1}
f._default_feature_states[ENTITIES] = {"c": 0}
f._default_feature_states[ACTION_NAME] = {"e": 0, "d": 1, "action_listen": 2}
f._default_feature_states[SLOTS] = {"e_0": 0, "f_0": 1, "g_0": 2}
f._default_feature_states[ACTIVE_LOOP] = {"h": 0, "i": 1, "j": 2, "k": 3}

encoded = f.encode_state(
{
"user": {"text": "a ball", "intent": "b", "entities": ["c"]},
Expand All @@ -249,9 +300,36 @@ def test_single_state_featurizer_with_interpreter_state_with_no_action_name(
},
interpreter=interpreter,
)

assert list(encoded.keys()) == [ACTION_TEXT, ACTIVE_LOOP, SLOTS]
assert encoded[ACTION_TEXT][0].features.shape[-1] == 300
assert (encoded[SLOTS][0].features != scipy.sparse.coo_matrix([[1, 0, 0]])).nnz == 0
assert (
encoded[ACTIVE_LOOP][0].features != scipy.sparse.coo_matrix([[0, 0, 0, 1]])
).nnz == 0


def test_state_features_for_attribute_raises_on_not_supported_attribute():
f = SingleStateFeaturizer()

with pytest.raises(ValueError):
f._state_features_for_attribute({}, "not-supported-attribute")


def test_to_sparse_sentence_features():
features = [
Features(
scipy.sparse.csr_matrix(np.random.randint(5, size=(5, 10))),
FEATURE_TYPE_SEQUENCE,
TEXT,
"some-featurizer",
)
]

sentence_features = SingleStateFeaturizer._to_sparse_sentence_features(features)

assert len(sentence_features) == 1
assert FEATURE_TYPE_SENTENCE == sentence_features[0].type
assert features[0].origin == sentence_features[0].origin
assert features[0].attribute == sentence_features[0].attribute
assert sentence_features[0].features.shape == (1, 10)
94 changes: 94 additions & 0 deletions tests/core/featurizers/test_tracker_featurizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from typing import Text

import numpy as np
import pytest

from rasa.core.featurizers.single_state_featurizer import SingleStateFeaturizer
from rasa.core.featurizers.tracker_featurizers import (
TrackerFeaturizer,
FullDialogueTrackerFeaturizer,
MaxHistoryTrackerFeaturizer,
)
from rasa.shared.core.domain import Domain
from rasa.shared.nlu.interpreter import RegexInterpreter
from tests.core.conftest import DEFAULT_DOMAIN_PATH_WITH_SLOTS
from tests.core.utilities import tracker_from_dialogue_file


def test_fail_to_load_non_existent_featurizer():
assert TrackerFeaturizer.load("non_existent_class") is None


def test_persist_and_load_tracker_featurizer(tmp_path: Text, moodbot_domain: Domain):
state_featurizer = SingleStateFeaturizer()
state_featurizer.prepare_from_domain(moodbot_domain)
tracker_featurizer = MaxHistoryTrackerFeaturizer(state_featurizer)

tracker_featurizer.persist(tmp_path)

loaded_tracker_featurizer = TrackerFeaturizer.load(tmp_path)

assert loaded_tracker_featurizer is not None
assert loaded_tracker_featurizer.state_featurizer is not None


def test_convert_labels_to_ids():
trackers_as_actions = [
["utter_greet", "utter_channel"],
["utter_greet", "utter_default", "utter_goodbye"],
]

tracker_featurizer = TrackerFeaturizer()
domain = Domain.load(DEFAULT_DOMAIN_PATH_WITH_SLOTS)

actual_output = tracker_featurizer._convert_labels_to_ids(
trackers_as_actions, domain
)
expected_output = np.array([np.array([14, 11]), np.array([14, 12, 13])])

assert expected_output.size == actual_output.size
for expected_array, actual_array in zip(expected_output, actual_output):
assert np.all(expected_array == actual_array)


def test_featurize_trackers_raises_on_missing_state_featurizer(default_domain: Domain):
tracker_featurizer = TrackerFeaturizer()

with pytest.raises(ValueError):
tracker_featurizer.featurize_trackers([], default_domain, RegexInterpreter())


def test_featurize_trackers_with_full_dialogue_tracker_featurizer(
moodbot_domain: Domain,
):
state_featurizer = SingleStateFeaturizer()
tracker_featurizer = FullDialogueTrackerFeaturizer(state_featurizer)

tracker = tracker_from_dialogue_file(
"data/test_dialogues/moodbot.json", moodbot_domain
)
state_features, labels = tracker_featurizer.featurize_trackers(
[tracker], moodbot_domain, RegexInterpreter()
)

assert state_features is not None
assert len(state_features) > 0
assert labels is not None
assert len(labels) > 0


def test_featurize_trackers_with_max_history_tracker_featurizer(moodbot_domain: Domain):
state_featurizer = SingleStateFeaturizer()
tracker_featurizer = MaxHistoryTrackerFeaturizer(state_featurizer)

tracker = tracker_from_dialogue_file(
"data/test_dialogues/moodbot.json", moodbot_domain
)
state_features, labels = tracker_featurizer.featurize_trackers(
[tracker], moodbot_domain, RegexInterpreter()
)

assert state_features is not None
assert len(state_features) > 0
assert labels is not None
assert len(labels) > 0