Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add function to add frequently confused intent to intent report #4899

Merged
merged 34 commits into from
Dec 12, 2019
Merged
Show file tree
Hide file tree
Changes from 31 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
fc07bcb
add function to add frequently confused intent to intent report
amn41 Dec 4, 2019
25cb1e5
update test, add types, format
amn41 Dec 4, 2019
159ef9d
avoid index out of bounds
amn41 Dec 5, 2019
4e8180e
Update rasa/nlu/test.py
amn41 Dec 5, 2019
ec174eb
Update rasa/nlu/test.py
amn41 Dec 5, 2019
7558b29
Update rasa/nlu/test.py
amn41 Dec 5, 2019
791ef37
review comments
amn41 Dec 5, 2019
21361d3
add test with bigger confusion matrix
amn41 Dec 5, 2019
c3f5ffb
fix file
amn41 Dec 5, 2019
3f54895
fix keys len
amn41 Dec 5, 2019
41596d5
type annotation and fix test
amn41 Dec 5, 2019
f96eb79
fix typo
amn41 Dec 5, 2019
bcc118f
add docstring
amn41 Dec 5, 2019
632b115
fix linter errors
amn41 Dec 5, 2019
29189f3
Merge branch 'master' into add-most-confused-intents-to-report
amn41 Dec 5, 2019
8426628
Update rasa/nlu/test.py
amn41 Dec 10, 2019
275d8c8
Update tests/nlu/base/test_evaluation.py
amn41 Dec 10, 2019
2d12fa2
Update tests/nlu/base/test_evaluation.py
amn41 Dec 10, 2019
af686fd
Update tests/nlu/base/test_evaluation.py
amn41 Dec 10, 2019
50951fd
Update tests/nlu/base/test_evaluation.py
amn41 Dec 10, 2019
16be17a
Update tests/nlu/base/test_evaluation.py
amn41 Dec 10, 2019
9b5e23f
Update tests/nlu/base/test_evaluation.py
amn41 Dec 10, 2019
92cf483
Update tests/nlu/base/test_evaluation.py
amn41 Dec 10, 2019
5558d53
Update tests/nlu/base/test_evaluation.py
amn41 Dec 10, 2019
cae2313
add another condition to test
amn41 Dec 10, 2019
9396f63
add imports
amn41 Dec 10, 2019
6bb8f49
fix error with negative index starting at -0
amn41 Dec 11, 2019
c08962e
add changelog file
amn41 Dec 11, 2019
355c679
Merge branch 'master' into add-most-confused-intents-to-report
amn41 Dec 11, 2019
517ad02
Merge branch 'pin-multidict' into add-most-confused-intents-to-report
amn41 Dec 11, 2019
019be00
Merge branch 'master' into add-most-confused-intents-to-report
amn41 Dec 12, 2019
b93dbe7
Update rasa/nlu/test.py
amn41 Dec 12, 2019
13d32a2
fix import
amn41 Dec 12, 2019
a566f66
Merge branch 'master' into add-most-confused-intents-to-report
amn41 Dec 12, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions changelog/4899.improvement.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
The ``intent_report.json`` created by ``rasa test`` now creates an extra field
``confused_with`` for each intent. This is a dictionary containing the names of
the most common false positives when this intent should be predicted, and the
number of such false positives.
45 changes: 41 additions & 4 deletions rasa/nlu/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,41 @@ def evaluate_response_selections(
}


def _add_confused_intents_to_report(
report: Dict[Text, Dict[Text, float]],
cnf_matrix: np.ndarray,
labels: Iterable[Text],
amn41 marked this conversation as resolved.
Show resolved Hide resolved
) -> Dict[Text, Dict[Text, Union[Dict, float]]]:
"""Adds a field "confused_with" to the intents in the
intent evaluation report. The value is a dict of
{"false_positive_label": false_positive_count} pairs.
If there are no false positives in the confusion matrix,
the dict will be empty. Typically we include the two most
commonly false positive labels, three in the rare case that
the diagonal element in the confusion matrix is not one of the
three highest values in the row.
"""

# sort confusion matrix by false positives
indices = np.argsort(cnf_matrix, axis=1)
amn41 marked this conversation as resolved.
Show resolved Hide resolved
n_candidates = min(3, len(labels))

for label in labels:
# it is possible to predict intent 'None'
if report.get(label):
amn41 marked this conversation as resolved.
Show resolved Hide resolved
report[label]["confused_with"] = {}

for i, label in enumerate(labels):
for j in range(n_candidates):
label_idx = indices[i, -(1 + j)]
false_pos_label = labels[label_idx]
false_positives = int(cnf_matrix[i, label_idx])
if false_pos_label != label and false_positives > 0:
report[label]["confused_with"][false_pos_label] = false_positives

return report


def evaluate_intents(
intent_results: List[IntentEvaluationResult],
output_directory: Optional[Text],
Expand All @@ -416,6 +451,8 @@ def evaluate_intents(
Others are filtered out. Returns a dictionary of containing the
evaluation result.
"""
import sklearn.metrics
amn41 marked this conversation as resolved.
Show resolved Hide resolved
import sklearn.utils.multiclass

# remove empty intent targets
num_examples = len(intent_results)
Expand All @@ -431,10 +468,14 @@ def evaluate_intents(
intent_results, "intent_target", "intent_prediction"
)

cnf_matrix = sklearn.metrics.confusion_matrix(target_intents, predicted_intents)
labels = sklearn.utils.multiclass.unique_labels(target_intents, predicted_intents)

if output_directory:
report, precision, f1, accuracy = get_evaluation_metrics(
target_intents, predicted_intents, output_dict=True
)
report = _add_confused_intents_to_report(report, cnf_matrix, labels)

report_filename = os.path.join(output_directory, "intent_report.json")

Expand Down Expand Up @@ -463,16 +504,12 @@ def evaluate_intents(
collect_nlu_errors(intent_results, errors_filename)

if confmat_filename:
from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels
import matplotlib.pyplot as plt

if output_directory:
confmat_filename = os.path.join(output_directory, confmat_filename)
intent_hist_filename = os.path.join(output_directory, intent_hist_filename)

cnf_matrix = confusion_matrix(target_intents, predicted_intents)
labels = unique_labels(target_intents, predicted_intents)
plot_confusion_matrix(
cnf_matrix,
classes=labels,
Expand Down
70 changes: 69 additions & 1 deletion tests/nlu/base/test_evaluation.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from typing import Text

import asyncio
import logging

import pytest
from _pytest.tmpdir import TempdirFactory

import rasa.utils.io
from rasa.test import compare_nlu_models
Expand Down Expand Up @@ -306,7 +309,13 @@ def test_intent_evaluation_report(tmpdir_factory):

report = json.loads(rasa.utils.io.read_file(report_filename))

greet_results = {"precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 1}
greet_results = {
"precision": 1.0,
"recall": 1.0,
"f1-score": 1.0,
"support": 1,
"confused_with": {},
}

prediction = {
"text": "hello",
Expand All @@ -320,6 +329,65 @@ def test_intent_evaluation_report(tmpdir_factory):
assert result["predictions"][0] == prediction


def test_intent_evaluation_report_large(tmpdir_factory: TempdirFactory):
path = tmpdir_factory.mktemp("evaluation")
report_folder = path / "reports"
report_filename = report_folder / "intent_report.json"

rasa.utils.io.create_directory(str(report_folder))

def correct(label: Text) -> IntentEvaluationResult:
return IntentEvaluationResult(label, label, "", 1.0)

def incorrect(label: Text, _label: Text) -> IntentEvaluationResult:
return IntentEvaluationResult(label, _label, "", 1.0)

a_results = [correct("A")] * 10
b_results = [correct("B")] * 7 + [incorrect("B", "C")] * 3
c_results = [correct("C")] * 3 + [incorrect("C", "D")] + [incorrect("C", "E")]
d_results = [correct("D")] * 29 + [incorrect("D", "B")] * 3
e_results = [incorrect("E", "C")] * 5 + [incorrect("E", "")] * 5

intent_results = a_results + b_results + c_results + d_results + e_results

evaluate_intents(
intent_results,
report_folder,
successes=False,
errors=False,
confmat_filename=None,
intent_hist_filename=None,
)

report = json.loads(rasa.utils.io.read_file(str(report_filename)))

a_results = {
"precision": 1.0,
"recall": 1.0,
"f1-score": 1.0,
"support": 10,
"confused_with": {},
}

e_results = {
"precision": 0.0,
"recall": 0.0,
"f1-score": 0.0,
"support": 10,
"confused_with": {"C": 5, "": 5},
amn41 marked this conversation as resolved.
Show resolved Hide resolved
}

c_confused_with = {
"D": 1,
"E": 1,
}

assert len(report.keys()) == 8
assert report["A"] == a_results
assert report["E"] == e_results
assert report["C"]["confused_with"] == c_confused_with


def test_response_evaluation_report(tmpdir_factory):
path = tmpdir_factory.mktemp("evaluation").strpath
report_folder = os.path.join(path, "reports")
Expand Down