Skip to content

Commit

Permalink
Merge pull request #242 from snipsco/release/0.6.0
Browse files Browse the repository at this point in the history
Release/0.6.0
  • Loading branch information
ClemDoum authored May 19, 2017
2 parents dc46543 + a6f372d commit beecd7d
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 504 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from setuptools import setup, find_packages

packages = find_packages()
packages = [p for p in find_packages() if "tests" not in p]

PACKAGE_NAME = "snips_nlu"
ROOT_PATH = os.path.dirname(os.path.abspath(__file__))
Expand Down
2 changes: 1 addition & 1 deletion snips_nlu/__version__
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.5.0
0.6.0
34 changes: 23 additions & 11 deletions snips_nlu/intent_classifier/snips_intent_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,8 @@ def get_intent(self, text):
raise AssertionError('SnipsIntentClassifier instance must be '
'fitted before `get_intent` can be called')

if len(text) == 0 or len(self.intent_list) == 0:
return None

if len(text) == 0 or len(self.intent_list) == 0 \
or self.featurizer is None:
or self.featurizer is None or self.classifier is None:
return None

if len(self.intent_list) == 1:
Expand All @@ -80,24 +77,39 @@ def get_intent(self, text):
return IntentClassificationResult(intent_name, prob)

def to_dict(self):
featurizer_dict = None
if self.featurizer is not None:
featurizer_dict = self.featurizer.to_dict()
coeffs = None
intercept = None
if self.classifier is not None:
coeffs = self.classifier.coef_.tolist()
intercept = self.classifier.intercept_.tolist()

return {
"classifier_args": self.classifier_args,
"coeffs": self.classifier.coef_.tolist(),
"intercept": self.classifier.intercept_.tolist(),
"coeffs": coeffs,
"intercept": intercept,
"intent_list": self.intent_list,
"language_code": self.language.iso_code,
"featurizer": self.featurizer.to_dict()
"featurizer": featurizer_dict
}

@classmethod
def from_dict(cls, obj_dict):
language = Language.from_iso_code(obj_dict['language_code'])
classifier_args = obj_dict['classifier_args']
classifier = cls(language=language, classifier_args=classifier_args)
sgd_classifier = SGDClassifier(**classifier_args)
sgd_classifier.coef_ = np.array(obj_dict['coeffs'])
sgd_classifier.intercept_ = np.array(obj_dict['intercept'])
sgd_classifier = None
coeffs = obj_dict['coeffs']
intercept = obj_dict['intercept']
if coeffs is not None and intercept is not None:
sgd_classifier = SGDClassifier(**classifier_args)
sgd_classifier.coef_ = np.array(coeffs)
sgd_classifier.intercept_ = np.array(intercept)
classifier.classifier = sgd_classifier
classifier.intent_list = obj_dict['intent_list']
classifier.featurizer = Featurizer.from_dict(obj_dict['featurizer'])
featurizer = obj_dict['featurizer']
if featurizer is not None:
classifier.featurizer = Featurizer.from_dict(featurizer)
return classifier
46 changes: 3 additions & 43 deletions snips_nlu/nlu_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,10 @@
from duckling import core

from dataset import validate_and_format_dataset
from snips_nlu.built_in_entities import BuiltInEntity, get_builtin_entities, \
is_builtin_entity
from snips_nlu.built_in_entities import BuiltInEntity, is_builtin_entity
from snips_nlu.constants import (
INTENTS, ENTITIES, UTTERANCES, LANGUAGE, VALUE, AUTOMATICALLY_EXTENSIBLE,
ENTITY, MATCH_RANGE, DATA, SLOT_NAME,
ENTITY, DATA, SLOT_NAME,
USE_SYNONYMS, SYNONYMS, TOKEN_INDEXES, NGRAM)
from snips_nlu.intent_classifier.snips_intent_classifier import \
SnipsIntentClassifier
Expand Down Expand Up @@ -194,8 +193,7 @@ def enrich_slots(slots, other_slots):
class SnipsNLUEngine(NLUEngine):
def __init__(self, language, rule_based_parser=None,
probabilistic_parser=None, entities=None,
slot_name_mapping=None, tagging_threshold=None,
intents_data_sizes=None):
slot_name_mapping=None, intents_data_sizes=None):
super(SnipsNLUEngine, self).__init__(language)
self.rule_based_parser = rule_based_parser
self.probabilistic_parser = probabilistic_parser
Expand All @@ -205,9 +203,6 @@ def __init__(self, language, rule_based_parser=None,
if slot_name_mapping is None:
slot_name_mapping = dict()
self.slot_name_mapping = slot_name_mapping
if tagging_threshold is None:
tagging_threshold = 5
self.tagging_threshold = tagging_threshold
self.intents_data_sizes = intents_data_sizes
self._pre_trained_taggers = dict()
self.tagging_scope = []
Expand All @@ -228,38 +223,6 @@ def _parse(self, text, intent=None):
return _parse(text, self.entities, self.rule_based_parser,
self.probabilistic_parser, intent)

def tag(self, text, intent):
"""
Parse the input text conditionally to the knowledge of `intent`.
This method is more aggressive (less conservative) than `parse`.
"""
result = self._parse(text, intent=intent)
enrich_results = self.intents_data_sizes[
intent] < self.tagging_threshold
if not enrich_results:
return result

# Add slots seen in other queries from other intents
seen_entities_slots = _tag_seen_entities(text, self.entities)

# Add builtins entities
builtin_entities = get_builtin_entities(text, self.language,
self.tagging_scope)
builtin_slots = [ParsedSlot(ent[MATCH_RANGE], ent[VALUE],
ent[ENTITY].label, ent[ENTITY].label)
for ent in builtin_entities]
slots = enrich_slots(seen_entities_slots, builtin_slots)

# Add current model results
slots = enrich_slots(slots, result.parsed_slots)

slots = sorted(slots, key=lambda x: x.match_range[0])

parsed_intent = IntentClassificationResult(
result.parsed_intent.intent_name, result.parsed_intent.probability)
return Result(text, parsed_intent=parsed_intent,
parsed_slots=slots).as_dict()

def fit(self, dataset, intents=None):

"""
Expand Down Expand Up @@ -335,7 +298,6 @@ def to_dict(self):
return {
LANGUAGE: self.language.iso_code,
"slot_name_mapping": self.slot_name_mapping,
"tagging_threshold": self.tagging_threshold,
ENTITIES: self.entities,
"intents_data_sizes": self.intents_data_sizes,
"model": model_dict
Expand All @@ -348,7 +310,6 @@ def from_dict(cls, obj_dict):
"""
language = Language.from_iso_code(obj_dict[LANGUAGE])
slot_name_mapping = obj_dict["slot_name_mapping"]
tagging_threshold = obj_dict["tagging_threshold"]
entities = obj_dict[ENTITIES]
intents_data_sizes = obj_dict["intents_data_sizes"]

Expand All @@ -367,6 +328,5 @@ def from_dict(cls, obj_dict):
language=language, rule_based_parser=rule_based_parser,
probabilistic_parser=probabilistic_parser, entities=entities,
slot_name_mapping=slot_name_mapping,
tagging_threshold=tagging_threshold,
intents_data_sizes=intents_data_sizes
)
Loading

0 comments on commit beecd7d

Please sign in to comment.