Skip to content

Commit

Permalink
Merge pull request #699 from snipsco/release/0.17.4
Browse files Browse the repository at this point in the history
Release 0.17.4
  • Loading branch information
adrienball authored Nov 20, 2018
2 parents a3e00e8 + ffdb55f commit ddcfcec
Show file tree
Hide file tree
Showing 36 changed files with 176 additions and 103 deletions.
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
# Changelog
All notable changes to this project will be documented in this file.

## [0.17.4] - 2018-11-20
### Added
- Add a `--config` argument in the metrics CLI

### Changed
- Replace "parser_threshold" by "matching_strictness" in dataset format
- Optimize loading and inference runtime
- Disable stemming for intent classification in default configs


## [0.17.3] - 2018-10-18
### Fixed
- Crash with num2words and floats
Expand Down Expand Up @@ -165,6 +175,7 @@ several commands.
- Fix compiling issue with `bindgen` dependency when installing from source
- Fix issue in `CRFSlotFiller` when handling builtin entities

[0.17.4]: https://github.com/snipsco/snips-nlu/compare/0.17.3...0.17.4
[0.17.3]: https://github.com/snipsco/snips-nlu/compare/0.17.2...0.17.3
[0.17.2]: https://github.com/snipsco/snips-nlu/compare/0.17.1...0.17.2
[0.17.1]: https://github.com/snipsco/snips-nlu/compare/0.17.0...0.17.1
Expand Down
3 changes: 2 additions & 1 deletion CONTRIBUTORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@ Contributors
This is a list of everyone who has made significant contributions to Snips NLU, in alphabetical order. Thanks a lot for the great work!

* `Alice Coucke <https://github.com/choufractal>`_
* `ddorian <https://github.com/ddorian>`_
* `Josh Meyer <https://github.com/JRMeyer>`_
* `Matthieu Brouillard <https://github.com/McFoggy>`_
* `Matthieu Brouillard <https://github.com/McFoggy>`_
4 changes: 2 additions & 2 deletions docs/source/data_model.rst
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ entity in your dataset as follows:
"synonyms": []
}
],
"parser_threshold": 1.0
"matching_strictness": 1.0
}
}
Expand Down Expand Up @@ -363,6 +363,6 @@ not your custom entity is automatically extensible:
"automatically_extensible": true,
"use_synonyms": true,
"data": [],
"parser_threshold": 1.0
"matching_strictness": 1.0
}
}
4 changes: 2 additions & 2 deletions docs/source/tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ Now, the ``"entities"`` part of the generated json looks like that:
"value": "garden"
}
],
"parser_threshold": 1.0,
"matching_strictness": 1.0,
"use_synonyms": true
},
"snips/temperature": {}
Expand Down Expand Up @@ -186,7 +186,7 @@ and that we want our assistant to cover. Additionally, we add some
"value": "garden"
}
],
"parser_threshold": 1.0,
"matching_strictness": 1.0,
"use_synonyms": true
},
"snips/temperature": {}
Expand Down
2 changes: 1 addition & 1 deletion sample_datasets/beverage_dataset.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
]
}
],
"parser_threshold": 1.0,
"matching_strictness": 1.0,
"use_synonyms": true
},
"snips/number": {}
Expand Down
2 changes: 1 addition & 1 deletion sample_datasets/flights_dataset.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"synonyms": ["new york", "big apple"]
}
],
"parser_threshold": 1.0,
"matching_strictness": 1.0,
"use_synonyms": true
},
"snips/datetime": {}
Expand Down
4 changes: 2 additions & 2 deletions sample_datasets/lights_dataset.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"color": {
"automatically_extensible": true,
"data": [],
"parser_threshold": 1.0,
"matching_strictness": 1.0,
"use_synonyms": true
},
"room": {
Expand All @@ -18,7 +18,7 @@
"synonyms": []
}
],
"parser_threshold": 1.0,
"matching_strictness": 1.0,
"use_synonyms": true
}
},
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
required = [
"enum34>=1.1,<2.0; python_version<'3.4'",
"future>=0.16,<0.17",
"numpy==1.14.0",
"numpy>=1.15,<1.16",
"scipy>=1.0,<2.0",
"scikit-learn>=0.19,<0.20",
"sklearn-crfsuite>=0.3.6,<0.4",
Expand Down
2 changes: 1 addition & 1 deletion snips_nlu/__about__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
__email__ = "[email protected], [email protected]"
__license__ = "Apache License, Version 2.0"

__version__ = "0.17.3"
__version__ = "0.17.4"
__model_version__ = "0.17.0"

__download_url__ = "https://github.com/snipsco/snips-nlu-language-resources/releases/download"
Expand Down
12 changes: 6 additions & 6 deletions snips_nlu/cli/dataset/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
import six
from future.utils import with_metaclass

from snips_nlu.constants import (AUTOMATICALLY_EXTENSIBLE, DATA,
PARSER_THRESHOLD, SYNONYMS, USE_SYNONYMS,
VALUE)
from snips_nlu.constants import (
AUTOMATICALLY_EXTENSIBLE, DATA, MATCHING_STRICTNESS, SYNONYMS,
USE_SYNONYMS, VALUE)
from snips_nlu.entity_parser.builtin_entity_parser import is_builtin_entity

AUTO_EXT_REGEX = re.compile(r'^#\sautomatically_extensible=(true|false)\s*$')
Expand All @@ -38,12 +38,12 @@ class CustomEntity(Entity):
"""

def __init__(self, name, utterances, automatically_extensible,
use_synonyms, parser_threshold=1.0):
use_synonyms, matching_strictness=1.0):
super(CustomEntity, self).__init__(name)
self.utterances = utterances
self.automatically_extensible = automatically_extensible
self.use_synonyms = use_synonyms
self.parser_threshold = parser_threshold
self.matching_strictness = matching_strictness

@classmethod
def from_file(cls, filepath):
Expand Down Expand Up @@ -86,7 +86,7 @@ def json(self):
AUTOMATICALLY_EXTENSIBLE: self.automatically_extensible,
USE_SYNONYMS: self.use_synonyms,
DATA: [u.json for u in self.utterances],
PARSER_THRESHOLD: self.parser_threshold
MATCHING_STRICTNESS: self.matching_strictness
}


Expand Down
42 changes: 38 additions & 4 deletions snips_nlu/cli/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,31 +4,56 @@
from pathlib import Path

import plac
from snips_nlu_metrics import Engine

from snips_nlu import SnipsNLUEngine, load_resources
from snips_nlu.utils import json_string


def make_engine_cls(config):
class ConfigEngine(Engine):
def __init__(self):
self.engine = None
self.config = config

def fit(self, dataset):
self.engine = SnipsNLUEngine(self.config).fit(dataset)
return self

def parse(self, text):
return self.engine.parse(text)

return ConfigEngine


@plac.annotations(
dataset_path=("Path to the dataset file", "positional", None, str),
output_path=("Destination path for the json metrics", "positional", None,
str),
config_path=("Path to a NLU engine config file", "option", "c", str),
nb_folds=("Number of folds to use for the cross-validation", "option", "n",
int),
train_size_ratio=("Fraction of the data that we want to use for training "
"(between 0 and 1)", "option", "t", float),
exclude_slot_metrics=("Exclude slot metrics and slot errors in the output",
"flag", "s", bool),
include_errors=("Include parsing errors in the output", "flag", "i", bool))
def cross_val_metrics(dataset_path, output_path, nb_folds=5,
def cross_val_metrics(dataset_path, output_path, config_path=None, nb_folds=5,
train_size_ratio=1.0, exclude_slot_metrics=False,
include_errors=False):
def progression_handler(progress):
print("%d%%" % int(progress * 100))

if config_path is not None:
with Path(config_path).open("r", encoding="utf-8") as f:
config = json.load(f)
engine_cls = make_engine_cls(config)
else:
engine_cls = SnipsNLUEngine

metrics_args = dict(
dataset=dataset_path,
engine_class=SnipsNLUEngine,
engine_class=engine_cls,
progression_handler=progression_handler,
nb_folds=nb_folds,
train_size_ratio=train_size_ratio,
Expand All @@ -55,15 +80,24 @@ def progression_handler(progress):
None, str),
output_path=("Destination path for the json metrics", "positional", None,
str),
config_path=("Path to a NLU engine config file", "option", "c", str),
exclude_slot_metrics=("Exclude slot metrics and slot errors in the output",
"flag", "s", bool),
include_errors=("Include parsing errors in the output", "flag", "i", bool))
def train_test_metrics(train_dataset_path, test_dataset_path, output_path,
exclude_slot_metrics=False, include_errors=False):
config_path=None, exclude_slot_metrics=False,
include_errors=False):
if config_path is not None:
with Path(config_path).open("r", encoding="utf-8") as f:
config = json.load(f)
engine_cls = make_engine_cls(config)
else:
engine_cls = SnipsNLUEngine

metrics_args = dict(
train_dataset=train_dataset_path,
test_dataset=test_dataset_path,
engine_class=SnipsNLUEngine,
engine_class=engine_cls,
include_slot_metrics=not exclude_slot_metrics
)

Expand Down
2 changes: 1 addition & 1 deletion snips_nlu/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
END = "end"
BUILTIN_ENTITY_PARSER = "builtin_entity_parser"
CUSTOM_ENTITY_PARSER = "custom_entity_parser"
PARSER_THRESHOLD = "parser_threshold"
MATCHING_STRICTNESS = "matching_strictness"

# resources
STOP_WORDS = "stop_words"
Expand Down
25 changes: 13 additions & 12 deletions snips_nlu/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,12 @@
from future.utils import iteritems, itervalues
from snips_nlu_ontology import get_all_languages

from snips_nlu.constants import (AUTOMATICALLY_EXTENSIBLE, CAPITALIZE, DATA,
ENTITIES, ENTITY, INTENTS, LANGUAGE,
PARSER_THRESHOLD, SLOT_NAME, SYNONYMS, TEXT,
USE_SYNONYMS, UTTERANCES, VALIDATED, VALUE)
from snips_nlu.entity_parser.builtin_entity_parser import (BuiltinEntityParser,
is_builtin_entity,
is_gazetteer_entity)
from snips_nlu.constants import (
AUTOMATICALLY_EXTENSIBLE, CAPITALIZE, DATA, ENTITIES, ENTITY, INTENTS,
LANGUAGE, MATCHING_STRICTNESS, SLOT_NAME, SYNONYMS, TEXT, USE_SYNONYMS,
UTTERANCES, VALIDATED, VALUE)
from snips_nlu.entity_parser.builtin_entity_parser import (
BuiltinEntityParser, is_builtin_entity, is_gazetteer_entity)
from snips_nlu.preprocessing import tokenize_light
from snips_nlu.string_variations import get_string_variations
from snips_nlu.utils import validate_key, validate_keys, validate_type
Expand Down Expand Up @@ -135,21 +134,23 @@ def validate_and_format_custom_entity(entity, queries_entities, language,
validate_type(entity, dict)

# TODO: this is here temporarily, only to allow backward compatibility
if PARSER_THRESHOLD not in entity:
entity[PARSER_THRESHOLD] = 1.0
if MATCHING_STRICTNESS not in entity:
strictness = entity.get("parser_threshold", 1.0)

entity[MATCHING_STRICTNESS] = strictness

mandatory_keys = [USE_SYNONYMS, AUTOMATICALLY_EXTENSIBLE, DATA,
PARSER_THRESHOLD]
MATCHING_STRICTNESS]
validate_keys(entity, mandatory_keys, object_label="entity")
validate_type(entity[USE_SYNONYMS], bool)
validate_type(entity[AUTOMATICALLY_EXTENSIBLE], bool)
validate_type(entity[DATA], list)
validate_type(entity[PARSER_THRESHOLD], float)
validate_type(entity[MATCHING_STRICTNESS], float)

formatted_entity = dict()
formatted_entity[AUTOMATICALLY_EXTENSIBLE] = entity[
AUTOMATICALLY_EXTENSIBLE]
formatted_entity[PARSER_THRESHOLD] = entity[PARSER_THRESHOLD]
formatted_entity[MATCHING_STRICTNESS] = entity[MATCHING_STRICTNESS]
use_synonyms = entity[USE_SYNONYMS]

# Validate format and filter out unused data
Expand Down
2 changes: 1 addition & 1 deletion snips_nlu/default_configs/config_de.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@
"sublinear_tf": False,
"pvalue_threshold": 0.4,
"word_clusters_name": None,
"use_stemming": True
"use_stemming": False
},
"random_seed": None
}
Expand Down
2 changes: 1 addition & 1 deletion snips_nlu/default_configs/config_en.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@
"sublinear_tf": False,
"pvalue_threshold": 0.4,
"word_clusters_name": None,
"use_stemming": True
"use_stemming": False
},
"random_seed": None
}
Expand Down
2 changes: 1 addition & 1 deletion snips_nlu/default_configs/config_es.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@
"sublinear_tf": False,
"pvalue_threshold": 0.4,
"word_clusters_name": None,
"use_stemming": True
"use_stemming": False
},
"random_seed": None
}
Expand Down
2 changes: 1 addition & 1 deletion snips_nlu/default_configs/config_fr.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@
"sublinear_tf": False,
"pvalue_threshold": 0.4,
"word_clusters_name": None,
"use_stemming": True
"use_stemming": False
},
"random_seed": None
}
Expand Down
2 changes: 1 addition & 1 deletion snips_nlu/default_configs/config_it.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@
"sublinear_tf": False,
"pvalue_threshold": 0.4,
"word_clusters_name": None,
"use_stemming": True
"use_stemming": False
},
"random_seed": None
}
Expand Down
6 changes: 3 additions & 3 deletions snips_nlu/entity_parser/custom_entity_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from snips_nlu_ontology import GazetteerEntityParser

from snips_nlu.constants import (
END, ENTITIES, LANGUAGE, PARSER_THRESHOLD, RES_MATCH_RANGE, START,
END, ENTITIES, LANGUAGE, MATCHING_STRICTNESS, RES_MATCH_RANGE, START,
UTTERANCES, ENTITY_KIND)
from snips_nlu.entity_parser.builtin_entity_parser import is_builtin_entity
from snips_nlu.entity_parser.custom_entity_parser_usage import (
Expand Down Expand Up @@ -87,7 +87,7 @@ def parse(self, text, scope=None, use_cache=True):
if cache_key not in self._cache:
parser_result = self._parse(text, scope)
self._cache[cache_key] = parser_result
return deepcopy(self._cache[cache_key])
return self._cache[cache_key]

def _parse(self, text, scope):
tokens = tokenize(text, self.language)
Expand Down Expand Up @@ -123,7 +123,7 @@ def _create_custom_entity_parser_configuration(entities):
{
"entity_identifier": entity_name,
"entity_parser": {
"threshold": entity[PARSER_THRESHOLD],
"threshold": entity[MATCHING_STRICTNESS],
"gazetteer": [
{
"raw_value": k,
Expand Down
3 changes: 1 addition & 2 deletions snips_nlu/entity_parser/entity_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from __future__ import unicode_literals

from abc import ABCMeta, abstractmethod
from copy import deepcopy

from future.builtins import object
from future.utils import with_metaclass
Expand Down Expand Up @@ -34,7 +33,7 @@ def parse(self, text, scope=None, use_cache=True):
if cache_key not in self._cache:
parser_result = self._parser.parse(text, scope)
self._cache[cache_key] = parser_result
return deepcopy(self._cache[cache_key])
return self._cache[cache_key]

@abstractmethod
def persist(self, path):
Expand Down
Loading

0 comments on commit ddcfcec

Please sign in to comment.