Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow to fit SnipsNLUEngine with Dataset object #840

Merged
merged 3 commits into from
Aug 9, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ All notable changes to this project will be documented in this file.
- Allow to bypass the model version check [#830](https://github.com/snipsco/snips-nlu/pull/830)
- Persist `CustomEntityParser` license when needed [#832](https://github.com/snipsco/snips-nlu/pull/832)
- Document metrics CLI [#839](https://github.com/snipsco/snips-nlu/pull/839)
- Allow to fit SnipsNLUEngine with a `Dataset` object [#840](https://github.com/snipsco/snips-nlu/pull/840)

### Fixed
- Invalidate importlib caches after dynamically installing module [#838](https://github.com/snipsco/snips-nlu/pull/838)
Expand Down
5 changes: 4 additions & 1 deletion snips_nlu/dataset/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
AUTOMATICALLY_EXTENSIBLE, CAPITALIZE, DATA, ENTITIES, ENTITY, INTENTS,
LANGUAGE, MATCHING_STRICTNESS, SLOT_NAME, SYNONYMS, TEXT, USE_SYNONYMS,
UTTERANCES, VALIDATED, VALUE, LICENSE_INFO)
from snips_nlu.dataset import extract_utterance_entities
from snips_nlu.dataset import extract_utterance_entities, Dataset
from snips_nlu.entity_parser.builtin_entity_parser import (
BuiltinEntityParser, is_builtin_entity)
from snips_nlu.exceptions import DatasetFormatError
Expand All @@ -32,6 +32,9 @@ def validate_and_format_dataset(dataset):
"""
from snips_nlu_parsers import get_all_languages

if isinstance(dataset, Dataset):
dataset = dataset.json

# Make this function idempotent
if dataset.get(VALIDATED, False):
return dataset
Expand Down
4 changes: 2 additions & 2 deletions snips_nlu/tests/test_custom_entity_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,14 +348,12 @@ def test_create_custom_entity_parser_configuration(self):
self.assertDictEqual(expected_dict, config)


# pylint: disable=unused-argument
def _persist_parser(path):
path = Path(path)
with path.open("w", encoding="utf-8") as f:
f.write("nothing interesting here")


# pylint: disable=unused-argument
def _load_parser(path):
path = Path(path)
with path.open("r", encoding="utf-8") as f:
Expand All @@ -365,3 +363,5 @@ def _load_parser(path):
# pylint: disable=unused-argument
def _stem(string, language):
return string[:-1]

# pylint: enable=unused-argument
68 changes: 65 additions & 3 deletions snips_nlu/tests/test_dataset_validation.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
# coding=utf-8
from __future__ import unicode_literals

import io
from builtins import range, str

from future.utils import iteritems
from mock import mock, patch

from snips_nlu.constants import ENTITIES, SNIPS_DATETIME
from snips_nlu.dataset import validate_and_format_dataset
from snips_nlu.dataset.validation import _validate_and_format_custom_entity
from snips_nlu.constants import ENTITIES, SNIPS_DATETIME, VALIDATED
from snips_nlu.dataset import Dataset
from snips_nlu.dataset.validation import (
validate_and_format_dataset, _validate_and_format_custom_entity)
from snips_nlu.exceptions import DatasetFormatError
from snips_nlu.tests.utils import SnipsTest, EntityParserMock

Expand Down Expand Up @@ -1174,3 +1176,63 @@ def test_should_keep_license_info(self):
"validated": True
}
self.assertDictEqual(expected_dataset, validated_dataset)

def test_validate_should_be_idempotent(self):
# Given
dataset_stream = io.StringIO("""
# getWeather Intent
---
type: intent
name: getWeather
utterances:
- what is the weather in [weatherLocation:location](Paris)?
- is it raining in [weatherLocation] [weatherDate:snips/datetime]

# Location Entity
---
type: entity
name: location
automatically_extensible: true
values:
- [new york, big apple]
- london
""")

dataset = Dataset.from_yaml_files("en", [dataset_stream])
validated_dataset = validate_and_format_dataset(dataset)

# When
validated_dataset_2 = validate_and_format_dataset(validated_dataset)

# Then
self.assertDictEqual(validated_dataset, validated_dataset_2)
self.assertTrue(validated_dataset.get(VALIDATED, False))

def test_validate_should_accept_dataset_object(self):
# Given
dataset_stream = io.StringIO("""
# getWeather Intent
---
type: intent
name: getWeather
utterances:
- what is the weather in [weatherLocation:location](Paris)?
- is it raining in [weatherLocation] [weatherDate:snips/datetime]

# Location Entity
---
type: entity
name: location
automatically_extensible: true
values:
- [new york, big apple]
- london
""")

dataset = Dataset.from_yaml_files("en", [dataset_stream])

# When
validated_dataset = validate_and_format_dataset(dataset)

# Then
self.assertTrue(validated_dataset.get(VALIDATED, False))
2 changes: 1 addition & 1 deletion snips_nlu/tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def persist(self, path):
f.write(json_string(unit_dict))

@classmethod
def from_path(cls, path, **shared): # pylint:disable=unused-argument
def from_path(cls, path, **_):
with (path / "metadata.json").open(encoding="utf8") as f:
metadata = json.load(f)
fitted = metadata["fitted"]
Expand Down