-
Notifications
You must be signed in to change notification settings - Fork 41
/
test_backend_svc.py
96 lines (66 loc) · 3.04 KB
/
test_backend_svc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
"""Unit tests for the SVC backend in Annif"""
import pytest
import annif.backend
import annif.corpus
from annif.exception import NotInitializedException, NotSupportedException
def test_svc_default_params(project):
svc_type = annif.backend.get_backend("svc")
svc = svc_type(backend_id="svc", config_params={}, project=project)
expected_default_params = {
"limit": 100,
"min_df": 1,
}
actual_params = svc.params
for param, val in expected_default_params.items():
assert param in actual_params and actual_params[param] == val
def test_svc_suggest_no_vectorizer(project):
svc_type = annif.backend.get_backend("svc")
svc = svc_type(backend_id="svc", config_params={}, project=project)
with pytest.raises(NotInitializedException):
svc.suggest(["example text"])[0]
def test_svc_train(datadir, document_corpus, project, caplog):
svc_type = annif.backend.get_backend("svc")
svc = svc_type(backend_id="svc", config_params={}, project=project)
svc.train(document_corpus)
assert svc._model is not None
assert datadir.join("svc-model.gz").exists()
assert (
"training on a document with multiple subjects is not "
+ "supported by SVC; selecting one random subject."
in caplog.text
)
def test_svc_train_ngram(datadir, document_corpus, project):
svc_type = annif.backend.get_backend("svc")
svc = svc_type(backend_id="svc", config_params={"ngram": 2}, project=project)
svc.train(document_corpus)
assert svc._model is not None
assert datadir.join("svc-model.gz").exists()
def test_svc_train_cached(datadir, project):
svc_type = annif.backend.get_backend("svc")
svc = svc_type(backend_id="svc", config_params={}, project=project)
with pytest.raises(NotSupportedException):
svc.train("cached")
def test_svc_train_nodocuments(datadir, project, empty_corpus):
svc_type = annif.backend.get_backend("svc")
svc = svc_type(backend_id="svc", config_params={}, project=project)
with pytest.raises(NotSupportedException):
svc.train(empty_corpus)
def test_svc_suggest(project):
svc_type = annif.backend.get_backend("svc")
svc = svc_type(backend_id="svc", config_params={"limit": 20}, project=project)
results = svc.suggest(["""Arkeologiaa sanotaan joskus myös..."""])[0]
assert len(results) > 0
assert len(results) <= 20
archaeologists = project.subjects.by_uri("http://www.yso.fi/onto/yso/p10849")
assert archaeologists in [result.subject_id for result in results]
def test_svc_suggest_no_input(project):
svc_type = annif.backend.get_backend("svc")
svc = svc_type(backend_id="svc", config_params={"limit": 8}, project=project)
results = svc.suggest(["j"])[0]
assert len(results) == 0
def test_svc_suggest_no_model(datadir, project):
svc_type = annif.backend.get_backend("svc")
svc = svc_type(backend_id="svc", config_params={}, project=project)
datadir.join("svc-model.gz").remove()
with pytest.raises(NotInitializedException):
svc.suggest(["example text"])[0]