-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtests.py
63 lines (49 loc) · 2.3 KB
/
tests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import unittest
from os import path
from os.path import dirname, abspath
from datatypes import MorToken
from dump_speaker_data import extract_feature_utterances, rewriter
from sample_size_test import generate_ngrams
from stats import dice_stat
here = dirname(abspath(__file__))
class TestUtteranceExtracting(unittest.TestCase):
@classmethod
def setUpClass(cls):
print "setting up", cls
cls.filename = path.join(here, "fixtures", "eve01.xml")
cls.utterances = extract_feature_utterances([cls.filename], 'word', 'MOT')
cls.corpus = reduce(list.__add__, cls.utterances) # flatten
def test_extraction(self):
mot_words = extract_feature_utterances([self.filename], 'word', 'MOT')
self.assertEqual(len(mot_words), 804)
chi_pos = extract_feature_utterances([self.filename], 'pos', 'CHI')
self.assertEqual(len(chi_pos), 741)
all_words = extract_feature_utterances([self.filename], 'pos')
self.assertEqual(len(all_words), 1588)
def test_statistic_funs(self):
ngrams = generate_ngrams(3, self.corpus)
self.assertEqual(dice_stat(ngrams, ngrams), 1)
self.assertEqual(dice_stat(ngrams, []), 0)
def test_ngrams(self):
ngrams = generate_ngrams(2, self.corpus)
for first, second in [ngrams[i:i+2] for i in range(len(ngrams) - 2)]:
self.assertEquals(first.split()[-1], second.split()[0])
def _mtf(self, prefix=None, word=None, stem=None, pos=None, subPos=None, sxfx=None, sfx=None):
return MorToken(prefix or [], word, stem, pos, subPos or [], sxfx or [], sfx or [])
def test_translation(self):
pairs = (
(self._mtf(word="nook", pos="fam"), "chi"),
(self._mtf(word="stool",pos="n", subPos=["prop"], sfx=["POSS"]),
"n-pr"),
(self._mtf(word="steve", pos="n", subPos=["prop"], sfx=["PL"]),
"n-pr-pl"),
(self._mtf(word="steve", pos="n", subPos=["prop"], sfx=["PL"]),
"n-pr-pl"),
(self._mtf(pos="part", sfx=["PERF"]), "prt-pf"),
(self._mtf(pos="part", sfx=["PROG"]), "prt-pg"),
(self._mtf(pos=""), "ptl"),
)
for input, expected in pairs:
self.assertEquals(rewriter(input), expected)
if __name__ == "__main__":
unittest.main()