-
Notifications
You must be signed in to change notification settings - Fork 3
/
parse_dik.py
executable file
·47 lines (35 loc) · 1.57 KB
/
parse_dik.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#!/usr/bin/env python3
import yaml
from characters import strip_length
from parse import Lexicon, Endings
TEST_FILE = "tests/dik.yaml"
LEXICON_FILE = "lexicons/dik.yaml"
ENDINGS_FILE = "stemming.yaml"
lexicon = Lexicon(LEXICON_FILE)
endings = Endings(ENDINGS_FILE)
if __name__ == "__main__":
with open(TEST_FILE) as f:
for test in yaml.load(f):
lemma = strip_length(test.pop("lemma"))
test_length = test.pop("test_length", True)
location = test.pop("location", None)
for parse, form in test.items():
stem_info = lexicon.stem_info(lemma, parse, context=location)
if stem_info is None:
print("couldn't get stem info for {} {}".format(lemma, parse))
continue
ending_info = endings.ending_info(form, parse)
valid_stems = (
set(strip_length(info.stem) for info in stem_info) &
set(info.stem for info in ending_info))
if len(valid_stems) != 1:
print(form, parse, lemma)
print(" {}".format(stem_info))
print(" {}".format(ending_info))
for valid_stem in valid_stems:
for info in stem_info:
if info.stem == valid_stem:
print(" {}".format(info))
for info in ending_info:
if info.stem == valid_stem:
print(" {}".format(info))