-
Notifications
You must be signed in to change notification settings - Fork 87
/
Copy patheval.py
executable file
·65 lines (50 loc) · 1.48 KB
/
eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#===============================================================================
#
# Copyright (c) 2017 <> All Rights Reserved
#
#
# Author: Hai Liang Wang
# Date: 2018-07-05:16:28:10
#
#===============================================================================
"""
"""
from __future__ import print_function
from __future__ import division
__copyright__ = "Copyright (c) 2017 . All Rights Reserved"
__author__ = "Hai Liang Wang"
__date__ = "2018-06-05:16:28:10"
import os
import sys
curdir = os.path.dirname(os.path.abspath(__file__))
sys.path.append(curdir)
if sys.version_info[0] < 3:
sys.setdefaultencoding("utf-8")
reload(sys)
# raise "Must be using Python 3"
else:
unicode = str
from tqdm import trange
from hmm import Tokenizer as HMMTokenizer
HT = HMMTokenizer()
def evaluate(tokenizer, input, output):
output_lines = []
input_lines = []
with open(input, 'r') as f:
for x in f.readlines():
input_lines.append(x)
for x in trange(len(input_lines)):
# print("seg: %s" % input_lines[x])
o = []
for y in tokenizer.cut(input_lines[x]):
if y.strip(): o.append(y.strip())
output_lines.append(' '.join(o) + '\n')
print("done.")
with open(output, 'w') as fr:
fr.writelines(output_lines)
def main():
evaluate(HT, '/tools/icwb2-data/testing/msr_test.utf8', 'hmm.msr_test.seg')
if __name__ == '__main__':
main()