This repository has been archived by the owner on Oct 18, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 129
/
run_best.py
109 lines (95 loc) · 4.25 KB
/
run_best.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import pickle
import json
import os
from collections import OrderedDict # 有序词典
import numpy as np
import ml_model as ml
import operate_data as od
def runBest(vector='wordfreq',m_model = ml.naiveBayes):
##### 开启记录模式的代码(只记录留一验证后准确率最高的模型) #####
xpath = os.path.join('result', 'vector', 'resultX.npz')
ypath = os.path.join('result', 'vector', 'resultY.npz')
resultX = np.load(xpath)
resultY = np.load(ypath)
logpath = os.path.join('result','log','logfile.plk')
logfile = None
with open(logpath,'rb') as f:
logfile = pickle.load(f)
trainX,trainY,testX,testY,logTrain,logTest = od.randomData(resultX[vector],resultY[vector],0.1,logfile) # 选取最好的vector方法
model = m_model(trainX,trainY) # 选取最好的机器训练模型
predictY = [model.predict(x.reshape(1,-1))[0] for x in testX]
logDict = OrderedDict()
logDict['+2+'] = [];logDict['+2.'] = [];logDict['+2-'] = []
logDict['-2+'] = [];logDict['-2.'] = [];logDict['-2-'] = []
logDict['.2+'] = [];logDict['.2.'] = [];logDict['.2-'] = []
for i in range(len(predictY)):
if predictY[i] == testY[i]:
if predictY[i]>0:
logDict['+2+'].append(logTest[i])
elif predictY[i]==0:
logDict['.2.'].append(logTest[i])
else:
logDict['-2-'].append(logTest[i])
elif predictY[i]>0 and testY[i]==0:
logDict['.2+'].append(logTest[i])
elif predictY[i]>0 and testY[i]<0:
logDict['-2+'].append(logTest[i])
elif predictY[i]<0 and testY[i]==0:
logDict['.2-'].append(logTest[i])
elif predictY[i]<0 and testY[i]>0:
logDict['+2-'].append(logTest[i])
elif predictY[i]==0 and testY[i]>0:
logDict['+2.'].append(logTest[i])
elif predictY[i]==0 and testY[i]<0:
logDict['-2.'].append(logTest[i])
with open(os.path.join('result','log','3plus3arr.plk'),'wb') as f:
pickle.dump(logDict,f)
print('Over')
def logBest():
arr = None # 存放3*3的矩阵(实际是数组)
with open(os.path.join('result','log','3plus3arr.plk'),'rb') as f:
arr = pickle.load(f)
ErrorTag = {}
key_arr = [] # 含有错误标签的键组成的数组
for key in arr.keys():
if key not in ('+2+','-2-','.2.') and len(arr[key]):
key_arr.append(key)
message = 'At: '+key+'; Total '+str(len(arr[key]))+"; \n "+",".join(arr[key])
print(message)
ErrorTag[key] = message
with open(os.path.join('result','log','best_model','error_tag.json'),'w',encoding="utf-8") as f:
json.dump(ErrorTag,f)
print("="*30)
for key in arr.keys():
if key not in key_arr:
print('At: ' + key + '; Total ' + str(len(arr[key])) + "; \n " + ",".join(arr[key]))
##### 计算PR并且写入文件 #####
pr = {} #p:精确率 r:召回率
if (len(arr['+2+'])+len(arr['-2+'])+len(arr['.2+'])) and (len(arr['+2+'])+len(arr['+2-'])+len(arr['+2.'])):
pr['+'] = {\
'p':len(arr['+2+'])/(len(arr['+2+'])+len(arr['-2+'])+len(arr['.2+'])),\
'r':len(arr['+2+'])/(len(arr['+2+'])+len(arr['+2-'])+len(arr['+2.'])) \
}
else :
pr['+'] = {'p' : None,'r' :None}
if (len(arr['-2-'])+len(arr['+2-'])+len(arr['.2-'])) and (len(arr['-2-'])+len(arr['-2+'])+len(arr['-2.'])):
pr['-'] = {\
'p':len(arr['-2-'])/(len(arr['-2-'])+len(arr['+2-'])+len(arr['.2-'])),\
'r':len(arr['-2-'])/(len(arr['-2-'])+len(arr['-2+'])+len(arr['-2.'])) \
}
else :
pr['-'] = {'p':None,'r':None}
if (len(arr['.2.'])+len(arr['+2.'])+len(arr['-2.'])) and (len(arr['.2.'])+len(arr['.2+'])+len(arr['.2-'])):
pr['.'] = {\
'p':len(arr['.2.'])/(len(arr['.2.'])+len(arr['+2.'])+len(arr['-2.'])),\
'r':len(arr['.2.'])/(len(arr['.2.'])+len(arr['.2+'])+len(arr['.2-'])) \
}
else :
pr['.'] = {'p':None,'r':None}
with open(os.path.join('result','log','best_model','PR.json'),'w',encoding="utf-8") as f:
json.dump(pr,f)
if __name__=='__main__':
best_vec = 'wordfreq'
best_model = ml.naiveBayes
runBest(vector = best_vec,m_model= best_model)
logBest()