forked from NetManAIOps/Bagel
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhello.py
134 lines (94 loc) · 4.06 KB
/
hello.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
from model import DonutX
import pandas as pd
import numpy as np
from kpi_series import KPISeries
from sklearn.metrics import precision_recall_curve
from evaluation_metric import range_lift_with_delay
df = pd.read_csv('../train.csv', header=0, index_col=None)
kpi_list = df["KPI ID"].unique().tolist()
curr_kpi = kpi_list[2]
df_train = df[df["KPI ID"] == curr_kpi]
df_train = df_train[["timestamp", "value", "label"]]
# df_train = pd.read_csv('./sample_data.csv' , header=0, index_col=None)
print(df_train)
kpi = KPISeries(
value = df_train.value,
timestamp = df_train.timestamp,
label = df_train.label,
name = 'sample_data',
)
train_kpi, valid_kpi, test_kpi = kpi.split((0.49, 0.21, 0.3))
train_kpi, train_kpi_mean, train_kpi_std = train_kpi.normalize(return_statistic=True)
valid_kpi = valid_kpi.normalize(mean=train_kpi_mean, std=train_kpi_std)
test_kpi = test_kpi.normalize(mean=train_kpi_mean, std=train_kpi_std)
print(np.unique(train_kpi.label, return_counts=True))
print(np.unique(valid_kpi.label, return_counts=True))
print(np.unique(test_kpi.label, return_counts=True))
print(np.unique(test_kpi.missing, return_counts=True))
model = DonutX(cuda=False, max_epoch=50, latent_dims=8, network_size=[100, 100])
# model.fit(train_kpi.label_sampling(0.), valid_kpi)
model.fit(train_kpi.label_sampling(1.), valid_kpi)
y_prob = model.predict(test_kpi.label_sampling(0.))
# y_prob, pred, thre = model.detect(test_kpi.label_sampling(0.), return_threshold=True)
print("y prob before: ", y_prob)
print("length before: ", len(y_prob))
y_prob = range_lift_with_delay(y_prob, test_kpi.label)
print("LENGTHS: ", len(y_prob), len(test_kpi.label))
precisions, recalls, thresholds = precision_recall_curve(test_kpi.label, y_prob)
f1_scores = (2 * precisions * recalls) / (precisions + recalls)
print(f'best F1-score: {np.max(f1_scores[np.isfinite(f1_scores)])}')
print('best threshold: ', thresholds[np.argmax(f1_scores)])
print("xxxxxxx")
print(f1_scores)
print(len(f1_scores))
print("****")
print(precisions, recalls, thresholds)
print(len(precisions), len(recalls), len(thresholds))
print("----")
print(y_prob)
print("max value y_prob: ", np.max(y_prob[np.isfinite(y_prob)]))
# print(pred)
# print(thre)
# print(np.unique(pred, return_counts=True)) # predicted anomaly counts
print('DDDDDDD')
thre = thresholds[np.argmax(f1_scores)]
pred = y_prob >= thre
print(np.unique(pred, return_counts=True))
print()
print("NOW DO SOME PREDICTING")
df_2 = pd.read_csv('../test.csv', header=0, index_col=None)
df_predict = df_2[df_2["KPI ID"] == curr_kpi]
df_predict = df_predict[["timestamp", "value"]]
print(df_predict)
predict_kpi = KPISeries(
value = df_predict.value,
timestamp = df_predict.timestamp,
name = 'predict_data',
)
print(np.unique(predict_kpi.label, return_counts=True))
print(np.unique(predict_kpi.missing, return_counts=True))
predict_kpi = predict_kpi.normalize(mean=train_kpi_mean, std=train_kpi_std)
z_prob = model.predict(predict_kpi.label_sampling(0.))
print("y prob before: ", z_prob)
print("length before: ", len(z_prob))
# y_prob = range_lift_with_delay(z_prob, test_kpi.label)
# print("LENGTHS: ", len(z_prob), len(test_kpi.label))
anomalies = z_prob >= thre
print(np.unique(anomalies, return_counts=True))
predict_kpi._label = np.asarray(anomalies, np.int).astype(int)
# format results for outputting
final_df = pd.DataFrame()
final_df["timestamp"] = predict_kpi.timestamp
final_df["KPI ID"] = curr_kpi #this can't be first because dataframe starts empty
final_df["predict"] = predict_kpi.label
final_df["missing"] = predict_kpi.missing
# remove rows that were missing
final_df = final_df[final_df.missing != 1]
# remove missing column
final_df = final_df.drop('missing', axis=1)
# rearrange columns for formatting
final_df = final_df.reindex(columns = ["KPI ID", "timestamp", "predict"])
# append to final results
final_df.to_csv('./xdddddddd.csv', index=False)
print("total anomaly count: ")
print(np.unique(final_df.predict, return_counts=True))