-
Notifications
You must be signed in to change notification settings - Fork 29
/
ddos-detect.py
143 lines (111 loc) · 4.56 KB
/
ddos-detect.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import os
import arff
import numpy as np
from matplotlib import pyplot as plt
from keras.layers import Dense, Dropout
from keras.models import Sequential
from keras.models import load_model
from keras.callbacks import TensorBoard
from keras.utils import to_categorical
from keras.utils import plot_model
# set the directory of the dataset
file = open("data/final-dataset.arff", 'r')
# # Togglable Options
# regenerate_model = False
# regenerate_data = False
# generate_graphs = True
# save_model = True
# create_model_image = False
def generate_model(shape):
# define the model
model = Sequential()
model.add(Dense(30, input_dim=shape, kernel_initializer='uniform', activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(10, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(10, activation='relu'))
model.add(Dropout(0.4))
# model.add(Dense(64, activation='relu'))
# model.add(Dropout(0.4))
model.add(Dense(5, activation='softmax'))
print(model.summary())
return model
def scrape_data():
# decode the .arff data and change text labels into numerical
decoder = arff.ArffDecoder()
data = decoder.decode(file, encode_nominal=True)
# split the raw data into data and labels
vals = [val[0: -1] for val in data['data']]
labels = [label[-1] for label in data['data']]
for val in labels:
if labels[val] != 0:
labels[val] = 1
# split the labels and data into traning and validation sets
training_data = vals[0: int(.9 * len(vals))]
training_labels = labels[0: int(.9 * len(vals))]
validation_data = vals[int(.9 * len(vals)):]
validation_labels = labels[int(.9 * len(vals)):]
print(training_labels)
# flatten labels with one hot encoding
training_labels = to_categorical(training_labels, 5)
validation_labels = to_categorical(validation_labels, 5)
# save all arrays with numpy
np.save('saved-files/vals', np.asarray(vals))
np.save('saved-files/labels', np.asarray(labels))
np.save('saved-files/training_data', np.asarray(training_data))
np.save('saved-files/validation_data', np.asarray(validation_data))
np.save('saved-files/training_labels', np.asarray(training_labels))
np.save('saved-files/validation_labels', np.asarray(validation_labels))
# check to see if saved data exists, if not then create the data
# if not os.path.exists('saved-files/training_data.npy') or not os.path.exists(
# 'saved-files/training_labels.npy') or not os.path.exists(
# 'saved-files/validation_data.npy') or not os.path.exists('saved-files/validation_labels.npy'):
# print('creating')
# if not os.path.exists('saved-files'):
# os.mkdir('saved-files')
# scrape_data()
scrape_data()
# load the saved data
data_train = np.load('saved-files/training_data.npy')
label_train = np.load('saved-files/training_labels.npy')
data_eval = np.load('saved-files/validation_data.npy')
label_eval = np.load('saved-files/validation_labels.npy')
# generate and compile the model
model = generate_model(len(data_train[0]))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# initialize tensorboard
tensorboard = TensorBoard(log_dir='logs/', histogram_freq=0, write_graph=True, write_images=True)
# only using 3 epochs otherwise the model would overfit to the data
history = model.fit(data_train, label_train, validation_data=(data_eval, label_eval), epochs=2, callbacks=[tensorboard])
loss_history = history.history["loss"]
numpy_loss_history = np.array(loss_history)
np.savetxt("saved-files/loss_history.txt", numpy_loss_history, delimiter=",")
model = load_model('saved-files/model.h5')
# evaluating the model's performace
print(model.evaluate(data_eval, label_eval))
print(model.evaluate(data_train, label_train))
#if create_model_image:
plot_model(model, to_file='model.png', show_shapes=True)
plt.figure(1)
# summarize history for accuracy
plt.subplot(211)
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
# summarize history for loss
plt.subplot(212)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# save the model for later so no retraining is needed
# model.save('saved-files/model.h5')
# play sound when done with code to alert me
os.system('afplay /System/Library/Sounds/Ping.aiff')
os.system('afplay /System/Library/Sounds/Ping.aiff')