-
Notifications
You must be signed in to change notification settings - Fork 13
/
pruned_RNN_SM.py
111 lines (87 loc) · 3.2 KB
/
pruned_RNN_SM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/usr/bin/env python
#-*-coding:utf-8 -*-
'''
Implementation of our pruned RNN-SM algorithm
-------------
Based on paper:
RNN-SM: Fast Steganalysis of VoIP Streams Using Recurrent Neural Network
-------------
Author: Zinan Lin
Email: [email protected]
'''
import numpy as np
import os, pickle, random, datetime
from keras.models import Sequential
from keras.layers import Dense, Activation, LSTM
FOLDERS = [
{"class" : 1, "folder" : "/data1/linzn/data/ch_g729a_100_10000ms_FEAT"}, # The folder that contains positive data files.
{"class" : 0, "folder" : "/data1/linzn/data/ch_g729a_0_10000ms_FEAT"} # The folder that contains negative data files.
]
SAMPLE_LENGTH = 10000 # The sample length (ms)
BATCH_SIZE = 32 # batch size
ITER = 30 # number of iteration
FOLD = 5 # = NUM_SAMPLE / number of testing samples
'''
Get the paths of all files in the folder
'''
def get_file_list(folder):
file_list = []
for file in os.listdir(folder):
file_list.append(os.path.join(folder, file))
return file_list
'''
Read codeword file
-------------
input
file_path
The path to an ASCII file.
Each line contains three integers: x1 x2 x3, which are the three codewords of the frame.
There are (number of frame) lines in total.
output
the list of codewords
'''
def parse_sample(file_path):
file = open(file_path, 'r')
lines = file.readlines()
sample =[]
for line in lines:
line_split = line.strip("\r\n\t").strip().split("\t")
sample.append(line_split)
return sample
'''
Save variable in pickle
'''
def save_variable(file_name, variable):
file_object = open(file_name, "wb")
pickle.dump(variable, file_object)
file_object.close()
'''
Pruned RNN-SM training and testing
'''
if __name__ == '__main__':
all_files = [(item, folder["class"]) for folder in FOLDERS for item in get_file_list(folder["folder"])]
random.shuffle(all_files)
save_variable('all_files.pkl', all_files)
all_samples_x = [(parse_sample(item[0])) for item in all_files]
all_samples_y = [item[1] for item in all_files]
np_all_samples_x = np.asarray(all_samples_x)
np_all_samples_y = np.asarray(all_samples_y)
save_variable('np_all_samples_x.pkl', np_all_samples_x)
save_variable('np_all_samples_y.pkl', np_all_samples_y)
file_num = len(all_files)
sub_file_num = int(file_num / FOLD)
x_test = np_all_samples_x[0 : sub_file_num] # The samples for testing
y_test = np_all_samples_y[0 : sub_file_num] # The label of the samples for testing
x_train = np_all_samples_x[sub_file_num : file_num] # The samples for training
y_train = np_all_samples_y[sub_file_num : file_num] # The label of the samples for training
print("Building model")
model = Sequential()
model.add(LSTM(50, input_length = int(SAMPLE_LENGTH / 10), input_dim = 3, return_sequences = True)) # first layer
model.add(LSTM(50)) # second layer
model.add(Dense(1)) # output layer
model.add(Activation('sigmoid')) # activation function
model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ["accuracy"])
print("Training")
for i in range(ITER):
model.fit(x_train, y_train, batch_size = BATCH_SIZE, nb_epoch = 1, validation_data = (x_test, y_test))
model.save('model_%d.h5' % (i + 1))