-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprep_data.py
90 lines (68 loc) · 2.45 KB
/
prep_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import numpy as np
import numpy.matlib
import pandas as pd
import os
import h5py
from keras.utils import to_categorical
from sklearn import preprocessing
num_class = 39
h = 40 # number of filter banks
ws = 15 # window size
w = int((ws-1)/2) # mid window
method = 1 # zero for 2D conv / one for 1D conv
path_train = '--path to raw train data'
def create_data(cnn_data,path):
X_train = [] # filter bank features
Y_train = [] # class labels
for csvfile in cnn_data:
data = pd.read_csv(os.path.join(path, csvfile),header=None)
X_train.extend(data.iloc[:, 0:-1].values)
Y_train.extend(data.iloc[:, -1].values)
return np.array(X_train), np.array(Y_train)
train_data = os.listdir(path_train)
X_train, Y_train = create_data(train_data,path_train)
# padding data
X_train = np.vstack((np.matlib.repmat(X_train[0,:],w,1),X_train,np.matlib.repmat(X_train[-1,:],w,1)))
Y_train = np.hstack((np.matlib.repmat(Y_train[0],1,w),Y_train[None,:],np.matlib.repmat(Y_train[-1],1,w)))
# standardizing data --- zero mean/ unit variance
mean = np.mean(X_train,axis=0)
std = np.std(X_train,axis=0)
X_train = (X_train - mean) / std
#X_train = preprocessing.scale(X_train)
X_train = np.transpose(X_train)
Xtrn = []
Ytrn = []
if method==0:
for i in range(w,np.shape(X_train)[1]-w):
tmp = np.zeros((h, ws, 3))
tmp[:,:,0] = X_train[0:h,i-w:i+w+1].copy()
tmp[:,:,1] = X_train[h:2*h,i-w:i+w+1].copy()
tmp[:,:,2] = X_train[2*h:3*h,i-w:i+w+1].copy()
if Y_train[0,i] != 39: # discarding classes with label 'q'
Xtrn.append(tmp)
Ytrn.append(Y_train[0, i])
elif method==1:
for i in range(w, np.shape(X_train)[1] - w):
tmp = np.zeros((h,1,3*ws))
c = 0
for j in range(-w,w+1):
tmp[:,:,c] = X_train[0:h,i+j:i+j+1].copy()
tmp[:,:,c+1] = X_train[h:2*h,i+j:i+j+1].copy()
tmp[:,:,c+2] = X_train[2*h:3*h,i+j:i+j+1].copy()
c +=3
if Y_train[0,i] != 39: # discarding classes with label 'q'
Xtrn.append(tmp)
Ytrn.append(Y_train[0,i])
Xtrn = np.array(Xtrn)
Ytrn = np.array(Ytrn)
# one-hot-encoding
Ytrn = to_categorical(Ytrn,num_classes=num_class)
print(np.shape(Xtrn))
print(np.shape(Ytrn))
# save dataset to hdf file
hf = h5py.File('train_data.h5', 'w')
hf.create_dataset('train_data', data=Xtrn)
hf.close()
hf = h5py.File('train_label.h5', 'w')
hf.create_dataset('train_label', data=Ytrn)
hf.close()