-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathMFCC_computation.py
165 lines (126 loc) · 4.94 KB
/
MFCC_computation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
import numpy
from matplotlib import pyplot as pl
import math
from mlp_backprop_momentum import MLP
import k_fold_cross_validation as cv
import scipy.io.wavfile as wav
from scikits.talkbox.features import mfcc
def read_wav_files(files):
"""
Get the sample rate and all data for each files
:param files: an array of file paths (must be .wav files)
:return: (rates, data)
"""
n = len(files)
data = []
rates = numpy.zeros(n)
for i in xrange(len(files)):
rate, d = wav.read(files[i])
rates[i] = rate
data.append(d)
return rates, data
def compute_mfcc(files, nceps=13, mode='mean'):
"""
Calculate Mel-frequency cepstral coefficients (MFCCs) for each files
and use the coefficients mean to summarize each file. So each file gets
a vector of 13 coefficients instead of having a matrice containing coefficients
for all windows.
:param files: an array of file paths (must be .wav files)
:return: an array of shape (num_files, 13)
"""
num_files = len(files)
sample_rates, data = read_wav_files(files)
ceps_mean = numpy.zeros((num_files, nceps))
for i in xrange(0, num_files):
ceps_i, _, _ = mfcc(data[i], fs=sample_rates[i], nceps=nceps)
ceps_mean[i] = getattr(numpy, mode)(ceps_i, axis=0)
return ceps_mean
def create_dataset(gender_classes, FILES, nceps=13, mode='mean'):
"""
Creates a dataset for training.
Note that the returned dataset is shuffled to prevent issues during
training.
:param gender_classes: an array of tuples [(gender_key, output_class),]
:return: dataset: a 2D-array which has a shape of (num_files, num_coeffs + 1)
num_files: is the total number of files
num_coeffs: is the number of MFCC coefficient (see MFCC_COEFFS)
and finally the output class is added a the end of each element of the dataset
"""
# use the same number of files for all class
size = min([len(FILES[gender]) for gender, _ in gender_classes])
# create dataset
dataset = []
for input_gender, output_class in gender_classes:
ceps = compute_mfcc(FILES[input_gender][:size], nceps, mode)
for input_ceps in ceps:
dataset.append(numpy.append(input_ceps, output_class))
dataset = numpy.array(dataset)
# shuffle dataset
numpy.random.shuffle(dataset)
return dataset
def print_mse(mse, n_neurons, figsize=(15, 4), ylim=(0, 1)):
pl.figure(figsize=figsize)
for n in numpy.arange(mse.shape[0]):
pl.subplot(1, mse.shape[0], n + 1)
for i in numpy.arange(mse.shape[1]):
pl.plot(mse[n, i, :], c='b')
pl.ylim(ylim)
pl.xlabel('Epochs')
pl.ylabel('mse')
pl.title(str(n_neurons[n]) + ' neurons')
pl.grid()
pl.tight_layout()
def print_mse_train_test(mse_train, mse_test, n_neurons, figsize=(16, 8), aspect=20):
pl.figure(figsize=figsize)
# plot training
pl.subplot(2, 1, 1)
pl.imshow(mse_train, vmin=numpy.min(mse_train), vmax=numpy.percentile(mse_train, 90), aspect=aspect,
interpolation='nearest')
pl.yticks(numpy.arange(len(n_neurons)), n_neurons)
pl.xlabel('Epochs')
pl.ylabel('Number of hidden Neurons')
pl.title('Training')
pl.colorbar()
# plot tests
pl.subplot(2, 1, 2)
pl.imshow(mse_test, vmin=numpy.min(mse_test), vmax=numpy.percentile(mse_test, 90), aspect=aspect,
interpolation='nearest')
pl.yticks(numpy.arange(len(n_neurons)), n_neurons)
pl.xlabel('Epochs')
pl.ylabel('Number of hidden Neurons')
pl.title('Test')
pl.colorbar()
pl.tight_layout()
def print_coeff_boxplot(keys, FILES, nceps=13, ylim=(-3.5, 20)):
NUM_PLOTS = len(keys)
NUM_COLS = 3
NUM_ROWS = math.ceil(NUM_PLOTS / float(NUM_COLS))
pl.figure(figsize=(15, 5 * NUM_ROWS))
X_LABEL = 'coefficients'
for plot_index, s_class in enumerate(sorted(keys)):
values = compute_mfcc(FILES[s_class], nceps)
pl.subplot(NUM_ROWS, NUM_COLS, plot_index + 1)
pl.ylim(ylim)
pl.boxplot(values)
pl.title(s_class)
pl.xlabel(X_LABEL)
pl.grid()
pl.tight_layout()
def conf_mat_stats(matrix):
# true_positive + false_positive for each class
tp_fp = numpy.sum(matrix, axis=0)
# true_positive + false_negative for each class
tp_fn = numpy.sum(matrix, axis=1)
# init precision, recalls, and f1 scores for each class
precisions = numpy.zeros(matrix.shape[0])
recalls = numpy.zeros(matrix.shape[0])
f1_scores = numpy.zeros(matrix.shape[0])
for i in numpy.arange(matrix.shape[0]):
tp = matrix[i][i]
p = tp / float(tp_fp[i]) # precision = tp/(tp + fp)
r = tp / float(tp_fn[i]) # recall = tp/(tp + fn)
precisions[i] = p
recalls[i] = r
# f1-score = 2 x precision x recall / ( precision + recall)
f1_scores[i] = 2 * p * r / float(p + r)
return numpy.mean(precisions), numpy.mean(recalls), numpy.mean(f1_scores)