-
Notifications
You must be signed in to change notification settings - Fork 0
/
CNN_layer.py
468 lines (364 loc) · 13.9 KB
/
CNN_layer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
import numpy as np
import gzip
# x = np.random.random((2,8,8)) - 0.5
def ReLU(x):
"""
param:
x : numpy array object
returns:
modified x with ReLU function aaplied
"""
x[x<0] = 0
return x
def nanargmax(a):
"""
param:
a : numpy array object for which we find the index at which max occurs
returns:
index of the max value that occurs
"""
idx = np.argmax(a, axis=None)
max_indx = np.unravel_index(idx, a.shape)
if np.isnan(a[max_indx]):
nan_count = np.sum(np.isnan(a))
# In numpy < 1.8 use idx = np.argsort(a, axis=None)[-nan_count-1]
idx = np.argpartition(a, -nan_count-1, axis=None)[-nan_count-1]
max_indx = np.unravel_index(idx, a.shape)
return max_indx
# nanargmax return the indices of the maximum value in that array
def maxpool(X, f, s):
"""
param:
X : numpy object array
f : the size of the kernel
s : stride for maxpooling
returns:
maxpooled output of matrix X
"""
(l, w, w) = X.shape
pool = np.zeros((l, (w-f)/s+1,(w-f)/s+1))
for jj in range(0,l):
i=0
while(i<w):
j=0
while(j<w):
pool[jj,i/2,j/2] = np.max(X[jj,i:i+f,j:j+f])
j+=s
i+=s
return pool
# def maxpool(X,w,s):
# # size*size is image size
# # Image must be square shaped
# channels,size,size = X.shape
# pool = np.zeros((channels, int(np.ceil(size/(w+s-1))),int(np.ceil(size/(w+s-1))) ))
# pool_size = pool.shape[1]
# # print(pool.shape)
# for channel in range(channels):
# ip=0
# i = 0
# j = 0
# while(ip<pool_size):
# jp=0
# j = 0
# while(jp<pool_size):
# pool[channel,ip,jp] = np.max(X[channel,i:i+w,j:j+w])
# j += w + s -1
# jp = jp+1
# i += w + s - 1
# ip = ip+1
# # print(pool)
# return pool
def softmax_cost(out,y):
"""
param :
out : output of the fully connected layer
y : output label 10x1 matrix
returns :
costs : cost because of the classification
probability : probability of belonging to any of the classes
"""
eout = np.exp(out, dtype=np.float) #we dont have 128 a typo fixed
probability = eout/sum(eout)
p = sum(y*probability) #y is the output label [0 0 0 1 0 0 ... ]
cost = -np.log(p) ## (Only data loss. No regularised loss)
return cost,probability
#Returns gradient for all the paramaters in each iteration
def ConvuluteNetwork(image, label, kernel1, kernel2, biasconv1, biasconv2, weights, biasfc):
"""
param :
image : input image of size 28x28x1
kernel1 : conolutional layer 1 kernels
kernel2 : convolutional layer 2 kernels
biasconv1 : bias for layer 1
biasconv2 : bias for layer 2
weights : weight matrix for fully connected layer 800x10
biasfc : bias for fully connected layer
returns :
dkernel1 : changes to be added to kernel1
dkernel2 : changes to be added to kernel2
dbiasconv1 : changes to be added to biasconv1
dbiasconv2 : changes to be added to biasconv2
dweights : changes to be added to weights
dbiasfc : changes to be added to biasfc
cost : calculates cost using softmax function
accuracy : accuracy is accuracy
"""
# feed forward building the network
# calculations for conv 1 layer
# l - channel
# w - size of square image
(l, w, w) = image.shape
# l1 - No. of filters in Conv1
# l2 - No. of filters in Conv2
l1 = len(kernel1)
l2 = len(kernel2)
( _, f, f) = kernel1[0].shape
# w1 - size of image after convolute1
# w2 - size of image after convolute2
w1 = w-f+1
w2 = w1-f+1
convolute1 = np.zeros((l1,w1,w1))
convolute2 = np.zeros((l2,w2,w2))
for jj in range(0,l1):
for x in range(0,w1):
for y in range(0,w1):
convolute1[jj,x,y] = np.sum( image[:,x:x+f,y:y+f] * kernel1[jj] ) + biasconv1[jj]
ReLU(convolute1)
#calculations of the 2nd conv layer starts from here
for jj in range(0,l2):
for x in range(0,w2):
for y in range(0,w2):
convolute2[jj,x,y] = np.sum( convolute1[:,x:x+f,y:y+f] * kernel2[jj] ) + biasconv2[jj]
ReLU(convolute2) #relu activation
# maxpooling with window size 2 and stride 2
pooled_layer = maxpool(convolute2, 2, 2)
fully_connected = pooled_layer.reshape((int((w2/2))*int((w2/2))*l2,1)) #starting point of fully connected layer
out = weights.dot(fully_connected) + biasfc #output of size 10x1
#Softmax function for probabilities
cost, probability = softmax_cost(out, label)
if np.argmax(out)==np.argmax(label):
acc=1
else:
acc=0
# Backpropagation for getting the differentiation using chain rule
dout = probability - label #dL/dout 10x1
dweights = dout.dot(fully_connected.T) #dL/dweights
dbiasfc = sum(dout.T).T.reshape((10,1)) #dbiasfc
dfully_connected = weights.T.dot(dout) #dL/dfully_connected
dpool = dfully_connected.T.reshape((l2, int((w2/2)), int((w2/2))))
dconvolute2 = np.zeros((l2, w2, w2))
for jj in range(0,l2):
i=0
while(i<w2):
j=0
while(j<w2):
(a,b) = nanargmax(convolute2[jj,i:i+2,j:j+2]) #Getting indices of maximum value in the array
dconvolute2[jj,i+a,j+b] = dpool[jj,int(i/2),int(j/2)]
j+=2
i+=2
dconvolute2[convolute2<=0]=0 #ReLU
dconvolute1 = np.zeros((l1, w1, w1))
dkernel2 = {}
dbiasconv2 = {}
for xx in range(0,l2):
dkernel2[xx] = np.zeros((l1,f,f))
dbiasconv2[xx] = 0
dkernel1 = {}
dbiasconv1 = {}
for xx in range(0,l1):
dkernel1[xx] = np.zeros((l,f,f))
dbiasconv1[xx] = 0
for jj in range(0,l2):
for x in range(0,w2):
for y in range(0,w2):
dkernel2[jj]+=dconvolute2[jj,x,y]*convolute1[:,x:x+f,y:y+f]
dconvolute1[:,x:x+f,y:y+f]+=dconvolute2[jj,x,y]*kernel2[jj]
dbiasconv2[jj] = np.sum(dconvolute2[jj])
dconvolute1[convolute1<=0]=0
for jj in range(0,l1):
for x in range(0,w1):
for y in range(0,w1):
dkernel1[jj]+=dconvolute1[jj,x,y]*image[:,x:x+f,y:y+f]
dbiasconv1[jj] = np.sum(dconvolute1[jj])
return [dkernel1, dkernel2, dbiasconv1, dbiasconv2, dweights, dbiasfc, cost, acc]
def initialize_theta(nout, nin):
"""
param :
nout : output neurons
nin : input neurons
returns :
returns a matrix of size (in X out) with randomly initialized weights
"""
return 0.01*np.random.rand(nout, nin)
def init_kernel(kernel_dim, n_channels, distribution='normal'):
"""
param :
kernel_dim : size of the window
n_channels : number of channels. 1 here
returns :
a kernel with values sampled from gaussian distribution
"""
#defining standard deviation for the normal distribution of the kernel initialization
std_shape = kernel_dim*kernel_dim*n_channels
stddev = np.sqrt(1./std_shape)
kernel_shape = (n_channels,kernel_dim,kernel_dim) # kernel shape
return np.random.normal(loc = 0,scale = stddev,size = kernel_shape)
## Returns all the trained parameters
def Momentum_Grad_des(batch, learningrate, w, l, MU, kernel1, kernel2, biasconv1, biasconv2, weights, biasfc, cost, acc):
"""
param :
batch : batch for claculating gradient descent
learningrate : the rate at which the changes are updated
w : size of the image
l : channel length
MU : momentum
kernel1 : conolutional layer 1 kernels
kernel2 : convolutional layer 2 kernels
biasconv1 : bias for layer 1
biasconv2 : bias for layer 2
weights : weight matrix for fully connected layer 800x10
biasfc : bias for fully connected layer
cost : calculates cost using softmax function
accuracy : accuracy
returns :
kernel1 : updated kernel1 after gradient descent
kernel2 : updated kernel2 after gradient descent
biasconv1 : updated biasconv1 after gradient descent
biasconv2 : updated biasconv2 after gradient descent
weights : updated weights after gradient descent
biasfc : updated biasfc after gradient descent
cost : updated cost after gradient descent
accuracy : updated accuracy after gradient descent
"""
X = batch[:,0:-1]
X = X.reshape(len(batch), l, w, w)
y = batch[:,-1]
n_correct=0
cost_ = 0
batch_size = len(batch)
dkernel2 = {}
dkernel1 = {}
dbiasconv2 = {}
dbiasconv1 = {}
v1 = {}
v2 = {}
bv1 = {}
bv2 = {}
for k in range(0,len(kernel2)):
dkernel2[k] = np.zeros(kernel2[0].shape)
dbiasconv2[k] = 0
v2[k] = np.zeros(kernel2[0].shape)
bv2[k] = 0
for k in range(0,len(kernel1)):
dkernel1[k] = np.zeros(kernel1[0].shape)
dbiasconv1[k] = 0
v1[k] = np.zeros(kernel1[0].shape)
bv1[k] = 0
dweights = np.zeros(weights.shape)
dbiasfc = np.zeros(biasfc.shape)
v3 = np.zeros(weights.shape)
bv3 = np.zeros(biasfc.shape)
for i in range(0,batch_size):
image = X[i]
label = np.zeros((weights.shape[0],1))
label[int(y[i]),0] = 1
## Fetching gradient for the current parameters
[dkernel1_, dkernel2_, dbiasconv1_, dbiasconv2_, dweights_, dbiasfc_, curr_cost, acc_] = ConvuluteNetwork(image, label, kernel1, kernel2, biasconv1, biasconv2, weights, biasfc)
for j in range(0,len(kernel2)):
dkernel2[j]+=dkernel2_[j]
dbiasconv2[j]+=dbiasconv2_[j]
for j in range(0,len(kernel1)):
dkernel1[j]+=dkernel1_[j]
dbiasconv1[j]+=dbiasconv1_[j]
dweights+=dweights_
dbiasfc+=dbiasfc_
cost_+=curr_cost
n_correct+=acc_
for j in range(0,len(kernel1)):
v1[j] = MU*v1[j] -learningrate*dkernel1[j]/batch_size
kernel1[j] += v1[j]
# kernel1[j] -= learningrate*dkernel1[j]/batch_size
bv1[j] = MU*bv1[j] -learningrate*dbiasconv1[j]/batch_size
biasconv1[j] += bv1[j]
for j in range(0,len(kernel2)):
v2[j] = MU*v2[j] -learningrate*dkernel2[j]/batch_size
kernel2[j] += v2[j]
# kernel2[j] += -learningrate*dkernel2[j]/batch_size
bv2[j] = MU*bv2[j] -learningrate*dbiasconv2[j]/batch_size
biasconv2[j] += bv2[j]
v3 = MU*v3 - learningrate*dweights/batch_size
weights += v3
# weights += -learningrate*dweights/batch_size
bv3 = MU*bv3 -learningrate*dbiasfc/batch_size
biasfc += bv3
cost_ = cost_/batch_size
cost.append(cost_)
accuracy = float(n_correct)/batch_size
acc.append(accuracy)
return [kernel1, kernel2, biasconv1, biasconv2, weights, biasfc, cost, acc]
## Predict class of each row of matrix X
def predict(image, kernel1, kernel2, biasconv1, biasconv2, weights, biasfc):
"""
param :
image : input image
filt : final kernel for convolute1 used to classify the image
kernel2 : final kernel for convolute2 used to classify the image
biasconv1 : final biasconv1 used to classify the image
biasconv2 : final biasconv2 used to classify the image
weights : final weights for fc layer
bias : final bias for fully connected layer
returns :
number of maximum probability
probability of that image belonging to that class
"""
(l,w,w)=image.shape
(l1,f,f) = kernel2[0].shape
l2 = len(kernel2)
w1 = w-f+1
w2 = w1-f+1
convolute1 = np.zeros((l1,w1,w1))
convolute2 = np.zeros((l2,w2,w2))
for jj in range(0,l1):
for x in range(0,w1):
for y in range(0,w1):
convolute1[jj,x,y] = np.sum(image[:,x:x+f,y:y+f]*kernel1[jj])+biasconv1[jj]
convolute1[convolute1<=0] = 0 #relu activation
#Calculating second Convolution layer
for jj in range(0,l2):
for x in range(0,w2):
for y in range(0,w2):
convolute2[jj,x,y] = np.sum(convolute1[:,x:x+f,y:y+f]*kernel2[jj])+biasconv2[jj]
convolute2[convolute2<=0] = 0 #relu activation
# maxpooling with a kernel size 2 and stride 2
pooled_layer = maxpool(convolute2, 2, 2)
# print(w2)
fully_connected = pooled_layer.reshape(((w2/2)*(w2/2)*l2,1))
#print(weights.size, fully_connected.size, biasfc.size)
out = weights.dot(fully_connected) + biasfc #10*1
eout = np.exp(out, dtype=np.float)
probability = eout/sum(eout)
# probability = 1/(1+np.exp(-out))
# print out
# print np.argmax(out), np.max(out)
return np.argmax(probability), np.max(probability)
def extract_data(filename, num_images, IMAGE_WIDTH):
# this function definition has been taken from internet
"""Extract the images into a 4D tensor [image index, y, x, channels].
Values are rescaled from [0, 255] down to [-0.5, 0.5].
"""
print('Extracting', filename)
with gzip.open(filename) as bytestream:
bytestream.read(16)
buf = bytestream.read(IMAGE_WIDTH * IMAGE_WIDTH * num_images)
data = np.frombuffer(buf, dtype=np.uint8).astype(np.float32) #Interpret a buffer as a 1-dimensional array
data = data.reshape(num_images, IMAGE_WIDTH*IMAGE_WIDTH)
return data
def extract_labels(filename, num_images):
# this function definition has been taken from internet
"""Extract the labels into a vector of int64 label IDs."""
print('Extracting', filename)
with gzip.open(filename) as bytestream:
bytestream.read(8)
buf = bytestream.read(1 * num_images)
labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int64) #Interpret a buffer as a 1-dimensional array
return labels