-
Notifications
You must be signed in to change notification settings - Fork 0
/
ANN.py
146 lines (123 loc) · 5.3 KB
/
ANN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2018/4/16 14:00
# @Author : Ting
import numpy as np
from math import *
import matplotlib.pyplot as plt
import random
random.seed(123)
def generate_data(X, func):
"""
generate sample data for given x
"""
Y = []
for x in X:
Y.append(func(x))
return np.array(X).T, np.array(Y).T
# define network
class Network:
def __init__(self, x, y, weights, biases):
# Hidden fully connected layer1 with ReLU
self.z1 = weights['h1'].T.dot(x) + biases['b1']
self.a1 = np.maximum(self.z1, 0)
# Hidden fully connected layer2 with ReLU
self.z2 = weights['h2'].T.dot(self.a1) + biases['b2']
self.a2 = np.maximum(self.z2, 0)
# Output fully connected output layer
self.y_pred = weights['out'].T.dot(self.a2) + biases['out']
# loss function
self.loss = np.square(self.y_pred - y).sum() / y.shape[1]
def NN(train_x, train_y, test_x, test_y, learning_rate=0.01, batch_size=100, epochs=100,
n_hidden_1=50, n_hidden_2=50):
# Parameters
num_input = train_x.shape[0] # input dimension
num_output = train_y.shape[0] # output dimension
num_steps = int(train_x.shape[1]/batch_size)
# Store layers weight & bias
weights = {
'h1': np.random.normal(loc=0.0, scale=0.1, size=(num_input, n_hidden_1)),
'h2': np.random.normal(loc=0.0, scale=0.1, size=(n_hidden_1, n_hidden_2)),
'out': np.random.normal(loc=0.0, scale=0.1, size=(n_hidden_2, num_output))
}
# weights = {
# 'h1': np.random.uniform(-sqrt(6/(num_input+n_hidden_1)), sqrt(6/(num_input+n_hidden_1)), size=(num_input, n_hidden_1)),
# 'h2': np.random.normal(-sqrt(6 / (n_hidden_1+n_hidden_2)), sqrt(6 / (n_hidden_1+n_hidden_2)), size=(n_hidden_1, n_hidden_2)),
# 'out': np.random.normal(-sqrt(6 / (n_hidden_2+num_output)), sqrt(6 / (n_hidden_2+num_output)), size=(n_hidden_2, num_output))
# }
biases = {
'b1': np.zeros([n_hidden_1,1]),
'b2': np.zeros([n_hidden_2,1]),
'out': np.zeros([num_output,1])
}
loss_values = []
# Start training
for i in range(epochs):
# shuffle the data - for SGD:
# copy_data = np.concatenate((train_x,train_y),axis=0).T
# np.random.shuffle(copy_data)
# train_x = copy_data.T[:num_input]
# train_y = copy_data.T[num_input:]
for step in range(num_steps):
batch_x = train_x[:, batch_size * step:batch_size * (step+1)]
batch_y = train_y[:, batch_size * step:batch_size * (step+1)]
# create network
nn = Network(batch_x, batch_y, weights=weights, biases=biases)
# BP process
# calculate gradient for each layer
error_out = 2*(nn.y_pred - batch_y)/batch_size
grad_wout = error_out.dot(nn.a2.T)
grad_bout = np.sum(error_out,axis=1, keepdims=True)
nn.z2[nn.z2 < 0] = 0
nn.z2[nn.z2 > 0] = 1
error_2 = nn.z2 * (weights['out'].dot(error_out))
grad_w2 = error_2.dot(nn.a1.T)
grad_b2 = np.sum(error_2,axis=1, keepdims=True)
nn.z1[nn.z1 < 0] = 0
nn.z1[nn.z1 > 0] = 1
error_1 = nn.z1 * (weights['h2'].dot(error_2))
grad_w1 = error_1.dot(batch_x.T)
grad_b1 = np.sum(error_1,axis=1, keepdims=True)
# update parameters
weights['h1'] = weights['h1'] - learning_rate * grad_w1.T
weights['h2'] = weights['h2'] - learning_rate * grad_w2.T
weights['out'] = weights['out'] - learning_rate * grad_wout.T
biases['b1'] = biases['b1'] - learning_rate * grad_b1
biases['b2'] = biases['b2'] - learning_rate * grad_b2
biases['out'] = biases['out'] - learning_rate * grad_bout
# save loss value for each epoch
nn = Network(test_x, test_y, weights=weights, biases=biases)
loss_values.append(nn.loss)
nn = Network(test_x,test_y, weights=weights, biases=biases)
plt.figure()
plt.plot(loss_values)
plt.xlabel('epochs')
plt.ylabel('loss')
plt.show()
plt.figure()
# plt.plot(test_x[0,:],test_y[0,:])
# plt.plot(test_x[0, :], nn.y_pred[0, :])
ax = plt.subplot(1, 1, 1)
p1, = ax.plot(test_x[0, :], test_y[0,:], label="true")
p2, = ax.plot(test_x[0, :], nn.y_pred[0, :], label="predict")
handles, labels = ax.get_legend_handles_labels()
ax.legend(handles[::-1], labels[::-1])
plt.show()
print(nn.loss)
# training data
# x and y is designed to be a two-dimension array
# so remember to correctly refer x values and return list of y values
# y = x**2
# function = lambda x: [x[0]**2]
# y = sin(x)
# function = lambda x: [sin(x[0])]
function = lambda x: [0.2+0.4*x[0]**2+0.3*x[0]*sin(15*x[0])+0.05*cos(50*x[0])]
trainx, trainy = generate_data([[i] for i in np.random.randn(10000)],function)
testx, testy = generate_data([[i/250-2] for i in range(1000)],function)
learning_rate = 0.1
batch_size = 20
epochs = 500
n_hidden_1 = 200
n_hidden_2 = 200
NN(trainx, trainy, testx, testy,learning_rate=learning_rate, batch_size=batch_size,
epochs=epochs, n_hidden_1=n_hidden_1, n_hidden_2=n_hidden_2)