-
Notifications
You must be signed in to change notification settings - Fork 49
/
trainStochasticSquaredErrorTwoLayerPerceptron.m
74 lines (60 loc) · 3.12 KB
/
trainStochasticSquaredErrorTwoLayerPerceptron.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
function [hiddenWeights, outputWeights, error] = trainStochasticSquaredErrorTwoLayerPerceptron(activationFunction, dActivationFunction, numberOfHiddenUnits, inputValues, targetValues, epochs, batchSize, learningRate)
% trainStochasticSquaredErrorTwoLayerPerceptron Creates a two-layer perceptron
% and trains it on the MNIST dataset.
%
% INPUT:
% activationFunction : Activation function used in both layers.
% dActivationFunction : Derivative of the activation
% function used in both layers.
% numberOfHiddenUnits : Number of hidden units.
% inputValues : Input values for training (784 x 60000)
% targetValues : Target values for training (1 x 60000)
% epochs : Number of epochs to train.
% batchSize : Plot error after batchSize images.
% learningRate : Learning rate to apply.
%
% OUTPUT:
% hiddenWeights : Weights of the hidden layer.
% outputWeights : Weights of the output layer.
%
% The number of training vectors.
trainingSetSize = size(inputValues, 2);
% Input vector has 784 dimensions.
inputDimensions = size(inputValues, 1);
% We have to distinguish 10 digits.
outputDimensions = size(targetValues, 1);
% Initialize the weights for the hidden layer and the output layer.
hiddenWeights = rand(numberOfHiddenUnits, inputDimensions);
outputWeights = rand(outputDimensions, numberOfHiddenUnits);
hiddenWeights = hiddenWeights./size(hiddenWeights, 2);
outputWeights = outputWeights./size(outputWeights, 2);
n = zeros(batchSize);
figure; hold on;
for t = 1: epochs
for k = 1: batchSize
% Select which input vector to train on.
n(k) = floor(rand(1)*trainingSetSize + 1);
% Propagate the input vector through the network.
inputVector = inputValues(:, n(k));
hiddenActualInput = hiddenWeights*inputVector;
hiddenOutputVector = activationFunction(hiddenActualInput);
outputActualInput = outputWeights*hiddenOutputVector;
outputVector = activationFunction(outputActualInput);
targetVector = targetValues(:, n(k));
% Backpropagate the errors.
outputDelta = dActivationFunction(outputActualInput).*(outputVector - targetVector);
hiddenDelta = dActivationFunction(hiddenActualInput).*(outputWeights'*outputDelta);
outputWeights = outputWeights - learningRate.*outputDelta*hiddenOutputVector';
hiddenWeights = hiddenWeights - learningRate.*hiddenDelta*inputVector';
end;
% Calculate the error for plotting.
error = 0;
for k = 1: batchSize
inputVector = inputValues(:, n(k));
targetVector = targetValues(:, n(k));
error = error + norm(activationFunction(outputWeights*activationFunction(hiddenWeights*inputVector)) - targetVector, 2);
end;
error = error/batchSize;
plot(t, error,'*');
end;
end