-
Notifications
You must be signed in to change notification settings - Fork 0
/
linear_regression.R
116 lines (101 loc) · 3.44 KB
/
linear_regression.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#
# theta - vector of ncol(X) + 1 values
# X - matrix containing independent variables
hypothesis <- function(theta, X) {
# Computes the value of hypothesis function for linear regression.
#
# Args:
# theta: vector of coefficients in the equation of linear regression
# X: matrix of observations (without the column of ones)
# Assumption: length(theta) - 1 and ncol(X) are the same
#
# Returns:
# A vector of hypothesis values given theta and X for all observations in X.
# add a column of ones at the beginning of X
X <- cbind(1, X)
# return(1)
# compute hypothesis for all observations in X
as.vector(t(theta) %*% t(X))
}
cost <- function(theta, X, y) {
# Computes the cost of using theta as the coefficients for linear regression hypothesis function.
#
# Args:
# theta: vector of coefficients for linear regression hypothesis
# X: matrix of observations without the dependent variable
# y: vector of dependent variable
#
# Returns:
# Cost of using theta in hypothesis function for given X and y.
m <- nrow(X)
sum((hypothesis(theta, X) - y) ^ 2) / (2 * m)
}
gradient_descent <- function(theta, X, y, alpha, num_iters) {
# Computes values of theta that minimize cost function for linear regression
#
# Args:
# theta: vector of initial coefficients for linear regression hypothesis
# X: matrix of observations without the dependent variable
# y: vector of dependent variable
# alpha: double representing the learning rate
# num_iters: integer representing the number of interations of gradient descent do perform
#
# Returns:
# List containing two vectors of doubles:
# Vector of theta that minimze cost function and an additional vector
# of cost values after each iteration (cost_values[1] is the value of cost function)
# after the first iteration of the algorithm.
# list(theta = c(0, 0), cost_vals = c(0, 0, 0))
m <- nrow(X)
cost_vals <- numeric(num_iters)
coeff <- alpha / m
for (i in 1:num_iters) {
cost_vals[i] <- cost(theta, X, y)
theta <- theta - coeff * derivatives(theta, X, y)
}
list(theta = theta, cost_vals = cost_vals)
}
res <- function(theta, X, y) {
# Helper function to compute residuals.
#
# Args:
# theta: vector of coefficients for a linear regression model
# X: matrix of observations without the dependent variable
# y: vector of observations of the dependent variable
#
# Returns:
# Vector of residuals for all observations.
h <- hypothesis(theta, X)
h - y
}
derivatives <- function(theta, X, y) {
# Helper function to compute derivatives in gradiend descent algorithm
#
# Args:
# theta: vector of coefficients for a linear regression model
# X: matrix of observations without the dependent variable
# y: vector of observations of the dependent variable
#
# Returns:
# Vector of derivatives for each value of theta.
X_ones <- cbind(1, X)
r <- matrix(res(theta, X, y), nrow = 1)
as.vector(t(X_ones) %*% t(r))
}
standarize_features <- function(X) {
# Standarizes features.
#
# Args:
# X: matrix of features to standarize
#
# Returns:
# List of:
# 1. doubles - standarized features.
# 2. doubles - parameters used for standarization (mean and standard deviation).
means <- apply(X, 2, mean)
stds <- apply(X, 2, sd)
for (i in 1:ncol(X)) {
X[, i] <- (X[, i] - means[i]) / stds[i]
}
list(stand_feats = X, means = means, stds = stds)
}