-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLinearRegression.R
47 lines (38 loc) · 1.66 KB
/
LinearRegression.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
rm(list = ls())
cat("\014")
# Creation of data frame.
data<-data.frame(
Years_Exp=c(1.1,1.3,1.5,2.0,2.2,2.9,3.0,3.2,3.2,3.7),
Salary=c(39343.00,46205.00,37731.00,43525.00,39891.00,56642.01,60150.20,54445.67,64445.89,57189.33))
# Creation of scatter plot:
plot(data$Years_Exp,data$Salary,pch=19,xlab = 'Years Experienced',ylab='Salary',main='Scatter plot: Years Experienced v/s Salary',col='red')
# Partitioning the data
install.packages('caTools')
library(caTools)
split=sample.split(data$Salary,SplitRatio = 0.7)
training_set=subset(data,split==T)
testset=subset(data,split==F)
# Fitting simple linear regression to the training set
lm.r=lm(formula = Salary ~ Years_Exp,data=training_set)
# summary of the model.
summary(lm.r)
# Create a data feame with new input values:
new_data=data.frame(Years_Exp=c(4.0,9.0,7.5))
# Predict them using Linear regression model
predict_salaries=predict(lm.r,newdata = new_data)
# Print the predicted salary:
predict_salaries
# Visualize training set results:
library(ggplot2)
ggplot() + geom_point(aes(x = training_set$Years_Exp, y = training_set$Salary), colour = 'red') +
geom_line(aes(x = training_set$Years_Exp, y = predict(lm.r, newdata = training_set)), colour = 'blue') +
ggtitle('Salary vs Experience (Training set)') +
xlab('Years of experience') +
ylab('Salary')
# Visualize test set results
ggplot() +
geom_point(aes(x = testset$Years_Exp, y = testset$Salary), colour = 'red') +
geom_line(aes(x = training_set$Years_Exp, y = predict(lm.r, newdata = training_set)), colour = 'blue') +
ggtitle('Salary vs Experience (Test set)') +
xlab('Years of experience') +
ylab('Salary')