-
Notifications
You must be signed in to change notification settings - Fork 0
/
hw4p4-more-state-data.Rmd
125 lines (103 loc) · 2 KB
/
hw4p4-more-state-data.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
---
title: "State Data Revisited"
author: "Terrel Shumway"
date: "05/02/2015"
output: html_document
---
This document presents answers for homework 4 part 4.
## Getting and Cleaning the Data
```{r}
data(state)
statedata = data.frame(state.x77)
#
# baseurl = "https://courses.edx.org/c4x/MITx/15.071x_2/asset/"
#
# getdata = function(local){
# if(!file.exists(local)){
# library(downloader)
# remote = paste0(baseurl,local)
# print(remote)
# download(remote,local)
# }
# read.csv(local)
# }
#
# statedata = getdata("statedataSimple.csv")
```
## Linear Regression Models
problem 1.1:
```{r}
m1 = lm(Life.Exp ~ .,data=statedata)
s = summary(m1)
s$adj.r.squared
```
problem 1.2:
```{r}
#sum((statedata$Life.Exp-predict(m1,type="response"))^2)
sum(resid(m1)^2)
```
problem 1.3,1.4:
```{r}
m2 = step(m1)
s = summary(m2)
s$adj.r.squared
sum(resid(m2)^2)
```
## CART Models
```{r}
library(rpart)
library(rpart.plot)
```
problem 2.1:
```{r}
m3 = rpart(m1$terms,data=statedata)
prp(m3)
```
problem 2.2:
```{r}
sum((statedata$Life.Exp-predict(m3))^2)
```
problem 2.3:
```{r}
m4 = rpart(m1$terms,data=statedata,minbucket=5)
prp(m4)
```
problem 2.5:
```{r}
sum((statedata$Life.Exp-predict(m4))^2)
```
problem 2.6:
```{r}
m5 = rpart(Life.Exp~Area,data=statedata,minbucket=1)
sum((statedata$Life.Exp-predict(m5))^2)
```
## Cross Validation
```{r}
library(caret)
numFolds = trainControl(method="cv",number=10)
cpGrid = expand.grid(.cp = seq(0.01,0.5,0.01))
set.seed(111)
cvmodels = train(Life.Exp~.,
data=statedata,
method="rpart",
trControl=numFolds,
tuneGrid=cpGrid)
cvmodels$bestTune
```
```{r}
m6 = rpart(m1$terms,data=statedata,cp=cvmodels$bestTune)
prp(m6)
sum((statedata$Life.Exp-predict(m6))^2)
```
```{r}
set.seed(111)
cvmodels = train(Life.Exp~Area,
data=statedata,
method="rpart",
trControl=numFolds,
tuneGrid=cpGrid)
cvmodels$bestTune
m7 = rpart(Life.Exp~Area,data=statedata,cp=cvmodels$bestTune)
prp(m7)
sum((statedata$Life.Exp-predict(m7))^2)
```