-
Notifications
You must be signed in to change notification settings - Fork 82
/
bivariateProbit.R
149 lines (106 loc) · 3.43 KB
/
bivariateProbit.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
### Stata's seems to be the primary audience concerned with these models, but I
### thought I'd play around with one here (I've never had reason to use a probit
### model in practice). Stata examples come from the UCLA ATS website and the
### Stata manual so one can investigate the Stata result for comparison.
# standard probit --------------------------------------------------------
probitLL = function(beta, X, y){
mu = X %*% beta
# these produce identical results, but the second is the typical depiction
# ll = sum(dbinom(
# y,
# size = 1,
# prob = pnorm(mu),
# log = T
# ))
ll = sum(y * pnorm(mu, log = T) + (1 - y) * log(1 - pnorm(mu)))
-ll
}
### Example 1
# Example at https://stats.idre.ucla.edu/stata/dae/probit-regression/
admit = haven::read_dta('https://stats.idre.ucla.edu/stat/stata/dae/binary.dta')
head(admit)
X = model.matrix(admit~ gre + gpa + factor(rank), admit)
y = admit$admit
init = rep(0, ncol(X))
optimResult = optim(
fn = probitLL,
par = init,
X = X,
y = y,
method = 'BFGS'
)
optimResult
### Example 2: from Stata manual on standard probit
# http://www.stata.com/manuals13/rprobit.pdf
# "We have data on the make, weight, and mileage rating of 22 foreign and 52
# domestic automobiles. We wish to fit a probit model explaining whether a car
# is foreign based on its weight and mileage."
auto = haven::read_dta('http://www.stata-press.com/data/r13/auto.dta')
head(auto)
X = model.matrix(foreign~ weight + mpg, auto)
y = auto$foreign
init = rep(0, ncol(X))
optimResult = optim(
fn = probitLL,
par = init,
X = X,
y = y
)
optimResult
# Bivariate probit --------------------------------------------------------
bivariateProbitLL = function(pars, X, y1, y2) {
rho = pars[1]
mu1 = X %*% pars[2:(ncol(X) + 1)]
mu2 = X %*% pars[(ncol(X) + 2):length(pars)]
q1 = ifelse(y1 == 1, 1,-1)
q2 = ifelse(y2 == 1, 1,-1)
require(mnormt)
eta1 = q1 * mu1
eta2 = q2 * mu2
ll = matrix(NA, nrow = nrow(X))
for (i in 1:length(ll)) {
corr = q1[i] * q2[i] * rho
corr = matrix(c(1, corr, corr, 1), 2)
ll[i] = log(
pmnorm(
x = c(eta1[i], eta2[i]),
mean = c(0, 0),
varcov = corr
)
)
}
# the loop is probably clearer, and there is no difference in time, but here's a oneliner
# ll = mapply(function(e1, e2, q1, q2) log(pmnorm(x=c(e1, e2), varcov = matrix(c(1,q1*q2*rho,q1*q2*rho,1),2))),
# eta1, eta2, q1, q2)
-sum(ll)
}
### Example 3: from stata manual on bivariate probit
# "We wish to model the bivariate outcomes of whether children attend private
# school and whether the head of the household voted for an increase in property
# tax based on the other covariates."
school = haven::read_dta('http://www.stata-press.com/data/r13/school.dta')
head(school)
X = model.matrix(private ~ years + logptax + loginc, school)
y1 = school$private
y2 = school$vote
init = c(0, rep(0, ncol(X)*2))
# you'll probably get a warning or two, ignore; takes a couple seconds
optimResult = optim(
fn = bivariateProbitLL,
par = init,
X = X,
y1 = y1,
y2 = y2,
method = 'BFGS'
)
loglik = optimResult$value
rho = optimResult$par[1]
coefsPrivate = optimResult$par[2:(ncol(X) + 1)]
coefsVote = optimResult$par[(ncol(X) + 2):length(init)]
names(coefsPrivate) = names(coefsVote) = c('Int', 'years', 'logptax', 'loginc')
list(
loglik = loglik,
rho = rho,
Private = coefsPrivate,
Vote = coefsVote
)