1
1
Regression and Other Stories: Gay
2
2
================
3
3
Andrew Gelman, Aki Vehtari
4
- 2021-04-20
4
+ 2021-09-06
5
5
6
6
- [ 22 Advanced regression and multilevel
7
7
models] ( #22-advanced-regression-and-multilevel-models )
46
46
naes <-
47
47
file_naes %> %
48
48
read_csv() %> %
49
- select(! X1 )
49
+ select(! ... 1 )
50
50
51
51
naes
52
52
```
53
53
54
- #> # A tibble: 81,422 x 6
55
- #> age gender race gayFavorFederalMarri… gayFavorStateMarri … gayKnowSomeone
56
- #> <dbl> <chr> <chr> <chr> <chr> <chr>
57
- #> 1 70 Female Hispan… No <NA> <NA>
58
- #> 2 54 Female White No <NA> <NA>
59
- #> 3 74 Male White No <NA> <NA>
60
- #> 4 73 Female Other Yes No No
61
- #> 5 48 Female White No Yes Yes
62
- #> 6 58 Male White No Yes Yes
63
- #> 7 35 Female White Yes <NA> <NA>
64
- #> 8 74 Female White No <NA> <NA>
65
- #> 9 63 Female White No No No
66
- #> 10 64 Male White Yes <NA> <NA>
54
+ #> # A tibble: 81,422 × 6
55
+ #> age gender race gayFavorFederalMarri… gayFavorStateMarr … gayKnowSomeone
56
+ #> <dbl> <chr> <chr> <chr> <chr> <chr>
57
+ #> 1 70 Female Hispanic No <NA> <NA>
58
+ #> 2 54 Female White No <NA> <NA>
59
+ #> 3 74 Male White No <NA> <NA>
60
+ #> 4 73 Female Other Yes No No
61
+ #> 5 48 Female White No Yes Yes
62
+ #> 6 58 Male White No Yes Yes
63
+ #> 7 35 Female White Yes <NA> <NA>
64
+ #> 8 74 Female White No <NA> <NA>
65
+ #> 9 63 Female White No No No
66
+ #> 10 64 Male White Yes <NA> <NA>
67
67
#> # … with 81,412 more rows
68
68
69
69
Let’s understand the ` NA ` s in the data.
@@ -94,15 +94,15 @@ Let’s now look at `age`.
94
94
``` r
95
95
age_count <- function (var ) {
96
96
naes %> %
97
- drop_na({{var }}) %> %
97
+ drop_na({{ var }}) %> %
98
98
count(age ) %> %
99
99
arrange(desc(age ))
100
100
}
101
101
102
102
age_count(gayFavorStateMarriage )
103
103
```
104
104
105
- #> # A tibble: 81 x 2
105
+ #> # A tibble: 81 × 2
106
106
#> age n
107
107
#> <dbl> <int>
108
108
#> 1 97 1
@@ -121,7 +121,7 @@ age_count(gayFavorStateMarriage)
121
121
age_count(gayKnowSomeone )
122
122
```
123
123
124
- #> # A tibble: 81 x 2
124
+ #> # A tibble: 81 × 2
125
125
#> age n
126
126
#> <dbl> <int>
127
127
#> 1 97 1
@@ -151,12 +151,12 @@ Create indicator variable `y` from `var` and add to naes.
151
151
``` r
152
152
indicator <- function (var ) {
153
153
naes %> %
154
- drop_na({{var }}) %> %
154
+ drop_na({{ var }}) %> %
155
155
mutate(
156
156
y =
157
157
case_when(
158
- {{var }} == " Yes" ~ 1 ,
159
- {{var }} == " No" ~ 0 ,
158
+ {{ var }} == " Yes" ~ 1 ,
159
+ {{ var }} == " No" ~ 0 ,
160
160
TRUE ~ NA_real_
161
161
)
162
162
)
@@ -169,10 +169,10 @@ responses for each age for variable `var`.
169
169
``` r
170
170
yes_prop <- function (var ) {
171
171
naes %> %
172
- drop_na({{var }}) %> %
172
+ drop_na({{ var }}) %> %
173
173
group_by(age ) %> %
174
174
summarize(
175
- y = sum({{var }} == " Yes" ) / n(),
175
+ y = sum({{ var }} == " Yes" ) / n(),
176
176
n = n()
177
177
)
178
178
}
@@ -186,14 +186,14 @@ pred <- function(var, method = c("loess", "splines")) {
186
186
method <- match.arg(method )
187
187
188
188
if (method == " loess" ) {
189
- data <- indicator({{var }})
189
+ data <- indicator({{ var }})
190
190
fit <- loess(y ~ age , data = data )
191
191
tibble(
192
192
age = seq_range(data $ age ),
193
193
y = predict(fit , newdata = tibble(age ))
194
194
)
195
195
} else if (method == " splines" ) {
196
- data <- yes_prop({{var }})
196
+ data <- yes_prop({{ var }})
197
197
fit <- stan_gamm4(y ~ s(age ), data = data , refresh = 0 , adapt_delta = 0.99 )
198
198
tibble(age = seq_range(data $ age )) %> %
199
199
predictive_intervals(fit = fit )
@@ -214,7 +214,7 @@ plot <- function(var, method = c("", "loess", "splines")) {
214
214
)
215
215
216
216
plot <-
217
- yes_prop({{var }}) %> %
217
+ yes_prop({{ var }}) %> %
218
218
ggplot(aes(age )) +
219
219
geom_point(aes(y = y , size = n ), shape = " circle filled" , fill = " grey75" ) +
220
220
coord_cartesian(ylim = c(0 , NA )) +
@@ -225,18 +225,18 @@ plot <- function(var, method = c("", "loess", "splines")) {
225
225
) +
226
226
theme(legend.position = " none" ) +
227
227
labs(
228
- title = title %> % pull({{var }}),
228
+ title = title %> % pull({{ var }}),
229
229
x = " Age" ,
230
230
y = " Percentage of yes responses"
231
231
)
232
232
233
233
if (method == " loess" ) {
234
234
plot <-
235
235
plot +
236
- geom_line(aes(age , y ), data = pred(var = {{var }}, method = " loess" )) +
236
+ geom_line(aes(age , y ), data = pred(var = {{ var }}, method = " loess" )) +
237
237
labs(subtitle = " Loess fit" )
238
238
} else if (method == " splines" ) {
239
- v <- pred(var = {{var }}, method = " splines" )
239
+ v <- pred(var = {{ var }}, method = " splines" )
240
240
plot <-
241
241
plot +
242
242
geom_ribbon(aes(ymin = `5%` , ymax = `95%` ), data = v , alpha = 0.25 ) +
0 commit comments