Skip to content

Commit 4af7775

Browse files
committed
First commit
1 parent 2159951 commit 4af7775

36 files changed

+5032
-0
lines changed

.Rbuildignore

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
^_dev$
2+
^.*\.Rproj$
3+
^\.Rproj\.user$
4+
^README\.Rmd$

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,4 @@ po/*~
4747

4848
# RStudio Connect folder
4949
rsconnect/
50+
inst/doc

DESCRIPTION

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
Package: eepd
2+
Title: What the Package Does (One Line, Title Case)
3+
Version: 0.0.0.9000
4+
Authors@R:
5+
person("Noah", "Greifer", email = "[email protected]", role = c("aut", "cre"),
6+
comment = c(ORCID = "0000-0003-3067-7154"))
7+
Description: What the package does (one paragraph).
8+
License: GPL (>= 2)
9+
Imports:
10+
stats,
11+
marginaleffects (>= 0.14.0),
12+
pbapply (>= 1.7-2),
13+
fwb (>= 0.2.0),
14+
chk (>= 0.9.0)
15+
Encoding: UTF-8
16+
Roxygen: list(markdown = TRUE)
17+
RoxygenNote: 7.3.0
18+
Depends:
19+
R (>= 2.10)
20+
LazyData: true
21+
Suggests:
22+
knitr,
23+
rmarkdown
24+
VignetteBuilder: knitr

NAMESPACE

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# Generated by roxygen2: do not edit by hand
2+
3+
S3method(c,eepd_models)
4+
S3method(plot,eepd_sim)
5+
S3method(print,eepd_models)
6+
S3method(print,eepd_sim)
7+
export(eepd_boot)
8+
export(eepd_fit)
9+
export(eepd_mod)
10+
export(eepd_sim)
11+
import(ggplot2)

R/aux_functions.R

+147
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
#Fast (weighted) mean, optionally with subset
2+
.wtd_mean <- function(x, w = NULL, subset = NULL) {
3+
if (is.null(subset)) {
4+
if (is.null(w)) {
5+
sum(x) / length(x)
6+
}
7+
else {
8+
sum(x * w) / sum(w)
9+
}
10+
}
11+
else {
12+
x <- x[subset]
13+
if (is.null(w)) {
14+
sum(x) / length(x)
15+
}
16+
else {
17+
w <- w[subset]
18+
sum(x * w) / sum(w)
19+
}
20+
}
21+
}
22+
23+
.wtd_sd <- function(x, w = NULL, subset = NULL) {
24+
if (is.null(subset)) {
25+
if (is.null(w)) {
26+
sqrt(sum((x - .wtd_mean(x))^2)/(length(x) - 1))
27+
}
28+
else {
29+
sum_w <- sum(w)
30+
sqrt((sum_w / (sum_w^2 - sum(w^2))) * sum(w * (x - .wtd_mean(x, w))^2))
31+
}
32+
}
33+
else {
34+
x <- x[subset]
35+
if (is.null(w)) {
36+
sqrt(sum((x - .wtd_mean(x))^2)/(length(x) - 1))
37+
}
38+
else {
39+
w <- w[subset]
40+
sum_w <- sum(w)
41+
sqrt((sum_w / (sum_w^2 - sum(w^2))) * sum(w * (x - .wtd_mean(x, w))^2))
42+
}
43+
}
44+
}
45+
46+
#Binds together multiple smaller square matrices (e.g., vcovs) into a larger one
47+
#with 0s in the empty spaces. Used to create large covariance matrix
48+
.block_diagonal <- function(matlist) {
49+
dim1 <- sum(unlist(lapply(matlist, ncol)))
50+
51+
out <- matrix(0, nrow = dim1, ncol = dim1)
52+
k <- 0
53+
for (v in matlist) {
54+
ind <- k + seq_len(ncol(v))
55+
out[ind, ind] <- v
56+
k <- k + ncol(v)
57+
}
58+
59+
out
60+
}
61+
62+
#Checks if a given family specification is okay
63+
.okay_family <- function(family) {
64+
if (is.character(family)) {
65+
if (length(family) != 1 || anyNA(family)) return(FALSE)
66+
if (family %in% c("negbin", "negative.binomial", "Negative Binomial")) return(TRUE)
67+
family <- get(family, mode = "function", envir = parent.frame(2))
68+
}
69+
if (is.function(family)) {
70+
family <- family()
71+
}
72+
73+
!is.null(family$family) && is.function(family$variance) &&
74+
is.function(family$linkinv)
75+
}
76+
77+
#Joint covariance matrix of coefficients across multiple models. Requires same units in all models,
78+
#use nonzero weights to subset (e.g., weights of 1 for present and 1e-8 for absent). Should give
79+
#same results as M-estimation (HC0 vcov).
80+
vcovSUEST <- function(fits) {
81+
coef_lengths <- lengths(lapply(fits, function(f) na.omit(coef(f))))
82+
l <- c(0, cumsum(coef_lengths))
83+
coef_inds <- lapply(seq_along(fits), function(i) seq_len(coef_lengths[i]) + l[i])
84+
85+
inf_func <- lapply(fits, function(f) {
86+
b <- .bread(f)
87+
ef <- sandwich::estfun(f)
88+
inf <- tcrossprod(b, ef)/nobs(f)
89+
inf[is.na(inf)] <- 0
90+
inf
91+
})
92+
93+
#VCOV matrix to be returned
94+
V <- matrix(NA_real_, nrow = sum(coef_lengths), ncol = sum(coef_lengths))
95+
96+
for (i in seq_along(fits)) {
97+
ind_i <- coef_inds[[i]]
98+
99+
#Usual within-model HC0 vcov
100+
V[ind_i, ind_i] <- tcrossprod(inf_func[[i]], inf_func[[i]])
101+
102+
for (j in seq_along(fits)[-seq_len(i)]) {
103+
ind_j <- coef_inds[[j]]
104+
105+
#between-model vcov components
106+
V[ind_i, ind_j] <- tcrossprod(inf_func[[i]], inf_func[[j]])
107+
V[ind_j, ind_i] <- t(V[ind_i, ind_j])
108+
}
109+
}
110+
111+
V
112+
}
113+
114+
#Quickly get bread matrix; for non-lm and nonglm objects, uses sandwich::bread()
115+
.bread <- function(x) {
116+
if (!class(x)[1] %in% c("lm", "glm")) {
117+
return(sandwich::bread(x))
118+
}
119+
120+
p <- x$rank
121+
122+
if (p == 0) {
123+
return(matrix(NA_real_, 0L, 0L))
124+
}
125+
126+
Qr <- x$qr
127+
128+
coef.p <- x$coefficients[Qr$pivot[1:p]]
129+
cov.unscaled <- chol2inv(Qr$qr[1:p, 1:p, drop = FALSE])
130+
dimnames(cov.unscaled) <- list(names(coef.p), names(coef.p))
131+
132+
df <- p + x$df.residual
133+
134+
out <- cov.unscaled * df
135+
136+
if (class(x)[1] == "glm" && !substr(x$family$family, 1L, 17L) %in% c("poisson", "binomial", "Negative Binomial")) {
137+
ww <- weights(x, "working")
138+
wres <- as.vector(residuals(x, "working")) * ww
139+
dispersion <- sum(wres^2, na.rm = TRUE) / sum(ww, na.rm = TRUE)
140+
out <- out * dispersion
141+
}
142+
143+
out
144+
}
145+
146+
147+

R/eepd-package.R

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#' @keywords internal
2+
"_PACKAGE"
3+
4+
## usethis namespace: start
5+
#' @import ggplot2
6+
## usethis namespace: end
7+
NULL

R/eepd_boot.R

+134
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
#' @title Bootstrap estimation of ATTs
2+
#'
3+
#' @description `eepd_boot()` bootstraps the selection of optimal models and estimation of ATTs done by [eepd_sim()] in order to account for uncertainty in sampling from the population. Bootstrapping is done by [fwb::fwb()], which uses the fractional weighted bootstrap or the traditional bootstrap.
4+
#'
5+
#' @inheritParams eepd_fit
6+
#' @inheritParams fwb::fwb
7+
#' @param models either an `eepd_models` object (the output of a call to [eepd_mod()]) or an `eepd_fits` object (the output of a call to [eepd_fit()]). If the latter, the arguments `data`, `group_var`, `unit_var`, `time_var`, `val_times`, and `post_time` should be left empty as they will be extracted from the supplied object.
8+
#' @param nboot the number of bootstrap iterations to use; default is 999. More is better but takes longer.
9+
#' @param boot_type string; the type of bootstrap to perform. See the `wtype` argument of [fwb::fwb()] for allowable options. The default is `"exp"`, which requests the fractional weighted bootstrap using weights drawn from an Exp(1) distribution. `"multinom"` requests the usual bootstrap, which can fail when key observation requird to fit certain models happen not to be selected into a given bootstrap sample.
10+
#' @param nsim the number of simulation iteration to perform in each bootstrap sample. Default is 200. More is better but takes longer.
11+
#'
12+
#' @returns
13+
#' An `fwb` object containing the estimated ATTs in each bootstrap iteration. See [fwb::fwb()] for details. `summary()`, `plot()`, and `print()` methods are available; see [fwb::summary.fwb()] and [fwb::plot.fwb()] for details.
14+
#'
15+
#' @seealso [eepd_fit()]; [eepd_sim()]; [fwb::fwb()]
16+
#'
17+
#' @examples
18+
#' data("ptpdata")
19+
#'
20+
#' # Combination of 8 models: 2 baseline formulas,
21+
#' # 2 families, 2 lags
22+
#' models <- eepd_mod(list(crude_rate ~ 1,
23+
#' crude_rate ~ year),
24+
#' log = c(FALSE, TRUE))
25+
#' models
26+
#'
27+
#' # Fit the models to data; unit_var must be supplied for
28+
#' # fixed effects
29+
#' cl <- parallel::detectCores()
30+
#' boot_out <- eepd_boot(models, data = ptpdata,
31+
#' nboot = 99, nsim = 100,
32+
#' group_var = "group",
33+
#' time_var = "year",
34+
#' val_times = 1999:2003,
35+
#' post_time = 2008,
36+
#' unit_var = "state",
37+
#' cl = cl, verbose = TRUE)
38+
#'
39+
#' summary(boot_out, ci.type = "perc")
40+
41+
#' @export
42+
eepd_boot <- function(models, data, nboot = 999, boot_type = getOption("fwb_wtype", "exp"), nsim = 200,
43+
group_var, unit_var, time_var,
44+
val_times, post_time, cl = NULL, verbose = FALSE) {
45+
# Argument checks
46+
mcall <- match.call()
47+
chk::chk_not_missing(models, "`models`")
48+
49+
if (inherits(models, "eepd_models")) {
50+
chk::chk_not_missing(data, "`data`")
51+
chk::chk_data(data)
52+
53+
# Process and order dataset
54+
data <- as.data.frame(data)
55+
56+
if (is.null(rownames(data))) {
57+
rownames(data) <- seq_len(nrow(data))
58+
}
59+
60+
chk::chk_not_missing(group_var, "`group_var`")
61+
chk::chk_string(group_var)
62+
chk::chk_subset(group_var, names(data))
63+
if (length(unique(data[[group_var]])) != 2) {
64+
chk::err("the grouping variable must have exactly 2 unique values")
65+
}
66+
data[[group_var]] <- factor(data[[group_var]])
67+
levels(data[[group_var]]) <- c("0", "1")
68+
69+
chk::chk_not_missing(time_var, "`time_var`")
70+
chk::chk_string(time_var)
71+
chk::chk_subset(time_var, names(data))
72+
73+
if (any(vapply(models, function(m) m$diff_k > 0 || m$lag > 0 || m$fixef, logical(1L)))) {
74+
chk::chk_not_missing(unit_var, "`unit_var`")
75+
chk::chk_string(unit_var)
76+
chk::chk_subset(unit_var, names(data))
77+
data[[unit_var]] <- factor(data[[unit_var]])
78+
79+
data <- data[order(data[[unit_var]], data[[time_var]]),, drop = FALSE]
80+
}
81+
82+
chk::chk_not_missing(val_times, "`val_times`")
83+
chk::chk_numeric(val_times)
84+
chk::chk_subset(val_times, data[[time_var]])
85+
86+
chk::chk_not_missing(post_time, "`post_time`")
87+
chk::chk_number(post_time)
88+
chk::chk_subset(post_time, data[[time_var]])
89+
chk::chk_gt(post_time, val_times)
90+
}
91+
else if (inherits(models, "eepd_fits")) {
92+
chk::chk_missing(data, "`data`")
93+
chk::chk_missing(group_var, "`group_var`")
94+
chk::chk_missing(time_var, "`time_var`")
95+
chk::chk_missing(unit_var, "`unit_var`")
96+
chk::chk_missing(val_times, "`val_times`")
97+
chk::chk_missing(post_time, "`post_time`")
98+
99+
data <- models$data
100+
group_var <- attr(models, "group_var")
101+
time_var <- attr(models, "time_var")
102+
unit_var <- attr(models, "unit_var")
103+
val_times <- models$val_times
104+
post_time <- models$post_time
105+
models <- models$models
106+
}
107+
else {
108+
chk::err("`models` must be an `eepd_models` object (the output of a call to `eepd_mod()`) or an `eepd_fits` object (the output of a call to `eepd_fit()`")
109+
}
110+
111+
chk::chk_flag(verbose)
112+
113+
.boot_fun <- function(.data, .weights) {
114+
115+
fits <- .fit_models_internal(models, .data, .weights, group_var, unit_var, time_var,
116+
val_times, post_time)
117+
118+
if (all(.weights == 1)) {
119+
sim_est <- eepd_sim(fits)
120+
}
121+
else {
122+
sim_est <- eepd_sim(fits, nsim)
123+
}
124+
125+
c(ATT = unname(mean(sim_est$atts)))
126+
}
127+
128+
boot_out <- fwb::fwb(data, .boot_fun, R = nboot, cluster = data[[unit_var]],
129+
wtype = boot_type, verbose = verbose, cl = cl)
130+
131+
class(boot_out) <- c("eepd_boot", class(boot_out))
132+
133+
boot_out
134+
}

0 commit comments

Comments
 (0)