Skip to content

Commit b3e1dab

Browse files
Merge pull request #22 from USEPA/develop
CRAN v0.7.0
2 parents f636ff5 + f3c66cf commit b3e1dab

File tree

142 files changed

+18097
-6506
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

142 files changed

+18097
-6506
lines changed

DESCRIPTION

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: spmodel
22
Title: Spatial Statistical Modeling and Prediction
3-
Version: 0.6.0
3+
Version: 0.7.0
44
Authors@R: c(
55
person(given = "Michael",
66
family = "Dumelle",
@@ -28,7 +28,7 @@ License: GPL-3
2828
Encoding: UTF-8
2929
LazyData: true
3030
Roxygen: list(markdown = TRUE)
31-
RoxygenNote: 7.3.1
31+
RoxygenNote: 7.3.2
3232
Depends:
3333
R (>= 3.5.0)
3434
Imports:
@@ -45,7 +45,8 @@ Suggests:
4545
testthat (>= 3.0.0),
4646
ggplot2,
4747
ranger,
48-
statmod
48+
statmod,
49+
pROC
4950
VignetteBuilder: knitr
5051
Config/testthat/edition: 3
5152
URL: https://usepa.github.io/spmodel/

NAMESPACE

+9
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,12 @@ S3method(AICc,spautor)
88
S3method(AICc,spgautor)
99
S3method(AICc,spglm)
1010
S3method(AICc,splm)
11+
S3method(AUROC,spgautor)
12+
S3method(AUROC,spglm)
13+
S3method(BIC,spautor)
14+
S3method(BIC,spgautor)
15+
S3method(BIC,spglm)
16+
S3method(BIC,splm)
1117
S3method(anova,spautor)
1218
S3method(anova,spgautor)
1319
S3method(anova,spglm)
@@ -345,6 +351,7 @@ S3method(vcov,spgautor)
345351
S3method(vcov,spglm)
346352
S3method(vcov,splm)
347353
export(AICc)
354+
export(AUROC)
348355
export(augment)
349356
export(covmatrix)
350357
export(dispersion_initial)
@@ -391,8 +398,10 @@ importFrom(sf,st_coordinates)
391398
importFrom(sf,st_crs)
392399
importFrom(sf,st_drop_geometry)
393400
importFrom(sf,st_intersects)
401+
importFrom(sf,st_is_longlat)
394402
importFrom(stats,.getXlevels)
395403
importFrom(stats,AIC)
404+
importFrom(stats,BIC)
396405
importFrom(stats,anova)
397406
importFrom(stats,coef)
398407
importFrom(stats,coefficients)

NEWS.md

+18
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,21 @@
1+
# spmodel 0.7.0
2+
3+
## Minor Updates
4+
5+
* Added `AUROC()` functions to compute the area under the receiver operating characteristic (AUROC) curve for `spglm()` and `spgautor()` models when `family` is `"binomial"` and the response is binary (i.e., represents a single success or failure).
6+
* Added a `BIC()` function to compute the Bayesian Information Criterion (BIC) for `splm()`, `spautor()`, `spglm()`, and `spgautor()` models when `estmethod` is `"reml"` (restricted maximum likelihood; the default) or `"ml"` (maximum likelihood).
7+
* Added a `type` argument to `loocv()` when `cv_predict = TRUE` and using `spglm()` or `spgautor()` models so that predictions may be obtained on the link or response scale.
8+
* Added a warning message when `data` is an `sf` object and a geographic (i.e., degrees) coordinate system is used instead of a projected coordinate system.
9+
* Changed the default behavior of `local` in `predict.spmodel` so that it depends only on the observed data sample size. Now, when the observed data sample size exceeds 10,000 `local` is set to `TRUE` by default. This change was made because prediction for big data depends almost exclusively on the observed data sample size, not the number of predictions desired.
10+
* Minor external data updates (for package testing).
11+
* Minor vignette updates.
12+
* Minor documentation updates.
13+
* Minor error message updates.
14+
15+
## Bug Fixes
16+
17+
* Fixed a bug that prohibited proper indexing when calling `predict()` with the `local` method `"distance"` on a model object fit with a random effect or partition factor.
18+
119
# spmodel 0.6.0
220

321
## Minor Updates

R/AUROC.R

+58
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
#' Area Under Receiver Operating Characteristic Curve
2+
#'
3+
#' @description Compare area under the receiver operating characteristic curve (AUROC) for binary (e.g.,
4+
#' logistic) models. The area under the ROC curve provides a measure of the
5+
#' model's classification accuracy averaged over all possible threshold values.
6+
#'
7+
#' @param object A fitted model object from [spglm()] or [spgautor()]) where \code{family = "binomial"}
8+
#' and the response values are binary, representing a single success or failure for each datum.
9+
#' @param ... Additional arguments to [pROC::auc()].
10+
#'
11+
#' @return The area under the receiver operating characteristic curve.
12+
#'
13+
#' @order 1
14+
#' @export
15+
#'
16+
#' @examples
17+
#' spgmod <- spglm(presence ~ elev,
18+
#' family = "binomial", data = moose,
19+
#' spcov_type = "exponential"
20+
#' )
21+
#' AUROC(spgmod)
22+
#' @references
23+
#' Robin, X., Turck, N., Hainard, A., Tiberti, N., Lisacek, F., Sanchez, J. C., & Müller, M.
24+
#' (2011). pROC: an open-source package for R and S+ to analyze and compare ROC curves.
25+
#' \emph{BMC bioinformatics}, 12, 1-8.
26+
AUROC <- function(object, ...) {
27+
UseMethod("AUROC", object)
28+
}
29+
30+
#' @rdname AUROC
31+
#' @method AUROC spglm
32+
#' @order 2
33+
#' @export
34+
AUROC.spglm <- function(object, ...) {
35+
if (!requireNamespace("pROC", quietly = TRUE)) {
36+
stop("Install the pROC package before using AUROC().", call. = FALSE)
37+
} else {
38+
39+
if (object$family != "binomial") {
40+
stop("AUROC() only available when family is \"binomial\".", call. = FALSE)
41+
}
42+
43+
if (any(object$size != 1)) {
44+
stop("AUROC() only available for binary models (i.e., models whose response indicates a single success or failure).", call. = FALSE)
45+
}
46+
dotlist <- list(...)
47+
if (!("quiet" %in% names(dotlist))) {
48+
dotlist$quiet <- TRUE
49+
}
50+
as.numeric(do.call(pROC::auc, c(list(response = as.vector(object$y), predictor = fitted(object)), dotlist)))
51+
}
52+
}
53+
54+
#' @rdname AUROC
55+
#' @method AUROC spgautor
56+
#' @order 3
57+
#' @export
58+
AUROC.spgautor <- AUROC.spglm

R/BIC.R

+127
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
#' Compute BIC of fitted model objects
2+
#'
3+
#' @description Compute BIC for one or
4+
#' several fitted model objects for which a log-likelihood
5+
#' value can be obtained.
6+
#'
7+
#' @param object A fitted model object from [splm()], [spautor()], [spglm()], or [spgautor()]
8+
#' where \code{estmethod} is \code{"ml"} or \code{"reml"}.
9+
#' @param ... Optionally more fitted model objects.
10+
#'
11+
#' @details When comparing models fit by maximum or restricted maximum
12+
#' likelihood, the smaller the BIC, the better the fit. The theory of
13+
#' BIC requires that the log-likelihood has been maximized, and hence,
14+
#' no BIC methods exist for models where \code{estmethod} is not
15+
#' \code{"ml"} or \code{"reml"}. Additionally, BIC comparisons between \code{"ml"}
16+
#' and \code{"reml"} models are meaningless -- comparisons should only be made
17+
#' within a set of models estimated using \code{"ml"} or a set of models estimated
18+
#' using \code{"reml"}. BIC comparisons for \code{"reml"} must
19+
#' use the same fixed effects. To vary the covariance parameters and
20+
#' fixed effects simultaneously, use \code{"ml"}.
21+
#'
22+
#' BIC is defined as \eqn{-2loglik + log(n)(estparams)}, where \eqn{n} is the sample size
23+
#' and \eqn{estparams} is the number of estimated parameters. For \code{"ml"}, \eqn{estparams} is
24+
#' the number of estimated covariance parameters plus the number of estimated
25+
#' fixed effects. For \code{"reml"}, \eqn{estparams} is the number of estimated covariance
26+
#' parameters.
27+
#'
28+
#' @return If just one object is provided, a numeric value with the corresponding
29+
#' BIC.
30+
#'
31+
#' If multiple objects are provided, a \code{data.frame} with rows corresponding
32+
#' to the objects and columns representing the number of parameters estimated
33+
#' (\code{df}) and the BIC.
34+
#'
35+
#' @name BIC.spmodel
36+
#' @method BIC splm
37+
#' @order 1
38+
#' @export
39+
#'
40+
#' @examples
41+
#' spmod <- splm(z ~ water + tarp,
42+
#' data = caribou,
43+
#' spcov_type = "exponential", xcoord = x, ycoord = y
44+
#' )
45+
#' BIC(spmod)
46+
BIC.splm <- function(object, ...) {
47+
48+
# set k as log of sample size
49+
k <- log(object$n)
50+
51+
# store object and ...
52+
object_list <- list(object, ...)
53+
54+
# see if ... has any elements
55+
if (length(object_list) == 1) {
56+
57+
# number of estimated parameters
58+
if (object$estmethod == "ml") {
59+
n_est_param <- object$npar + object$p
60+
} else {
61+
n_est_param <- object$npar
62+
}
63+
64+
# error if not ml or reml
65+
if (!object$estmethod %in% c("ml", "reml")) {
66+
stop("BIC is only defined is estmethod is \"ml\" or \"reml\".", call. = FALSE)
67+
}
68+
# compute BIC
69+
BIC_val <- -2 * logLik(object) + k * (n_est_param)
70+
} else {
71+
72+
73+
# warning if ml and reml in same call
74+
est_methods <- vapply(object_list, function(x) x$estmethod, character(1))
75+
if ("ml" %in% est_methods && "reml" %in% est_methods) {
76+
warning("BIC and BICc should not compare models fit with
77+
\"ml\" to models fit with \"reml\"", call. = FALSE)
78+
}
79+
80+
# warning if reml and fixed effects change
81+
est_methods_reml <- which(est_methods == "reml")
82+
if (length(est_methods_reml) > 1) {
83+
if (any(vapply(
84+
est_methods_reml,
85+
function(x) !identical(formula(object_list[[x]]), formula(object_list[[1]])), logical(1)
86+
))) {
87+
warning("BIC and BICc should not be used to compare models fit with \"reml\" whose fixed effect formulas differ.", call. = FALSE)
88+
}
89+
}
90+
91+
# find model names provided
92+
object_list_names <- as.character(c(substitute(object), (as.list(substitute(list(...)))[-1])))
93+
94+
# error if any names duplicated
95+
if (any(duplicated(object_list_names))) {
96+
stop("Each model object must have a unique name", call. = FALSE)
97+
}
98+
# iterate through each model
99+
object_BIC <- lapply(object_list, function(x) {
100+
# warning if estmethod not ml or reml
101+
if (!object$estmethod %in% c("ml", "reml")) {
102+
stop("BIC is only defined is estmethod is \"ml\" or \"reml\".", call. = FALSE)
103+
}
104+
105+
if (x$estmethod == "ml") {
106+
n_est_param <- x$npar + x$p
107+
} else {
108+
n_est_param <- x$npar
109+
}
110+
111+
# store degrees of freedom (parames estimated) and BIC
112+
data.frame(df = n_est_param, BIC = -2 * logLik(x) + k * (n_est_param))
113+
})
114+
# put all BIC data frames together
115+
BIC_val <- do.call("rbind", object_BIC)
116+
# set rownames as model names
117+
row.names(BIC_val) <- object_list_names
118+
}
119+
# return BIC value
120+
BIC_val
121+
}
122+
123+
#' @rdname BIC.spmodel
124+
#' @method BIC spautor
125+
#' @order 2
126+
#' @export
127+
BIC.spautor <- BIC.splm

R/BIC_glm.R

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#' @rdname BIC.spmodel
2+
#' @method BIC spglm
3+
#' @order 3
4+
#' @export
5+
BIC.spglm <- BIC.splm
6+
7+
#' @rdname BIC.spmodel
8+
#' @method BIC spgautor
9+
#' @order 4
10+
#' @export
11+
BIC.spgautor <- BIC.spautor

R/get_data_object.R

+15-1
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,20 @@ get_data_object_splm <- function(formula, data, spcov_initial, xcoord, ycoord, e
8989
stop("Coordinates must be numeric.", call. = FALSE)
9090
}
9191

92+
# check if coordinates are projected
93+
if (is_sf) {
94+
if (st_is_longlat(crs)) {
95+
warning("Coordinates are in a geographic coordinate system. For the most accurate results, please ensure
96+
coordinates are in a projected coordinate system (e.g., via sf::st_transform()).", call. = FALSE)
97+
}
98+
} else {
99+
# possible revisit this later and add explicit warning
100+
# if (any(abs(c(data[[xcoord]], data[[ycoord]])) <= 360)) {
101+
# warning("Coordinates may be in a geographic coordinate system. For the most accurate results, please ensure
102+
# coordinates are in a projected coordinate system (e.g., via sf::st_transform()).", call. = FALSE)
103+
# }
104+
}
105+
92106
# subsetting by na and not na values
93107
## find response variabale name
94108
na_index <- is.na(data[[all.vars(formula)[1]]])
@@ -206,7 +220,7 @@ get_data_object_splm <- function(formula, data, spcov_initial, xcoord, ycoord, e
206220
if (is.null(local)) {
207221
if (n > 5000) {
208222
local <- TRUE
209-
message("Because the sample size exceeds 5000, we are setting local = TRUE to perform computationally efficient approximations. To override this behavior and compute the exact solution, rerun splm() with local = FALSE. Be aware that setting local = FALSE may result in exceedingly long computational times.")
223+
message("Because the sample size exceeds 5,000, we are setting local = TRUE to perform computationally efficient approximations. To override this behavior and compute the exact solution, rerun with local = FALSE. Be aware that setting local = FALSE may result in exceedingly long computational times.")
210224
} else {
211225
local <- FALSE
212226
}

R/get_data_object_glm.R

+15-1
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,20 @@ get_data_object_spglm <- function(formula, family, data, spcov_initial, xcoord,
9090
stop("Coordinates must be numeric.", call. = FALSE)
9191
}
9292

93+
# check if coordinates are projected
94+
if (is_sf) {
95+
if (st_is_longlat(crs)) {
96+
warning("Coordinates are in a geographic coordinate system. For the most accurate results, please ensure
97+
coordinates are in a projected coordinate system (e.g., via sf::st_transform()).", call. = FALSE)
98+
}
99+
} else {
100+
# possible revisit this later and add explicit warning
101+
# if (any(abs(c(data[[xcoord]], data[[ycoord]])) <= 360)) {
102+
# warning("Coordinates may be in a geographic coordinate system. For the most accurate results, please ensure
103+
# coordinates are in a projected coordinate system (e.g., via sf::st_transform()).", call. = FALSE)
104+
# }
105+
}
106+
93107
# subsetting by na and not na values
94108
## find response variable name
95109
# na_index <- is.na(data[[all.vars(formula)[1]]])
@@ -239,7 +253,7 @@ get_data_object_spglm <- function(formula, family, data, spcov_initial, xcoord,
239253
if (is.null(local)) {
240254
if (n > 3000) {
241255
local <- TRUE
242-
message("Because the sample size exceeds 3000, we are setting local = TRUE to perform computationally efficient approximations. To override this behavior and compute the exact solution, rerun splm() with local = FALSE. Be aware that setting local = FALSE may result in exceedingly long computational times.")
256+
message("Because the sample size exceeds 3,000, we are setting local = TRUE to perform computationally efficient approximations. To override this behavior and compute the exact solution, rerun with local = FALSE. Be aware that setting local = FALSE may result in exceedingly long computational times.")
243257
} else {
244258
local <- FALSE
245259
}

R/get_local_list.R

+7-1
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,13 @@ get_local_list_estimation <- function(local, data, xcoord, ycoord, n, partition_
6666

6767
# setting var adjust
6868
if (!"var_adjust" %in% names_local) {
69-
local$var_adjust <- "theoretical"
69+
if (n <= 100000) {
70+
local$var_adjust <- "theoretical"
71+
} else {
72+
message('var_adjust was not specified and the sample size exceeds 100,000, so the default var_adjust value is being changed from "theoretical" to "none". To override this behavior, rerun and set var_adjust in local. Be aware that setting var_adjust to "theoretical" may result in exceedingly long computational times.')
73+
local$var_adjust <- "none"
74+
}
75+
7076
} # "none", "empirical", "theoretical", and "pooled"
7177

7278
# setting partition factor

R/glance.R

+6-4
Original file line numberDiff line numberDiff line change
@@ -14,20 +14,21 @@
1414
#' \item \code{n} The sample size.
1515
#' \item \code{p} The number of fixed effects.
1616
#' \item \code{npar} The number of estimated covariance parameters.
17-
#' \item \code{value} The optimized value of the fitting function
17+
#' \item \code{value} The optimized value of the fitting function.
1818
#' \item \code{AIC} The AIC.
1919
#' \item \code{AICc} The AICc.
20-
#' \item \code{logLik} The log-likelihood
20+
#' \item \code{BIC} The BIC.
21+
#' \item \code{logLik} The log-likelihood.
2122
#' \item \code{deviance} The deviance.
22-
#' \item \code{pseudo.r.squared} The pseudo r-squared
23+
#' \item \code{pseudo.r.squared} The pseudo r-squared.
2324
#' }
2425
#'
2526
#' @name glance.spmodel
2627
#' @method glance splm
2728
#' @order 1
2829
#' @export
2930
#'
30-
#' @seealso [AIC.spmodel()] [AICc()] [logLik.spmodel()] [deviance.spmodel()] [pseudoR2()] [tidy.spmodel()] [augment.spmodel()]
31+
#' @seealso [AIC.spmodel()] [AICc()] [BIC.spmodel()] [logLik.spmodel()] [deviance.spmodel()] [pseudoR2()] [tidy.spmodel()] [augment.spmodel()]
3132
#'
3233
#' @examples
3334
#' spmod <- splm(z ~ water + tarp,
@@ -44,6 +45,7 @@ glance.splm <- function(x, ...) {
4445
value = x$optim$value,
4546
AIC = ifelse(is_likbased, AIC(x), NA),
4647
AICc = ifelse(is_likbased, AICc(x), NA),
48+
BIC = ifelse(is_likbased, BIC(x), NA),
4749
logLik = ifelse(is_likbased, logLik(x), NA),
4850
deviance = ifelse(is_likbased, deviance(x), NA),
4951
pseudo.r.squared = pseudoR2(x),

0 commit comments

Comments
 (0)