diff --git a/R/misc.R b/R/misc.R
index 429b5cf2..11fc7f74 100644
--- a/R/misc.R
+++ b/R/misc.R
@@ -130,11 +130,15 @@ contains_vi <- function(object) {!is_empty(object$importance)}
#' beautify time
#'
#' @description
-#' Used to make time printouts more readable with verbose progress
+#' Used to make time printouts more readable with verbose progress.
+#' Based on the beautifyTime function in ranger package.
#'
-#' @param seconds a difftime value
+#' @param seconds time in seconds.
#'
-#' @return
+#' @noRd
+#'
+#' @return a string with formatted times
+
beautifyTime <- function(seconds) {
result <- ""
diff --git a/man/aorsf-package.Rd b/man/aorsf-package.Rd
index 0c0aa177..dd823558 100644
--- a/man/aorsf-package.Rd
+++ b/man/aorsf-package.Rd
@@ -8,7 +8,7 @@
\description{
\if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}}
-Fit, interpret, and make predictions with oblique random survival forests. Oblique decision trees are notoriously slow compared to their axis based counterparts, but 'aorsf' runs as fast or faster than axis-based decision tree algorithms for right-censored time-to-event outcomes. Methods to accelerate and interpret the oblique random survival forest are described in Jaeger et al., (2023) \doi{10.1080/10618600.2023.2231048}.
+Fit, interpret, and compute predictions with oblique random forests. Includes support for partial dependence, variable importance, passing customized functions for variable importance and identification of linear combinations of features. Methods for the oblique random survival forest are described in Jaeger et al., (2023) \doi{10.1080/10618600.2023.2231048}.
}
\seealso{
Useful links:
diff --git a/man/orsf.Rd b/man/orsf.Rd
index b5810509..53b64937 100644
--- a/man/orsf.Rd
+++ b/man/orsf.Rd
@@ -277,11 +277,12 @@ an \emph{obliqueForest} object
Grow or specify an oblique random forest. While the name \code{orsf()}
implies that this function only works for survival forests,
it can be used for classification, regression, or survival
-forests. So, why isn't this function called \code{orf()}? In its
-early development, the \code{aorsf} package was exclusively for
-oblique random survival forests, but now it is broader.
+forests.
}
\details{
+Why isn't this function called \code{orf()}? In its earlier versions, the
+\code{aorsf} package was exclusively for \emph{o}blique \emph{r}andom \emph{s}urvival \emph{f}orests.
+
\strong{formula for survival oblique RFs}:
\itemize{
\item The response in \code{formula} can be a survival
@@ -387,9 +388,9 @@ penguin_fit
## N trees: 5
## N predictors total: 7
## N predictors per node: 3
-## Average leaves per tree: 5.6
+## Average leaves per tree: 4.4
## Min observations in leaf: 5
-## OOB stat value: 0.99
+## OOB stat value: 0.98
## OOB stat type: AUC-ROC
## Variable importance: anova
##
@@ -414,7 +415,7 @@ bill_fit
## N trees: 5
## N predictors total: 7
## N predictors per node: 3
-## Average leaves per tree: 47.4
+## Average leaves per tree: 51.4
## Min observations in leaf: 5
## OOB stat value: 0.74
## OOB stat type: RSQ
@@ -445,10 +446,10 @@ pbc_fit
## N trees: 5
## N predictors total: 17
## N predictors per node: 5
-## Average leaves per tree: 20.8
+## Average leaves per tree: 21.8
## Min observations in leaf: 5
## Min events in leaf: 1
-## OOB stat value: 0.77
+## OOB stat value: 0.79
## OOB stat type: Harrell's C-index
## Variable importance: anova
##
@@ -495,7 +496,7 @@ take to fit the forest before you commit to it:
orsf_time_to_train()
}\if{html}{\out{}}
-\if{html}{\out{
}}\preformatted{## Time difference of 3.380013 secs
+\if{html}{\out{
}}\preformatted{## Time difference of 2.256203 secs
}\if{html}{\out{
}}
\enumerate{
\item If fitting multiple forests, use the blueprint along with
@@ -566,12 +567,12 @@ brier_scores
\if{html}{\out{
}}\preformatted{## # A tibble: 6 x 4
## .metric .estimator .eval_time .estimate
##
-## 1 brier_survival standard 500 0.0703
-## 2 brier_survival standard 1000 0.0775
-## 3 brier_survival standard 1500 0.110
-## 4 brier_survival standard 2000 0.113
-## 5 brier_survival standard 2500 0.132
-## 6 brier_survival standard 3000 0.183
+## 1 brier_survival standard 500 0.0477
+## 2 brier_survival standard 1000 0.0851
+## 3 brier_survival standard 1500 0.0988
+## 4 brier_survival standard 2000 0.126
+## 5 brier_survival standard 2500 0.159
+## 6 brier_survival standard 3000 0.198
}\if{html}{\out{
}}
\if{html}{\out{
}}\preformatted{roc_scores <- test_pred \%>\%
@@ -583,12 +584,12 @@ roc_scores
\if{html}{\out{
}}\preformatted{## # A tibble: 6 x 4
## .metric .estimator .eval_time .estimate
##
-## 1 roc_auc_survival standard 500 0.933
-## 2 roc_auc_survival standard 1000 0.937
-## 3 roc_auc_survival standard 1500 0.915
-## 4 roc_auc_survival standard 2000 0.930
-## 5 roc_auc_survival standard 2500 0.921
-## 6 roc_auc_survival standard 3000 0.896
+## 1 roc_auc_survival standard 500 0.966
+## 2 roc_auc_survival standard 1000 0.943
+## 3 roc_auc_survival standard 1500 0.937
+## 4 roc_auc_survival standard 2000 0.923
+## 5 roc_auc_survival standard 2500 0.911
+## 6 roc_auc_survival standard 3000 0.881
}\if{html}{\out{
}}
}
}
diff --git a/man/orsf_control.Rd b/man/orsf_control.Rd
index ad2aff3c..3009240f 100644
--- a/man/orsf_control.Rd
+++ b/man/orsf_control.Rd
@@ -310,14 +310,13 @@ The AUC values, from highest to lowest:
\if{html}{\out{
}}\preformatted{sc$AUC$score[order(-AUC)]
}\if{html}{\out{
}}
-\if{html}{\out{
}}\preformatted{## model times AUC se lower upper
-##
-## 1: net 1788 0.9151649 0.02025057 0.8754745 0.9548553
-## 2: rlt 1788 0.9119200 0.02090107 0.8709547 0.9528854
-## 3: accel 1788 0.9095628 0.02143250 0.8675558 0.9515697
-## 4: cph 1788 0.9095628 0.02143250 0.8675558 0.9515697
-## 5: rando 1788 0.9062197 0.02148854 0.8641029 0.9483365
-## 6: pca 1788 0.8983266 0.02303267 0.8531834 0.9434698
+\if{html}{\out{}}\preformatted{## model times AUC se lower upper
+## 1: net 1788 0.9151649 0.02025057 0.8754745 0.9548553
+## 2: rlt 1788 0.9136390 0.02013475 0.8741756 0.9531024
+## 3: accel 1788 0.9095628 0.02143250 0.8675558 0.9515697
+## 4: cph 1788 0.9095628 0.02143250 0.8675558 0.9515697
+## 5: rando 1788 0.9062197 0.02148854 0.8641029 0.9483365
+## 6: pca 1788 0.8999479 0.02226683 0.8563057 0.9435901
}\if{html}{\out{
}}
And the indices of prediction accuracy:
@@ -326,12 +325,11 @@ And the indices of prediction accuracy:
}\if{html}{\out{}}
\if{html}{\out{
}}\preformatted{## model times IPA
-##
## 1: net 1788 0.4905777
-## 2: accel 1788 0.4806649
-## 3: cph 1788 0.4806649
-## 4: rlt 1788 0.4675228
-## 5: pca 1788 0.4369636
+## 2: accel 1788 0.4806065
+## 3: cph 1788 0.4806065
+## 4: rlt 1788 0.4687322
+## 5: pca 1788 0.4383995
## 6: rando 1788 0.4302814
## 7: Null model 1788 0.0000000
}\if{html}{\out{
}}
diff --git a/man/orsf_ice_oob.Rd b/man/orsf_ice_oob.Rd
index b68afe0f..324abb21 100644
--- a/man/orsf_ice_oob.Rd
+++ b/man/orsf_ice_oob.Rd
@@ -13,8 +13,8 @@ orsf_ice_oob(
pred_type = NULL,
expand_grid = TRUE,
boundary_checks = TRUE,
- n_thread = 0,
- verbose_progress = FALSE,
+ n_thread = NULL,
+ verbose_progress = NULL,
...
)
@@ -25,8 +25,8 @@ orsf_ice_inb(
pred_type = NULL,
expand_grid = TRUE,
boundary_checks = TRUE,
- n_thread = 0,
- verbose_progress = FALSE,
+ n_thread = NULL,
+ verbose_progress = NULL,
...
)
@@ -39,8 +39,8 @@ orsf_ice_new(
na_action = "fail",
expand_grid = TRUE,
boundary_checks = TRUE,
- n_thread = 0,
- verbose_progress = FALSE,
+ n_thread = NULL,
+ verbose_progress = NULL,
...
)
}
@@ -174,20 +174,18 @@ ice_oob <- orsf_ice_oob(fit_clsf, pred_spec = pred_spec)
ice_oob
}\if{html}{\out{
}}
-\if{html}{\out{
}}\preformatted{## Key:
-## id_variable id_row class flipper_length_mm pred
-##
-## 1: 1 1 Adelie 190 0.92169247
-## 2: 1 2 Adelie 190 0.80944657
-## 3: 1 3 Adelie 190 0.85172955
-## 4: 1 4 Adelie 190 0.93559327
+\if{html}{\out{}}\preformatted{## id_variable id_row class flipper_length_mm pred
+## 1: 1 1 Adelie 190 0.92059968
+## 2: 1 2 Adelie 190 0.80953569
+## 3: 1 3 Adelie 190 0.84869374
+## 4: 1 4 Adelie 190 0.93559660
## 5: 1 5 Adelie 190 0.97708693
## ---
-## 896: 2 146 Gentoo 210 0.26092984
+## 896: 2 146 Gentoo 210 0.25636964
## 897: 2 147 Gentoo 210 0.04798334
-## 898: 2 148 Gentoo 210 0.07927359
-## 899: 2 149 Gentoo 210 0.84779971
-## 900: 2 150 Gentoo 210 0.11105143
+## 898: 2 148 Gentoo 210 0.07945140
+## 899: 2 149 Gentoo 210 0.84811899
+## 900: 2 150 Gentoo 210 0.10695367
}\if{html}{\out{
}}
There are two identifiers in the output:
@@ -241,7 +239,6 @@ ice_new
}\if{html}{\out{}}
\if{html}{\out{
}}\preformatted{## id_variable id_row flipper_length_mm pred
-##
## 1: 1 1 190 37.94483
## 2: 1 2 190 37.61595
## 3: 1 3 190 37.53681
@@ -267,7 +264,6 @@ ice_new
}\if{html}{\out{
}}
\if{html}{\out{
}}\preformatted{## id_variable id_row species island body_mass_g pred
-##
## 1: 1 1 Adelie Biscoe 3200 37.78339
## 2: 1 2 Adelie Biscoe 3200 37.73273
## 3: 1 3 Adelie Biscoe 3200 37.71248
@@ -293,7 +289,6 @@ ice_new
}\if{html}{\out{
}}
\if{html}{\out{
}}\preformatted{## id_variable id_row variable value level pred
-##
## 1: 1 1 species NA Adelie 37.74136
## 2: 1 2 species NA Adelie 37.42367
## 3: 1 3 species NA Adelie 37.04598
@@ -322,7 +317,6 @@ ice_new
}\if{html}{\out{
}}
\if{html}{\out{
}}\preformatted{## id_variable id_row species island pred
-##
## 1: 1 1 Adelie Biscoe 38.52327
## 2: 1 2 Adelie Biscoe 38.32073
## 3: 1 3 Adelie Biscoe 37.71248
@@ -360,19 +354,18 @@ Compute individual conditional expectation using in-bag data for
ice_train
}\if{html}{\out{
}}
-\if{html}{\out{
}}\preformatted{## id_variable id_row pred_horizon bili pred
-##
-## 1: 1 1 1826.25 1 0.1290317
-## 2: 1 2 1826.25 1 0.1242352
-## 3: 1 3 1826.25 1 0.0963452
-## 4: 1 4 1826.25 1 0.1172367
-## 5: 1 5 1826.25 1 0.2030256
-## ---
-## 746: 5 146 1826.25 5 0.7868537
-## 747: 5 147 1826.25 5 0.2012954
-## 748: 5 148 1826.25 5 0.4893605
-## 749: 5 149 1826.25 5 0.4698220
-## 750: 5 150 1826.25 5 0.9557285
+\if{html}{\out{}}\preformatted{## id_variable id_row pred_horizon bili pred
+## 1: 1 1 1826.25 1 0.1290317
+## 2: 1 2 1826.25 1 0.1242352
+## 3: 1 3 1826.25 1 0.0963452
+## 4: 1 4 1826.25 1 0.1172367
+## 5: 1 5 1826.25 1 0.2030256
+## ---
+## 746: 5 146 1826.25 5 0.7868537
+## 747: 5 147 1826.25 5 0.2012954
+## 748: 5 148 1826.25 5 0.4893605
+## 749: 5 149 1826.25 5 0.4698220
+## 750: 5 150 1826.25 5 0.9557285
}\if{html}{\out{
}}
If you don’t have specific values of a variable in mind, let
@@ -382,19 +375,18 @@ If you don’t have specific values of a variable in mind, let
ice_train
}\if{html}{\out{}}
-\if{html}{\out{
}}\preformatted{## id_variable id_row pred_horizon bili pred
-##
-## 1: 1 1 1826.25 0.55 0.11728559
-## 2: 1 2 1826.25 0.55 0.11728839
-## 3: 1 3 1826.25 0.55 0.08950739
-## 4: 1 4 1826.25 0.55 0.10064959
-## 5: 1 5 1826.25 0.55 0.18736417
-## ---
-## 746: 5 146 1826.25 7.25 0.82600898
-## 747: 5 147 1826.25 7.25 0.29156437
-## 748: 5 148 1826.25 7.25 0.58395919
-## 749: 5 149 1826.25 7.25 0.54202021
-## 750: 5 150 1826.25 7.25 0.96391985
+\if{html}{\out{}}\preformatted{## id_variable id_row pred_horizon bili pred
+## 1: 1 1 1826.25 0.59 0.11706741
+## 2: 1 2 1826.25 0.59 0.11562173
+## 3: 1 3 1826.25 0.59 0.09110739
+## 4: 1 4 1826.25 0.59 0.10069721
+## 5: 1 5 1826.25 0.59 0.18769751
+## ---
+## 746: 5 146 1826.25 7.21 0.82600898
+## 747: 5 147 1826.25 7.21 0.29156437
+## 748: 5 148 1826.25 7.21 0.58220919
+## 749: 5 149 1826.25 7.21 0.54168688
+## 750: 5 150 1826.25 7.21 0.96204106
}\if{html}{\out{
}}
Specify \code{pred_horizon} to get individual conditional expectation at each
@@ -405,19 +397,18 @@ value:
ice_train
}\if{html}{\out{}}
-\if{html}{\out{
}}\preformatted{## id_variable id_row pred_horizon bili pred
-##
-## 1: 1 1 500 0.55 0.008276627
-## 2: 1 1 1000 0.55 0.055724516
-## 3: 1 1 1500 0.55 0.085091120
-## 4: 1 1 2000 0.55 0.123423352
-## 5: 1 1 2500 0.55 0.166380739
-## ---
-## 4496: 5 150 1000 7.25 0.837774757
-## 4497: 5 150 1500 7.25 0.934536379
-## 4498: 5 150 2000 7.25 0.967823286
-## 4499: 5 150 2500 7.25 0.972059574
-## 4500: 5 150 3000 7.25 0.980785643
+\if{html}{\out{}}\preformatted{## id_variable id_row pred_horizon bili pred
+## 1: 1 1 500 0.59 0.008276627
+## 2: 1 1 1000 0.59 0.055715858
+## 3: 1 1 1500 0.59 0.084987224
+## 4: 1 1 2000 0.59 0.123090885
+## 5: 1 1 2500 0.59 0.165214938
+## ---
+## 4496: 5 150 1000 7.21 0.835895969
+## 4497: 5 150 1500 7.21 0.932657591
+## 4498: 5 150 2000 7.21 0.965944498
+## 4499: 5 150 2500 7.21 0.970325309
+## 4500: 5 150 3000 7.21 0.979051377
}\if{html}{\out{
}}
Multi-prediction horizon ice comes with minimal extra computational
diff --git a/man/orsf_pd_oob.Rd b/man/orsf_pd_oob.Rd
index c1053961..789df6bd 100644
--- a/man/orsf_pd_oob.Rd
+++ b/man/orsf_pd_oob.Rd
@@ -15,8 +15,8 @@ orsf_pd_oob(
prob_values = c(0.025, 0.5, 0.975),
prob_labels = c("lwr", "medn", "upr"),
boundary_checks = TRUE,
- n_thread = 0,
- verbose_progress = FALSE,
+ n_thread = NULL,
+ verbose_progress = NULL,
...
)
@@ -29,8 +29,8 @@ orsf_pd_inb(
prob_values = c(0.025, 0.5, 0.975),
prob_labels = c("lwr", "medn", "upr"),
boundary_checks = TRUE,
- n_thread = 0,
- verbose_progress = FALSE,
+ n_thread = NULL,
+ verbose_progress = NULL,
...
)
@@ -45,8 +45,8 @@ orsf_pd_new(
prob_values = c(0.025, 0.5, 0.975),
prob_labels = c("lwr", "medn", "upr"),
boundary_checks = TRUE,
- n_thread = 0,
- verbose_progress = FALSE,
+ n_thread = NULL,
+ verbose_progress = NULL,
...
)
}
@@ -194,15 +194,13 @@ pd_oob <- orsf_pd_oob(fit_clsf, pred_spec = pred_spec)
pd_oob
}\if{html}{\out{}}
-\if{html}{\out{
}}\preformatted{## Key:
-## class flipper_length_mm mean lwr medn upr
-##
-## 1: Adelie 190 0.6176908 0.202278109 0.75856417 0.9810614
-## 2: Adelie 210 0.4338528 0.019173811 0.56489202 0.8648110
-## 3: Chinstrap 190 0.2114979 0.017643385 0.15211271 0.7215181
-## 4: Chinstrap 210 0.1803019 0.020108201 0.09679464 0.7035053
-## 5: Gentoo 190 0.1708113 0.001334861 0.02769695 0.5750201
-## 6: Gentoo 210 0.3858453 0.068685035 0.20717073 0.9532853
+\if{html}{\out{}}\preformatted{## class flipper_length_mm mean lwr medn upr
+## 1: Adelie 190 0.6180632 0.207463688 0.76047056 0.9809703
+## 2: Adelie 210 0.4346177 0.018583256 0.56486883 0.8647387
+## 3: Chinstrap 190 0.2119948 0.017692341 0.15658268 0.7163635
+## 4: Chinstrap 210 0.1801186 0.020454479 0.09525310 0.7085293
+## 5: Gentoo 190 0.1699420 0.001277844 0.02831331 0.5738689
+## 6: Gentoo 210 0.3852637 0.068685035 0.20853993 0.9537020
}\if{html}{\out{
}}
Note that predicted probabilities are returned for each class and
@@ -221,7 +219,7 @@ But this isn’t the case for the median predicted probability!
\if{html}{\out{}}\preformatted{sum(pd_oob[flipper_length_mm == 190, medn])
}\if{html}{\out{
}}
-\if{html}{\out{}}\preformatted{## [1] 0.9383738
+\if{html}{\out{
}}\preformatted{## [1] 0.9453666
}\if{html}{\out{
}}
}
@@ -253,7 +251,6 @@ pd_new
}\if{html}{\out{
}}
\if{html}{\out{}}\preformatted{## flipper_length_mm mean lwr medn upr
-##
## 1: 190 42.96571 37.09805 43.69769 48.72301
## 2: 210 45.66012 40.50693 46.31577 51.65163
}\if{html}{\out{
}}
@@ -270,7 +267,6 @@ pd_new
}\if{html}{\out{}}
\if{html}{\out{
}}\preformatted{## species island body_mass_g mean lwr medn upr
-##
## 1: Adelie Biscoe 3200 40.31374 37.24373 40.31967 44.22824
## 2: Chinstrap Biscoe 3200 45.10582 42.63342 45.10859 47.60119
## 3: Gentoo Biscoe 3200 42.81649 40.19221 42.55664 46.84035
@@ -331,7 +327,6 @@ pd_new
}\if{html}{\out{
}}
\if{html}{\out{
}}\preformatted{## variable value level mean lwr medn upr
-##
## 1: species NA Adelie 41.90271 37.10417 41.51723 48.51478
## 2: species NA Chinstrap 47.11314 42.40419 46.96478 51.51392
## 3: species NA Gentoo 44.37038 39.87306 43.89889 51.21635
@@ -360,7 +355,6 @@ pd_new
}\if{html}{\out{
}}
\if{html}{\out{
}}\preformatted{## species island mean lwr medn upr
-##
## 1: Adelie Biscoe 41.98024 37.22711 41.65252 48.51478
}\if{html}{\out{
}}
}
@@ -387,13 +381,12 @@ Compute partial dependence using in-bag data for \code{bili = c(1,2,3,4,5)}:
pd_train
}\if{html}{\out{
}}
-\if{html}{\out{}}\preformatted{## pred_horizon bili mean lwr medn upr
-##
-## 1: 1826.25 0.55 0.2481444 0.02035041 0.1242215 0.8801444
-## 2: 1826.25 0.70 0.2502831 0.02045039 0.1271039 0.8836536
-## 3: 1826.25 1.50 0.2797763 0.03964900 0.1601715 0.9041584
-## 4: 1826.25 3.50 0.3959349 0.13431288 0.2920400 0.9501230
-## 5: 1826.25 7.25 0.5351935 0.28064629 0.4652185 0.9783000
+## 1: 1826.25 0.590 0.2484695 0.02035041 0.1243120 0.8823385
+## 2: 1826.25 0.725 0.2508045 0.02060111 0.1274237 0.8836536
+## 3: 1826.25 1.500 0.2797763 0.03964900 0.1601715 0.9041584
+## 4: 1826.25 3.500 0.3959349 0.13431288 0.2920400 0.9501230
+## 5: 1826.25 7.210 0.5344511 0.27869513 0.4651185 0.9782084
}\if{html}{\out{
}}
Specify \code{pred_horizon} to get partial dependence at each value:
@@ -419,39 +411,38 @@ Specify \code{pred_horizon} to get partial dependence at each value:
pd_train
}\if{html}{\out{}}
-\if{html}{\out{}}\preformatted{## Adelie Chinstrap Gentoo
-## [1,] 0.9405310 0.04121955 0.018249405
-## [2,] 0.9628988 0.03455909 0.002542096
-## [3,] 0.9032074 0.08510528 0.011687309
-## [4,] 0.9300133 0.05209040 0.017896329
-## [5,] 0.7965703 0.16243492 0.040994821
+## [1,] 0.9405286 0.04125900 0.018212368
+## [2,] 0.9628964 0.03459853 0.002505059
+## [3,] 0.9029383 0.08527806 0.011783605
+## [4,] 0.9301983 0.05180907 0.017992625
+## [5,] 0.7968234 0.16538539 0.037791201
}\if{html}{\out{
}}
\if{html}{\out{