Skip to content

Commit

Permalink
4.0.0 cran prep (#319)
Browse files Browse the repository at this point in the history
* add webiste and start cran updates

* add check win

* update-news

* update readme and fix bug

* build everything except the site

* readme check

* rebuild

* Update README.rmd

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>

* update readme and new vignette

* new vignette and fix bug it IDd

* rebuild

* fix typos

* rebuild

* docs

* readme workflow was false positives

* print

* kable

* kable-didnt-work

* print-em

---------

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
  • Loading branch information
zachmayer and coderabbitai[bot] authored Aug 13, 2024
1 parent e1e5e2d commit 280eab5
Show file tree
Hide file tree
Showing 27 changed files with 671 additions and 92 deletions.
5 changes: 5 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,8 @@ revdep/*
^CRAN-SUBMISSION$
check_package.R
^\.github$
^LICENSE\.md$
^_pkgdown\.yml$
^docs$
^pkgdown$
^README\.Rmd$
50 changes: 50 additions & 0 deletions .github/workflows/pkgdown.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: [main, master]
pull_request:
branches: [main, master]
release:
types: [published]
workflow_dispatch:

name: pkgdown.yaml

permissions: read-all

jobs:
pkgdown:
runs-on: ubuntu-latest
# Only restrict concurrency for non-PR jobs
concurrency:
group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
permissions:
contents: write
steps:
- uses: actions/checkout@v4

- uses: r-lib/actions/setup-pandoc@v2

- uses: r-lib/actions/setup-r@v2
with:
use-public-rspm: true

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::pkgdown, local::.
needs: website

- name: Build site
run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
shell: Rscript {0}

- name: Deploy to GitHub pages 🚀
if: github.event_name != 'pull_request'
uses: JamesIves/[email protected]
with:
clean: false
branch: gh-pages
folder: docs
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -145,3 +145,5 @@ doc
Meta
/doc/
/Meta/
inst/doc
docs
10 changes: 5 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ Package: caretEnsemble
Type: Package
Title: Ensembles of Caret Models
Version: 4.0.0
Date: 2024-08-06
Authors@R: c(person(c("Zachary", "A."), "Deane-Mayer", role = c("aut", "cre"),
email = "zach.mayer@gmail.com"),
person(c("Jared", "E."), "Knowles", role=c("aut"),
email="[email protected]"))
Date: 2024-08-12
Authors@R: c(person(c("Zachary", "A."), "Deane-Mayer", role = c("aut", "cre", "cph"), email = "[email protected]"),
person(c("Jared", "E.", "Knowles"), role="ctb", email="jknowles@gmail.com"),
person("Antón", "López", role="ctb", email="[email protected]")
)
URL: https://github.com/zachmayer/caretEnsemble
BugReports: https://github.com/zachmayer/caretEnsemble/issues
Description: Functions for creating ensembles of caret models: caretList()
Expand Down
4 changes: 2 additions & 2 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
YEAR: 2013-2019
COPYRIGHT HOLDER: Zachary Albert Mayer
YEAR: 2013-2024
COPYRIGHT HOLDER: Zachary Albert Mayer
21 changes: 21 additions & 0 deletions LICENSE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# MIT License

Copyright (c) 2013-2024 Zachary Albert Mayer

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
48 changes: 40 additions & 8 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,38 @@
.PHONY: help
help:
@echo "Available targets:"
@echo " all Run clean, fix-style, document, install, build-vignettes, lint, spell, test, check, coverage"
@echo " all Run clean, fix-style, document, install, readme, vignettes, lint, spell, test, check, coverage, preview-site"
@echo " dev Run clean, fix-style, document, lint, spell, test"
@echo " install-deps Install dependencies"
@echo " install Install the whole package, including dependencies"
@echo " document Generate documentation"
@echo " update-test-fixtures Update test fixtures"
@echo " test Run unit tests"
@echo " coverage Generate coverage reports"
@echo " check Run R CMD check as CRAN"
@echo " view-coverage View coverage report"
@echo " check Run R CMD check locally"
@echo " check Run R CMD on the winbuilder service from CRAN"
@echo " fix-style Auto style the code"
@echo " lint Check the code for lint"
@echo " spell Check spelling"
@echo " build Build the package"
@echo " build-vignettes Build vignettes"
@echo " vignettes Build vignettes"
@echo " readme Build readme"
@echo " release Release to CRAN"
@echo " preview-site Preview pkgdown site"
@echo " dev-guide Open the R package development guide"
@echo " clean Clean up generated files"

.PHONY: all
all: clean fix-style document install build-vignettes lint spell test check coverage
all: clean fix-style document install readme vignettes lint spell test check coverage preview-site

.PHONY: dev
all: clean fix-style document lint spell test

.PHONY: install-deps
install-deps:
Rscript -e "if (!requireNamespace('devtools', quietly = TRUE)) install.packages('devtools')"
Rscript -e "if (!requireNamespace('pkgdown', quietly = TRUE)) install.packages('pkgdown')"
Rscript -e "devtools::install_deps()"
Rscript -e "devtools::install_dev_deps()"
Rscript -e "devtools::update_packages()"
Expand Down Expand Up @@ -75,14 +85,22 @@ coverage-test: coverage.rds
testthat::expect_gte(cov_num, 100.0); \
"

.PHONY: view-coverage
view-coverage: coverage-report.html
open coverage-report.html

.PHONY: coverage
coverage: cobertura.xml coverage-report.html coverage-test
coverage: cobertura.xml coverage-report.html view-coverage coverage-test

.PHONY: check
check: document
check:
Rscript -e "devtools::check(cran = FALSE, remote = TRUE, manual = TRUE, force_suggests = TRUE, error_on = 'note')"
Rscript -e "devtools::check(cran = TRUE , remote = TRUE, manual = TRUE, force_suggests = TRUE, error_on = 'note')"

.PHONY: check-win
check-win:
Rscript -e "devtools:::check_win()"

.PHONY: fix-style
fix-style:
Rscript -e "styler::style_pkg()"
Expand All @@ -107,19 +125,33 @@ spell:
build:
Rscript -e "devtools::build()"

.PHONY: build-vignettes
build-vignettes:
.PHONY: vignettes
vignettes:
Rscript -e "devtools::build_vignettes()"

.PHONY: readme
readme:
Rscript -e "devtools::build_readme()"

.PHONY: preview-site
preview-site:
Rscript -e "pkgdown::build_site()"
open docs/index.html

.PHONY: release
release:
Rscript -e "devtools::release()"

.PHONY: dev-guide
dev-guide:
open https://r-pkgs.org/whole-game.html

.PHONY: clean
clean:
rm -rf *.Rcheck
rm -f *.tar.gz
rm -f man/*.Rd
rm -f README.md
rm -f coverage.rds
rm -f cobertura.xml
rm -f coverage-report.html
Expand Down
19 changes: 19 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,24 @@
#NEWS

## caretEnsemble 4.0.0
- Multiclass support! caretList, caretStack, and caretEnsemble
- The greedy optimizer is back! caretEnsemble now uses a greedy optimizer by default. This optimizer can never be worse than the worst single model. caretStack still support all caret models, including glm.
- Refactored some internals for scalability (e.g. data.table for predictions, trim some un-needed data by default).
- Moved all the S3 methods to caretStack, which now supports print, summary, plot, dotplot, and autoplot. caretEnsemble inherits from caretStack, and therefore also supports all of these methods.
- Allow ensembling of mixed lists of classification and regression models.
- Allow ensemble of models with different resampling strategies, so long as they were trained on the same data.
- Allow transfer learning for ensembling models trained on different datasets.
- Added permutation importance as the default importance method for caretLists and caretStacks.
- Add a default trainControl constructor to make it easier to build good controls for training caretLists for stacking with caretStack.
- Expanded test coverage to 100%.
- Sped up test suite (unit tests now run in 20 seconds).
- Delinted codebase: now conforms with all available linters save the object name linter.
- Added a makefile for easier local package development.
- Fixed badges in the readme.
- Added a pkgdown site.
- Switched to github actions (from travis) for CI.
- Internal refactoring, optimization, and bug fixes.

## caretEnsemble 2.0.3
- Fix broken package documentation with new roxygen2
- Replace deprecated linters with the new versions
Expand Down
13 changes: 10 additions & 3 deletions R/caretList.R
Original file line number Diff line number Diff line change
Expand Up @@ -175,22 +175,29 @@ predict.caretList <- function(object, newdata = NULL, verbose = FALSE, excluded_
#' models. We also construct explicit fold indexes and return the stacked predictions,
#' which are needed for stacking. For classification models we return class probabilities.
#' @param target the target variable.
#' @param method the method to use for trainControl.
#' @param number the number of folds to use.
#' @param savePredictions the type of predictions to save.
#' @param index the fold indexes to use.
#' @param is_class logical, is this a classification or regression problem.
#' @param is_binary logical, is this binary classification.
#' @param ... other arguments to pass to \code{\link[caret]{trainControl}}
#' @export
defaultControl <- function(
target,
method = "cv",
number = 5L,
savePredictions = "final",
index = caret::createFolds(target, k = number, list = TRUE, returnTrain = TRUE),
is_class = is.factor(target) || is.character(target),
is_binary = length(unique(target)) == 2L,
...) {
stopifnot(savePredictions %in% c("final", "all"))
caret::trainControl(
method = "cv",
method = method,
number = number,
index = caret::createFolds(target, k = number, list = TRUE, returnTrain = TRUE),
savePredictions = "final",
index = index,
savePredictions = savePredictions,
classProbs = is_class,
summaryFunction = ifelse(is_class && is_binary, caret::twoClassSummary, caret::defaultSummary),
returnData = FALSE,
Expand Down
52 changes: 29 additions & 23 deletions R/caretStack.R
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,10 @@ wtd.sd <- function(x, w, na.rm = FALSE) {
#' print(meta_model)
print.caretStack <- function(x, ...) {
cat("The following models were ensembled:", toString(names(x$models)), " \n")
cat("\ncaret::train model:\n")
print(x$ens_model)
cat("\nFinal model:\n")
print(x$ens_model$finalModel)
}

#' @title Summarize a caretStack object
Expand Down Expand Up @@ -436,6 +439,8 @@ stackedTrainResiduals <- function(object, show_class_id = 2L) {
#' @description This function provides a more robust series of diagnostic plots
#' for a caretEnsemble object.
#' @param object a \code{caretStack} object
#' @param training_data The data used to train the ensemble. Required if xvars is not NULL
#' Must be in the same row order as when the models were trained.
#' @param xvars a vector of the names of x variables to plot against residuals
#' @param show_class_id For classification only: which class level to show on the plot
#' @param ... ignored
Expand All @@ -458,7 +463,7 @@ stackedTrainResiduals <- function(object, show_class_id = 2L) {
#' ens <- caretStack(models.reg)
#' autoplot(ens)
# https://github.com/thomasp85/patchwork/issues/226 — why we need importFrom patchwork plot_layout
autoplot.caretStack <- function(object, xvars = NULL, show_class_id = 2L, ...) {
autoplot.caretStack <- function(object, training_data = NULL, xvars = NULL, show_class_id = 2L, ...) {
stopifnot(methods::is(object, "caretStack"))
ensemble_data <- stackedTrainResiduals(object$ens_model, show_class_id = show_class_id)

Expand Down Expand Up @@ -517,28 +522,29 @@ autoplot.caretStack <- function(object, xvars = NULL, show_class_id = 2L, ...) {
)

# Residuals vs X variables
x_data <- data.table::data.table(object$models[[1L]]$trainingData)
if (is.null(xvars)) {
xvars <- names(x_data)
xvars <- setdiff(xvars, c(".outcome", ".weights", "(Intercept)"))
xvars <- sample(xvars, 2L)
out <- (g1 + g2) / (g3 + g4)
if (!is.null(training_data)) {
x_data <- data.table::data.table(training_data)
if (is.null(xvars)) {
xvars <- sample(names(x_data), 2L)
}
data.table::set(x_data, j = "rowIndex", value = seq_len(nrow(x_data)))
plotdf <- merge(ensemble_data, x_data, by = "rowIndex")
g5 <- ggplot2::ggplot(plotdf, ggplot2::aes(.data[[xvars[1L]]], .data[["resid"]])) +
ggplot2::geom_point() +
ggplot2::geom_smooth(se = FALSE) +
ggplot2::scale_x_continuous(xvars[1L]) +
ggplot2::scale_y_continuous("Residuals") +
ggplot2::labs(title = paste0("Residuals Against ", xvars[1L])) +
ggplot2::theme_bw()
g6 <- ggplot2::ggplot(plotdf, ggplot2::aes(.data[[xvars[2L]]], .data[["resid"]])) +
ggplot2::geom_point() +
ggplot2::geom_smooth(se = FALSE) +
ggplot2::scale_x_continuous(xvars[2L]) +
ggplot2::scale_y_continuous("Residuals") +
ggplot2::labs(title = paste0("Residuals Against ", xvars[2L])) +
ggplot2::theme_bw()
out <- out / (g5 + g6)
}
data.table::set(x_data, j = "rowIndex", value = seq_len(nrow(x_data)))
plotdf <- merge(ensemble_data, x_data, by = "rowIndex")
g5 <- ggplot2::ggplot(plotdf, ggplot2::aes(.data[[xvars[1L]]], .data[["resid"]])) +
ggplot2::geom_point() +
ggplot2::geom_smooth(se = FALSE) +
ggplot2::scale_x_continuous(xvars[1L]) +
ggplot2::scale_y_continuous("Residuals") +
ggplot2::labs(title = paste0("Residuals Against ", xvars[1L])) +
ggplot2::theme_bw()
g6 <- ggplot2::ggplot(plotdf, ggplot2::aes(.data[[xvars[2L]]], .data[["resid"]])) +
ggplot2::geom_point() +
ggplot2::geom_smooth(se = FALSE) +
ggplot2::scale_x_continuous(xvars[2L]) +
ggplot2::scale_y_continuous("Residuals") +
ggplot2::labs(title = paste0("Residuals Against ", xvars[2L])) +
ggplot2::theme_bw()
out <- (g1 + g2) / (g3 + g4) / (g5 + g6)
out
}
5 changes: 5 additions & 0 deletions R/permutationImportance.R
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ shuffled_mae <- function(model, original_data, target, pred_type, shuffle_idx) {
new_preds <- as.matrix(stats::predict(model, original_data, type = pred_type))
data.table::set(original_data, j = var, value = old_var)

if (anyNA(new_preds)) { # This shoudn't happen, but it does with rpart.
new_preds[is.na(new_preds)] <- 0.0
}

mae(new_preds, target)
}, numeric(1L))

Expand Down Expand Up @@ -105,6 +109,7 @@ permutationImportance <- function(
is.numeric(preds_orig),
is.finite(preds_orig)
)

# Error of shuffled variables
mae_vars <- shuffled_mae(model, newdata, preds_orig, pred_type, shuffle_idx)

Expand Down
Loading

0 comments on commit 280eab5

Please sign in to comment.