diff --git a/DESCRIPTION b/DESCRIPTION index fa2ee97d..1fbfb6b5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,7 +27,7 @@ Imports: GPfit, hardhat (>= 1.2.0), lifecycle (>= 1.0.0), - parsnip (>= 1.0.0), + parsnip (>= 1.0.1.9000), purrr (>= 0.3.2), recipes (>= 1.0.0), rlang (>= 1.0.2), @@ -54,4 +54,6 @@ Encoding: UTF-8 Language: en-US LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.1.9000 +RoxygenNote: 7.2.0.9000 +Remotes: + tidymodels/parsnip diff --git a/NEWS.md b/NEWS.md index 86f5ceb8..1d7a12ef 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # tune (development version) +* `last_fit()`, `fit_resamples()`, `tune_grid()`, and `tune_bayes()` do not automatically error if the wrong type of `control` object is passed. If the passed control object is not a superset of the one that is needed, the function will still error. As an example, passing `control_grid()` to `tune_bayes()` will fail but passing `control_bayes()` to `tune_grid()` will not. ([#449](https://github.com/tidymodels/tune/issues/449)) + # tune 1.0.0 * `show_notes()` is a new function that can better help understand warnings and errors. diff --git a/R/control.R b/R/control.R index cf5c7901..5b4c76a5 100644 --- a/R/control.R +++ b/R/control.R @@ -1,8 +1,6 @@ #' Control aspects of the grid search process #' #' @inheritParams control_bayes -#' @param allow_par A logical to allow parallel processing (if a parallel -#' backend is registered). #' #' @details #' @@ -33,6 +31,9 @@ control_grid <- function(verbose = FALSE, allow_par = TRUE, pkgs = NULL, save_workflow = FALSE, event_level = "first", parallel_over = NULL) { + # Any added arguments should also be added in superset control functions + # in other packages + # add options for seeds per resample val_class_and_single(verbose, "logical", "control_grid()") @@ -84,6 +85,9 @@ control_last_fit <- function( verbose = FALSE, event_level = "first" ) { + # Any added arguments should also be added in superset control functions + # in other packages + extr <- function(x) x control <- control_resamples( @@ -148,6 +152,8 @@ print.control_last_fit <- function(x, ...) { #' `"everything"` describing how to use parallel processing. Alternatively, #' `NULL` is allowed, which chooses between `"resamples"` and `"everything"` #' automatically. +#' @param allow_par A logical to allow parallel processing (if a parallel +#' backend is registered). #' #' If `"resamples"`, then tuning will be performed in parallel over resamples #' alone. Within each resample, the preprocessor (i.e. recipe or formula) is @@ -201,7 +207,11 @@ control_bayes <- save_workflow = FALSE, save_gp_scoring = FALSE, event_level = "first", - parallel_over = NULL) { + parallel_over = NULL, + allow_par = TRUE) { + # Any added arguments should also be added in superset control functions + # in other packages + # add options for seeds per resample val_class_and_single(verbose, "logical", "control_bayes()") @@ -216,6 +226,8 @@ control_bayes <- val_class_or_null(pkgs, "character", "control_bayes()") val_class_and_single(event_level, "character", "control_bayes()") val_parallel_over(parallel_over, "control_bayes()") + val_class_and_single(allow_par, "logical", "control_bayes()") + if (!is.infinite(uncertain) && uncertain > no_improve) { cli::cli_alert_warning( @@ -226,6 +238,7 @@ control_bayes <- res <- list( verbose = verbose, + allow_par = allow_par, no_improve = no_improve, uncertain = uncertain, seed = seed, diff --git a/R/last_fit.R b/R/last_fit.R index 6eb63b9e..125975b1 100644 --- a/R/last_fit.R +++ b/R/last_fit.R @@ -83,6 +83,8 @@ last_fit.model_spec <- function(object, preprocessor, split, ..., metrics = NULL )) } + control <- parsnip::condense_control(control, control_last_fit()) + empty_ellipses(...) wflow <- add_model(workflow(), object) @@ -101,6 +103,9 @@ last_fit.model_spec <- function(object, preprocessor, split, ..., metrics = NULL #' @export last_fit.workflow <- function(object, split, ..., metrics = NULL, control = control_last_fit()) { empty_ellipses(...) + + control <- parsnip::condense_control(control, control_last_fit()) + last_fit_workflow(object, split, metrics, control) } diff --git a/R/resample.R b/R/resample.R index 457cdbc6..c2ef53ef 100644 --- a/R/resample.R +++ b/R/resample.R @@ -81,6 +81,8 @@ fit_resamples.model_spec <- function(object, )) } + control <- parsnip::condense_control(control, control_resamples()) + empty_ellipses(...) wflow <- add_model(workflow(), object) @@ -109,6 +111,8 @@ fit_resamples.workflow <- function(object, control = control_resamples()) { empty_ellipses(...) + control <- parsnip::condense_control(control, control_resamples()) + res <- resample_workflow( workflow = object, diff --git a/R/tune_bayes.R b/R/tune_bayes.R index 8feb75e4..4b7f3536 100644 --- a/R/tune_bayes.R +++ b/R/tune_bayes.R @@ -160,6 +160,8 @@ tune_bayes.model_spec <- function(object, )) } + control <- parsnip::condense_control(control, control_bayes()) + wflow <- add_model(workflow(), object) if (is_recipe(preprocessor)) { @@ -190,6 +192,8 @@ tune_bayes.workflow <- initial = 5, control = control_bayes()) { + control <- parsnip::condense_control(control, control_bayes()) + res <- tune_bayes_workflow( object, diff --git a/R/tune_grid.R b/R/tune_grid.R index 513602ff..38007eba 100644 --- a/R/tune_grid.R +++ b/R/tune_grid.R @@ -257,6 +257,8 @@ tune_grid.model_spec <- function(object, preprocessor, resamples, ..., )) } + control <- parsnip::condense_control(control, control_grid()) + empty_ellipses(...) wflow <- add_model(workflow(), object) @@ -283,6 +285,8 @@ tune_grid.workflow <- function(object, resamples, ..., param_info = NULL, grid = 10, metrics = NULL, control = control_grid()) { empty_ellipses(...) + control <- parsnip::condense_control(control, control_grid()) + # Disallow `NULL` grids in `tune_grid()`, as this is the special signal # used when no tuning is required if (is.null(grid)) { diff --git a/man/control_bayes.Rd b/man/control_bayes.Rd index 8838b64f..05862e61 100644 --- a/man/control_bayes.Rd +++ b/man/control_bayes.Rd @@ -16,7 +16,8 @@ control_bayes( save_workflow = FALSE, save_gp_scoring = FALSE, event_level = "first", - parallel_over = NULL + parallel_over = NULL, + allow_par = TRUE ) } \arguments{ @@ -70,7 +71,10 @@ is considered the "event".} \item{parallel_over}{A single string containing either \code{"resamples"} or \code{"everything"} describing how to use parallel processing. Alternatively, \code{NULL} is allowed, which chooses between \code{"resamples"} and \code{"everything"} -automatically. +automatically.} + +\item{allow_par}{A logical to allow parallel processing (if a parallel +backend is registered). If \code{"resamples"}, then tuning will be performed in parallel over resamples alone. Within each resample, the preprocessor (i.e. recipe or formula) is diff --git a/man/control_grid.Rd b/man/control_grid.Rd index fa48c381..d942d5a8 100644 --- a/man/control_grid.Rd +++ b/man/control_grid.Rd @@ -36,7 +36,26 @@ might be hard to see; try setting the \code{tidymodels.dark} option with \code{options(tidymodels.dark = TRUE)} to print lighter colors.} \item{allow_par}{A logical to allow parallel processing (if a parallel -backend is registered).} +backend is registered). + +If \code{"resamples"}, then tuning will be performed in parallel over resamples +alone. Within each resample, the preprocessor (i.e. recipe or formula) is +processed once, and is then reused across all models that need to be fit. + +If \code{"everything"}, then tuning will be performed in parallel at two levels. +An outer parallel loop will iterate over resamples. Additionally, an +inner parallel loop will iterate over all unique combinations of +preprocessor and model tuning parameters for that specific resample. This +will result in the preprocessor being re-processed multiple times, but +can be faster if that processing is extremely fast. + +If \code{NULL}, chooses \code{"resamples"} if there are more than one resample, +otherwise chooses \code{"everything"} to attempt to maximize core utilization. + +Note that switching between \code{parallel_over} strategies is not guaranteed +to use the same random number generation schemes. However, re-tuning a +model using the same \code{parallel_over} strategy is guaranteed to be +reproducible between runs.} \item{extract}{An optional function with at least one argument (or \code{NULL}) that can be used to retain arbitrary objects from the model fit object, @@ -59,26 +78,7 @@ is considered the "event".} \item{parallel_over}{A single string containing either \code{"resamples"} or \code{"everything"} describing how to use parallel processing. Alternatively, \code{NULL} is allowed, which chooses between \code{"resamples"} and \code{"everything"} -automatically. - -If \code{"resamples"}, then tuning will be performed in parallel over resamples -alone. Within each resample, the preprocessor (i.e. recipe or formula) is -processed once, and is then reused across all models that need to be fit. - -If \code{"everything"}, then tuning will be performed in parallel at two levels. -An outer parallel loop will iterate over resamples. Additionally, an -inner parallel loop will iterate over all unique combinations of -preprocessor and model tuning parameters for that specific resample. This -will result in the preprocessor being re-processed multiple times, but -can be faster if that processing is extremely fast. - -If \code{NULL}, chooses \code{"resamples"} if there are more than one resample, -otherwise chooses \code{"everything"} to attempt to maximize core utilization. - -Note that switching between \code{parallel_over} strategies is not guaranteed -to use the same random number generation schemes. However, re-tuning a -model using the same \code{parallel_over} strategy is guaranteed to be -reproducible between runs.} +automatically.} } \description{ Control aspects of the grid search process diff --git a/tests/testthat/_snaps/bayes.md b/tests/testthat/_snaps/bayes.md index da41bec5..73d6c606 100644 --- a/tests/testthat/_snaps/bayes.md +++ b/tests/testthat/_snaps/bayes.md @@ -127,46 +127,25 @@ ! validation: internal: A correlation computation is required, but `estimate` is constant and ha... ! For the rsq estimates, 1 missing value was found and removed before fitting the Gaussian process model. - ! The Gaussian process model is being fit using 1 features but only has 2 - data points to do so. This may cause errors or a poor model fit. - ! Gaussian process model: X should be in range (0, 1) - ! For the rsq estimates, 1 missing value was found and removed before fitting - the Gaussian process model. - ! Gaussian process model: X should be in range (0, 1) ! validation: internal: A correlation computation is required, but `estimate` is constant and ha... ! For the rsq estimates, 2 missing values were found and removed before fitting the Gaussian process model. - ! Gaussian process model: X should be in range (0, 1) ! validation: internal: A correlation computation is required, but `estimate` is constant and ha... ! For the rsq estimates, 3 missing values were found and removed before fitting the Gaussian process model. - ! Gaussian process model: X should be in range (0, 1) ! validation: internal: A correlation computation is required, but `estimate` is constant and ha... ! For the rsq estimates, 4 missing values were found and removed before fitting the Gaussian process model. - ! Gaussian process model: X should be in range (0, 1) ! validation: internal: A correlation computation is required, but `estimate` is constant and ha... ! For the rsq estimates, 5 missing values were found and removed before fitting the Gaussian process model. - ! Gaussian process model: X should be in range (0, 1) ! validation: internal: A correlation computation is required, but `estimate` is constant and ha... ! For the rsq estimates, 6 missing values were found and removed before fitting the Gaussian process model. - ! Gaussian process model: X should be in range (0, 1) ! validation: internal: A correlation computation is required, but `estimate` is constant and ha... ! For the rsq estimates, 7 missing values were found and removed before fitting the Gaussian process model. - ! Gaussian process model: X should be in range (0, 1) - ! validation: internal: A correlation computation is required, but `estimate` is constant and ha... - ! For the rsq estimates, 8 missing values were found and removed before - fitting the Gaussian process model. - ! Gaussian process model: X should be in range (0, 1) - ! validation: internal: A correlation computation is required, but `estimate` is constant and ha... - ! For the rsq estimates, 9 missing values were found and removed before - fitting the Gaussian process model. - ! Gaussian process model: X should be in range (0, 1) ! validation: internal: A correlation computation is required, but `estimate` is constant and ha... - ! No improvement for 10 iterations; returning current results. --- diff --git a/tests/testthat/test-condense_control.R b/tests/testthat/test-condense_control.R new file mode 100644 index 00000000..378c832f --- /dev/null +++ b/tests/testthat/test-condense_control.R @@ -0,0 +1,52 @@ +test_that("control functions respect hierarchy with condense_control", { + + ctrl <- parsnip::condense_control( + control_grid(), + control_resamples() + ) + + expect_equal( + ctrl, + control_resamples() + ) + + ctrl <- parsnip::condense_control( + control_last_fit(), + control_grid() + ) + + expect_equal( + ctrl, + control_grid(extract = control_last_fit()$extract, save_pred = TRUE) + ) + + ctrl <- parsnip::condense_control( + control_last_fit(), + control_resamples() + ) + + expect_equal( + ctrl, + control_resamples(extract = control_last_fit()$extract, save_pred = TRUE) + ) + + ctrl <- parsnip::condense_control( + control_bayes(), + control_grid() + ) + + expect_equal( + ctrl, + control_grid() + ) + + ctrl <- parsnip::condense_control( + control_bayes(), + control_resamples() + ) + + expect_equal( + ctrl, + control_resamples() + ) +})