From 2774f4ecad60e5b8288b9168a91207f922df2ca9 Mon Sep 17 00:00:00 2001 From: david-cortes Date: Sat, 14 Dec 2024 16:25:29 +0100 Subject: [PATCH] base1 indexing, remove deprecated --- R-package/R/xgb.importance.R | 30 +++++++++++++------------ R-package/R/xgb.model.dt.tree.R | 15 ++++++++----- R-package/R/xgb.plot.shap.R | 2 +- R-package/man/xgb.importance.Rd | 22 +++++------------- R-package/man/xgb.model.dt.tree.Rd | 5 ++--- R-package/man/xgb.plot.shap.Rd | 2 +- R-package/tests/testthat/test_helpers.R | 2 +- 7 files changed, 37 insertions(+), 41 deletions(-) diff --git a/R-package/R/xgb.importance.R b/R-package/R/xgb.importance.R index c1b45e81bb8c..eff1e7b90e91 100644 --- a/R-package/R/xgb.importance.R +++ b/R-package/R/xgb.importance.R @@ -12,15 +12,11 @@ #' @param feature_names Character vector used to overwrite the feature names #' of the model. The default is `NULL` (use original feature names). #' @param model Object of class `xgb.Booster`. -#' @param trees An integer vector of tree indices that should be included +#' @param trees An integer vector of (base-1) tree indices that should be included #' into the importance calculation (only for the "gbtree" booster). #' The default (`NULL`) parses all trees. #' It could be useful, e.g., in multiclass classification to get feature importances -#' for each class separately. *Important*: the tree index in XGBoost models -#' is zero-based (e.g., use `trees = 0:4` for the first five trees). -#' @param data Deprecated. -#' @param label Deprecated. -#' @param target Deprecated. +#' for each class separately. #' @return A `data.table` with the following columns: #' #' For a tree model: @@ -94,14 +90,14 @@ #' #' # inspect importances separately for each class: #' xgb.importance( -#' model = mbst, trees = seq(from = 0, by = nclass, length.out = nrounds) -#' ) -#' xgb.importance( #' model = mbst, trees = seq(from = 1, by = nclass, length.out = nrounds) #' ) #' xgb.importance( #' model = mbst, trees = seq(from = 2, by = nclass, length.out = nrounds) #' ) +#' xgb.importance( +#' model = mbst, trees = seq(from = 3, by = nclass, length.out = nrounds) +#' ) #' #' # multiclass classification using "gblinear": #' mbst <- xgb.train( @@ -122,15 +118,21 @@ #' xgb.importance(model = mbst) #' #' @export -xgb.importance <- function(model = NULL, feature_names = getinfo(model, "feature_name"), trees = NULL, - data = NULL, label = NULL, target = NULL) { - - if (!(is.null(data) && is.null(label) && is.null(target))) - warning("xgb.importance: parameters 'data', 'label' and 'target' are deprecated") +xgb.importance <- function(model = NULL, feature_names = getinfo(model, "feature_name"), trees = NULL) { if (!(is.null(feature_names) || is.character(feature_names))) stop("feature_names: Has to be a character vector") + if (!is.null(trees)) { + if (!is.vector(trees)) { + stop("'trees' must be a vector of tree indices.") + } + trees <- trees - 1L + if (anyNA(trees)) { + stop("Passed invalid tree indices.") + } + } + handle <- xgb.get.handle(model) if (xgb.booster_type(model) == "gblinear") { args <- list(importance_type = "weight", feature_names = feature_names) diff --git a/R-package/R/xgb.model.dt.tree.R b/R-package/R/xgb.model.dt.tree.R index 12ed705ba8f7..789737d4c701 100644 --- a/R-package/R/xgb.model.dt.tree.R +++ b/R-package/R/xgb.model.dt.tree.R @@ -6,10 +6,9 @@ #' be set through [setinfo()]), they will be used in the output from this function. #' @param text Character vector previously generated by the function [xgb.dump()] #' (called with parameter `with_stats = TRUE`). `text` takes precedence over `model`. -#' @param trees An integer vector of tree indices that should be used. The default +#' @param trees An integer vector of (base-1) tree indices that should be used. The default #' (`NULL`) uses all trees. Useful, e.g., in multiclass classification to get only -#' the trees of one class. *Important*: the tree index in XGBoost models -#' is zero-based (e.g., use `trees = 0:4` for the first five trees). +#' the trees of one class. #' @param use_int_id A logical flag indicating whether nodes in columns "Yes", "No", and #' "Missing" should be represented as integers (when `TRUE`) or as "Tree-Node" #' character strings (when `FALSE`, default). @@ -75,8 +74,14 @@ xgb.model.dt.tree <- function(model = NULL, text = NULL, " (or NULL if 'model' was provided).") } - if (!(is.null(trees) || is.numeric(trees))) { - stop("trees: must be a vector of integers.") + if (!is.null(trees)) { + if (!is.vector(trees) || (!is.numeric(trees) && !is.integer(trees))) { + stop("trees: must be a vector of integers.") + } + trees <- trees - 1L + if (anyNA(trees) || min(trees) < 0) { + stop("Passed invalid tree indices.") + } } feature_names <- NULL diff --git a/R-package/R/xgb.plot.shap.R b/R-package/R/xgb.plot.shap.R index bb678968db88..dd6d4466e8ab 100644 --- a/R-package/R/xgb.plot.shap.R +++ b/R-package/R/xgb.plot.shap.R @@ -121,7 +121,7 @@ #' num_class = nclass #' ) #' ) -#' trees0 <- seq(from = 0, by = nclass, length.out = nrounds) +#' trees0 <- seq(from = 1, by = nclass, length.out = nrounds) #' col <- rgb(0, 0, 1, 0.5) #' #' xgb.plot.shap( diff --git a/R-package/man/xgb.importance.Rd b/R-package/man/xgb.importance.Rd index f26067d7fef9..1e771e98810d 100644 --- a/R-package/man/xgb.importance.Rd +++ b/R-package/man/xgb.importance.Rd @@ -7,10 +7,7 @@ xgb.importance( model = NULL, feature_names = getinfo(model, "feature_name"), - trees = NULL, - data = NULL, - label = NULL, - target = NULL + trees = NULL ) } \arguments{ @@ -19,18 +16,11 @@ xgb.importance( \item{feature_names}{Character vector used to overwrite the feature names of the model. The default is \code{NULL} (use original feature names).} -\item{trees}{An integer vector of tree indices that should be included +\item{trees}{An integer vector of (base-1) tree indices that should be included into the importance calculation (only for the "gbtree" booster). The default (\code{NULL}) parses all trees. It could be useful, e.g., in multiclass classification to get feature importances -for each class separately. \emph{Important}: the tree index in XGBoost models -is zero-based (e.g., use \code{trees = 0:4} for the first five trees).} - -\item{data}{Deprecated.} - -\item{label}{Deprecated.} - -\item{target}{Deprecated.} +for each class separately.} } \value{ A \code{data.table} with the following columns: @@ -119,15 +109,15 @@ mbst <- xgb.train( xgb.importance(model = mbst) # inspect importances separately for each class: -xgb.importance( - model = mbst, trees = seq(from = 0, by = nclass, length.out = nrounds) -) xgb.importance( model = mbst, trees = seq(from = 1, by = nclass, length.out = nrounds) ) xgb.importance( model = mbst, trees = seq(from = 2, by = nclass, length.out = nrounds) ) +xgb.importance( + model = mbst, trees = seq(from = 3, by = nclass, length.out = nrounds) +) # multiclass classification using "gblinear": mbst <- xgb.train( diff --git a/R-package/man/xgb.model.dt.tree.Rd b/R-package/man/xgb.model.dt.tree.Rd index f55fc17a4e7b..424552e490cd 100644 --- a/R-package/man/xgb.model.dt.tree.Rd +++ b/R-package/man/xgb.model.dt.tree.Rd @@ -19,10 +19,9 @@ be set through \code{\link[=setinfo]{setinfo()}}), they will be used in the outp \item{text}{Character vector previously generated by the function \code{\link[=xgb.dump]{xgb.dump()}} (called with parameter \code{with_stats = TRUE}). \code{text} takes precedence over \code{model}.} -\item{trees}{An integer vector of tree indices that should be used. The default +\item{trees}{An integer vector of (base-1) tree indices that should be used. The default (\code{NULL}) uses all trees. Useful, e.g., in multiclass classification to get only -the trees of one class. \emph{Important}: the tree index in XGBoost models -is zero-based (e.g., use \code{trees = 0:4} for the first five trees).} +the trees of one class.} \item{use_int_id}{A logical flag indicating whether nodes in columns "Yes", "No", and "Missing" should be represented as integers (when \code{TRUE}) or as "Tree-Node" diff --git a/R-package/man/xgb.plot.shap.Rd b/R-package/man/xgb.plot.shap.Rd index 7bdd5ad2bfac..9feb7e6a6664 100644 --- a/R-package/man/xgb.plot.shap.Rd +++ b/R-package/man/xgb.plot.shap.Rd @@ -174,7 +174,7 @@ mbst <- xgb.train( num_class = nclass ) ) -trees0 <- seq(from = 0, by = nclass, length.out = nrounds) +trees0 <- seq(from = 1, by = nclass, length.out = nrounds) col <- rgb(0, 0, 1, 0.5) xgb.plot.shap( diff --git a/R-package/tests/testthat/test_helpers.R b/R-package/tests/testthat/test_helpers.R index b03282cfd6d0..26a555ea55e3 100644 --- a/R-package/tests/testthat/test_helpers.R +++ b/R-package/tests/testthat/test_helpers.R @@ -352,7 +352,7 @@ test_that("xgb.importance works with and without feature names", { imp.Tree <- xgb.importance(model = mbst.Tree) expect_equal(dim(imp.Tree), c(4, 4)) - trees <- seq(from = 0, by = 2, length.out = 2) + trees <- seq(from = 1, by = 2, length.out = 2) importance <- xgb.importance(feature_names = feature.names, model = bst.Tree, trees = trees) importance_from_dump <- function() {