Skip to content

Commit

Permalink
raw probs for classifiers
Browse files Browse the repository at this point in the history
  • Loading branch information
thierrymoudiki committed Mar 10, 2024
1 parent 153d727 commit 29646a1
Show file tree
Hide file tree
Showing 6 changed files with 131 additions and 24 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
^learningmachine\.Rcheck$
^learningmachine.*\.tar\.gz$
^learningmachine.*\.tgz$
^\.github$
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ Package: learningmachine
Type: Package
Title: Machine Learning with explanations and uncertainty quantification
Version: 1.0.0
Date: 2024-02-24
Date: 2024-03-07
Author: T. Moudiki
Maintainer: T. Moudiki <[email protected]>
Description: Regression-based Machine Learning with explanations and uncertainty quantification.
Expand Down
78 changes: 66 additions & 12 deletions R/Classifier.R
Original file line number Diff line number Diff line change
Expand Up @@ -258,22 +258,73 @@ Classifier <-
},
predict = function(X,
level = NULL) {
probs <- self$predict_proba(X)
if (is.null(level) && is.null(self$level))
{
probs <- self$predict_proba(X)
numeric_factor <- apply(probs, 1, which.max)
res <- decode_factors(numeric_factor,
private$encoded_factors)
names(res) <- NULL
return(res)

preds <- decode_factors(numeric_factor,
private$encoded_factors)
names(preds) <- NULL
return(preds)
} else { # !is.null(level) || !is.null(self$level)
raw_preds <- self$engine$predict(self$model, X)
scaled_raw_residuals <- scale(private$calib_resids,
center = TRUE,
scale = TRUE)
sd_raw_residuals <- apply(private$calib_resids, 2, sd)

set.seed(self$seed)

if (self$pi_method %in% c("kdejackknifeplus", "kdesplitconformal"))
{
simulated_raw_calibrated_residuals <- lapply(seq_len(private$n_classes),
function(i) rgaussiandens(x = private$calib_resids[, i],
n = nrow(raw_preds),
p = self$B,
seed = self$seed))
}

if (self$pi_method %in% c("bootjackknifeplus", "bootsplitconformal"))
{
simulated_raw_calibrated_residuals <- lapply(seq_len(private$n_classes),
function(i) rbootstrap(x = private$calib_resids[, i],
n = nrow(raw_preds),
p = self$B,
seed = self$seed))
}

if (self$pi_method %in% c("surrsplitconformal", "surrjackknifeplus"))
{
if (nrow(raw_preds) > length(private$calib_resids))
{
stop("For surrogates, must have number of predictions < number of training observations")
}
simulated_raw_calibrated_residuals <- lapply(seq_len(private$n_classes),
function(i) rsurrogate(x = private$calib_resids[, i],
n = nrow(raw_preds),
p = self$B,
seed = self$seed))
}
sims <- lapply(seq_len(private$n_classes),
function (i) replicate(self$B,
raw_preds[,i]) + sd_raw_residuals[i] * simulated_raw_calibrated_residuals[[i]])
preds_lower <- lapply(seq_len(private$n_classes), function(i)
pmax(0, apply(sims[[i]], 1, function(x)
quantile(x, probs = (1 - self$level / 100) / 2))))
preds_upper <- lapply(seq_len(private$n_classes), function(i)
pmin(1, apply(sims[[i]], 1, function(x)
quantile(x, probs = 1 - (1 - self$level / 100) / 2))))
if(!is.null(private$class_names))
{
names(sims) <- private$class_names
names(preds_lower) <- private$class_names
names(preds_upper) <- private$class_names
}

res <- list()
res$preds <- NULL # predictions sets with given 'level'
res$lower <- NULL # upon request
res$upper <- NULL # upon request
res$sims <- NULL # upon request
res$lower <- preds_lower
res$upper <- preds_upper
res$sims <- sims # upon request

# prediction sets with given 'level'
if (is.null(self$level) && !is.null(level))
Expand Down Expand Up @@ -314,7 +365,8 @@ Classifier <-

fit_multitaskregressor <- function(x,
y,
method = c("bcn",
method = c("lm",
"bcn",
"extratrees",
"glmnet",
"krr",
Expand Down Expand Up @@ -362,7 +414,8 @@ fit_multitaskregressor <- function(x,

predict_multitaskregressor <- function(objs,
X,
method = c("bcn",
method = c("lm",
"bcn",
"extratrees",
"glmnet",
"krr",
Expand All @@ -372,6 +425,7 @@ predict_multitaskregressor <- function(objs,
method <- match.arg(method)
predict_func <- switch(
method,
lm = function(obj, X) X%*%obj$coefficients,
bcn = bcn::predict.bcn,
extratrees = predict_func_extratrees,
glmnet = predict,
Expand Down
35 changes: 33 additions & 2 deletions R/Regressor.R
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,11 @@ Regressor <-
y_pred_calibration <-
self$engine$predict(self$model, # notice the diff
X_calibration_sc)
debug_print(y_calibration_sc)
debug_print(y_pred_calibration)
private$calib_resids <-
y_calibration_sc - y_pred_calibration
debug_print(private$calib_resids)
private$abs_calib_resids <- abs(private$calib_resids)
}
}
Expand Down Expand Up @@ -479,8 +482,18 @@ fit_regressor <- function(x,
"glmnet",
"krr",
"xgboost"),
scaling = FALSE,
...) {
regressor_choice <- match.arg(method)
if (scaling == TRUE)
{
scales <- scale_matrix(x)
xm <- scales$X_mean
xs <- scales$X_sd
x <- scales$X
ym <- mean(y)
y <- y - ym
}
obj <- switch(
regressor_choice,
lm = function(x, y, ...)
Expand All @@ -501,7 +514,17 @@ fit_regressor <- function(x,
xgboost = function(x, y, ...)
fit_xgboost_regression(x, y, ...)
)
return(obj(x = x, y = y, ...))

res <- obj(x = x, y = y, ...)
res$scaling <- FALSE
if (scaling == TRUE)
{
res$scaling <- TRUE
res$xm <- xm
res$xs <- xs
res$ym <- ym
}
return(res)
}


Expand All @@ -516,7 +539,11 @@ predict_regressor <- function(obj,
"krr",
"xgboost")) {
method_choice <- match.arg(method)

if (obj$scaling == TRUE)
{
X <- sweep(X, 2, obj$xm, "-")
X <- sweep(X, 2, obj$xs, "/")
}
predict_func <- switch(
method_choice,
lm = function(object, X)
Expand All @@ -529,5 +556,9 @@ predict_regressor <- function(obj,
ridge = predict_ridge_regression,
xgboost = predict
)
if (obj$scaling == TRUE)
{
return(predict_func(obj, X) + obj$ym)
}
return(predict_func(obj, X))
}
35 changes: 28 additions & 7 deletions R/utils_all.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@ compute_probs_list <- function(x) {
}
}
}
if (!is.null(names(x))) {
names(res) <- names(x)
}

names_x <- try(names(x), silent = TRUE)
if(!inherits(names_x, "try-error"))
names(res) <- names_x

#res$sims <- x
return(res)
}
Expand All @@ -41,10 +43,12 @@ compute_pis <- function(x, alpha) {
pmax(0, pmin(1, quantile(x[[j]][i, ], probs = 1 - alpha / 2)))
}
}
if (!is.null(names(x))) {
colnames(preds) <- names(x)
colnames(lower) <- names(x)
colnames(upper) <- names(x)

names_x <- try(names(x), silent = TRUE)
if (!inherits(names_x, "try-error")) {
colnames(preds) <- names_x
colnames(lower) <- names_x
colnames(upper) <- names_x
}
return(list(
preds = preds,
Expand Down Expand Up @@ -144,6 +148,14 @@ get_classes_idx <- function(new_probs, q_threshold, level) {
}


# get expit probs -----
expit_probs <- function(x) {
stopifnot(is.vector(x))
temp <- 1 / (1 + exp(-x))
temp/sum(temp)
}


# get jackknife residuals -----
get_jackknife_residuals <-
function(X, y, idx, fit_func, predict_func) {
Expand Down Expand Up @@ -478,6 +490,15 @@ remove_nulls <- function(x) {
return(x[!is.null(x)])
}

scale_matrix <- function(X)
{
X_mean <- colMeans(X)
X_sd <- apply(X, 2, sd)
X <- sweep(X, 2, X_mean, "-")
X <- sweep(X, 2, X_sd, "/")
return(list(X = X, X_mean = X_mean, X_sd = X_sd))
}

# sort data frame -----
sort_df <- function(df, by, decreasing = FALSE) {
return(df[order(df[[by]], decreasing = decreasing), ])
Expand Down
4 changes: 2 additions & 2 deletions learningmachine.Rproj
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,5 @@ LaTeX: pdfLaTeX

BuildType: Package
PackageInstallArgs: --no-multiarch --with-keep.source
PackageCheckArgs: --as-cran --no-vignettes --ignore-vignettes
PackageRoxygenize: rd,collate,vignette
PackageCheckArgs: --as-cran --no-vignettes
PackageRoxygenize: rd,collate

0 comments on commit 29646a1

Please sign in to comment.