Skip to content

Commit

Permalink
do v2.7.0 -- see NEWS
Browse files Browse the repository at this point in the history
  • Loading branch information
thierrymoudiki committed Nov 8, 2024
1 parent 8fda364 commit 0fafdf6
Show file tree
Hide file tree
Showing 18 changed files with 1,233 additions and 594 deletions.
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: learningmachine
Type: Package
Title: Machine Learning with Explanations and Uncertainty Quantification
Version: 2.6.0
Date: 2024-09-20
Version: 2.7.0
Date: 2024-11-08
Author: T. Moudiki
Maintainer: T. Moudiki <[email protected]>
Description: Regression-based Machine Learning with explanations and uncertainty quantification.
Expand All @@ -29,5 +29,6 @@ Suggests:
roxygen2,
testthat (>= 3.0.0),
xgboost
Roxygen: list(markdown = TRUE)
VignetteBuilder: knitr
Config/testthat/edition: 3
31 changes: 26 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.PHONY: build buildsite check clean coverage docs getwd initialize install installcranpkg installgithubpkg installedpkgs load removepkg render setwd start test usegit
.PHONY: build buildsite check clean cleanvars coverage docs getwd initialize install installcranpkg installgithubpkg installedpkgs load removepkg render setwd start test usegit
.DEFAULT_GOAL := help

# The directory where R files are stored
Expand Down Expand Up @@ -44,8 +44,21 @@ check: clean setwd ## check package
clean: ## remove all build, and artifacts
rm -f .Rhistory
rm -f *.RData
rm -f *.Rproj
rm -f *.Rproj
rm -rf .Rproj.user
rm -f src/*.o
rm -f src/*.so
rm -f src/*.tmp
rm -f vignettes/*.html

cleanvars: setwd ## remove all local variables
@read -p "Do you want to remove all local variables in R? (1-yes, 2-no): " choice; \
if [ $$choice -eq 1 ]; then \
echo "Removing all local variables..."; \
Rscript -e "rm(list=ls())"; \
else \
echo "Keeping the variables."; \
fi

coverage: ## get test coverage
Rscript -e "devtools::test_coverage('.')"
Expand All @@ -60,8 +73,12 @@ docs: clean setwd ## generate docs
getwd: ## get current directory
Rscript -e "getwd()"

install: clean setwd ## install current package
install: clean setwd docs ## install current package
Rscript -e "try(devtools::install('.'), silent = FALSE)"
@read -p "Start R session? (y/n): " choice; \
if [ "$$choice" = "y" ]; then \
$(MAKE) start; \
fi

installcranpkg: setwd ## install a package
@read -p "Enter the name of package to be installed: " pckg; \
Expand All @@ -88,10 +105,14 @@ initialize: setwd ## initialize: install packages devtools, usethis, pkgdown and
help: ## print menu with all options
@python3 -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)

load: clean setwd ## load all (when developing the package)
load: clean setwd docs ## load all and restart (when developing the package)
Rscript -e "devtools::load_all('.')"
@read -p "Start R session? (y/n): " choice; \
if [ "$$choice" = "y" ]; then \
$(MAKE) start; \
fi

removepkg: ## remove package
removepkg: clean ## remove package
@read -p "Enter the name of package to be removed: " pckg; \
if [ -z "$$pckg" ]; then \
echo "Package name cannot be empty."; \
Expand Down
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# version 2.6.0 (2024-11-08)

- conformal confidence intervals for average effects
- use Wilcoxon signed-rank test for nonparametric test on average effects

# version 2.5.0 (2024-09-20)

- tests for nonparametric confidence intervals of average effects (based on rescaled winkler scores)
Expand Down
24 changes: 23 additions & 1 deletion R/Base.R
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ Base <-
class_name = NULL,
class_index = NULL,
y = NULL,
type_ci = c("student", "nonparametric", "bootstrap"),
type_ci = c("student", "nonparametric", "bootstrap", "conformal"),
cl = NULL) {
if (is.null(self$engine) || is.null(self$model) || is.null(self$type))
stop(paste0(self$name, " must be fitted first (use ", self$name, "$fit())"))
Expand Down Expand Up @@ -331,6 +331,23 @@ Base <-
return(c(0, NA, NA, NA))
}
}

foo_conformal_tests <- function(x)
{
res <- try(conformal_ci_mean(x),
silent = TRUE)
if (!inherits(res, "try-error"))
{
return(c(
as.numeric(res$estimate),
res$lower,
res$upper,
res$pvalue
))
} else {
return(c(0, NA, NA, NA))
}
}

lower_signif_codes <- c(0, 0.001, 0.01, 0.05, 0.1)
upper_signif_codes <- c(0.001, 0.01, 0.05, 0.1, 1)
Expand All @@ -348,6 +365,9 @@ Base <-
if (identical(type_ci, "bootstrap"))
citests <- try(data.frame(t(apply(effects, 2, foo_bootstrap_tests))), silent = TRUE)

if (identical(type_ci, "conformal"))
citests <- try(data.frame(t(apply(effects, 2, foo_conformal_tests))), silent = TRUE)

#misc::debug_print(citests)

if (!inherits(citests, "try-error"))
Expand All @@ -363,9 +383,11 @@ Base <-
if (self$type == "regression")
{
coverage_rate <- 100 * mean((y >= as.numeric(preds$lower)) * (y <= as.numeric(preds$upper)))

R_squared <- 1 - sum((y - preds$preds) ^ 2) / sum((y - mean(y)) ^ 2)
R_squared_adj <-
1 - (1 - R_squared) * (length(y) - 1) / (length(y) - ncol(X) - 1)

Residuals <- y - preds$preds
return(list(
R_squared = R_squared,
Expand Down
4 changes: 4 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ find_lam_eigen <- function(Eigenvectors, Eigenvalues, y, reg_lambda_vector) {
.Call(`_learningmachine_find_lam_eigen`, Eigenvectors, Eigenvalues, y, reg_lambda_vector)
}

fastSampleCpp <- function(resids, n) {
.Call(`_learningmachine_fastSampleCpp`, resids, n)
}

empirical_quantile_cpp <- function(x, q) {
.Call(`_learningmachine_empirical_quantile_cpp`, x, q)
}
Expand Down
69 changes: 55 additions & 14 deletions R/utils_all.R
Original file line number Diff line number Diff line change
Expand Up @@ -748,6 +748,13 @@ t_test_from_ci <- function(mean_estimate, lower_bound, upper_bound, df, conf_lev
return(p_value)
}

# Non-parametric test using Wilcoxon Signed-Rank Test
wilcoxon_test <- function(x, mu_0 = 0) {
# Perform a one-sample Wilcoxon signed-rank test
test_result <- wilcox.test(x, mu = mu_0)
return(test_result$p.value)
}

compute_ci_mean <- function(xx,
type_split = c("random",
"sequential"),
Expand Down Expand Up @@ -793,12 +800,7 @@ compute_ci_mean <- function(xx,
estimate <- stats::median(pseudo_means_x)
lower <- quantile(pseudo_means_x, probs = 1 - upper_prob)
upper <- quantile(pseudo_means_x, probs = upper_prob)
pvalue <- t_test_from_ci(mean_estimate = estimate,
lower_bound = lower,
upper_bound = upper,
df = length(pseudo_means_x) - 1,
conf_level = level/100,
mu_0 = 0)
pvalue <- wilcoxon_test(pseudo_means_x)

return(list(
estimate = estimate,
Expand All @@ -813,7 +815,7 @@ compute_ci_mean <- function(xx,
bootstrap_ci_mean <- function(xx, level = 95, seed = 123) {

set.seed(seed)
B <- 500L
B <- 1000L
alpha <- 1 - level / 100

bootstrap_means <- rep(0, B)
Expand All @@ -824,19 +826,58 @@ bootstrap_ci_mean <- function(xx, level = 95, seed = 123) {
}

# Step 2: Calculate the confidence interval
estimate <- mean(xx)
estimate <- median(bootstrap_means)
lower_bound <- quantile(bootstrap_means, alpha / 2)
upper_bound <- quantile(bootstrap_means, 1 - alpha / 2)
pvalue <- t_test_from_ci(mean_estimate = estimate,
lower_bound = lower_bound,
upper_bound = upper_bound,
df = length(xx) - 1,
conf_level = level/100,
mu_0 = 0)
pvalue <- wilcoxon_test(bootstrap_means)

return(list(
estimate = estimate,
lower = lower_bound,
upper = upper_bound,
pvalue = pvalue))
}


boot_means_matrix <- function(B, mean_estimate, resids, seed) {
set.seed(seed)
n <- length(resids)
# Add mean_estimate to each resampled residual and compute means
colMeans(matrix(resids[matrix(sample(n, size = n * B, replace = TRUE), nrow = n, ncol = B)],
nrow = n, ncol = B) + mean_estimate)
}

conformal_ci_mean <- function(xx, level = 95, seed = 123, cpp=TRUE) {
set.seed(seed)
alpha <- 1 - (level / 100)
n_xx <- length(xx)
half_n_xx <- n_xx %/% 2
idx_train <- sample(seq_len(n_xx), replace = FALSE, size = half_n_xx)

# Calculate the mean estimate on the sampled training set
mean_estimate <- mean(xx[idx_train])

# Calculate residuals from the remaining data points
resids <- xx[-idx_train] - mean_estimate

# Bootstrap the residuals to estimate the variability
B <- 500L # Number of bootstrap samples

if (cpp)
{
boot_samples <- matrix(fastSampleCpp(resids, B * length(resids)), ncol = B)
} else {
boot_samples <- matrix(sample(resids, size = B * length(resids), replace = TRUE), ncol = B)
}
boot_means <- colMeans(mean_estimate + boot_samples)

# Derive the confidence interval from the bootstrap distribution of means
lower <- as.numeric(quantile(boot_means, alpha / 2))
upper <- as.numeric(quantile(boot_means, 1 - alpha / 2))

return(list(
estimate = median(boot_means),
lower = lower,
upper = upper,
pvalue = wilcoxon_test(boot_means)))
}
2 changes: 1 addition & 1 deletion docs/pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ articles:
getting-updates-bayesianrvfl: getting-updates-bayesianrvfl.html
getting-updates: getting-updates.html
qrnn: qrnn.html
last_built: 2024-09-18T16:57Z
last_built: 2024-09-21T03:05Z

18 changes: 9 additions & 9 deletions man/Base.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 10 additions & 10 deletions man/Classifier.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 0fafdf6

Please sign in to comment.