Skip to content

Commit d7725a2

Browse files
+pbDD
1 parent b9fcf7f commit d7725a2

File tree

13 files changed

+525
-63
lines changed

13 files changed

+525
-63
lines changed

DESCRIPTION

+6-2
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ Authors@R: c(
1616
person("Pierre-Luc", "Germain", role="aut"),
1717
person("Charlotte", "Soneson", role="aut"),
1818
person("Anthony", "Sonrel", role="aut"),
19+
person("Jeroen", "Gilis", role="aut"),
20+
person("Davide", "Risso", role="aut"),
21+
person("Lieven", "Clement", role="aut"),
1922
person("Mark D.", "Robinson", role=c("aut", "fnd"),
2023
2124
Imports:
@@ -33,22 +36,23 @@ Imports:
3336
Suggests:
3437
BiocStyle,
3538
countsimQC,
36-
cowplot,
3739
ExperimentHub,
3840
iCOBRA,
3941
knitr,
42+
patchwork,
4043
phylogram,
4144
RColorBrewer,
4245
reshape2,
4346
rmarkdown,
4447
statmod,
48+
stageR,
4549
testthat,
4650
UpSetR
4751
biocViews: ImmunoOncology, DifferentialExpression, Sequencing,
4852
SingleCell, Software, StatisticalMethod, Visualization
4953
License: GPL-3
5054
VignetteBuilder: knitr
51-
RoxygenNote: 7.2.3
55+
RoxygenNote: 7.3.2
5256
Encoding: UTF-8
5357
URL: https://github.com/HelenaLC/muscat
5458
BugReports: https://github.com/HelenaLC/muscat/issues

NAMESPACE

+3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
export(aggregateData)
44
export(calcExprFreqs)
55
export(mmDS)
6+
export(pbDD)
67
export(pbDS)
78
export(pbFlatten)
89
export(pbHeatmap)
@@ -11,6 +12,7 @@ export(prepSCE)
1112
export(prepSim)
1213
export(resDS)
1314
export(simData)
15+
export(stagewise_DS_DD)
1416
import(ggplot2)
1517
importFrom(BiocParallel,MulticoreParam)
1618
importFrom(BiocParallel,SerialParam)
@@ -101,6 +103,7 @@ importFrom(lmerTest,contest)
101103
importFrom(lmerTest,lmer)
102104
importFrom(matrixStats,colAnys)
103105
importFrom(matrixStats,rowAnyNAs)
106+
importFrom(matrixStats,rowMedians)
104107
importFrom(matrixStats,rowMins)
105108
importFrom(matrixStats,rowQuantiles)
106109
importFrom(matrixStats,rowSds)

R/pbDS.R

+39-26
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
#' Can be a list for multiple, independent comparisons.
2121
#' @param min_cells a numeric. Specifies the minimum number of cells in a given
2222
#' cluster-sample required to consider the sample for differential testing.
23-
#' @param filter characterstring specifying whether
23+
#' @param filter character string specifying whether
2424
#' to filter on genes, samples, both or neither.
2525
#' @param treat logical specifying whether empirical Bayes moderated-t
2626
#' p-values should be computed relative to a minimum fold-change threshold.
@@ -82,16 +82,16 @@
8282
#' @export
8383

8484
pbDS <- function(pb,
85-
method = c("edgeR", "DESeq2", "limma-trend", "limma-voom"),
86-
design = NULL, coef = NULL, contrast = NULL, min_cells = 10,
87-
filter = c("both", "genes", "samples", "none"), treat = FALSE,
88-
verbose = TRUE, BPPARAM = SerialParam(progressbar = verbose)) {
85+
method=c("edgeR", "DESeq2", "limma-trend", "limma-voom", "DD"),
86+
design=NULL, coef=NULL, contrast=NULL, min_cells=10,
87+
filter=c("both", "genes", "samples", "none"), treat=FALSE,
88+
verbose=TRUE, BPPARAM=SerialParam(progressbar=verbose)) {
8989

9090
# check validity of input arguments
9191
args <- as.list(environment())
9292
method <- match.arg(method)
9393
filter <- match.arg(filter)
94-
.check_pbs(pb, check_by = TRUE)
94+
.check_pbs(pb, check_by=TRUE)
9595
.check_args_pbDS(args)
9696
stopifnot(is(BPPARAM, "BiocParallelParam"))
9797

@@ -104,7 +104,7 @@ pbDS <- function(pb,
104104
}
105105
if (is.null(coef) & is.null(contrast)) {
106106
c <- colnames(design)[ncol(design)]
107-
contrast <- makeContrasts(contrasts = c, levels = design)
107+
contrast <- makeContrasts(contrasts=c, levels=design)
108108
args$contrast <- contrast
109109
}
110110

@@ -117,18 +117,19 @@ pbDS <- function(pb,
117117
if (!is.list(coef))
118118
coef <- list(coef)
119119
cs <- vapply(coef, function(i)
120-
paste(colnames(design)[i], collapse = "-"),
120+
paste(colnames(design)[i], collapse="-"),
121121
character(1))
122122
names(cs) <- names(coef) <- cs
123123
}
124124
ct <- ifelse(is.null(coef), "contrast", "coef")
125125

126126
if (!is.function(method)) {
127127
fun <- switch(method,
128-
"DESeq2" = .DESeq2,
129-
"edgeR" = .edgeR,
130-
"limma-trend" = .limma_trend,
131-
"limma-voom" = .limma_voom)
128+
"DD"=.edgeR_NB,
129+
"edgeR"=.edgeR,
130+
"DESeq2"=.DESeq2,
131+
"limma-voom"=.limma_voom,
132+
"limma-trend"=.limma_trend)
132133
} else {
133134
fun_call <- 1
134135
}
@@ -139,28 +140,30 @@ pbDS <- function(pb,
139140
n_cells <- .n_cells(pb)
140141
names(kids) <- kids <- assayNames(pb)
141142
res <- bplapply(
142-
BPPARAM = BPPARAM,
143+
BPPARAM=BPPARAM,
143144
kids, function (k) {
144145
rmv <- n_cells[k, ] < min_cells
145-
d <- design[colnames(y <- pb[ , !rmv]), , drop = FALSE]
146+
d <- design[colnames(y <- pb[ , !rmv]), , drop=FALSE]
146147
if (filter %in% c("samples", "both")) {
147148
ls <- colSums(assay(y, k))
148-
ol <- isOutlier(ls, log = TRUE, type = "lower", nmads = 3)
149-
d <- d[colnames(y <- y[, !ol]), , drop = FALSE]
149+
ol <- isOutlier(ls, log=TRUE, type="lower", nmads=3)
150+
d <- d[colnames(y <- y[, !ol]), , drop=FALSE]
150151
}
151152
if (any(tabulate(y$group_id) < 2)
152-
|| qr(d)$rank == nrow(d)
153+
|| qr(d)$rank== nrow(d)
153154
|| qr(d)$rank < ncol(d))
154155
return(NULL)
155-
y <- y[rowSums(assay(y, k)) != 0, , drop = FALSE]
156+
y <- y[rowSums(assay(y, k)) != 0, , drop=FALSE]
156157
if (filter %in% c("genes", "both") & max(assay(y, k)) > 100)
157-
y <- y[filterByExpr(assay(y, k), d), , drop = FALSE]
158+
y <- y[filterByExpr(assay(y, k), d), , drop=FALSE]
158159
# drop samples without any detected features
159160
keep <- colAnys(assay(y, k) > 0)
160-
y <- y[, keep, drop = FALSE]
161-
d <- d[keep, , drop = FALSE]
162-
args <- list(x = y, k = k, design = d, coef = coef,
163-
contrast = contrast, ct = ct, cs = cs, treat = treat)
161+
y <- y[, keep, drop=FALSE]
162+
d <- d[keep, , drop=FALSE]
163+
args <- list(
164+
x=y, k=k, design=d, coef=coef,
165+
contrast=contrast, ct=ct, cs=cs,
166+
treat=treat, nc=n_cells[k, !rmv])
164167
args <- args[intersect(names(args), fun_args)]
165168
suppressWarnings(do.call(fun, args))
166169
})
@@ -169,14 +172,24 @@ pbDS <- function(pb,
169172
rmv <- vapply(res, is.null, logical(1))
170173
res <- res[!rmv]
171174

172-
if (length(res) == 0) stop(
175+
if (length(res)== 0) stop(
173176
"Specified filtering options result in no genes in any clusters ",
174177
"being tested. To force testing, consider modifying arguments ",
175178
"'min_cells' and/or 'filter'. See '?pbDS' for details.")
176179

177180
# reorganize & do global p-value adjustment
178181
names(i) <- i <- c("table", "data", "fit")
179-
res <- lapply(i, map, .x = res)
182+
res <- lapply(i, map, .x=res)
180183
res$table <- .p_adj_global(res$table)
181-
return(c(res, list(args = args)))
184+
return(c(res, list(args=args)))
185+
}
186+
187+
#' @rdname pbDS
188+
#' @export
189+
pbDD <- function(pb, design=NULL, coef=NULL, contrast=NULL,
190+
min_cells=10, filter=c("both", "genes", "samples", "none"),
191+
verbose=TRUE, BPPARAM=SerialParam(progressbar=verbose))
192+
{
193+
args <- as.list(environment())
194+
do.call(pbDS, c(args, list(method="DD")))
182195
}

R/stagewiseDD.R

+121
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
#' @importFrom dplyr bind_rows
2+
.res_DX <- function(res_DS, res_DD) {
3+
# for each contrast...
4+
names(cts) <- cts <- names(res_DS$table)
5+
lapply(cts, function(ct) {
6+
# for each cluster...
7+
names(kids) <- kids <- names(res_DS$table[[ct]])
8+
lapply(kids, function(kid) {
9+
# get DS/DD results
10+
DS <- res_DS$table[[ct]][[kid]]
11+
DD <- res_DD$table[[ct]][[kid]]
12+
# add missing genes
13+
DS <- bind_rows(DS, data.frame(gene=setdiff(DD$gene, DS$gene)))
14+
DD <- bind_rows(DD, data.frame(gene=setdiff(DS$gene, DD$gene)))
15+
# reorder & return both
16+
DD <- DD[match(DS$gene, DD$gene), ]
17+
return(list(DS=DS, DD=DD))
18+
})
19+
})
20+
}
21+
22+
#' @rdname stagewise_DS_DD
23+
#' @title Perform two-stage testing on DS and DD
24+
#'
25+
#' @param res_DS a list of DS testing results as returned
26+
#' by \code{\link{pbDS}} or \code{\link{mmDS}}.
27+
#' @param res_DD a list of DD testing results as returned
28+
#' by \code{\link{pbDD}} (or \code{\link{pbDS}} with \code{method="DD"}).
29+
#' @param sce (optional) \code{SingleCellExperiment} object containing the data
30+
#' that underlies testing, prior to summarization with \code{\link{aggregateData}}.
31+
#' Used for validation of inputs in order to prevent unexpected failure/results.
32+
#'
33+
#' @return
34+
#' A list of \code{DFrame}s containing results for each contrast and cluster.
35+
#' Each table contains DS and DD results for genes shared between analyses,
36+
#' as well as results from stagewise testing analysis, namely:
37+
#' \itemize{
38+
#' \item{\code{p_adj}: FDR adjusted p-values for the
39+
#' screening hypothesis that a gene is neither DS nor DD
40+
#' (see \code{?stageR::getAdjustedPValues} for details)}
41+
#' \item{\code{p_val.DS/D}: confirmation stage p-values for DS/D}}
42+
#'
43+
#' @examples
44+
#' data(example_sce)
45+
#'
46+
#' pbs_sum <- aggregateData(example_sce, assay="counts", fun="sum")
47+
#' pbs_det <- aggregateData(example_sce, assay="counts", fun="num.detected")
48+
#'
49+
#' res_DS <- pbDS(pbs_sum, min_cells=0, filter="none", verbose=FALSE)
50+
#' res_DD <- pbDD(pbs_det, min_cells=0, filter="none", verbose=FALSE)
51+
#'
52+
#' res <- stagewise_DS_DD(res_DS, res_DD)
53+
#' head(res[[1]][[1]]) # results for 1st cluster
54+
#'
55+
#' @importFrom S4Vectors DataFrame
56+
#' @importFrom purrr map_depth
57+
#' @export
58+
59+
stagewise_DS_DD <- function(res_DS, res_DD, sce=NULL, verbose=FALSE) {
60+
if (!requireNamespace("stageR", quietly=TRUE))
61+
stop("Install 'stageR' to use this function.")
62+
63+
# validity checks
64+
# TODO: helper to check validity of 'res_DS/D'
65+
# against each other and, optionally, 'sce'
66+
stopifnot(
67+
# same coefs/constrasts
68+
names(x <- res_DS$table) ==
69+
names(y <- res_DD$table),
70+
# any shared clusters
71+
sum(mapply(\(i, j)
72+
length(intersect(i, j)),
73+
i=lapply(x, names),
74+
j=lapply(y, names))) > 0)
75+
if (!is.null(sce)) {
76+
.check_sce(sce)
77+
. <- map_depth(list(x, y), 3, \(df) df$gene %in% rownames(sce))
78+
stopifnot("gene(s) present in 'res_DS/D' not found in 'sce'"=unlist(.))
79+
. <- map_depth(list(x, y), 3, \(df) df$cluster_id %in% sce$cluster_id)
80+
stopifnot("cluster(s) present in 'res_DS/D' not found in 'sce'"=unlist(.))
81+
}
82+
83+
# assure that results contain same set of genes, in the same order
84+
# (indepedent of different filtering criteria for the two analyses)
85+
res_DX <- .res_DX(res_DS=res_DS, res_DD=res_DD)
86+
87+
# perform harmonic mean p-value aggregation according to
88+
# (https://www.pnas.org/doi/full/10.1073/pnas.1814092116)
89+
.mu <- \(x) 1/mean(1/x, na.rm=TRUE)
90+
91+
# perform stagewise testing
92+
res <- map_depth(res_DX, 2, \(x) {
93+
ps <- data.frame(
94+
p_val.DS=x$DS$p_val,
95+
p_val.DD=x$DD$p_val,
96+
row.names=x$DS$gene)
97+
qs <- apply(ps, 1, .mu); names(qs) <- x$DS$gene
98+
obj <- stageR::stageR(qs, as.matrix(ps), FALSE)
99+
eva <- expression({
100+
obj <- stageR::stageWiseAdjustment(obj,
101+
method="none", alpha=0.05, allowNA=TRUE)
102+
res <- stageR::getAdjustedPValues(obj,
103+
onlySignificantGenes=FALSE, order=FALSE)
104+
})
105+
res <- if (verbose) eval(eva) else suppressMessages({eval(eva)})
106+
# TODO: communicate this better with the user?
107+
if (is.null(res)) res <- NA
108+
colnames(res)[1] <- "p_adj"
109+
return(res)
110+
})
111+
names(cs) <- cs <- names(res)
112+
lapply(cs, \(c) {
113+
names(ks) <- ks <- names(res[[c]])
114+
lapply(ks, \(k) {
115+
gs <- rownames(df <- res[[c]][[k]])
116+
res_DS <- I((. <- res_DS$table[[c]][[k]])[match(gs, .$gene), ])
117+
res_DD <- I((. <- res_DD$table[[c]][[k]])[match(gs, .$gene), ])
118+
DataFrame(gene=gs, df, cluster_id=k, contrast=c, res_DS, res_DD)
119+
})
120+
})
121+
}

R/utils-pbDS.R

+34
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,40 @@
8989
list(table = tbl, data = y, fit = fit)
9090
}
9191

92+
#' @importFrom matrixStats rowMedians
93+
.edgeR_NB <- \(x, k, design, coef, contrast, ct, cs, nc) {
94+
y <- assay(x, k)
95+
# Gene_level filtering to remove genes detected in
96+
# almost all cells of almost all pseudobulk samples
97+
med_detection <- rowMedians(sweep(y, 2, nc, "/"))
98+
gene_filter <- med_detection < 0.9
99+
# Normalization offset to remove systematic differences between pseudobulk
100+
# samples that are due to technical or nuisance biological variability.
101+
# Idea obtained from cellular detection rate (CDR) normalization from MAST.
102+
# Note that this normalization is used instead of 'edgeR::calcNormFactors()'.
103+
of <- colMeans(sweep(y[gene_filter, ], 2, nc, "/"))
104+
# construct 'DGEList'
105+
y <- suppressMessages(DGEList(
106+
counts = y[gene_filter, ],
107+
group = x$group_id[colnames(y)],
108+
remove.zeros = TRUE))
109+
# add offsets to 'DGEList'
110+
y$offset <- log(nc * of)
111+
# run an 'edgeR' analysis
112+
y <- estimateDisp(y, design)
113+
fit <- glmQLFit(y, design, robust = TRUE)
114+
tbl <- lapply(cs, function(c) {
115+
fit <- glmQLFTest(fit,
116+
coef[[c]],
117+
contrast[, c],
118+
poisson.bound = FALSE)
119+
tbl <- topTags(fit, n = Inf, sort.by = "none")
120+
tbl <- rename(tbl$table, p_val = "PValue", p_adj.loc = "FDR")
121+
tbl <- .res_df(tbl, k, ct, c)
122+
})
123+
list(table = tbl, data = y, fit = fit)
124+
}
125+
92126
#' @importFrom dplyr rename
93127
#' @importFrom edgeR calcNormFactors DGEList
94128
#' @importFrom limma contrasts.fit eBayes lmFit topTable topTreat voom treat

0 commit comments

Comments
 (0)