Skip to content

Commit 99c3315

Browse files
committed
201203 - NN added, still needs work on mean LFC.
1 parent 2028d53 commit 99c3315

34 files changed

+2179
-67
lines changed

Diff for: DEoverlap_FDR.Rmd

+1-14
Original file line numberDiff line numberDiff line change
@@ -6,24 +6,11 @@ title: "Differential Gene Expression overlap"
66
library(cmapR)
77
library(colorspace)
88
library(scales)
9+
source("~/Dropbox/GDB/line2user.R")
910
.PAR <- par(no.readonly=T)
1011
```
1112

1213
```{r FX, echo=F,message=F,warning=F}
13-
line2user <- function(line, side) {
14-
lh <- par('cin')[2] * par('cex') * par('lheight')
15-
x_off <- diff(grconvertX(c(0, lh), 'inches', 'npc'))
16-
y_off <- diff(grconvertY(c(0, lh), 'inches', 'npc'))
17-
switch(side,
18-
`1` = grconvertY(-line * y_off, 'npc', 'user'),
19-
`2` = grconvertX(-line * x_off, 'npc', 'user'),
20-
`3` = grconvertY(1 + line * y_off, 'npc', 'user'),
21-
`4` = grconvertX(1 + line * x_off, 'npc', 'user'),
22-
stop("Side must be 1, 2, 3, or 4", call.=FALSE))
23-
}
24-
25-
26-
2714
DE_heatmap <- function(FDR_all,pDE_all,data_name=c("Ligands","Cell lines"),aCut=NA) {
2815
2916
ALPHA <- c(0.1,0.05,0.01,0.001)

Diff for: DEoverlap_NicheNet.R

+286-13
Original file line numberDiff line numberDiff line change
@@ -3,30 +3,283 @@ library(pbapply)
33
pboptions(type="timer")
44
.PAR <- par(no.readonly=T)
55

6-
# Prob DE overlap from Supplementary Table ----
7-
DEcounts <- read.table("~/Dropbox/GDB/CMapCorr/NicheNet_LigDEoverlap_count.txt",
8-
sep="\t",header=T,row.names=1,as.is=T)
9-
DEratio <- read.table("~/Dropbox/GDB/CMapCorr/NicheNet_LigDEoverlap_ratio.txt",
10-
sep="\t",header=T,row.names=1,as.is=T)
11-
numDE <- rowSums(DEcounts,na.rm=T)
12-
nGene <- c(length(Reduce(intersect,lapply(nn_db,function(X) X$diffexp$gene))),
13-
range(sapply(nn_db,function(X) nrow(X$diffexp))))
14-
15-
16-
176

187
# Re-analysis of NicheNet dataset ----
198

20-
nn_model <- readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds"))
9+
# nn_model <- readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds"))
2110
nn_db <- readRDS(url("https://zenodo.org/record/3260758/files/expression_settings.rds"))
2211

23-
nn_ligands <- unique(unlist(sapply(nn_db,"[[","from")))
12+
nn_db <- nn_db[sapply(sapply(nn_db,"[[","from"),length) == 1]
13+
14+
nn_ligands <- unique(sapply(nn_db,"[[","from"))
2415
nn_lig_dsNames <- sapply(nn_ligands,function(LIG) names(nn_db)[sapply(nn_db,function(X) LIG %in% X$from)])
2516
nn_ligands <- nn_ligands[sapply(nn_lig_dsNames,length) > 1]
2617
nn_lig_dsNames <- nn_lig_dsNames[nn_ligands]
2718

2819

2920

21+
## Mapping datasets to GEO accession ----
22+
23+
nn_DEinfo <- read.table("~/Dropbox/GDB/CMapCorr/NicheNet_DBinfo_nDE.txt",
24+
header=T,sep="\t",as.is=T)
25+
rownames(nn_DEinfo) <- nn_DEinfo$setting_id
26+
nn_DEinfo <- nn_DEinfo[,-1]
27+
28+
DSinfo <- list()
29+
for (LIG in nn_ligands) {
30+
temp_DE <- sapply(nn_lig_dsNames[[LIG]],function(X)
31+
nn_db[[X]]$diffexp[nn_db[[X]]$diffexp$lfc >= 1 & nn_db[[X]]$diffexp$qval <= 0.1,"gene"])
32+
temp_DSname <- sapply(sapply(temp_DE,length),function(N) {
33+
temp <- rownames(nn_DEinfo)[nn_DEinfo$ligand == LIG][nn_DEinfo[nn_DEinfo$ligand == LIG,"nr_upgenes"] %in% N]
34+
if (length(temp) == 0) {
35+
temp <- rownames(nn_DEinfo)[nn_DEinfo$ligand == LIG][nn_DEinfo[nn_DEinfo$ligand == LIG,"nr_degenes"] %in% N]
36+
}
37+
return(temp)
38+
})
39+
if (is.list(temp_DSname)) { stop(LIG) }
40+
DSinfo[[LIG]] <- nn_DEinfo[temp_DSname,c("time_series","cell_type","ligand")]
41+
DSinfo[[LIG]]$accession <- sub(paste0("-",LIG,".*$"),"",rownames(DSinfo[[LIG]]))
42+
rownames(DSinfo[[LIG]]) <- names(temp_DSname)
43+
rm(list=grep("^temp",ls(),value=T))
44+
}
45+
DSinfo$WNT1$cell_type <- paste(DSinfo$WNT1$cell_type,
46+
sapply(strsplit(rownames(DSinfo$WNT1),"_"),"[[",3),
47+
sep="_")
48+
for (LIG in names(DSinfo)) {
49+
DSinfo[[LIG]]$CtAcc <- paste(DSinfo[[LIG]]$accession,DSinfo[[LIG]]$cell_type)
50+
}
51+
52+
53+
# Organizing replicate data ----
54+
55+
nn_lig_rep <- pbsapply(nn_ligands,function(LIG) {
56+
temp_gene <- Reduce(intersect,sapply(nn_lig_dsNames[[LIG]],
57+
function(X) nn_db[[X]]$diffexp$gene,
58+
simplify=F))
59+
60+
temp_diffexp <- sapply(nn_lig_dsNames[[LIG]],function(X) {
61+
temp <- nn_db[[X]]$diffexp
62+
temp <- temp[temp$gene %in% temp_gene,]
63+
if (any(duplicated(temp$gene))) {
64+
temp_dup <- unique(temp$gene[duplicated(temp$gene)])
65+
temp_rm <- unlist(lapply(temp_dup,function(DUP) {
66+
temp_id <- rownames(temp[temp$gene == DUP,])
67+
return(temp_id[-which.min(temp[temp_id,"qval"])])
68+
}))
69+
temp <- temp[!rownames(temp) %in% temp_rm,]
70+
}
71+
rownames(temp) <- temp$gene
72+
return(list(lfc=temp[temp_gene,"lfc"],
73+
qval=temp[temp_gene,"qval"]))
74+
},simplify=F)
75+
76+
temp_lfc <- sapply(temp_diffexp,"[[","lfc")
77+
temp_qval <- sapply(temp_diffexp,"[[","qval")
78+
rownames(temp_lfc) <- rownames(temp_qval) <- temp_gene
79+
80+
return(list(lfc=temp_lfc,
81+
qval=temp_qval))
82+
},simplify=F)
83+
84+
85+
86+
# Correlation ----
87+
88+
corr_all <- corr_ct <- corr_ds <- list()
89+
for (LIG in nn_ligands) {
90+
temp_ts <- rownames(DSinfo[[LIG]])[DSinfo[[LIG]]$time_series]
91+
temp_nts <- rownames(DSinfo[[LIG]])[!DSinfo[[LIG]]$time_series]
92+
93+
if (length(temp_ts) > 1) {
94+
temp_acc <- sapply(unique(DSinfo[[LIG]][temp_ts,"CtAcc"]),function(X)
95+
temp_ts[DSinfo[[LIG]][temp_ts,"CtAcc"] == X],
96+
simplify=F)
97+
temp_ct <- sapply(unique(DSinfo[[LIG]][temp_ts,"cell_type"]),function(X)
98+
temp_ts[DSinfo[[LIG]][temp_ts,"cell_type"] == X],
99+
simplify=F)
100+
101+
temp_cor2 <- cor(nn_lig_rep[[LIG]]$lfc[,temp_ts],method="spearman")
102+
temp_cor_names <- sapply(colnames(temp_cor2),function(X)
103+
sapply(rownames(temp_cor2), function(Y) paste(X,Y,sep=".")))
104+
temp_cor <- temp_cor2[lower.tri(temp_cor2)]
105+
names(temp_cor) <- temp_cor_names[lower.tri(temp_cor_names)]
106+
rm(temp_cor2,temp_cor_names)
107+
108+
for (Y in names(temp_acc)[sapply(temp_acc,length) > 1]) {
109+
temp_hits <- sapply(strsplit(names(temp_cor),".",fixed=T),function(X) all(X %in% temp_acc[[Y]]))
110+
corr_ds[[LIG]] <- append(corr_ds[[LIG]],temp_cor[temp_hits])
111+
temp_cor <- temp_cor[!temp_hits]
112+
rm(temp_hits)
113+
}
114+
if (length(temp_cor) > 0) {
115+
for (Z in names(temp_ct)[sapply(temp_ct,length) > 1]) {
116+
temp_hits <- sapply(strsplit(names(temp_cor),".",fixed=T),function(X) all(X %in% temp_ct[[Z]]))
117+
corr_ct[[LIG]] <- append(corr_ct[[LIG]],temp_cor[temp_hits])
118+
temp_cor <- temp_cor[!temp_hits]
119+
rm(temp_hits)
120+
}
121+
}
122+
if (length(temp_cor) > 0) {
123+
corr_all[[LIG]] <- append(corr_all[[LIG]],temp_cor)
124+
}
125+
rm(Y,Z,temp_cor,temp_acc,temp_ct)
126+
}
127+
128+
if (length(temp_nts) > 1) {
129+
temp_acc <- sapply(unique(DSinfo[[LIG]][temp_nts,"CtAcc"]),function(X)
130+
temp_nts[DSinfo[[LIG]][temp_nts,"CtAcc"] == X],
131+
simplify=F)
132+
temp_ct <- sapply(unique(DSinfo[[LIG]][temp_nts,"cell_type"]),function(X)
133+
temp_nts[DSinfo[[LIG]][temp_nts,"cell_type"] == X],
134+
simplify=F)
135+
136+
temp_cor2 <- cor(nn_lig_rep[[LIG]]$lfc[,temp_nts],method="spearman")
137+
temp_cor_names <- sapply(colnames(temp_cor2),function(X)
138+
sapply(rownames(temp_cor2), function(Y) paste(X,Y,sep=".")))
139+
temp_cor <- temp_cor2[lower.tri(temp_cor2)]
140+
names(temp_cor) <- temp_cor_names[lower.tri(temp_cor_names)]
141+
rm(temp_cor2,temp_cor_names)
142+
143+
for (Y in names(temp_acc)[sapply(temp_acc,length) > 1]) {
144+
temp_hits <- sapply(strsplit(names(temp_cor),".",fixed=T),function(X) all(X %in% temp_acc[[Y]]))
145+
corr_ds[[LIG]] <- append(corr_ds[[LIG]],temp_cor[temp_hits])
146+
temp_cor <- temp_cor[!temp_hits]
147+
rm(temp_hits)
148+
}
149+
if (length(temp_cor) > 0) {
150+
for (Z in names(temp_ct)[sapply(temp_ct,length) > 1]) {
151+
temp_hits <- sapply(strsplit(names(temp_cor),".",fixed=T),function(X) all(X %in% temp_ct[[Z]]))
152+
corr_ct[[LIG]] <- append(corr_ct[[LIG]],temp_cor[temp_hits])
153+
temp_cor <- temp_cor[!temp_hits]
154+
rm(temp_hits)
155+
}
156+
}
157+
if (length(temp_cor) > 0) {
158+
corr_all[[LIG]] <- append(corr_all[[LIG]],temp_cor)
159+
}
160+
rm(Y,Z,temp_cor,temp_acc,temp_ct)
161+
}
162+
rm(temp_nts,temp_ts)
163+
}
164+
165+
166+
boxplot(unlist(sapply(nn_ligands,function(X) list(ALL=corr_all[[X]],
167+
CT=corr_ct[[X]],
168+
DS=corr_ds[[X]]),
169+
simplify=F),recursive=F),
170+
las=3)
171+
172+
par(mar=c(3,3,1,1),mgp=2:0)
173+
boxplot(list(ALL=unlist(corr_all),CT=unlist(corr_ct),DS=unlist(corr_ds)),
174+
ylab="Spearman correlation of logFC")
175+
176+
corr_all_nts <- corr_all_ts <- list()
177+
for (LIG in names(corr_all)) {
178+
temp_ts <- sapply(strsplit(names(corr_all[[LIG]]),".",fixed=T),
179+
function(X) all(DSinfo[[LIG]][X,"time_series"]))
180+
corr_all_ts[[LIG]] <- corr_all[[LIG]][temp_ts]
181+
corr_all_nts[[LIG]] <- corr_all[[LIG]][!temp_ts]
182+
}
183+
boxplot(list(ALL_nTS=unlist(corr_all_nts),ALL_TS=unlist(corr_all_ts),
184+
CT=unlist(corr_ct),DS=unlist(corr_ds)))
185+
# not much diff
186+
187+
188+
189+
190+
# Diff Exp ----
191+
192+
nn_DE <- list()
193+
for (LIG in nn_ligands) {
194+
nn_DE[[LIG]] <- mapply(intersect,
195+
apply(nn_lig_rep[[LIG]]$lfc,2,function(X) names(which(X >= 1))),
196+
apply(nn_lig_rep[[LIG]]$qval,2,function(X) names(which(X <= 0.1))))
197+
}
198+
hist(sapply(unlist(nn_DE,recursive=F),length),
199+
xlab="# DE per DS",main=NA)
200+
201+
temp_DE <- unlist(nn_DE,recursive=F)
202+
DEbkgd <- pbsapply(min(sapply(DSinfo,nrow)):max(sapply(DSinfo,nrow)),function(X)
203+
sapply(1:1e6,function(Y)
204+
length(Reduce(intersect,sample(temp_DE,X)))
205+
),cl=4)
206+
colnames(DEbkgd) <- min(sapply(DSinfo,nrow)):max(sapply(DSinfo,nrow))
207+
rm(temp_DE)
208+
209+
de_all <- de_ct <- de_ds <- list()
210+
P_all <- P_ct <- P_ds <- list()
211+
for (LIG in nn_ligands) {
212+
temp_ts <- rownames(DSinfo[[LIG]])[DSinfo[[LIG]]$time_series]
213+
temp_nts <- rownames(DSinfo[[LIG]])[!DSinfo[[LIG]]$time_series]
214+
215+
if (length(temp_ts) > 1) {
216+
temp_acc <- sapply(unique(DSinfo[[LIG]][temp_ts,"CtAcc"]),function(X)
217+
temp_ts[DSinfo[[LIG]][temp_ts,"CtAcc"] == X],
218+
simplify=F)
219+
temp_ct <- sapply(unique(DSinfo[[LIG]][temp_ts,"cell_type"]),function(X)
220+
temp_ts[DSinfo[[LIG]][temp_ts,"cell_type"] == X],
221+
simplify=F)
222+
temp_ct <- temp_ct[!sapply(temp_ct,function(CT)
223+
any(sapply(temp_acc,function(ACC) all(CT == ACC))))]
224+
225+
for (X in names(temp_acc)[sapply(temp_acc,length) > 1]) {
226+
Y <- paste(temp_acc[[X]],collapse=".")
227+
de_ds[[LIG]][[Y]] <- Reduce(intersect,nn_DE[[LIG]][temp_acc[[X]]])
228+
P_ds[[LIG]][[Y]] <- sum(DEbkgd[,as.character(length(temp_acc[[X]]))] >=
229+
length(de_ds[[LIG]][[Y]])) / nrow(DEbkgd)
230+
}
231+
for (X in names(temp_ct)[sapply(temp_ct,length) > 1]) {
232+
Y <- paste(temp_ct[[X]],collapse=".")
233+
de_ct[[LIG]][[Y]] <- Reduce(intersect,nn_DE[[LIG]][temp_ct[[X]]])
234+
P_ct[[LIG]][[Y]] <- sum(DEbkgd[,as.character(length(temp_ct[[X]]))] >=
235+
length(de_ct[[LIG]][[Y]])) / nrow(DEbkgd)
236+
}
237+
de_all[[LIG]] <- Reduce(intersect,nn_DE[[LIG]])
238+
P_all[[LIG]] <- sum(DEbkgd[,as.character(length(nn_DE[[LIG]]))] >=
239+
length(de_all[[LIG]])) / nrow(DEbkgd)
240+
241+
}
242+
243+
if (length(temp_nts) > 1) {
244+
temp_acc <- sapply(unique(DSinfo[[LIG]][temp_nts,"CtAcc"]),function(X)
245+
temp_nts[DSinfo[[LIG]][temp_nts,"CtAcc"] == X],
246+
simplify=F)
247+
temp_ct <- sapply(unique(DSinfo[[LIG]][temp_nts,"cell_type"]),function(X)
248+
temp_nts[DSinfo[[LIG]][temp_nts,"cell_type"] == X],
249+
simplify=F)
250+
temp_ct <- temp_ct[!sapply(temp_ct,function(CT)
251+
any(sapply(temp_acc,function(ACC) all(CT == ACC))))]
252+
253+
for (X in names(temp_acc)[sapply(temp_acc,length) > 1]) {
254+
Y <- paste(temp_acc[[X]],collapse=".")
255+
de_ds[[LIG]][[Y]] <- Reduce(intersect,nn_DE[[LIG]][temp_acc[[X]]])
256+
P_ds[[LIG]][[Y]] <- sum(DEbkgd[,as.character(length(temp_acc[[X]]))] >=
257+
length(de_ds[[LIG]][[Y]])) / nrow(DEbkgd)
258+
}
259+
for (X in names(temp_ct)[sapply(temp_ct,length) > 1]) {
260+
Y <- paste(temp_ct[[X]],collapse=".")
261+
de_ct[[LIG]][[Y]] <- Reduce(intersect,nn_DE[[LIG]][temp_ct[[X]]])
262+
P_ct[[LIG]][[Y]] <- sum(DEbkgd[,as.character(length(temp_ct[[X]]))] >=
263+
length(de_ct[[LIG]][[Y]])) / nrow(DEbkgd)
264+
}
265+
de_all[[LIG]] <- Reduce(intersect,nn_DE[[LIG]])
266+
P_all[[LIG]] <- sum(DEbkgd[,as.character(length(nn_DE[[LIG]]))] >=
267+
length(de_all[[LIG]])) / nrow(DEbkgd)
268+
}
269+
}
270+
271+
boxplot(list(ALL=-log10(unlist(P_all)),
272+
CT=-log10(unlist(P_ct)),
273+
DS=-log10(unlist(P_ds))),
274+
ylab="-log10 P #DE by chance")
275+
276+
277+
278+
279+
280+
281+
# old stuff ----
282+
30283
nn_lig_rep <- pbsapply(nn_ligands,function(LIG) {
31284
temp_gene <- Reduce(intersect,sapply(nn_lig_dsNames[[LIG]],
32285
function(X) nn_db[[X]]$diffexp$gene,
@@ -64,3 +317,23 @@ boxplot(nn_cor,las=3,ylab="Spearman's CC")
64317
# DE per NicheNet paper: lfc >= 1 & qval <= 0.1
65318

66319

320+
321+
322+
323+
# Prob DE overlap from Supplementary Table ----
324+
DEcounts <- read.table("~/Dropbox/GDB/CMapCorr/NicheNet_LigDEoverlap_count.txt",
325+
sep="\t",header=T,row.names=1,as.is=T)
326+
DEratio <- read.table("~/Dropbox/GDB/CMapCorr/NicheNet_LigDEoverlap_ratio.txt",
327+
sep="\t",header=T,row.names=1,as.is=T)
328+
numDE <- rowSums(DEcounts,na.rm=T)
329+
nGene <- c(length(Reduce(intersect,lapply(nn_db,function(X) X$diffexp$gene))),
330+
range(sapply(nn_db,function(X) nrow(X$diffexp))))
331+
332+
333+
334+
335+
336+
337+
338+
339+

0 commit comments

Comments
 (0)