@@ -3,30 +3,283 @@ library(pbapply)
3
3
pboptions(type = " timer" )
4
4
.PAR <- par(no.readonly = T )
5
5
6
- # Prob DE overlap from Supplementary Table ----
7
- DEcounts <- read.table(" ~/Dropbox/GDB/CMapCorr/NicheNet_LigDEoverlap_count.txt" ,
8
- sep = " \t " ,header = T ,row.names = 1 ,as.is = T )
9
- DEratio <- read.table(" ~/Dropbox/GDB/CMapCorr/NicheNet_LigDEoverlap_ratio.txt" ,
10
- sep = " \t " ,header = T ,row.names = 1 ,as.is = T )
11
- numDE <- rowSums(DEcounts ,na.rm = T )
12
- nGene <- c(length(Reduce(intersect ,lapply(nn_db ,function (X ) X $ diffexp $ gene ))),
13
- range(sapply(nn_db ,function (X ) nrow(X $ diffexp ))))
14
-
15
-
16
-
17
6
18
7
# Re-analysis of NicheNet dataset ----
19
8
20
- nn_model <- readRDS(url(" https://zenodo.org/record/3260758/files/ligand_target_matrix.rds" ))
9
+ # nn_model <- readRDS(url("https://zenodo.org/record/3260758/files/ligand_target_matrix.rds"))
21
10
nn_db <- readRDS(url(" https://zenodo.org/record/3260758/files/expression_settings.rds" ))
22
11
23
- nn_ligands <- unique(unlist(sapply(nn_db ," [[" ," from" )))
12
+ nn_db <- nn_db [sapply(sapply(nn_db ," [[" ," from" ),length ) == 1 ]
13
+
14
+ nn_ligands <- unique(sapply(nn_db ," [[" ," from" ))
24
15
nn_lig_dsNames <- sapply(nn_ligands ,function (LIG ) names(nn_db )[sapply(nn_db ,function (X ) LIG %in% X $ from )])
25
16
nn_ligands <- nn_ligands [sapply(nn_lig_dsNames ,length ) > 1 ]
26
17
nn_lig_dsNames <- nn_lig_dsNames [nn_ligands ]
27
18
28
19
29
20
21
+ # # Mapping datasets to GEO accession ----
22
+
23
+ nn_DEinfo <- read.table(" ~/Dropbox/GDB/CMapCorr/NicheNet_DBinfo_nDE.txt" ,
24
+ header = T ,sep = " \t " ,as.is = T )
25
+ rownames(nn_DEinfo ) <- nn_DEinfo $ setting_id
26
+ nn_DEinfo <- nn_DEinfo [,- 1 ]
27
+
28
+ DSinfo <- list ()
29
+ for (LIG in nn_ligands ) {
30
+ temp_DE <- sapply(nn_lig_dsNames [[LIG ]],function (X )
31
+ nn_db [[X ]]$ diffexp [nn_db [[X ]]$ diffexp $ lfc > = 1 & nn_db [[X ]]$ diffexp $ qval < = 0.1 ," gene" ])
32
+ temp_DSname <- sapply(sapply(temp_DE ,length ),function (N ) {
33
+ temp <- rownames(nn_DEinfo )[nn_DEinfo $ ligand == LIG ][nn_DEinfo [nn_DEinfo $ ligand == LIG ," nr_upgenes" ] %in% N ]
34
+ if (length(temp ) == 0 ) {
35
+ temp <- rownames(nn_DEinfo )[nn_DEinfo $ ligand == LIG ][nn_DEinfo [nn_DEinfo $ ligand == LIG ," nr_degenes" ] %in% N ]
36
+ }
37
+ return (temp )
38
+ })
39
+ if (is.list(temp_DSname )) { stop(LIG ) }
40
+ DSinfo [[LIG ]] <- nn_DEinfo [temp_DSname ,c(" time_series" ," cell_type" ," ligand" )]
41
+ DSinfo [[LIG ]]$ accession <- sub(paste0(" -" ,LIG ," .*$" )," " ,rownames(DSinfo [[LIG ]]))
42
+ rownames(DSinfo [[LIG ]]) <- names(temp_DSname )
43
+ rm(list = grep(" ^temp" ,ls(),value = T ))
44
+ }
45
+ DSinfo $ WNT1 $ cell_type <- paste(DSinfo $ WNT1 $ cell_type ,
46
+ sapply(strsplit(rownames(DSinfo $ WNT1 )," _" )," [[" ,3 ),
47
+ sep = " _" )
48
+ for (LIG in names(DSinfo )) {
49
+ DSinfo [[LIG ]]$ CtAcc <- paste(DSinfo [[LIG ]]$ accession ,DSinfo [[LIG ]]$ cell_type )
50
+ }
51
+
52
+
53
+ # Organizing replicate data ----
54
+
55
+ nn_lig_rep <- pbsapply(nn_ligands ,function (LIG ) {
56
+ temp_gene <- Reduce(intersect ,sapply(nn_lig_dsNames [[LIG ]],
57
+ function (X ) nn_db [[X ]]$ diffexp $ gene ,
58
+ simplify = F ))
59
+
60
+ temp_diffexp <- sapply(nn_lig_dsNames [[LIG ]],function (X ) {
61
+ temp <- nn_db [[X ]]$ diffexp
62
+ temp <- temp [temp $ gene %in% temp_gene ,]
63
+ if (any(duplicated(temp $ gene ))) {
64
+ temp_dup <- unique(temp $ gene [duplicated(temp $ gene )])
65
+ temp_rm <- unlist(lapply(temp_dup ,function (DUP ) {
66
+ temp_id <- rownames(temp [temp $ gene == DUP ,])
67
+ return (temp_id [- which.min(temp [temp_id ," qval" ])])
68
+ }))
69
+ temp <- temp [! rownames(temp ) %in% temp_rm ,]
70
+ }
71
+ rownames(temp ) <- temp $ gene
72
+ return (list (lfc = temp [temp_gene ," lfc" ],
73
+ qval = temp [temp_gene ," qval" ]))
74
+ },simplify = F )
75
+
76
+ temp_lfc <- sapply(temp_diffexp ," [[" ," lfc" )
77
+ temp_qval <- sapply(temp_diffexp ," [[" ," qval" )
78
+ rownames(temp_lfc ) <- rownames(temp_qval ) <- temp_gene
79
+
80
+ return (list (lfc = temp_lfc ,
81
+ qval = temp_qval ))
82
+ },simplify = F )
83
+
84
+
85
+
86
+ # Correlation ----
87
+
88
+ corr_all <- corr_ct <- corr_ds <- list ()
89
+ for (LIG in nn_ligands ) {
90
+ temp_ts <- rownames(DSinfo [[LIG ]])[DSinfo [[LIG ]]$ time_series ]
91
+ temp_nts <- rownames(DSinfo [[LIG ]])[! DSinfo [[LIG ]]$ time_series ]
92
+
93
+ if (length(temp_ts ) > 1 ) {
94
+ temp_acc <- sapply(unique(DSinfo [[LIG ]][temp_ts ," CtAcc" ]),function (X )
95
+ temp_ts [DSinfo [[LIG ]][temp_ts ," CtAcc" ] == X ],
96
+ simplify = F )
97
+ temp_ct <- sapply(unique(DSinfo [[LIG ]][temp_ts ," cell_type" ]),function (X )
98
+ temp_ts [DSinfo [[LIG ]][temp_ts ," cell_type" ] == X ],
99
+ simplify = F )
100
+
101
+ temp_cor2 <- cor(nn_lig_rep [[LIG ]]$ lfc [,temp_ts ],method = " spearman" )
102
+ temp_cor_names <- sapply(colnames(temp_cor2 ),function (X )
103
+ sapply(rownames(temp_cor2 ), function (Y ) paste(X ,Y ,sep = " ." )))
104
+ temp_cor <- temp_cor2 [lower.tri(temp_cor2 )]
105
+ names(temp_cor ) <- temp_cor_names [lower.tri(temp_cor_names )]
106
+ rm(temp_cor2 ,temp_cor_names )
107
+
108
+ for (Y in names(temp_acc )[sapply(temp_acc ,length ) > 1 ]) {
109
+ temp_hits <- sapply(strsplit(names(temp_cor )," ." ,fixed = T ),function (X ) all(X %in% temp_acc [[Y ]]))
110
+ corr_ds [[LIG ]] <- append(corr_ds [[LIG ]],temp_cor [temp_hits ])
111
+ temp_cor <- temp_cor [! temp_hits ]
112
+ rm(temp_hits )
113
+ }
114
+ if (length(temp_cor ) > 0 ) {
115
+ for (Z in names(temp_ct )[sapply(temp_ct ,length ) > 1 ]) {
116
+ temp_hits <- sapply(strsplit(names(temp_cor )," ." ,fixed = T ),function (X ) all(X %in% temp_ct [[Z ]]))
117
+ corr_ct [[LIG ]] <- append(corr_ct [[LIG ]],temp_cor [temp_hits ])
118
+ temp_cor <- temp_cor [! temp_hits ]
119
+ rm(temp_hits )
120
+ }
121
+ }
122
+ if (length(temp_cor ) > 0 ) {
123
+ corr_all [[LIG ]] <- append(corr_all [[LIG ]],temp_cor )
124
+ }
125
+ rm(Y ,Z ,temp_cor ,temp_acc ,temp_ct )
126
+ }
127
+
128
+ if (length(temp_nts ) > 1 ) {
129
+ temp_acc <- sapply(unique(DSinfo [[LIG ]][temp_nts ," CtAcc" ]),function (X )
130
+ temp_nts [DSinfo [[LIG ]][temp_nts ," CtAcc" ] == X ],
131
+ simplify = F )
132
+ temp_ct <- sapply(unique(DSinfo [[LIG ]][temp_nts ," cell_type" ]),function (X )
133
+ temp_nts [DSinfo [[LIG ]][temp_nts ," cell_type" ] == X ],
134
+ simplify = F )
135
+
136
+ temp_cor2 <- cor(nn_lig_rep [[LIG ]]$ lfc [,temp_nts ],method = " spearman" )
137
+ temp_cor_names <- sapply(colnames(temp_cor2 ),function (X )
138
+ sapply(rownames(temp_cor2 ), function (Y ) paste(X ,Y ,sep = " ." )))
139
+ temp_cor <- temp_cor2 [lower.tri(temp_cor2 )]
140
+ names(temp_cor ) <- temp_cor_names [lower.tri(temp_cor_names )]
141
+ rm(temp_cor2 ,temp_cor_names )
142
+
143
+ for (Y in names(temp_acc )[sapply(temp_acc ,length ) > 1 ]) {
144
+ temp_hits <- sapply(strsplit(names(temp_cor )," ." ,fixed = T ),function (X ) all(X %in% temp_acc [[Y ]]))
145
+ corr_ds [[LIG ]] <- append(corr_ds [[LIG ]],temp_cor [temp_hits ])
146
+ temp_cor <- temp_cor [! temp_hits ]
147
+ rm(temp_hits )
148
+ }
149
+ if (length(temp_cor ) > 0 ) {
150
+ for (Z in names(temp_ct )[sapply(temp_ct ,length ) > 1 ]) {
151
+ temp_hits <- sapply(strsplit(names(temp_cor )," ." ,fixed = T ),function (X ) all(X %in% temp_ct [[Z ]]))
152
+ corr_ct [[LIG ]] <- append(corr_ct [[LIG ]],temp_cor [temp_hits ])
153
+ temp_cor <- temp_cor [! temp_hits ]
154
+ rm(temp_hits )
155
+ }
156
+ }
157
+ if (length(temp_cor ) > 0 ) {
158
+ corr_all [[LIG ]] <- append(corr_all [[LIG ]],temp_cor )
159
+ }
160
+ rm(Y ,Z ,temp_cor ,temp_acc ,temp_ct )
161
+ }
162
+ rm(temp_nts ,temp_ts )
163
+ }
164
+
165
+
166
+ boxplot(unlist(sapply(nn_ligands ,function (X ) list (ALL = corr_all [[X ]],
167
+ CT = corr_ct [[X ]],
168
+ DS = corr_ds [[X ]]),
169
+ simplify = F ),recursive = F ),
170
+ las = 3 )
171
+
172
+ par(mar = c(3 ,3 ,1 ,1 ),mgp = 2 : 0 )
173
+ boxplot(list (ALL = unlist(corr_all ),CT = unlist(corr_ct ),DS = unlist(corr_ds )),
174
+ ylab = " Spearman correlation of logFC" )
175
+
176
+ corr_all_nts <- corr_all_ts <- list ()
177
+ for (LIG in names(corr_all )) {
178
+ temp_ts <- sapply(strsplit(names(corr_all [[LIG ]])," ." ,fixed = T ),
179
+ function (X ) all(DSinfo [[LIG ]][X ," time_series" ]))
180
+ corr_all_ts [[LIG ]] <- corr_all [[LIG ]][temp_ts ]
181
+ corr_all_nts [[LIG ]] <- corr_all [[LIG ]][! temp_ts ]
182
+ }
183
+ boxplot(list (ALL_nTS = unlist(corr_all_nts ),ALL_TS = unlist(corr_all_ts ),
184
+ CT = unlist(corr_ct ),DS = unlist(corr_ds )))
185
+ # not much diff
186
+
187
+
188
+
189
+
190
+ # Diff Exp ----
191
+
192
+ nn_DE <- list ()
193
+ for (LIG in nn_ligands ) {
194
+ nn_DE [[LIG ]] <- mapply(intersect ,
195
+ apply(nn_lig_rep [[LIG ]]$ lfc ,2 ,function (X ) names(which(X > = 1 ))),
196
+ apply(nn_lig_rep [[LIG ]]$ qval ,2 ,function (X ) names(which(X < = 0.1 ))))
197
+ }
198
+ hist(sapply(unlist(nn_DE ,recursive = F ),length ),
199
+ xlab = " # DE per DS" ,main = NA )
200
+
201
+ temp_DE <- unlist(nn_DE ,recursive = F )
202
+ DEbkgd <- pbsapply(min(sapply(DSinfo ,nrow )): max(sapply(DSinfo ,nrow )),function (X )
203
+ sapply(1 : 1e6 ,function (Y )
204
+ length(Reduce(intersect ,sample(temp_DE ,X )))
205
+ ),cl = 4 )
206
+ colnames(DEbkgd ) <- min(sapply(DSinfo ,nrow )): max(sapply(DSinfo ,nrow ))
207
+ rm(temp_DE )
208
+
209
+ de_all <- de_ct <- de_ds <- list ()
210
+ P_all <- P_ct <- P_ds <- list ()
211
+ for (LIG in nn_ligands ) {
212
+ temp_ts <- rownames(DSinfo [[LIG ]])[DSinfo [[LIG ]]$ time_series ]
213
+ temp_nts <- rownames(DSinfo [[LIG ]])[! DSinfo [[LIG ]]$ time_series ]
214
+
215
+ if (length(temp_ts ) > 1 ) {
216
+ temp_acc <- sapply(unique(DSinfo [[LIG ]][temp_ts ," CtAcc" ]),function (X )
217
+ temp_ts [DSinfo [[LIG ]][temp_ts ," CtAcc" ] == X ],
218
+ simplify = F )
219
+ temp_ct <- sapply(unique(DSinfo [[LIG ]][temp_ts ," cell_type" ]),function (X )
220
+ temp_ts [DSinfo [[LIG ]][temp_ts ," cell_type" ] == X ],
221
+ simplify = F )
222
+ temp_ct <- temp_ct [! sapply(temp_ct ,function (CT )
223
+ any(sapply(temp_acc ,function (ACC ) all(CT == ACC ))))]
224
+
225
+ for (X in names(temp_acc )[sapply(temp_acc ,length ) > 1 ]) {
226
+ Y <- paste(temp_acc [[X ]],collapse = " ." )
227
+ de_ds [[LIG ]][[Y ]] <- Reduce(intersect ,nn_DE [[LIG ]][temp_acc [[X ]]])
228
+ P_ds [[LIG ]][[Y ]] <- sum(DEbkgd [,as.character(length(temp_acc [[X ]]))] > =
229
+ length(de_ds [[LIG ]][[Y ]])) / nrow(DEbkgd )
230
+ }
231
+ for (X in names(temp_ct )[sapply(temp_ct ,length ) > 1 ]) {
232
+ Y <- paste(temp_ct [[X ]],collapse = " ." )
233
+ de_ct [[LIG ]][[Y ]] <- Reduce(intersect ,nn_DE [[LIG ]][temp_ct [[X ]]])
234
+ P_ct [[LIG ]][[Y ]] <- sum(DEbkgd [,as.character(length(temp_ct [[X ]]))] > =
235
+ length(de_ct [[LIG ]][[Y ]])) / nrow(DEbkgd )
236
+ }
237
+ de_all [[LIG ]] <- Reduce(intersect ,nn_DE [[LIG ]])
238
+ P_all [[LIG ]] <- sum(DEbkgd [,as.character(length(nn_DE [[LIG ]]))] > =
239
+ length(de_all [[LIG ]])) / nrow(DEbkgd )
240
+
241
+ }
242
+
243
+ if (length(temp_nts ) > 1 ) {
244
+ temp_acc <- sapply(unique(DSinfo [[LIG ]][temp_nts ," CtAcc" ]),function (X )
245
+ temp_nts [DSinfo [[LIG ]][temp_nts ," CtAcc" ] == X ],
246
+ simplify = F )
247
+ temp_ct <- sapply(unique(DSinfo [[LIG ]][temp_nts ," cell_type" ]),function (X )
248
+ temp_nts [DSinfo [[LIG ]][temp_nts ," cell_type" ] == X ],
249
+ simplify = F )
250
+ temp_ct <- temp_ct [! sapply(temp_ct ,function (CT )
251
+ any(sapply(temp_acc ,function (ACC ) all(CT == ACC ))))]
252
+
253
+ for (X in names(temp_acc )[sapply(temp_acc ,length ) > 1 ]) {
254
+ Y <- paste(temp_acc [[X ]],collapse = " ." )
255
+ de_ds [[LIG ]][[Y ]] <- Reduce(intersect ,nn_DE [[LIG ]][temp_acc [[X ]]])
256
+ P_ds [[LIG ]][[Y ]] <- sum(DEbkgd [,as.character(length(temp_acc [[X ]]))] > =
257
+ length(de_ds [[LIG ]][[Y ]])) / nrow(DEbkgd )
258
+ }
259
+ for (X in names(temp_ct )[sapply(temp_ct ,length ) > 1 ]) {
260
+ Y <- paste(temp_ct [[X ]],collapse = " ." )
261
+ de_ct [[LIG ]][[Y ]] <- Reduce(intersect ,nn_DE [[LIG ]][temp_ct [[X ]]])
262
+ P_ct [[LIG ]][[Y ]] <- sum(DEbkgd [,as.character(length(temp_ct [[X ]]))] > =
263
+ length(de_ct [[LIG ]][[Y ]])) / nrow(DEbkgd )
264
+ }
265
+ de_all [[LIG ]] <- Reduce(intersect ,nn_DE [[LIG ]])
266
+ P_all [[LIG ]] <- sum(DEbkgd [,as.character(length(nn_DE [[LIG ]]))] > =
267
+ length(de_all [[LIG ]])) / nrow(DEbkgd )
268
+ }
269
+ }
270
+
271
+ boxplot(list (ALL = - log10(unlist(P_all )),
272
+ CT = - log10(unlist(P_ct )),
273
+ DS = - log10(unlist(P_ds ))),
274
+ ylab = " -log10 P #DE by chance" )
275
+
276
+
277
+
278
+
279
+
280
+
281
+ # old stuff ----
282
+
30
283
nn_lig_rep <- pbsapply(nn_ligands ,function (LIG ) {
31
284
temp_gene <- Reduce(intersect ,sapply(nn_lig_dsNames [[LIG ]],
32
285
function (X ) nn_db [[X ]]$ diffexp $ gene ,
@@ -64,3 +317,23 @@ boxplot(nn_cor,las=3,ylab="Spearman's CC")
64
317
# DE per NicheNet paper: lfc >= 1 & qval <= 0.1
65
318
66
319
320
+
321
+
322
+
323
+ # Prob DE overlap from Supplementary Table ----
324
+ DEcounts <- read.table(" ~/Dropbox/GDB/CMapCorr/NicheNet_LigDEoverlap_count.txt" ,
325
+ sep = " \t " ,header = T ,row.names = 1 ,as.is = T )
326
+ DEratio <- read.table(" ~/Dropbox/GDB/CMapCorr/NicheNet_LigDEoverlap_ratio.txt" ,
327
+ sep = " \t " ,header = T ,row.names = 1 ,as.is = T )
328
+ numDE <- rowSums(DEcounts ,na.rm = T )
329
+ nGene <- c(length(Reduce(intersect ,lapply(nn_db ,function (X ) X $ diffexp $ gene ))),
330
+ range(sapply(nn_db ,function (X ) nrow(X $ diffexp ))))
331
+
332
+
333
+
334
+
335
+
336
+
337
+
338
+
339
+
0 commit comments