-
Notifications
You must be signed in to change notification settings - Fork 0
/
loadiSBCGBMIIsignature.R
55 lines (45 loc) · 2.46 KB
/
loadiSBCGBMIIsignature.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
loadiSBCGBMIIsignature = function(Y1.pre.train, Y2.pre.train,time, censoring){
#################################################################################################################################################################
############################ PRE - FILTERED FEATURE SELECTION ####################################################################################################
######## Prefiltering of the Genes ############################### ###########################
######### Using Univariate Cox's Model for Ranking of the Survival Power of the Genes ########
surv.obj <- Surv(exp(time),censoring)
coeff.sig <- c(0)
pvalue1.sig <- c(0)
pvalue2.sig <- c(0)
calcCox = function(x){
q1 <- unlist(summary(coxph(surv.obj ~ ., data = as.data.frame(x))))
return(q1$logtest.pvalue)
}
pvalue1.sig <- apply(Y1.pre.train,2,calcCox)
pvalue2.sig <- apply(Y2.pre.train,2,calcCox)
###### Adjusting p-values for Multiple Test Correction
pvalue1.sig.adj <- p.adjust(pvalue1.sig, method = "fdr")
pvalue2.sig.adj <- p.adjust(pvalue2.sig, method = "fdr")
##### GENE EXPRESSION #################
#### As the number of features are quite variable choose first a very loose cut-off
signature1.loose <- colnames(Y1.pre.train)[(pvalue1.sig < 0.2)]
### Combined the P-values
pvalue1.combined <- (pvalue1.sig.adj)
names(pvalue1.combined) <- colnames(Y1.pre.train)
## Sort it
pvalue1.combined.sort <- sort(pvalue1.combined)
## Only select those genes which are loosely in the signature
pvalue1.combined.adj <- pvalue1.combined.sort[names(pvalue1.combined.sort) %in% signature1.loose]
### Take the top 20 genes ####
probes1.signature <- names(pvalue1.combined.adj[1:20])
##### mi-RNA EXPRESSION #################
#### As the number of features are quite variable choose first a very loose cut-off
signature2.loose <- colnames(Y2.pre.train)[(pvalue2.sig < 0.2)]
### Combined the P-values
pvalue2.combined <- (pvalue2.sig.adj)
names(pvalue2.combined) <- colnames(Y2.pre.train)
## Sort it
pvalue2.combined.sort <- sort(pvalue2.combined)
## Only select those genes which are loosely in the signature
pvalue2.combined.adj <- pvalue2.combined.sort[names(pvalue2.combined.sort) %in% signature2.loose]
### Take the top 20 genes ####
probes2.signature <- names(pvalue2.combined.adj[1:20])
### Sending the signature
relev <- list('signature1.sbc'= probes1.signature,'signature2.sbc'= probes2.signature)
}