forked from Bowei-xiao/ADNI_code
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcalculateEwas.R
58 lines (45 loc) · 2.98 KB
/
calculateEwas.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
library('data.table')
library('car')
library('foreach')
library('doMC')
registerDoMC(22)
args=(commandArgs(TRUE))
chr_num=as.numeric(args[1])
pheno=read.csv('ADNI_covariate_moreCovs_540samples_withEpiage_1905obs.csv',stringsAsFactors=F)
probe_loc=fread('probe_locations.csv')
# load residuals of all individuals (note they have the same order as pheno)
# skip the first row, make the second row as header and then convert all entries into numbers
includeSex=T
if (includeSex){
name_suffix='withSex'
} else {
name_suffix='noSex'
}
residual_trans=fread(paste0('Transposed_standardized_residuals_top25percent_withRowNames_noColNames_',name_suffix,'_Chr',chr_num,'.txt'),header=F,stringsAsFactors=F)
runMultivar=function(i, pheno,residual_file,probe_locationFile){
probe_meth=as.vector(t(residual_file[i,2:ncol(residual_file)]))
probe_name=as.character(residual_file[i,1])
pheno$methy=probe_meth
# location use start position
probe_location=probe_locationFile$Start_hg38[probe_locationFile$Name==probe_name]
# run A/T/N separately, and run case/control separately
res11=lm(scale(log(ABETA),scale=F)~methy+DX_num+APOE4, data=pheno)
res12=lm(scale(log(PTAU),scale=F)~methy+DX_num+APOE4, data=pheno)
res13=lm(scale(log(TAU),scale=F)~methy+DX_num+APOE4, data=pheno)
df=data.frame(probe=probe_name,chr=chr_num,location=probe_location,
beta_a_probe=coef(summary(res11))['methy',1],se_a_probe=coef(summary(res11))['methy',2],p_a_probe=coef(summary(res11))['methy',4],
beta_a_dx=coef(summary(res11))['DX_num',1],se_a_dx=coef(summary(res11))['DX_num',2],p_a_dx=coef(summary(res11))['DX_num',4],
beta_a_ap=coef(summary(res11))['APOE4',1],se_a_ap=coef(summary(res11))['APOE4',2],p_a_ap=coef(summary(res11))['APOE4',4],
beta_t_probe=coef(summary(res12))['methy',1],se_t_probe=coef(summary(res12))['methy',2],p_t_probe=coef(summary(res12))['methy',4],
beta_t_dx=coef(summary(res12))['DX_num',1],se_t_dx=coef(summary(res12))['DX_num',2],p_t_dx=coef(summary(res12))['DX_num',4],
beta_t_ap=coef(summary(res12))['APOE4',1],se_t_ap=coef(summary(res12))['APOE4',2],p_t_ap=coef(summary(res12))['APOE4',4],
beta_n_probe=coef(summary(res13))['methy',1],se_n_probe=coef(summary(res13))['methy',2],p_n_probe=coef(summary(res13))['methy',4],
beta_n_dx=coef(summary(res13))['DX_num',1],se_n_dx=coef(summary(res13))['DX_num',2],p_n_dx=coef(summary(res13))['DX_num',4],
beta_n_ap=coef(summary(res13))['APOE4',1],se_n_ap=coef(summary(res13))['APOE4',2],p_n_ap=coef(summary(res13))['APOE4',4], stringsAsFactors=F)
return(df)
}
# run in parallel
res_list=foreach(i=1:nrow(residual_trans), .combine=rbind) %dopar% {
res=runMultivar(i, pheno=pheno,residual_file=residual_trans,probe_locationFile=probe_loc)
}
write.csv(res_list,paste0('Univariate_ewas_3phenos_top25percent_probes_',name_suffix,'_chr',chr_num,'.csv'),quote = F,row.names = F)