Skip to content

Commit 1da4bb8

Browse files
authored
Merge pull request #5 from h3abionet/master
Merge recent commits from JT
2 parents c9c2601 + 3738650 commit 1da4bb8

File tree

7 files changed

+320
-34
lines changed

7 files changed

+320
-34
lines changed

assoc/main.nf

+5
Original file line numberDiff line numberDiff line change
@@ -1294,6 +1294,11 @@ if (params.assoc+params.fisher+params.logistic+params.linear > 0) {
12941294
if (params.covariates) covariate = "--covar ${phenof} --covar-name ${params.covariates} "
12951295
out = pheno
12961296
}
1297+
if(params.sexinfo_available=='true'){
1298+
sexallow=""
1299+
}else{
1300+
sexallow="--allow-no-sex"
1301+
}
12971302
template "test.sh"
12981303
}
12991304

assoc/templates/test.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33
hostname
44

55

6-
plink --bfile $base $covariate $pheno_cmd --threads $num_assoc_cores --${test} $perm $adjust --out $outfname
6+
plink --bfile $base $covariate $pheno_cmd --threads $num_assoc_cores --${test} $perm $adjust --out $outfname $sexallow

replication/gwascat/nextflow.config

+4-5
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11

22
rImage="quay.io/h3abionet_org/r"
3-
4-
53
py3Image = "quay.io/h3abionet_org/py3plink"
64
gemmaImage="quay.io/h3abionet_org/py3plink"
75
boltImage="quay.io/h3abionet_org/py3fastlmm"
@@ -232,9 +230,6 @@ process {
232230
withLabel: latex {
233231
container = latexImage
234232
}
235-
withLabel: gcta{
236-
container = gctaImage
237-
}
238233
withLabel: metaanalyse {
239234
container = MetaAnImage
240235
}
@@ -251,6 +246,10 @@ process {
251246
withLabel : fastlmm{
252247
container = fastlmmImage
253248
}
249+
withLabel: gcta{
250+
container = gctaImage
251+
}
252+
254253

255254
}
256255

utils/build_example_data/bin/format_simulated.r

+5-2
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ q(status=2)
3636

3737
allinfo$z.cat.new<-allinfo$z.cat
3838
allinfo$riskall<-sapply(strsplit(as.character(allinfo$risk_allele),split='-'),function(x)x[2])
39-
balise<-allinfo$riskall==bimfile$ref
39+
balise<-allinfo$riskall==allinfo$ref
4040
allinfo$z.cat.new[balise]<- -allinfo$z.cat.new[balise]
4141
allinfo$beta.cat[balise]<- -allinfo$beta.cat[balise]
4242
allinfo$A2<-allinfo$ref
@@ -62,6 +62,9 @@ names(dataformatplk)<-c("CHR", "SNP", "BP","A1", "A2", "FRQ", "BETA", "SE", "P")
6262
write.table(dataformatplk,file=paste(opt[['out']], ".assoc",sep=''),sep="\t", row.names=F,col.names=T,quote=F)
6363
system(paste("plink -bfile ",opt[['bfile']]," --clump ", opt[['out']], ".assoc", " --clump-p1 ", opt[['clump_p1']], " --clump-p2 ", opt[['clump_p2']]," --clump-r2 ", opt[['clump_r2']], " --clump-kb ", opt[['clump_kb']], ' -out ', opt[['out']], '_clump' ,sep=''))
6464
Dataclump<-read.table(paste(opt[['out']], '_clump.clumped',sep=''), header=T)
65+
6566
allinfo<-allinfo[allinfo$rs_bim %in% Dataclump$SNP ,]
66-
write.table(allinfo[,c('rs_bim', 'z.cat.new')], sep='\t', quote=F, file=opt[['out']], row.names=F, col.names=F)
67+
68+
rsbest<-aggregate(z.cat.new~rs_bim, allinfo,function(x)x[which.max(abs(x))])
69+
write.table(rsbest, sep='\t', quote=F, file=opt[['out']], row.names=F, col.names=F)
6770

utils/build_example_data/main.nf

100644100755
+71-23
Original file line numberDiff line numberDiff line change
@@ -64,18 +64,18 @@ params.clump_p1=0.0001
6464
params.clump_p2=0.01
6565
params.clump_r2=0.50
6666
params.clump_kb=250
67-
params.dir_vcf="ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/"
67+
//ftp://ftp.1000genomes.ebi.ac.uk:21/vol1/ftp/release/20130502/ALL.chrX.phase3_shapeit2_mvncall_integrated_v1c.20130502.genotypes.vcf.gz
68+
params.dir_vcf="ftp://ftp.1000genomes.ebi.ac.uk:21/vol1/ftp/release/20130502/"
6869

6970

7071
if(params.list_chro_pheno==""){
71-
params.list_chro_pheno=params.list_chro
72+
list_chro_pheno=params.list_chro
7273
}
7374
listchro=getlistchro(params.list_chro)
74-
listchro_pheno=getlistchro(params.list_chro_pheno)
75+
listchro_pheno=getlistchro(list_chro_pheno)
7576

7677
listchro_ch=Channel.from(listchro)
7778
listchro_ch2=Channel.from(listchro)
78-
//Pattern100G="ALL.chr${chro}.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz"
7979

8080

8181
listchro_ch=listchro_ch.combine(Channel.fromPath(params.pos_allgeno, checkIfExists:true))
@@ -84,15 +84,16 @@ listchro_ch=listchro_ch.combine(Channel.fromPath(params.pos_allgeno, checkIfExis
8484

8585

8686
process Dl1000G{
87+
label 'Utils'
8788
cpus params.nb_cpus
8889
input :
8990
tuple val(chro), file(pos_geno) from listchro_ch
9091
output :
9192
tuple val(chro), file("${file1000G}") into file1000G
9293
script :
93-
file1000G= (chro=='X') ? "ALL.chrX.phase3_shapeit2_mvncall_integrated_v1b.20130502.genotypes.vcf.gz" : "ALL.chr${chro}.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz"
94-
//ALL.chrX.phase3_shapeit2_mvncall_integrated_v1b.20130502.genotypes.vcf.gz
95-
file1000G= (chro=='Y') ? "ALL.chrY.phase3_integrated_v2a.20130502.genotypes.vcf.gz" : "$file1000G"
94+
//ftp://ftp.1000genomes.ebi.ac.uk:21/vol1/ftp/release/20130502/ALL.chr17.phase3_shapeit2_mvncall_integrated_v5b.20130502.genotypes.vcf.gz
95+
file1000G= (chro=='X') ? "ALL.chrX.phase3_shapeit2_mvncall_integrated_v1c.20130502.genotypes.vcf.gz" : "ALL.chr${chro}.phase3_shapeit2_mvncall_integrated_v5b.20130502.genotypes.vcf.gz"
96+
file1000G= (chro=='Y') ? "ALL.chrY.phase3_integrated_v2b.20130502.genotypes.vcf.gz" : "$file1000G"
9697
"""
9798
awk -v chro=$chro '{if(\$1==chro)print \$1"\\t"\$2-1"\\t"\$2"\\t"\$1":"\$2}' $pos_geno > pos.bed
9899
bcftools view --threads ${params.nb_cpus} -R pos.bed ${params.dir_vcf}/$file1000G|bgzip -c > $file1000G
@@ -125,7 +126,8 @@ process cleanPlinkFile{
125126
cp "$bim" "${bim}.save"
126127
awk '{if(\$2=="."){\$2=\$1":"\$4};print \$0}' "${bim}.save" > "$bim"
127128
awk '{if(length(\$5)==1 && length(\$6)==1)print \$2}' $bim > ${bim}.wellpos.pos
128-
plink -bfile $plk --keep-allele-order --extract ${bim}.wellpos.pos --make-bed -out $out --threads ${params.nb_cpus}
129+
awk '{print \$2}' $plk".bim" | sort | uniq -d > duplicated_snps.snplist
130+
plink -bfile $plk --keep-allele-order --extract ${bim}.wellpos.pos --make-bed -out $out --threads ${params.nb_cpus} --exclude duplicated_snps.snplist
129131
"""
130132
}
131133
plk_chro_flt=plk_chro_cl.collect()
@@ -159,6 +161,8 @@ process mergePlinkFile{
159161
}
160162

161163
process addSexFile{
164+
label 'R'
165+
162166
cpus params.nb_cpus
163167
input :
164168
tuple file(bed), file(bim), file(fam) from allplkres_ch_befsex
@@ -176,6 +180,7 @@ process addSexFile{
176180
"""
177181
}
178182
process GwasCatDl{
183+
label 'R'
179184
publishDir "${params.output_dir}/gwascat", overwrite:true, mode:'copy'
180185
output :
181186
file("${out}.bed") into gwascat_bed
@@ -190,17 +195,32 @@ process GwasCatDl{
190195
"""
191196
}
192197

193-
listchro_ch_gwascat=Channel.from(listchro_pheno)
194-
listchro_ch_gwascat=listchro_ch_gwascat.combine(gwascat_pos)
198+
process getchr_gc{
199+
input :
200+
file(bedfile) from gwascat_bed
201+
output :
202+
stdout into listchro_ch_gwascat
203+
script :
204+
"""
205+
awk '{print \$1}' $bedfile|sort|uniq
206+
"""
207+
}
208+
209+
//listchro_ch_gwascat=Channel.from(listchro_pheno)
210+
newlistchro_ch_gwascat=Channel.create()
211+
check = Channel.create()
212+
listchro_ch_gwascat.flatMap { list_str -> list_str.split() }.tap ( check) .set { newlistchro_ch_gwascat}
213+
newlistchro_ch_gwascat= newlistchro_ch_gwascat.combine(gwascat_pos)
195214

196215
process Dl1000G_GC{
216+
label 'Utils'
197217
input :
198-
tuple val(chro), file(pos_geno) from listchro_ch_gwascat
218+
tuple val(chro), file(pos_geno) from newlistchro_ch_gwascat
199219
output :
200220
tuple val(chro), file("${file1000G}") into file1000G_gwasc
201221
script :
202-
file1000G= (chro=='X') ? "ALL.chrX.phase3_shapeit2_mvncall_integrated_v1b.20130502.genotypes.vcf.gz" : "ALL.chr${chro}.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz"
203-
file1000G= (chro=='Y' ) ? "ALL.chrY.phase3_integrated_v2a.20130502.genotypes.vcf.gz" : "$file1000G"
222+
file1000G= (chro=='X') ? "ALL.chrX.phase3_shapeit2_mvncall_integrated_v1c.20130502.genotypes.vcf.gz" : "ALL.chr${chro}.phase3_shapeit2_mvncall_integrated_v5b.20130502.genotypes.vcf.gz"
223+
file1000G= (chro=='Y') ? "ALL.chrY.phase3_integrated_v2b.20130502.genotypes.vcf.gz" : "$file1000G"
204224
"""
205225
tabix -fh ${params.dir_vcf}/$file1000G -R $pos_geno |bgzip -c > $file1000G
206226
"""
@@ -221,6 +241,7 @@ process transfvcfInBed1000G_GC{
221241
}
222242

223243
process cleanPlinkFile_GC{
244+
errorStrategy 'ignore'
224245
cpus params.nb_cpus
225246
input :
226247
tuple val(chro), file(bim), file(fam), file(bed) from plk_chro_gc
@@ -232,8 +253,10 @@ process cleanPlinkFile_GC{
232253
"""
233254
cp "$bim" "${bim}.save"
234255
awk '{if(\$2=="."){\$2=\$1":"\$4};print \$0}' "${bim}.save" > "$bim"
256+
awk '{print \$2}' $plk".bim" | sort | uniq -d > duplicated_snps.snplist
235257
awk '{if(length(\$5)==1 && length(\$6)==1)print \$2}' $bim > ${bim}.wellpos.pos
236-
plink -bfile $plk --keep-allele-order --extract ${bim}.wellpos.pos --make-bed -out $out --threads ${params.nb_cpus}
258+
plink -bfile $plk --keep-allele-order --extract ${bim}.wellpos.pos --make-bed -out $out --threads ${params.nb_cpus} --exclude duplicated_snps.snplist
259+
237260
"""
238261
}
239262

@@ -248,7 +271,6 @@ process mergePlinkFile_GC{
248271
output :
249272
tuple file("${out}.bed"), file("${out}.bim"), file("${out}.fam") into allplkres_ch_gc
250273
script :
251-
println allfile
252274
allfile2=allfile.toList().collect {it.toString().replaceFirst(~/\.[^\.]+$/, '')}
253275
allfile2=allfile2.unique()
254276
firstbed=allfile2[0]
@@ -269,6 +291,7 @@ process mergePlinkFile_GC{
269291
}
270292

271293
process format_simulated{
294+
label 'R'
272295
cpus params.nb_cpus
273296
input :
274297
tuple file(bed), file(bim), file(fam) from allplkres_ch_gc
@@ -286,36 +309,61 @@ process format_simulated{
286309
"""
287310
}
288311

289-
290312
process simulation_quantitatif{
313+
label 'gcta'
291314
cpus params.nb_cpus
292315
input :
293316
tuple file(bed), file(bim), file(fam), file(outeffect) from info_sim_qt
294-
publishDir "${params.output_dir}/simul_pheno/quant_pheno/", overwrite:true, mode:'copy'
295317
output :
296-
file("$out")
318+
file("sim.phen") into sim_ql
297319
script :
298-
out=params.output+"_qt.pheno"
299320
plk=bed.baseName
300321
"""
301322
${params.gcta_bin} --bfile $plk --simu-causal-loci $outeffect --simu-qt --simu-hsq ${params.simu_hsq} --out sim --simu-rep ${params.simu_rep} --simu-k ${params.simu_k}
302-
format_file_sim.r --file sim".phen" --out $out
303323
"""
304324
}
305325

326+
process format_sim_quantitatif{
327+
label 'R'
328+
input :
329+
file(file) from sim_ql
330+
publishDir "${params.output_dir}/simul_pheno/quant_pheno/", overwrite:true, mode:'copy'
331+
output :
332+
file("$out")
333+
script :
334+
out=params.output+"_qt.pheno"
335+
"""
336+
format_file_sim.r --file $file --out $out
337+
"""
338+
}
339+
306340
process simulation_qualitatif{
341+
label 'gcta'
307342
cpus params.nb_cpus
308343
input :
309344
tuple file(bed), file(bim), file(fam), file(outeffect) from info_sim_ql
310-
publishDir "${params.output_dir}/simul_pheno/qual_pheno/", overwrite:true, mode:'copy'
311345
output :
312-
file("$out")
346+
file("sim.phen") into sim_qt
313347
script :
314348
out=params.output+"_ql.pheno"
315349
plk=bed.baseName
316350
"""
317351
${params.gcta_bin} --bfile $plk --simu-causal-loci $outeffect --simu-hsq ${params.simu_hsq} --out sim --simu-rep ${params.simu_rep} --simu-k ${params.simu_k} --simu-cc `estimated_cc.py $fam ${params.simu_k}`
318-
format_file_sim.r --file sim".phen" --out $out
319352
"""
320353
}
321354

355+
356+
process format_sim_qualitatif{
357+
label 'R'
358+
input :
359+
file(file) from sim_qt
360+
publishDir "${params.output_dir}/simul_pheno/qual_pheno/", overwrite:true, mode:'copy'
361+
output :
362+
file("$out")
363+
script :
364+
out=params.output+"_ql.pheno"
365+
"""
366+
format_file_sim.r --file $file --out $out
367+
"""
368+
}
369+

utils/build_example_data/nextflow.config

+17-3
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
1-
2-
31
py3Image = "quay.io/h3abionet_org/py3plink"
4-
gemmaImage="quay.io/h3abionet_org/py3plink"
52
latexImage="quay.io/h3abionet_org/h3agwas-texlive"
63
py2fastImage="quay.io/h3abionet_org/py2fastlmm"
74
MetaAnImage="quay.io/h3abionet_org/py3metagwas"
5+
py3rImage="quay.io/h3abionet_org/py3rproject"
6+
gemmaImage="quay.io/h3abionet_org/py3plink"
7+
gctaImage="quay.io/h3abionet_org/gcta"
8+
Utils="quay.io/h3abionet_org/py3utils"
9+
10+
11+
812
swarmPort = '2376'
913
queue = 'batch'
1014

@@ -243,6 +247,16 @@ process {
243247
withLabel: py2fast {
244248
container = py2fastImage
245249
}
250+
withLabel: R {
251+
container = py3rImage
252+
}
253+
withLabel: gcta{
254+
container = gctaImage
255+
}
256+
withLabel: Utils{
257+
container = Utils
258+
}
259+
246260

247261

248262
}

0 commit comments

Comments
 (0)