@@ -64,18 +64,18 @@ params.clump_p1=0.0001
64
64
params. clump_p2= 0.01
65
65
params. clump_r2= 0.50
66
66
params. clump_kb= 250
67
- params. dir_vcf= " ftp://ftp.1000genomes.ebi.ac.uk/vol1/ftp/release/20130502/"
67
+ // ftp://ftp.1000genomes.ebi.ac.uk:21/vol1/ftp/release/20130502/ALL.chrX.phase3_shapeit2_mvncall_integrated_v1c.20130502.genotypes.vcf.gz
68
+ params. dir_vcf= " ftp://ftp.1000genomes.ebi.ac.uk:21/vol1/ftp/release/20130502/"
68
69
69
70
70
71
if (params. list_chro_pheno== " " ){
71
- params . list_chro_pheno= params. list_chro
72
+ list_chro_pheno= params. list_chro
72
73
}
73
74
listchro= getlistchro(params. list_chro)
74
- listchro_pheno= getlistchro(params . list_chro_pheno)
75
+ listchro_pheno= getlistchro(list_chro_pheno)
75
76
76
77
listchro_ch= Channel . from(listchro)
77
78
listchro_ch2= Channel . from(listchro)
78
- // Pattern100G="ALL.chr${chro}.phase3_shapeit2_mvncall_integrated_v5a.20130502.genotypes.vcf.gz"
79
79
80
80
81
81
listchro_ch= listchro_ch. combine(Channel . fromPath(params. pos_allgeno, checkIfExists :true ))
@@ -84,15 +84,16 @@ listchro_ch=listchro_ch.combine(Channel.fromPath(params.pos_allgeno, checkIfExis
84
84
85
85
86
86
process Dl1000G{
87
+ label ' Utils'
87
88
cpus params. nb_cpus
88
89
input :
89
90
tuple val(chro), file(pos_geno) from listchro_ch
90
91
output :
91
92
tuple val(chro), file(" ${ file1000G} " ) into file1000G
92
93
script :
93
- file1000G = (chro == ' X ' ) ? " ALL.chrX.phase3_shapeit2_mvncall_integrated_v1b.20130502.genotypes.vcf.gz " : " ALL.chr ${ chro } .phase3_shapeit2_mvncall_integrated_v5a .20130502.genotypes.vcf.gz"
94
- // ALL.chrX.phase3_shapeit2_mvncall_integrated_v1b .20130502.genotypes.vcf.gz
95
- file1000G= (chro== ' Y' ) ? " ALL.chrY.phase3_integrated_v2a .20130502.genotypes.vcf.gz" : " $file1000G "
94
+ // ftp://ftp.1000genomes.ebi.ac.uk:21/vol1/ftp/release/20130502/ ALL.chr17.phase3_shapeit2_mvncall_integrated_v5b .20130502.genotypes.vcf.gz
95
+ file1000G = (chro == ' X ' ) ? " ALL.chrX.phase3_shapeit2_mvncall_integrated_v1c .20130502.genotypes.vcf.gz" : " ALL.chr ${ chro } .phase3_shapeit2_mvncall_integrated_v5b.20130502.genotypes.vcf.gz "
96
+ file1000G= (chro== ' Y' ) ? " ALL.chrY.phase3_integrated_v2b .20130502.genotypes.vcf.gz" : " $file1000G "
96
97
"""
97
98
awk -v chro=$chro '{if(\$ 1==chro)print \$ 1"\\ t"\$ 2-1"\\ t"\$ 2"\\ t"\$ 1":"\$ 2}' $pos_geno > pos.bed
98
99
bcftools view --threads ${ params.nb_cpus} -R pos.bed ${ params.dir_vcf} /$file1000G |bgzip -c > $file1000G
@@ -125,7 +126,8 @@ process cleanPlinkFile{
125
126
cp "$bim " "${ bim} .save"
126
127
awk '{if(\$ 2=="."){\$ 2=\$ 1":"\$ 4};print \$ 0}' "${ bim} .save" > "$bim "
127
128
awk '{if(length(\$ 5)==1 && length(\$ 6)==1)print \$ 2}' $bim > ${ bim} .wellpos.pos
128
- plink -bfile $plk --keep-allele-order --extract ${ bim} .wellpos.pos --make-bed -out $out --threads ${ params.nb_cpus}
129
+ awk '{print \$ 2}' $plk ".bim" | sort | uniq -d > duplicated_snps.snplist
130
+ plink -bfile $plk --keep-allele-order --extract ${ bim} .wellpos.pos --make-bed -out $out --threads ${ params.nb_cpus} --exclude duplicated_snps.snplist
129
131
"""
130
132
}
131
133
plk_chro_flt= plk_chro_cl. collect()
@@ -159,6 +161,8 @@ process mergePlinkFile{
159
161
}
160
162
161
163
process addSexFile{
164
+ label ' R'
165
+
162
166
cpus params. nb_cpus
163
167
input :
164
168
tuple file(bed), file(bim), file(fam) from allplkres_ch_befsex
@@ -176,6 +180,7 @@ process addSexFile{
176
180
"""
177
181
}
178
182
process GwasCatDl{
183
+ label ' R'
179
184
publishDir " ${ params.output_dir} /gwascat" , overwrite:true , mode:' copy'
180
185
output :
181
186
file(" ${ out} .bed" ) into gwascat_bed
@@ -190,17 +195,32 @@ process GwasCatDl{
190
195
"""
191
196
}
192
197
193
- listchro_ch_gwascat= Channel . from(listchro_pheno)
194
- listchro_ch_gwascat= listchro_ch_gwascat. combine(gwascat_pos)
198
+ process getchr_gc{
199
+ input :
200
+ file(bedfile) from gwascat_bed
201
+ output :
202
+ stdout into listchro_ch_gwascat
203
+ script :
204
+ """
205
+ awk '{print \$ 1}' $bedfile |sort|uniq
206
+ """
207
+ }
208
+
209
+ // listchro_ch_gwascat=Channel.from(listchro_pheno)
210
+ newlistchro_ch_gwascat= Channel . create()
211
+ check = Channel . create()
212
+ listchro_ch_gwascat. flatMap { list_str -> list_str. split() }. tap ( check) .set { newlistchro_ch_gwascat}
213
+ newlistchro_ch_gwascat= newlistchro_ch_gwascat. combine(gwascat_pos)
195
214
196
215
process Dl1000G_GC{
216
+ label ' Utils'
197
217
input :
198
- tuple val(chro), file(pos_geno) from listchro_ch_gwascat
218
+ tuple val(chro), file(pos_geno) from newlistchro_ch_gwascat
199
219
output :
200
220
tuple val(chro), file(" ${ file1000G} " ) into file1000G_gwasc
201
221
script :
202
- file1000G= (chro== ' X' ) ? " ALL.chrX.phase3_shapeit2_mvncall_integrated_v1b .20130502.genotypes.vcf.gz" : " ALL.chr${ chro} .phase3_shapeit2_mvncall_integrated_v5a .20130502.genotypes.vcf.gz"
203
- file1000G= (chro== ' Y' ) ? " ALL.chrY.phase3_integrated_v2a .20130502.genotypes.vcf.gz" : " $file1000G "
222
+ file1000G= (chro== ' X' ) ? " ALL.chrX.phase3_shapeit2_mvncall_integrated_v1c .20130502.genotypes.vcf.gz" : " ALL.chr${ chro} .phase3_shapeit2_mvncall_integrated_v5b .20130502.genotypes.vcf.gz"
223
+ file1000G= (chro== ' Y' ) ? " ALL.chrY.phase3_integrated_v2b .20130502.genotypes.vcf.gz" : " $file1000G "
204
224
"""
205
225
tabix -fh ${ params.dir_vcf} /$file1000G -R $pos_geno |bgzip -c > $file1000G
206
226
"""
@@ -221,6 +241,7 @@ process transfvcfInBed1000G_GC{
221
241
}
222
242
223
243
process cleanPlinkFile_GC{
244
+ errorStrategy ' ignore'
224
245
cpus params. nb_cpus
225
246
input :
226
247
tuple val(chro), file(bim), file(fam), file(bed) from plk_chro_gc
@@ -232,8 +253,10 @@ process cleanPlinkFile_GC{
232
253
"""
233
254
cp "$bim " "${ bim} .save"
234
255
awk '{if(\$ 2=="."){\$ 2=\$ 1":"\$ 4};print \$ 0}' "${ bim} .save" > "$bim "
256
+ awk '{print \$ 2}' $plk ".bim" | sort | uniq -d > duplicated_snps.snplist
235
257
awk '{if(length(\$ 5)==1 && length(\$ 6)==1)print \$ 2}' $bim > ${ bim} .wellpos.pos
236
- plink -bfile $plk --keep-allele-order --extract ${ bim} .wellpos.pos --make-bed -out $out --threads ${ params.nb_cpus}
258
+ plink -bfile $plk --keep-allele-order --extract ${ bim} .wellpos.pos --make-bed -out $out --threads ${ params.nb_cpus} --exclude duplicated_snps.snplist
259
+
237
260
"""
238
261
}
239
262
@@ -248,7 +271,6 @@ process mergePlinkFile_GC{
248
271
output :
249
272
tuple file(" ${ out} .bed" ), file(" ${ out} .bim" ), file(" ${ out} .fam" ) into allplkres_ch_gc
250
273
script :
251
- println allfile
252
274
allfile2= allfile. toList(). collect {it. toString(). replaceFirst(~/ \. [^\. ]+$/ , ' ' )}
253
275
allfile2= allfile2. unique()
254
276
firstbed= allfile2[0 ]
@@ -269,6 +291,7 @@ process mergePlinkFile_GC{
269
291
}
270
292
271
293
process format_simulated{
294
+ label ' R'
272
295
cpus params. nb_cpus
273
296
input :
274
297
tuple file(bed), file(bim), file(fam) from allplkres_ch_gc
@@ -286,36 +309,61 @@ process format_simulated{
286
309
"""
287
310
}
288
311
289
-
290
312
process simulation_quantitatif{
313
+ label ' gcta'
291
314
cpus params. nb_cpus
292
315
input :
293
316
tuple file(bed), file(bim), file(fam), file(outeffect) from info_sim_qt
294
- publishDir " ${ params.output_dir} /simul_pheno/quant_pheno/" , overwrite:true , mode:' copy'
295
317
output :
296
- file(" $o ut " )
318
+ file(" sim.phen " ) into sim_ql
297
319
script :
298
- out= params. output+ " _qt.pheno"
299
320
plk= bed. baseName
300
321
"""
301
322
${ params.gcta_bin} --bfile $plk --simu-causal-loci $outeffect --simu-qt --simu-hsq ${ params.simu_hsq} --out sim --simu-rep ${ params.simu_rep} --simu-k ${ params.simu_k}
302
- format_file_sim.r --file sim".phen" --out $out
303
323
"""
304
324
}
305
325
326
+ process format_sim_quantitatif{
327
+ label ' R'
328
+ input :
329
+ file(file) from sim_ql
330
+ publishDir " ${ params.output_dir} /simul_pheno/quant_pheno/" , overwrite:true , mode:' copy'
331
+ output :
332
+ file(" $out " )
333
+ script :
334
+ out= params. output+ " _qt.pheno"
335
+ """
336
+ format_file_sim.r --file $file --out $out
337
+ """
338
+ }
339
+
306
340
process simulation_qualitatif{
341
+ label ' gcta'
307
342
cpus params. nb_cpus
308
343
input :
309
344
tuple file(bed), file(bim), file(fam), file(outeffect) from info_sim_ql
310
- publishDir " ${ params.output_dir} /simul_pheno/qual_pheno/" , overwrite:true , mode:' copy'
311
345
output :
312
- file(" $o ut " )
346
+ file(" sim.phen " ) into sim_qt
313
347
script :
314
348
out= params. output+ " _ql.pheno"
315
349
plk= bed. baseName
316
350
"""
317
351
${ params.gcta_bin} --bfile $plk --simu-causal-loci $outeffect --simu-hsq ${ params.simu_hsq} --out sim --simu-rep ${ params.simu_rep} --simu-k ${ params.simu_k} --simu-cc `estimated_cc.py $fam ${ params.simu_k} `
318
- format_file_sim.r --file sim".phen" --out $out
319
352
"""
320
353
}
321
354
355
+
356
+ process format_sim_qualitatif{
357
+ label ' R'
358
+ input :
359
+ file(file) from sim_qt
360
+ publishDir " ${ params.output_dir} /simul_pheno/qual_pheno/" , overwrite:true , mode:' copy'
361
+ output :
362
+ file(" $out " )
363
+ script :
364
+ out= params. output+ " _ql.pheno"
365
+ """
366
+ format_file_sim.r --file $file --out $out
367
+ """
368
+ }
369
+
0 commit comments