Skip to content

Commit 1da61a9

Browse files
author
Dmitry Pushkarev
committed
fully working exome pipeline
1 parent 0e119f4 commit 1da61a9

File tree

5 files changed

+126
-4
lines changed

5 files changed

+126
-4
lines changed

apply_bed_filter/apply_bed_filter.pl

+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
#!/usr/bin/perl
2+
#
3+
# Applies BED filter to a VCF file
4+
$filter=$ARGV[0];
5+
die "Filter file is not specified" unless -e $filter;
6+
die "Incorrect filter format" unless $filter =~/\.(bed|BED)/;
7+
8+
9+
#hash based coarse filter
10+
open(FL,"$filter")||die;
11+
while (<FL>)
12+
{
13+
@a=split("\t");
14+
$a[1]=int($a[1]/10000);
15+
$a[2]=int($a[2]/10000);
16+
17+
for ($i=$a[1]-1;$i<=$a[2]+1;$i++)
18+
{
19+
$OK{"$a[0]-$i"}=1;
20+
}
21+
}
22+
$tmp=int(rand()*1000000)."bed";
23+
$tmp="out.bed";
24+
25+
open(OUT,">$tmp");
26+
27+
while (<STDIN>)
28+
{
29+
if (/^\#/)
30+
{$header.=$_;next;}
31+
@a=split("\t");
32+
33+
$red=int($a[1]/10000);
34+
35+
next unless $OK{"$a[0]-$red"};
36+
print OUT "$a[0]\t$a[1]\t$a[1]\t$_";
37+
}
38+
39+
close OUT;
40+
41+
print $header;
42+
open(FL,"bedtools intersect -u -a $tmp -b $filter|") ||die;
43+
while (<FL>)
44+
{
45+
@a=split("\t");
46+
for ($i=3;$i<scalar(@a);$i++)
47+
{
48+
print "\t" unless $i==3;
49+
print $a[$i];
50+
}
51+
52+
}

apply_bed_filter/apply_bed_filter.sh

+71
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
#!/bin/bash
2+
3+
set -e -x
4+
5+
6+
7+
8+
9+
10+
11+
12+
13+
14+
15+
16+
17+
18+
19+
exit
20+
#!/usr/bin/perl
21+
#
22+
# Applies BED filter to a VCF file
23+
$filter=$ARGV[0];
24+
die "Filter file is not specified" unless -e $filter;
25+
die "Incorrect filter format" unless $filter =~/\.(bed|BED)/;
26+
27+
28+
#hash based coarse filter
29+
open(FL,"$filter")||die;
30+
while (<FL>)
31+
{
32+
@a=split("\t");
33+
$a[1]=int($a[1]/10000);
34+
$a[2]=int($a[2]/10000);
35+
36+
for ($i=$a[1]-1;$i<=$a[2]+1;$i++)
37+
{
38+
$OK{"$a[0]-$i"}=1;
39+
}
40+
}
41+
$tmp=int(rand()*1000000)."bed";
42+
$tmp="out.bed";
43+
44+
open(OUT,">$tmp");
45+
46+
while (<STDIN>)
47+
{
48+
if (/^\#/)
49+
{$header.=$_;next;}
50+
@a=split("\t");
51+
52+
$red=int($a[1]/10000);
53+
54+
next unless $OK{"$a[0]-$red"};
55+
print OUT "$a[0]\t$a[1]\t$a[1]\t$_";
56+
}
57+
58+
close OUT;
59+
60+
print $header;
61+
open(FL,"bedtools intersect -u -a $tmp -b $filter|") ||die;
62+
while (<FL>)
63+
{
64+
@a=split("\t");
65+
for ($i=3;$i<scalar(@a);$i++)
66+
{
67+
print "\t" unless $i==3;
68+
print $a[$i];
69+
}
70+
71+
}

download_reference.sh

+1
Original file line numberDiff line numberDiff line change
@@ -21,5 +21,6 @@ if [ ! -e /tmp/gcat_set_025_2.fastq.gz ]
2121
then
2222
wget -O /tmp/gcat_set_025_1.fastq.gz http://gapp-west.s3.amazonaws.com/sample_exome/025_Bioplanet_GCAT_30x/gcat_set_025_1.fastq.gz
2323
wget -O /tmp/gcat_set_025_2.fastq.gz http://gapp-west.s3.amazonaws.com/sample_exome/025_Bioplanet_GCAT_30x/gcat_set_025_2.fastq.gz
24+
wget -O /tmp/gcat_set_025.bed.gz http://gapp-west.s3.amazonaws.com/sample_exome/025_Bioplanet_GCAT_30x/gcat_set_025.bed.gz
2425

2526
fi

gcat_set_025.bed.gz

1.43 MB
Binary file not shown.

vqsr/vqsr.sh

+2-4
Original file line numberDiff line numberDiff line change
@@ -127,9 +127,7 @@ do
127127
-recalFile indels.recal.tmp \
128128
-tranchesFile indels.tranches \
129129
-rscriptFile indels.plots.R \
130-
-nt $cpu_cores
131-
132-
mv indels.recal.tmp indels.recal
130+
-nt $cpu_cores && mv indels.recal.tmp indels.recal
133131
fi
134132

135133
done
@@ -154,7 +152,7 @@ java -Xmx7g -jar $gatk_jar \
154152
-T CombineVariants \
155153
-R $gatk_data/hg19/ucsc.hg19.fasta \
156154
--variant snps.recalibrated.filtered.vcf \
157-
--variant indels.recalibrated.filtered.vcf \
155+
--variant indels.recalibrated.filtered.vcf \
158156
-o recalibrated.filtered.vcf
159157

160158
bgzip recalibrated.filtered.vcf

0 commit comments

Comments
 (0)