forked from JaneliaSciComp/msg
-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathsummary_mismatch.pl
executable file
·75 lines (66 loc) · 2.59 KB
/
summary_mismatch.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!perl
use File::Basename;
$src = dirname $0;
# generates a summary of *-matchMismatch.csv files generated by MSG
# uses the barcode file
# usage: perl summary_mismatch.pl barcodefile cutoff (optional minimum cutoff to assess error, default=0)
# perl summary_mismatch.pl yellow_barcode_all_nodups
open OUT, ">error_gamma";
print OUT "barcode\tgam_par1\tgam_par2\n";
# read in the barcodes
my $barcodefile = shift(@ARGV); chomp $barcodefile;
my $cutoff = shift(@ARGV); chomp $cutoff;
open BC, $barcodefile or die "wrong format for barcodefile\n";
#my $junk = <BC>; $junk = <BC>;
my @barcodes = ();
while (my $line = <BC>) {
chomp $line; #Removes trailing newline
$line =~ s/\r*//; #Removes any carriage returns, e.g. from Windows or Mac newlines
my @rows = split('\s+', $line); #Split the barcodes file on one or more whitespace characters (e.g. \t)
my $barcodename = "indiv".$rows[1]."_".$rows[0];
push(@barcodes, $barcodename);
}
# go through each barcode folder and generate an estimate of the error
for my $i (0..(scalar(@barcodes)-1)){
my $correct_par1_total = my $correct_par2_total = my $error_par1_total = my $error_par2_total =0;
my $csvfile = "hmm_fit/".$barcodes[$i]."/".$barcodes[$i]."-matchMismatch.csv";
unless (-e $csvfile) {
warn "$csvfile not present. skipping\n";
next;
}
system(qq{perl -p -i -e 's/\015/\n/go' $csvfile});
system(qq{perl -p -i -e 's/\015\012/\n/go' $csvfile});
open CSV, $csvfile or die "wrong path to csv? $csvfile \n";
my $junk2 =<CSV>;
my $j =0;
while (my $line2 = <CSV>){
$j++;
my @rows2 = split(',', $line2);
if ($rows2[0] =~/par1/){
$correct_par1_total += $rows2[5];
$error_par1_total += $rows2[6];
if ($correct_par1_total > $cutoff){
print "$barcodes[$i]\t$j\tcorr_par1:\t$correct_par1_total\terror_par1:\t$error_par1_total\n";
}
}
elsif ($rows2[0] =~/par2/){
$correct_par2_total += $rows2[6];
$error_par2_total += $rows2[5];
if ($correct_par2_total > $cutoff){
print "$barcodes[$i]\t$j\tcorr_par2:\t$correct_par2_total\terror_par2:\t$error_par2_total\n";
}
}
else {print "neither par1 nor par2 ", $barcodes[$i], " row $j\n"; die;}
}
if ($correct_par1_total > $cutoff){
my $error_par1 = $error_par1_total/($error_par1_total+$correct_par1_total);
print OUT $barcodes[$i], "\t", $error_par1, "\t";
}
else {print OUT $barcodes[$i], "\tNA\t";}
if ($correct_par2_total > $cutoff){
my $error_par2 = $error_par2_total/($error_par2_total+$correct_par2_total);
print OUT "\t$error_par2\n";
}
else {print OUT "\tNA\n";}
}
system("R --no-save < $src/plot_error.R");