-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathUtils.pm
executable file
·152 lines (134 loc) · 5.43 KB
/
Utils.pm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# Helper functions used project-wide
package Utils;
use POSIX;
eval {
require IO::Uncompress::Gunzip;
IO::Uncompress::Gunzip->import( qw/gunzip $GunzipError/ ) ;
};
if ($@) {
die "Error: Perl Module IO::Uncompress::Gunzip not installed";
}
sub strip {
# Remove leading and trailing whitespace
my ($val) = @_;
$val =~ s/^\s+//;
$val =~ s/\s+$//;
return $val;
}
sub system_call {
print "\nstarted ".POSIX::strftime("%m/%d/%Y %H:%M:%S\n", localtime);
print " @_\n" ;
system("@_") == 0 or die "Error in @_: $?" ;
print 'ended '.POSIX::strftime("%m/%d/%Y %H:%M:%S\n", localtime);
}
sub test_dependencies {
# Make sure all required dependencies are installed
system_call("bash msg/print_dependencies.sh | tee dependencies.out");
my $last_path = getcwd();
chdir('msg') or die "$!";
#system_call("chmod 755 test_dependencies.sh");
system_call("bash test_dependencies.sh");
chdir($last_path) or die "$!";
}
sub parse_config {
#Read in msg.cfg or other specified file and update where needed
my ($cfg_path, $default_params) = @_;
my %default_params = %$default_params;
die "ERROR: Can't locate $cfg_path.\n" unless (-e $cfg_path);
open (IN, $cfg_path) || die "ERROR: Can't open $cfg_path: $!\n";
while (<IN>) { chomp $_;
next if ($_ =~ /^\#/);
next unless ($_);
my ($key,$val) = split(/=/,$_,2);
$default_params{strip($key)} = strip($val);
} close IN;
### Configure some parameters ###
if (defined $default_params{'chroms'}) {
$default_params{'chroms2plot'} = $default_params{'chroms'} unless (defined $default_params{'chroms2plot'});
}
my $update_nthreads = $default_params{'threads'} if (defined $default_params{'threads'}); ## Number of qsub slots when running pe option
#add space after qsub options so we can insert into commands, add thread/slot count to -pe option
if (defined $default_params{'addl_qsub_option_for_exclusive_node'} && $default_params{'addl_qsub_option_for_exclusive_node'}) {
#example: go from user msg.cfg entered "-l excl=true" to "-l excl=true "
$default_params{'addl_qsub_option_for_exclusive_node'} = $default_params{'addl_qsub_option_for_exclusive_node'}.' ';
}
else {
$default_params{'addl_qsub_option_for_exclusive_node'} = '';
}
if (defined $default_params{'custom_qsub_options_for_all_cmds'} && $default_params{'custom_qsub_options_for_all_cmds'}) {
#example: go from user msg.cfg entered "-l excl=true" to "-l excl=true "
$default_params{'custom_qsub_options_for_all_cmds'} = $default_params{'custom_qsub_options_for_all_cmds'}.' ';
}
else {
$default_params{'addl_qsub_option_for_exclusive_node'} = '';
}
#Update qsub for PE option with number threads requested
if (defined $default_params{'addl_qsub_option_for_pe'} && $default_params{'addl_qsub_option_for_pe'}) {
#example: go from user msg.cfg entered "-pe batch" to "-pe batch 8 "
$default_params{'addl_qsub_option_for_pe'} = $default_params{'addl_qsub_option_for_pe'}." $update_nthreads ";
}
else {
$default_params{'addl_qsub_option_for_pe'} = '';
}
return \%default_params;
}
sub validate_config {
#Check that params were entered correctly and nothing essential is missing
my ($params, @required_file_paths) = @_;
### check if all files exist
foreach my $param (@required_file_paths) {
if (exists $params->{$param}) {
die "Exiting from msgCluster: Missing file $params->{$param}.\n" unless (-e $params->{$param});
}
else {
die "Parameter $param is required in the config file.\n";
}
}
print "\nParameters:\n\n";
### double check if the minimum exist
foreach my $key (sort keys %$params) {
die "ERROR (msgCluster): undefined parameter ($key) in config file.\n" unless ($params->{$key} ne 'NULL');
print "$key:\t$params->{$key}\n" ;
}
print "\n" ;
}
sub readFasta {
#Return a dictionary: length or sequence of each read by reference id. Supports gzipped
#fasta files or regular.
my ($file, $store_count) = @_;
my $file_handle;
my %reads;
my ($header,$seq);
if ($file =~ /\.gz$/ || $file =~ /\.gzip$/) { #Handles gzipped FASTA files (could handle more universally using try-catch)
$file_handle = new IO::Uncompress::Gunzip $file
or die "gunzip failed: $GunzipError\n";
} else {
open $file_handle, "<", $file || die "ERROR : Can't open $file: $!\n";
}
while (<$file_handle>) {
chomp $_;
if ($_ =~ /^>(\S+)/) {
#If we don't have a previous read to save then something went wrong, check first
if ($seq and !defined($header)) {die "Invalid FASTA file: @_";}
if ($seq) {
if ($store_count) {
$reads{$header} = length($seq)
} else {
$reads{$header} = $seq
}
}
$header = $1; #Grabs the contig/scaffold ID
$seq = '';
} else { $seq .= $_; } #Accumulates the contig/scaffold sequence ignoring line wrapping
}
close $file_handle;
if ($header) {
if ($store_count) {
$reads{$header} = length($seq)
} else {
$reads{$header} = $seq
}
}
return %reads;
}
1; #Perl requires this for importing a module