-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathpre-drgap.pl
54 lines (54 loc) · 1.29 KB
/
pre-drgap.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/usr/bin/perl
use strict;
use autodie;
use FileHandle;
use Getopt::Long;
our($input_file,$class_number);
our$date;
#our$fix_name;
my $optionOK = GetOptions(
'i|input_file=s' =>\$input_file,
'n|class_number=s' => \$class_number,
'd|date=s' => \$date,
);
#------------------------------------------------------------
our%gene_class;
open CLASS,'<',"${date}_gene-class.tmp";
my$header=<CLASS>;
our$line=0;
while(<CLASS>){
$line+=1;
chomp;
if(/^\d+\s+(\w+\-?\.?\w*)\s+(\d+)/){
$gene_class{$1}=$2;
}
else{
print "invalid format of gene class in line $line\n";
}
}
close CLASS;
my%fh;
foreach my$i(1..$class_number){
open $fh{$i},">${date}_subclass-$i.tmp";
}
open INPUT,'<',"$input_file";
open TMP,'>',"${date}_genes-which-are-in-input-file-but-not-in-the-chara-file.tmp";
our$n=0;#the number of genes which are in input file but not in the characteristic file------#
while(<INPUT>){
chomp;
my@element=split(/\t/);#print "$element[1]\n";
my$class=$gene_class{$element[1]};
if(exists($gene_class{$element[1]})){
$fh{$class}->print("$_\n");
}
else{
print TMP "$element[0]\t$element[1]\t$element[2]\n";
$n+=1;
}
}
print("The number of genes which are in the input file but not in the characteristic file is $n\n");
close INPUT;
foreach my$i(1..$class_number){
close $fh{$i};
}
close TMP;