-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathget_mapping_stats.py
64 lines (51 loc) · 1.95 KB
/
get_mapping_stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
### Boas Pucker ###
### [email protected] ###
### v0.1 ###
__usage__ = """
python get_mapping_stats.py
--in <FULL_PATH_TO_INPUT_DIRECTORY>
--out <FULL_PATH_TO_OUTPUT_FILE>
"""
import glob, re, sys
# --- end of imports --- #
def get_stats_from_file( filename ):
"""! @brie get al relevant stats from given mapping report file """
stats = {}
with open( filename, "r" ) as f:
line = f.readline()
while line:
if 'Number of input reads' in line:
value = int( re.findall( "\d+", line )[0] )
stats.update( { 'total': value } )
elif 'Uniquely mapped reads number' in line:
value = int( re.findall( "\d+", line )[0] )
stats.update( { 'unique': value } )
elif 'Number of reads mapped to multiple loci' in line:
value = int( re.findall( "\d+", line )[0] )
stats.update( { 'multi': value } )
line = f.readline()
return stats
def main( arguments ):
"""! @brief runs everything """
input_dir =arguments[ arguments.index( '--in' )+1 ]
output_file = arguments[ arguments.index( '--out' )+1 ]
all_files = glob.glob( input_dir + "*.logfile" ) + glob.glob( input_dir + "/*.logfile" )
all_stats = {}
for filename in all_files:
ID = filename.split('/')[-1]
all_stats.update( { ID: get_stats_from_file( filename ) } )
with open( output_file, "w" ) as out:
out.write( 'Sample\tTotalReads\tUniquelyMappedReads\tUniquelyMappedReads%\tMultiMappedReads\tMultiMappedReads%\n' )
for ID in sorted( all_stats.keys() ):
out.write( "\t".join( map( str, [ ID,
all_stats[ ID ]['total'],
all_stats[ ID ]['unique'],
str( ( 100.0*all_stats[ ID ]['unique'] ) / all_stats[ ID ]['total'] )[:5]+"%",
all_stats[ ID ]['multi'],
str( ( 100.0*all_stats[ ID ]['multi'] ) / all_stats[ ID ]['total'] )[:5]+"%" ] )
) + '\n' )
if __name__ == '__main__':
if '--in' in sys.argv and '--out' in sys.argv:
main( sys.argv )
else:
sys.exit( __usage__ )