@@ -56,6 +56,18 @@ def expand_counts(count_array: list, scores: list):
56
56
return np .array (ret_list )
57
57
58
58
59
+ def _make_gen (reader ):
60
+ """
61
+ solution from stack overflow to quickly count lines in a file.
62
+ https://stackoverflow.com/questions/19001402/how-to-count-the-total-number-of-lines-in-a-text-file-using-python
63
+
64
+ """
65
+ b = reader (1024 * 1024 )
66
+ while b :
67
+ yield b
68
+ b = reader (1024 * 1024 )
69
+
70
+
59
71
def parse_file (input_file : str , quality_scores : list , max_reads : int , qual_offset : int , readlen : int ):
60
72
"""
61
73
Parses an individual file for statistics
@@ -84,6 +96,13 @@ def parse_file(input_file: str, quality_scores: list, max_reads: int, qual_offse
84
96
line = fq_in .readline ().strip ()
85
97
readlens .append (len (line ))
86
98
99
+ # solution from stack overflow to quickly count lines in a file.
100
+ # https://stackoverflow.com/questions/19001402/how-to-count-the-total-number-of-lines-in-a-text-file-using-python
101
+ if max_reads == np .inf :
102
+ f = open (input_file , 'rb' )
103
+ f_gen = _make_gen (f .raw .read )
104
+ max_reads = sum (buf .count (b'\n ' ) for buf in f_gen )
105
+
87
106
readlens = np .array (readlens )
88
107
89
108
# Using the statistical mode seems like the right approach here. We expect the readlens to be roughly the same.
@@ -153,8 +172,12 @@ def parse_file(input_file: str, quality_scores: list, max_reads: int, qual_offse
153
172
for i in range (read_length ):
154
173
this_counts = q_count_by_pos [i ]
155
174
expanded_counts = expand_counts (this_counts , quality_scores )
156
- average_q = np .average (expanded_counts )
157
- st_d_q = np .std (expanded_counts )
175
+ if not expanded_counts :
176
+ average_q = 0
177
+ st_d_q = 0
178
+ else :
179
+ average_q = np .average (expanded_counts )
180
+ st_d_q = np .std (expanded_counts )
158
181
avg_std_by_pos .append ((average_q , st_d_q ))
159
182
160
183
# TODO In progress, working on ensuring the error model produces the right shape
@@ -191,7 +214,7 @@ def plot_stuff(init_q, real_q, q_range, prob_q, actual_readlen, plot_path):
191
214
plt .figure (1 )
192
215
z = np .array (init_q ).T
193
216
x , y = np .meshgrid (range (0 , len (z [0 ]) + 1 ), range (0 , len (z ) + 1 ))
194
- plt .pcolormesh (x , Y , z , vmin = 0. , vmax = 0.25 )
217
+ plt .pcolormesh (x , y , z , vmin = 0. , vmax = 0.25 )
195
218
plt .axis ([0 , len (z [0 ]), 0 , len (z )])
196
219
plt .yticks (range (0 , len (z ), 10 ), range (0 , len (z ), 10 ))
197
220
plt .xticks (range (0 , len (z [0 ]), 10 ), range (0 , len (z [0 ]), 10 ))
0 commit comments