@@ -113,7 +113,7 @@ def to_model(bam_path, output):
113
113
"""
114
114
logger = logging .getLogger (__name__ )
115
115
116
- insert_size_dist = []
116
+ template_length_dist = []
117
117
qualities_forward = []
118
118
qualities_reverse = []
119
119
subst_matrix_f = np .zeros ([301 , 16 ]) # we dont know the len of the reads
@@ -124,10 +124,10 @@ def to_model(bam_path, output):
124
124
# read the bam file and extract info needed for modelling
125
125
for read in read_bam (bam_path ):
126
126
# get insert size distribution
127
- if read .is_proper_pair :
127
+ if read .is_paired :
128
128
template_length = abs (read .template_length )
129
- i_size = template_length - (2 * len (read .seq ))
130
- insert_size_dist .append (i_size )
129
+ # i_size = template_length - (2 * len(read.seq))
130
+ template_length_dist .append (template_length )
131
131
132
132
# get qualities
133
133
if read .is_read1 :
@@ -167,10 +167,6 @@ def to_model(bam_path, output):
167
167
elif read .is_read2 :
168
168
indel_matrix_r [pos , indel ] += 1
169
169
170
- logger .info ("Calculating insert size distribution" )
171
- # insert_size = int(np.mean(insert_size_dist))
172
- hist_insert_size = modeller .insert_size (insert_size_dist )
173
-
174
170
logger .info ("Calculating mean and base quality distribution" )
175
171
quality_bins_f = modeller .divide_qualities_into_bins (qualities_forward )
176
172
quality_bins_r = modeller .divide_qualities_into_bins (qualities_reverse )
@@ -209,6 +205,10 @@ def to_model(bam_path, output):
209
205
ins_f , del_f = modeller .indel_matrix_to_choices (indel_matrix_f , read_length )
210
206
ins_r , del_r = modeller .indel_matrix_to_choices (indel_matrix_r , read_length )
211
207
208
+ logger .info ("Calculating insert size distribution" )
209
+ # insert_size = int(np.mean(insert_size_dist))
210
+ hist_insert_size = modeller .insert_size (template_length_dist , read_length )
211
+
212
212
write_to_file (
213
213
"kde" ,
214
214
read_length ,
0 commit comments