18
18
from ..common import TRINUC_IND , ALLOWED_NUCL , NUC_IND , DINUC_IND
19
19
from .default_mutation_model import *
20
20
from .default_sequencing_error_model import *
21
- from .utils import bin_scores , take_closest
22
21
23
22
__all__ = [
24
23
"MutationModel" ,
@@ -60,7 +59,9 @@ class InsertionModel(VariantModel):
60
59
def __init__ (self ,
61
60
insert_len_model : dict [int : float , ...],
62
61
rng : Generator = None ):
63
- self .insert_len_model = insert_len_model
62
+ # Creating probabilities from the weights
63
+ tot = sum (insert_len_model .values ())
64
+ self .insertion_len_model = {key : val / tot for key , val in insert_len_model .items ()}
64
65
self .rng = rng
65
66
66
67
def get_insertion_length (self , size : int = None ) -> int | list [int , ...]:
@@ -71,8 +72,8 @@ def get_insertion_length(self, size: int = None) -> int | list[int, ...]:
71
72
Greater than 1 returns a list of ints.
72
73
:return: int or list of ints.
73
74
"""
74
- return self .rng .choice (a = list (self .insert_len_model ),
75
- p = [* self .insert_len_model .values ()],
75
+ return self .rng .choice (a = list (self .insertion_len_model ),
76
+ p = [* self .insertion_len_model .values ()],
76
77
size = size , shuffle = False )
77
78
78
79
@@ -91,7 +92,9 @@ class DeletionModel(VariantModel):
91
92
def __init__ (self ,
92
93
deletion_len_model : dict [int : float , ...],
93
94
rng : Generator = None ):
94
- self .deletion_len_model = deletion_len_model
95
+ # Creating probabilities from the weights
96
+ tot = sum (deletion_len_model .values ())
97
+ self .deletion_len_model = {key : val / tot for key , val in deletion_len_model .items ()}
95
98
self .rng = rng
96
99
97
100
def get_deletion_length (self , size : int = None ) -> int | list [int , ...]:
@@ -281,6 +284,9 @@ def generate_snv(self, trinucleotide: Seq, reference_location: int) -> SingleNuc
281
284
transition_matrix = self .trinuc_trans_matrices [DINUC_IND [trinucleotide [0 ] + "_" + trinucleotide [2 ]]]
282
285
# then determine the trans probs based on the middle nucleotide
283
286
transition_probs = transition_matrix [NUC_IND [trinucleotide [1 ]]]
287
+ # Creating probabilities from the weights
288
+ transition_sum = sum (transition_probs )
289
+ transition_probs = [x / transition_sum for x in transition_probs ]
284
290
# Now pick a random alternate, weighted by the probabilities
285
291
alt = self .rng .choice (ALLOWED_NUCL , p = transition_probs )
286
292
temp_snv = SingleNucleotideVariant (reference_location , alt = alt )
@@ -376,11 +382,8 @@ def __init__(self,
376
382
self .insertion_model = insertion_model
377
383
self .uniform_quality_score = None
378
384
if self .is_uniform :
379
- # bin scores returns a list, so we need the first (only) element of the list
380
- converted_avg_err = bin_scores (self .quality_scores ,
381
- [int (- 10. * np .log10 (self .average_error ))])[0 ]
382
- # Set score to the lowest of the max of the quality scores and the bin closest to the input avg error.
383
- self .uniform_quality_score = min ([max (self .quality_scores ), converted_avg_err ])
385
+ # Set score to the lowest of the max of the quality scores and the input avg error.
386
+ self .uniform_quality_score = min ([max (self .quality_scores ), int (- 10. * np .log10 (self .average_error ) + 0.5 )])
384
387
self .rng = rng
385
388
386
389
def get_sequencing_errors (self ,
@@ -498,7 +501,13 @@ def get_quality_scores(self,
498
501
for i in quality_index_map :
499
502
score = self .rng .normal (self .quality_score_probabilities [i ][0 ],
500
503
scale = self .quality_score_probabilities [i ][1 ])
501
- score = take_closest (self .quality_scores , score )
504
+ # make sure score is in range and an int
505
+ score = round (score )
506
+ if score > 42 :
507
+ score = 42
508
+ if score < 1 :
509
+ score = 1
510
+
502
511
temp_qual_array .append (score )
503
512
504
513
if self .rescale_qualities :
@@ -509,9 +518,9 @@ def get_quality_scores(self,
509
518
self .quality_score_error_rate [n ]) + 0.5 )])
510
519
for n in temp_qual_array ]
511
520
# Now rebin the quality scores.
512
- temp_qual_array = np .array (bin_scores ( self . quality_scores , rescaled_quals ) )
521
+ temp_qual_array = np .array (rescaled_quals )
513
522
else :
514
- temp_qual_array = np .array (bin_scores ( self . quality_scores , temp_qual_array ) )
523
+ temp_qual_array = np .array (temp_qual_array )
515
524
516
525
return temp_qual_array [:input_read_length ]
517
526
0 commit comments