Skip to content

Commit d297910

Browse files
committed
Fixing a bug when requested read length is different than the model
1 parent 83c2d3b commit d297910

File tree

3 files changed

+9
-10
lines changed

3 files changed

+9
-10
lines changed

neat/models/default_quality_score_model.py

-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
# All numbers from 1-42
1414
default_quality_scores = np.arange(1, 43)
1515

16-
1716
default_qual_score_probs = np.array([
1817
[35.20551064, 5.39726466],
1918
[35.05310236, 5.63622973],

neat/models/error_models.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -67,27 +67,27 @@ def __init__(
6767

6868
def get_quality_scores(
6969
self,
70-
run_read_length: int,
7170
model_read_length: int,
71+
length: int,
7272
rng
7373
) -> np.ndarray:
7474
"""
75-
Takes a read_length and rng and returns an array of quality scores
75+
Takes a length and rng and returns an array of quality scores
7676
77-
:param run_read_length: The desired length of the quality score array
7877
:param model_read_length: the original read length for the model
78+
:param length: The desired length of the quality score array
7979
:param rng: random number generator.
8080
:return: An array of quality scores.
8181
"""
8282
if self.uniform_quality_score:
83-
return np.array([self.uniform_quality_score] * run_read_length)
83+
return np.array([self.uniform_quality_score] * length)
8484
else:
85-
if run_read_length == model_read_length:
85+
if length == model_read_length:
8686
quality_index_map = np.arange(model_read_length)
8787
else:
8888
# This is basically a way to evenly spread the distribution across the number of bases in the read
8989
quality_index_map = np.array(
90-
[max([0, model_read_length * n // run_read_length]) for n in range(run_read_length)]
90+
[max([0, model_read_length * n // length]) for n in range(length)]
9191
)
9292

9393
temp_qual_array = []
@@ -206,7 +206,7 @@ def get_sequencing_errors(
206206
# This is to prevent deletion error collisions and to keep there from being too many indel errors.
207207
if 0 < index < self.read_length - max(
208208
self.deletion_len_model) and total_indel_length > self.read_length // 4:
209-
error_type = self.rng.choice(a=list(self.variant_probs), p=list(self.variant_probs.values()))
209+
error_type = rng.choice(a=list(self.variant_probs), p=list(self.variant_probs.values()))
210210

211211
# Deletion error
212212
if error_type == Deletion:
@@ -227,7 +227,7 @@ def get_sequencing_errors(
227227
elif error_type == Insertion:
228228
insertion_length = self.get_insertion_length()
229229
insertion_reference = reference_segment[index]
230-
insert_string = ''.join(self.rng.choice(ALLOWED_NUCL, size=insertion_length))
230+
insert_string = ''.join(rng.choice(ALLOWED_NUCL, size=insertion_length))
231231
insertion_alternate = insertion_reference + insert_string
232232
introduced_errors.append(
233233
ErrorContainer(Insertion, index, insertion_length, insertion_reference, insertion_alternate)

neat/read_simulator/utils/read.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ def finalize_read_and_write(
321321
"""
322322

323323
# Generate quality scores for the read
324-
self.quality_array = qual_model.get_quality_scores(err_model.read_length, self.run_read_length, rng)
324+
self.quality_array = qual_model.get_quality_scores(err_model.read_length, len(self.reference_segment), rng)
325325

326326
# This replaces either hard or soft-masked reference segment with upper case or a standard repeat
327327
# It updates the quality array and reference segment in place, including reversing them, if appropriate

0 commit comments

Comments
 (0)