Skip to content

Commit 7108eab

Browse files
committed
Add indel storing and fix cleanup
1 parent 823fd75 commit 7108eab

File tree

4 files changed

+32
-8
lines changed

4 files changed

+32
-8
lines changed

Diff for: iss/app.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ def generate_reads(args):
131131
args.output + ".vcf",
132132
"##fileformat=VCFv4.1\n" + "\t".join(["#CHROM", "POS", "ID", "REF", "ALT", "QUAL", "FILTER", "INFO"]),
133133
)
134-
full_tmp_list = temp_R1 + temp_R2
134+
full_tmp_list = temp_R1 + temp_R2 + temp_mut
135135
full_tmp_list.append(genome_file)
136136
if os.path.exists("%s.memmap" % args.output):
137137
full_tmp_list.append("%s.memmap" % args.output)

Diff for: iss/error_models/__init__.py

+26-2
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ def introduce_indels(self, record, orientation, full_seq, bounds):
171171
bounds (tuple): the position of the read in the full_sequence
172172
173173
Returns:
174-
Seq: a sequence with (eventually) indels
174+
SeqRecord: a sequence record with indel errors
175175
"""
176176

177177
# get the right indel arrays
@@ -194,11 +194,35 @@ def introduce_indels(self, record, orientation, full_seq, bounds):
194194
if random.random() < prob:
195195
# we want to insert after the base read
196196
mutable_seq.insert(position + 1, str(nucl_to_insert))
197+
if self.store_mutations:
198+
record.annotations["mutations"].append(
199+
{
200+
"id": record.id,
201+
"position": position,
202+
"ref": mutable_seq[position],
203+
"alt": mutable_seq[position] + nucl_to_insert,
204+
"quality": ".",
205+
"type": "ins",
206+
}
207+
)
208+
197209
if random.random() < deletions[position][mutable_seq[nucl].upper()]:
198210
mutable_seq.pop(position)
211+
if self.store_mutations:
212+
record.annotations["mutations"].append(
213+
{
214+
"id": record.id,
215+
"position": position,
216+
"ref": mutable_seq[position],
217+
"alt": ".",
218+
"quality": ".",
219+
"type": "del",
220+
}
221+
)
199222
position += 1
200223
except IndexError:
201224
continue
202225

203226
seq = self.adjust_seq_length(mutable_seq, orientation, full_seq, bounds)
204-
return seq
227+
record.seq = seq
228+
return record

Diff for: iss/generator.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ def simulate_read(record, error_model, i, cpu_number, sequence_type):
153153
forward.annotations["original"] = str(forward.seq)
154154

155155
# add the indels, the qual scores and modify the record accordingly
156-
forward.seq = error_model.introduce_indels(forward, "forward", sequence, bounds)
156+
forward = error_model.introduce_indels(forward, "forward", sequence, bounds)
157157
forward = error_model.introduce_error_scores(forward, "forward")
158158
forward = error_model.mut_sequence(forward, "forward")
159159

@@ -185,7 +185,7 @@ def simulate_read(record, error_model, i, cpu_number, sequence_type):
185185
reverse.annotations["original"] = str(reverse.seq)
186186

187187
# add the indels, the qual scores and modify the record accordingly
188-
reverse.seq = error_model.introduce_indels(reverse, "reverse", sequence, bounds)
188+
reverse = error_model.introduce_indels(reverse, "reverse", sequence, bounds)
189189
reverse = error_model.introduce_error_scores(reverse, "reverse")
190190
reverse = error_model.mut_sequence(reverse, "reverse")
191191

Diff for: iss/test/test_error_model.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def test_introduce_indels():
6666
bounds = (5, 130)
6767
read = SeqRecord(Seq(str("ATATA" * 25)), id="read_1", description="test read")
6868
ref_genome = SeqRecord(Seq(str("ATATA" * 100)), id="ref_genome", description="test reference")
69-
read.seq = err_mod.introduce_indels(read, "forward", ref_genome, bounds)
69+
read = err_mod.introduce_indels(read, "forward", ref_genome, bounds)
7070
assert len(read.seq) == 125
7171
assert read.seq[:10] == "ATGATAATAT"
7272

@@ -81,7 +81,7 @@ def test_adjust_seq_length_extend():
8181
bounds = (480, 500)
8282
read = SeqRecord(Seq(str("ATTTA" * 4)), id="read_1", description="test read")
8383
ref_genome = SeqRecord(Seq(str("ATTTA" * 100)), id="ref_genome", description="test reference")
84-
read.seq = err_mod.introduce_indels(read, "forward", ref_genome, bounds)
84+
read = err_mod.introduce_indels(read, "forward", ref_genome, bounds)
8585
assert len(read.seq) == 20
8686
assert read.seq[:10] == "TTAATTTAAT"
8787
assert read.seq[10:] == "TTAATTTAAA"
@@ -100,7 +100,7 @@ def test_introduce_indels_rev():
100100

101101
ref_genome = SeqRecord(Seq("GG" + str("GTACC" * 100) + "GG"), id="ref_genome", description="test reference")
102102
read = SeqRecord(Seq(rev_comp(str(ref_genome.seq[484:504]))), id="read_1", description="test read")
103-
read.seq = err_mod.introduce_indels(read, "reverse", ref_genome, bounds)
103+
read = err_mod.introduce_indels(read, "reverse", ref_genome, bounds)
104104
assert len(read.seq) == 20
105105
assert read.seq == "CGTACGGTACGGTACGGTAC"
106106

0 commit comments

Comments
 (0)