Skip to content

Commit

Permalink
handling leading D post softmask
Browse files Browse the repository at this point in the history
  • Loading branch information
will-rowe committed Apr 2, 2020
1 parent 88bb34b commit 89c84b2
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 60 deletions.
53 changes: 25 additions & 28 deletions artic/align_trim.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,6 @@
import sys
from .vcftagprimersites import read_bed_file

# LeadingDeletion is an exception created when a soft mask results in a leading deletion (e.g. 10S1D10M)


class LeadingDeletion(Exception):
pass


# consumesReference lookup for if a CIGAR operation consumes the reference sequence
consumesReference = [True, False, True, True, False, False, False, True]

Expand Down Expand Up @@ -112,37 +105,43 @@ def trim(segment, primer_pos, end, debug):
if debug:
print("extra %s" % (extra), file=sys.stderr)
if extra:
if flag == 0:
if debug:
print("Inserted a %s, %s" % (0, extra), file=sys.stderr)

if end:
cigar.append((0, extra))
else:
cigar.insert(0, (0, extra))
eaten -= extra
if debug:
print("Inserted a %s, %s" % (0, extra), file=sys.stderr)
if end:
cigar.append((0, extra))
else:
cigar.insert(0, (0, extra))
eaten -= extra

# if softmasking the left primer, update the position of the leftmost mapping base
# softmask the left primer
if not end:

# update the position of the leftmost mappinng base
segment.pos = pos - extra
if debug:
print("New pos: %s" % (segment.pos), file=sys.stderr)

# if softmasking will result in a leading deletion operation in the final CIGAR, eat the deletions and increase the softmask
if not end and cigar[0][0] == 2:
raise LeadingDeletion(
"softmask created a leading deletion in the CIGAR")
# if proposed softmask leads straight into a deletion, shuffle leftmost mapping base along and ignore the deletion
if cigar[0][0] == 2:
if debug:
print(
"softmask created a leading deletion in the CIGAR, shuffling the alignment", file=sys.stderr)
while 1:
if cigar[0][0] != 2:
break
_, length = cigar.pop(0)
segment.pos += length

# now add the leading softmask
cigar.insert(0, (4, eaten))

# add the eaten CIGAR operations as a softmask to the start/end of the CIGAR
if end:
cigar.append((4, eaten))
# softmask the right primer
else:
cigar.insert(0, (4, eaten))
cigar.append((4, eaten))

# check the new CIGAR and replace the old one
if cigar[0][1] <= 0 or cigar[-1][1] <= 0:
raise ("invalid cigar operation created - possibly due to INDEL in primer")

segment.cigartuples = cigar
return

Expand Down Expand Up @@ -233,8 +232,6 @@ def go(args):
if args.verbose:
print("ref start %s >= primer_position %s" %
(segment.reference_start, p1_position), file=sys.stderr)
except LeadingDeletion:
continue
except Exception as e:
print("problem soft masking left primer in {} (error: {}), skipping" .format(
segment.query_name, e), file=sys.stderr)
Expand Down
71 changes: 39 additions & 32 deletions artic/align_trim_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,36 +11,36 @@

# dummy primers (using min required fields)
p1 = {
"start": 0,
"end": 10,
"direction": "+",
"primerID": "primer1_LEFT"
"start": 0,
"end": 10,
"direction": "+",
"primerID": "primer1_LEFT"
}
p2 = {
"start": 30,
"end": 40,
"direction": "-",
"primerID": "primer1_RIGHT"
"start": 30,
"end": 40,
"direction": "-",
"primerID": "primer1_RIGHT"
}
p3 = {
"start": 10,
"end": 20,
"direction": "+",
"primerID": "primer2_LEFT"
"start": 10,
"end": 20,
"direction": "+",
"primerID": "primer2_LEFT"
}
p4 = {
"start": 40,
"end": 50,
"direction": "-",
"primerID": "primer2_RIGHT"
"start": 40,
"end": 50,
"direction": "-",
"primerID": "primer2_RIGHT"
}

# primer scheme to hold dummy primers
dummyPrimerScheme = [p1, p2, p3, p4]

# actual the primer scheme for nCov
primerScheme = vcftagprimersites.read_bed_file(
TEST_DIR + "/../test-data/primer-schemes/nCoV-2019/V1/nCoV-2019.scheme.bed")
TEST_DIR + "/../test-data/primer-schemes/nCoV-2019/V1/nCoV-2019.scheme.bed")


# nCov alignment segment (derived from a real nCov read)
Expand Down Expand Up @@ -68,10 +68,12 @@

# expected softmasked CIGARs
seg1expectedCIGAR = "64S48M1D4M2I12M1I14M1D101M1D53M2D78M1I38M74S"
seg2expectedCIGAR = "67S1D69M5D1M1D40M2I12M1D41M1D117M1I3M1D4M2D11M2I2M1D18M1D18M1I3M78S"
seg2expectedCIGAR = "67S69M5D1M1D40M2I12M1D41M1D117M1I3M1D4M2D11M2I2M1D18M1D18M1I3M78S"


# test for the find_primer function
def test_find_primer():
"""test for the find primer function
"""

# test the primer finder on the primers themselves
for primer in dummyPrimerScheme:
Expand All @@ -91,8 +93,10 @@ def test_find_primer():
dummyPrimerScheme, 25, "-")
assert result[2]["primerID"] == "primer1_RIGHT", "find_primer returned incorrect primer"

# test trim

def test_trim():
"""test for the trim function
"""

def testRunner(seg, expectedCIGAR):

Expand All @@ -105,8 +109,8 @@ def testRunner(seg, expectedCIGAR):
p2_position = p2[2]['start']

# this segment should need forward and reverse softmasking
assert seg.reference_start < p1_position, "missed a forward soft masking opportunity"
assert seg.reference_end > p2_position, "missed a reverse soft masking opportunity"
assert seg.reference_start < p1_position, "missed a forward soft masking opportunity (read: %s)" % seg.query_name
assert seg.reference_end > p2_position, "missed a reverse soft masking opportunity (read: %s)" % seg.query_name

# before masking, get the query_alignment_length and the CIGAR to use for testing later
originalCigar = seg.cigarstring
Expand All @@ -116,28 +120,31 @@ def testRunner(seg, expectedCIGAR):
try:
align_trim.trim(seg, p1_position, False, False)
except Exception as e:
raise Exception("problem soft masking left primer in {} (error: {})" .format(seg.query_name, e))
raise Exception(
"problem soft masking left primer in {} (error: {})" .format(seg.query_name, e))

# check the CIGAR and query alignment length is updated
assert seg.cigarstring != originalCigar, "cigar was not updated with a softmask"
assert seg.query_alignment_length != originalQueryAlnLength, "query alignment was not updated after softmask"
assert seg.cigarstring != originalCigar, "cigar was not updated with a softmask (read: %s)" % seg.query_name
assert seg.query_alignment_length != originalQueryAlnLength, "query alignment was not updated after softmask (read: %s)" % seg.query_name

# trim the reverse primer
try:
align_trim.trim(seg, p2_position, True, False)
except Exception as e:
raise Exception("problem soft masking right primer in {} (error: {})" .format(seg.query_name, e))
raise Exception("problem soft masking right primer in {} (error: {})" .format(
seg.query_name, e))

# check the CIGAR and query alignment length is updated
assert seg.cigarstring != originalCigar, "cigar was not updated with a softmask"
assert seg.query_alignment_length != originalQueryAlnLength, "query alignment was not updated after softmask"
assert seg.cigarstring != originalCigar, "cigar was not updated with a softmask (read: %s)" % seg.query_name
assert seg.query_alignment_length != originalQueryAlnLength, "query alignment was not updated after softmask (read: %s)" % seg.query_name

# check we have the right CIGAR
assert expectedCIGAR == seg.cigarstring, "cigar does not match expected cigar string"
assert seg.cigarstring == expectedCIGAR, "cigar does not match expected cigar string (read: %s)" % seg.query_name

# check the query alignment now matches the expected primer product
assert seg.reference_start == p1_position, "left primer not masked corrrectly"
assert seg.reference_end == p2_position, "right primer not masked correctly"
assert seg.reference_start >= p1_position, "left primer not masked corrrectly (read: %s)" % seg.query_name
assert seg.reference_end <= p2_position, "right primer not masked correctly (read: %s)" % seg.query_name

# run the test with the first alignment segment
testRunner(seg1, seg1expectedCIGAR)
testRunner(seg1, seg1expectedCIGAR)
testRunner(seg2, seg2expectedCIGAR)

0 comments on commit 89c84b2

Please sign in to comment.