Skip to content

Commit

Permalink
Make the pileup image code more robust by skipping any incomplete ref…
Browse files Browse the repository at this point in the history
…bases, which can happen at the edge of a contig.

PiperOrigin-RevId: 326119735
  • Loading branch information
pichuan authored and copybara-github committed Aug 11, 2020
1 parent fd41ba8 commit 698f96a
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 7 deletions.
19 changes: 17 additions & 2 deletions deepvariant/pileup_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import itertools


from absl import logging
import numpy as np

from third_party.nucleus.protos import reads_pb2
Expand Down Expand Up @@ -470,6 +471,12 @@ def _pileup_for_pair_of_alts(alt_alleles):
else:
alt_images = []
for alt in alt_alleles:
if len(haplotype_sequences[alt]) != self.width:
logging.warning(
'haplotype_sequences[alt] is %d long but pileup '
'image width is %d. Giving up on this image',
len(haplotype_sequences[alt]), self.width)
return None
alt_image = self.build_pileup(
dv_call=dv_call,
refbases=haplotype_sequences[alt],
Expand All @@ -485,5 +492,13 @@ def _pileup_for_pair_of_alts(alt_alleles):
else:
return ref_image

return [(alts, _pileup_for_pair_of_alts(alts))
for alts in self._alt_allele_combinations(variant)]
retval = []
for alts in self._alt_allele_combinations(variant):
pileup = _pileup_for_pair_of_alts(alts)
# If the pileup is None, this can mean that we're near the edge of the
# contig, so one pileup width is invalid.
# Return None to indicate we couldn't process this variant.
if pileup is None:
return None
retval.append((alts, pileup))
return retval
28 changes: 23 additions & 5 deletions deepvariant/pileup_image_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1213,7 +1213,9 @@ def _expected_call(alts):

def test_create_pileup_images_with_alt_align(self):
self.dv_call.variant.alternate_bases[:] = ['C', 'T']
haplotype_sequences = {'C': 'seq for C', 'T': 'seq for T'}
seq_for_c = 'C' * self.pic.width
seq_for_t = 'T' * self.pic.width
haplotype_sequences = {'C': seq_for_c, 'T': seq_for_t}
haplotype_alignments = {'C': 'reads for C', 'T': 'reads for T'}

with mock.patch.object(
Expand Down Expand Up @@ -1256,17 +1258,33 @@ def _expected_alt_based_call(alts, refbases, reads):
[
# Pileup for 'C':
_expected_ref_based_call(['C']),
_expected_alt_based_call(['C'], 'seq for C', 'reads for C'),
_expected_alt_based_call(['C'], seq_for_c, 'reads for C'),
# Pileup for 'T':
_expected_ref_based_call(['T']),
_expected_alt_based_call(['T'], 'seq for T', 'reads for T'),
_expected_alt_based_call(['T'], seq_for_t, 'reads for T'),
# Pileup for 'C/T':
_expected_ref_based_call(['C', 'T']),
_expected_alt_based_call(['C', 'T'], 'seq for C', 'reads for C'),
_expected_alt_based_call(['C', 'T'], 'seq for T', 'reads for T'),
_expected_alt_based_call(['C', 'T'], seq_for_c, 'reads for C'),
_expected_alt_based_call(['C', 'T'], seq_for_t, 'reads for T'),
],
any_order=True)

def test_create_pileup_images_with_mismatched_alt_ref(self):
self.dv_call.variant.alternate_bases[:] = ['T']
# Deliberatly make the length different from self.pic.width.
haplotype_sequences = {'T': 'T' * (self.pic.width + 1)}
haplotype_alignments = {'T': 'reads for T'}
with mock.patch.object(
self.pic, 'build_pileup', autospec=True) as mock_encoder:
self.pic._options.alt_aligned_pileup = 'rows'
output = self.pic.create_pileup_images(
dv_call=self.dv_call,
reads_for_samples=self.reads_for_samples,
haplotype_alignments_for_samples=[haplotype_alignments],
haplotype_sequences=haplotype_sequences)
self.assertIsNone(output)
self.assertEqual(mock_encoder.call_count, 1)

@parameterized.parameters(
((100, 221, 6), 'rows', (300, 221, 6)),
((100, 221, 6), 'base_channels', (100, 221, 8)),
Expand Down

0 comments on commit 698f96a

Please sign in to comment.