diff --git a/deepvariant/pileup_image.py b/deepvariant/pileup_image.py index cee82a07..d36ad2ec 100644 --- a/deepvariant/pileup_image.py +++ b/deepvariant/pileup_image.py @@ -35,6 +35,7 @@ import itertools +from absl import logging import numpy as np from third_party.nucleus.protos import reads_pb2 @@ -470,6 +471,12 @@ def _pileup_for_pair_of_alts(alt_alleles): else: alt_images = [] for alt in alt_alleles: + if len(haplotype_sequences[alt]) != self.width: + logging.warning( + 'haplotype_sequences[alt] is %d long but pileup ' + 'image width is %d. Giving up on this image', + len(haplotype_sequences[alt]), self.width) + return None alt_image = self.build_pileup( dv_call=dv_call, refbases=haplotype_sequences[alt], @@ -485,5 +492,13 @@ def _pileup_for_pair_of_alts(alt_alleles): else: return ref_image - return [(alts, _pileup_for_pair_of_alts(alts)) - for alts in self._alt_allele_combinations(variant)] + retval = [] + for alts in self._alt_allele_combinations(variant): + pileup = _pileup_for_pair_of_alts(alts) + # If the pileup is None, this can mean that we're near the edge of the + # contig, so one pileup width is invalid. + # Return None to indicate we couldn't process this variant. + if pileup is None: + return None + retval.append((alts, pileup)) + return retval diff --git a/deepvariant/pileup_image_test.py b/deepvariant/pileup_image_test.py index 9b534fc5..cae2ccfa 100644 --- a/deepvariant/pileup_image_test.py +++ b/deepvariant/pileup_image_test.py @@ -1213,7 +1213,9 @@ def _expected_call(alts): def test_create_pileup_images_with_alt_align(self): self.dv_call.variant.alternate_bases[:] = ['C', 'T'] - haplotype_sequences = {'C': 'seq for C', 'T': 'seq for T'} + seq_for_c = 'C' * self.pic.width + seq_for_t = 'T' * self.pic.width + haplotype_sequences = {'C': seq_for_c, 'T': seq_for_t} haplotype_alignments = {'C': 'reads for C', 'T': 'reads for T'} with mock.patch.object( @@ -1256,17 +1258,33 @@ def _expected_alt_based_call(alts, refbases, reads): [ # Pileup for 'C': _expected_ref_based_call(['C']), - _expected_alt_based_call(['C'], 'seq for C', 'reads for C'), + _expected_alt_based_call(['C'], seq_for_c, 'reads for C'), # Pileup for 'T': _expected_ref_based_call(['T']), - _expected_alt_based_call(['T'], 'seq for T', 'reads for T'), + _expected_alt_based_call(['T'], seq_for_t, 'reads for T'), # Pileup for 'C/T': _expected_ref_based_call(['C', 'T']), - _expected_alt_based_call(['C', 'T'], 'seq for C', 'reads for C'), - _expected_alt_based_call(['C', 'T'], 'seq for T', 'reads for T'), + _expected_alt_based_call(['C', 'T'], seq_for_c, 'reads for C'), + _expected_alt_based_call(['C', 'T'], seq_for_t, 'reads for T'), ], any_order=True) + def test_create_pileup_images_with_mismatched_alt_ref(self): + self.dv_call.variant.alternate_bases[:] = ['T'] + # Deliberatly make the length different from self.pic.width. + haplotype_sequences = {'T': 'T' * (self.pic.width + 1)} + haplotype_alignments = {'T': 'reads for T'} + with mock.patch.object( + self.pic, 'build_pileup', autospec=True) as mock_encoder: + self.pic._options.alt_aligned_pileup = 'rows' + output = self.pic.create_pileup_images( + dv_call=self.dv_call, + reads_for_samples=self.reads_for_samples, + haplotype_alignments_for_samples=[haplotype_alignments], + haplotype_sequences=haplotype_sequences) + self.assertIsNone(output) + self.assertEqual(mock_encoder.call_count, 1) + @parameterized.parameters( ((100, 221, 6), 'rows', (300, 221, 6)), ((100, 221, 6), 'base_channels', (100, 221, 8)),