From 789b9327bb51a9701aa24206b70cff8e88752f9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20G=C3=B3mez-S=C3=A1nchez?= Date: Tue, 17 May 2016 11:02:58 +0200 Subject: [PATCH 1/5] Fixed issue419 --- src/main/java/htsjdk/samtools/SAMRecordSetBuilder.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/main/java/htsjdk/samtools/SAMRecordSetBuilder.java b/src/main/java/htsjdk/samtools/SAMRecordSetBuilder.java index c33d30fb72..714199f944 100644 --- a/src/main/java/htsjdk/samtools/SAMRecordSetBuilder.java +++ b/src/main/java/htsjdk/samtools/SAMRecordSetBuilder.java @@ -217,12 +217,11 @@ private SAMRecord createReadNoFlag(final String name, final int contig, final in final int defaultQuality) throws SAMException { final SAMRecord rec = new SAMRecord(this.header); rec.setReadName(name); - if (chroms.length <= contig) { - throw new SAMException("Contig too big [" + chroms.length + " < " + contig); + if (header.getSequenceDictionary().size() <= contig) { + throw new SAMException("Contig too big [" + header.getSequenceDictionary().size() + " < " + contig); } if (0 <= contig) { rec.setReferenceIndex(contig); - rec.setReferenceName(chroms[contig]); rec.setAlignmentStart(start); } if (!recordUnmapped) { From 4743d873ab6a7f849bc00a0e4ec7917be7582e03 Mon Sep 17 00:00:00 2001 From: magicDGS Date: Tue, 15 Dec 2015 13:07:11 +0100 Subject: [PATCH 2/5] Added indel support to SamLocusIterator --- .../samtools/util/SamLocusIterator.java | 176 +++++- .../samtools/util/SamLocusIteratorTest.java | 519 ++++++++++++++---- 2 files changed, 579 insertions(+), 116 deletions(-) diff --git a/src/main/java/htsjdk/samtools/util/SamLocusIterator.java b/src/main/java/htsjdk/samtools/util/SamLocusIterator.java index f0dd952f10..81559eb965 100644 --- a/src/main/java/htsjdk/samtools/util/SamLocusIterator.java +++ b/src/main/java/htsjdk/samtools/util/SamLocusIterator.java @@ -24,6 +24,9 @@ package htsjdk.samtools.util; import htsjdk.samtools.AlignmentBlock; +import htsjdk.samtools.Cigar; +import htsjdk.samtools.CigarElement; +import htsjdk.samtools.CigarOperator; import htsjdk.samtools.SAMException; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMRecord; @@ -75,12 +78,16 @@ public RecordAndOffset(final SAMRecord record, final int offset) { /** * The unit of iteration. Holds information about the locus (the SAMSequenceRecord and 1-based position - * on the reference), plus List of ReadAndOffset objects, one for each read that overlaps the locus + * on the reference), plus List of ReadAndOffset objects, one for each read that overlaps the locus; + * two more List_s_ of ReadAndOffset objects include reads that overlap the locus with insertions and deletions + * respectively */ public static final class LocusInfo implements Locus { private final SAMSequenceRecord referenceSequence; private final int position; private final List recordAndOffsets = new ArrayList(100); + private List deletedInRecord = null; + private List insertedInRecord = null; LocusInfo(final SAMSequenceRecord referenceSequence, final int position) { this.referenceSequence = referenceSequence; @@ -92,14 +99,49 @@ public void add(final SAMRecord read, final int position) { recordAndOffsets.add(new RecordAndOffset(read, position)); } + /** Accumulate info for one read with a deletion */ + public void addDeleted(final SAMRecord read, int previousPosition) { + if(deletedInRecord == null) { + deletedInRecord = new ArrayList<>(); + } + deletedInRecord.add(new RecordAndOffset(read, previousPosition)); + } + + /** + * Accumulate info for one read with an insertion. + * For this locus, the reads in the insertion are included also in recordAndOffsets + */ + public void addInserted(final SAMRecord read, int firstPosition) { + if(insertedInRecord == null) { + insertedInRecord = new ArrayList<>(); + } + insertedInRecord.add(new RecordAndOffset(read, firstPosition)); + } + public int getSequenceIndex() { return referenceSequence.getSequenceIndex(); } /** @return 1-based reference position */ public int getPosition() { return position; } public List getRecordAndPositions() { return Collections.unmodifiableList(recordAndOffsets); } + public List getDeletedInRecord() { + return (deletedInRecord == null) ? Collections.emptyList() : Collections.unmodifiableList(deletedInRecord); + } + public List getInsertedInRecord() { + return (insertedInRecord == null) ? Collections.emptyList() : Collections.unmodifiableList(insertedInRecord); + } public String getSequenceName() { return referenceSequence.getSequenceName(); } @Override public String toString() { return referenceSequence.getSequenceName() + ":" + position; } public int getSequenceLength() {return referenceSequence.getSequenceLength();} + + /** + * @return true if all the RecordAndOffset lists are empty; + * false if at least one have records + */ + public boolean isEmpty() { + return recordAndOffsets.isEmpty() && + (deletedInRecord == null || deletedInRecord.isEmpty()) && + (insertedInRecord == null || insertedInRecord.isEmpty()); + } } @@ -155,6 +197,12 @@ public void add(final SAMRecord read, final int position) { // Set to true when we have enforced the accumulation limit for the first time private boolean enforcedAccumulationLimit = false; + /** + * If true, include indels in the LocusInfo + */ + private boolean includeIndels = false; + + // When there is a target mask, these members remember the last locus for which a LocusInfo has been // returned, so that any uncovered locus in the target mask can be covered by a 0-coverage LocusInfo private int lastReferenceSequence = 0; @@ -300,8 +348,14 @@ public LocusInfo next() { continue; } - final Locus alignmentStart = new LocusImpl(rec.getReferenceIndex(), rec.getAlignmentStart()); - + int start = rec.getAlignmentStart(); + // only if we are including indels and the record does not start in the first base of the reference + // the stop locus to populate the queue is not the same if the record starts with an insertion + if(includeIndels && start != 1 && startWithInsertion(rec.getCigar())) { + // the start to populate is one less + start--; + } + final Locus alignmentStart = new LocusImpl(rec.getReferenceIndex(), start); // emit everything that is before the start of the current read, because we know no more // coverage will be accumulated for those loci. while (!accumulator.isEmpty() && locusComparator.compare(accumulator.get(0), alignmentStart) < 0) { @@ -316,17 +370,22 @@ public LocusInfo next() { } // at this point, either the accumulator list is empty or the head should - // be the same position as the first base of the read + // be the same position as the first base of the read (or insertion if first) if (!accumulator.isEmpty()) { if (accumulator.get(0).getSequenceIndex() != rec.getReferenceIndex() || - accumulator.get(0).position != rec.getAlignmentStart()) { + accumulator.get(0).position != start) { throw new IllegalStateException("accumulator should be empty or aligned with current SAMRecord"); } } // Store the loci for the read in the accumulator - if (!surpassedAccumulationThreshold()) accumulateSamRecord(rec); - + if (!surpassedAccumulationThreshold()) { + accumulateSamRecord(rec); + // Store the indels if requested + if(includeIndels) { + accumulateIndels(rec); + } + } samIterator.next(); } @@ -368,20 +427,30 @@ private boolean surpassedAccumulationThreshold() { return surpassesThreshold; } + /** + * Check if cigar start with an insertion, ignoring softclip + * @param cigar the cigar element + * @return true if it start with an insertion; false otherwise + */ + private static boolean startWithInsertion(final Cigar cigar) { + for(final CigarElement element: cigar.getCigarElements()) { + switch(element.getOperator()) { + case I: return true; + case S: continue; + default: break; + } + } + return false; + } + /** * Capture the loci covered by the given SAMRecord in the LocusInfos in the accumulator, * creating new LocusInfos as needed. */ private void accumulateSamRecord(final SAMRecord rec) { - final SAMSequenceRecord ref = getReferenceSequence(rec.getReferenceIndex()); - final int alignmentStart = rec.getAlignmentStart(); - final int alignmentEnd = rec.getAlignmentEnd(); - final int alignmentLength = alignmentEnd - alignmentStart; - // Ensure there are LocusInfos up to and including this position - for (int i=accumulator.size(); i<=alignmentLength; ++i) { - accumulator.add(new LocusInfo(ref, alignmentStart + i)); - } + // get the accumulator offset + int accOffset = getAccumulatorOffset(rec); final int minQuality = getQualityScoreCutoff(); final boolean dontCheckQualities = minQuality == 0; @@ -397,17 +466,52 @@ private void accumulateSamRecord(final SAMRecord rec) { // 0-based offset into the read of the current base final int readOffset = readStart + i - 1; - // 0-based offset from the aligned position of the first base in the read to the aligned position of the current base. - final int refOffset = refStart + i - alignmentStart; - // if the quality score cutoff is met, accumulate the base info if (dontCheckQualities || baseQualities[readOffset] >= minQuality) { + // 0-based offset from the aligned position of the first base in the read to the aligned position of the current base. + final int refOffset = refStart + i - accOffset; accumulator.get(refOffset).add(rec, readOffset); } } } } + /** + * Requires that the accumulator for the record is previously fill with + * {@link #accumulateSamRecord(htsjdk.samtools.SAMRecord)}. + * Include in the LocusInfo the indels; the quality threshold does not affect insertions/deletions + */ + private void accumulateIndels(final SAMRecord rec) { + // get the cigar elements + final List cigar = rec.getCigar().getCigarElements(); + // 0-based offset into the read of the current base + int readBase = 0; + // 0-based offset for the reference of the current base + // the accumulator could have the previous position because an indel is accumulating + int refBase = rec.getAlignmentStart() - getAccumulatorOffset(rec); + // iterate over the cigar element + for (int elementIndex = 0; elementIndex < cigar.size(); elementIndex++) { + final CigarElement e = cigar.get(elementIndex); + final CigarOperator operator = e.getOperator(); + if (operator.equals(CigarOperator.I)) { + System.err.println(""); + // insertions are included in the previous base + accumulator.get(refBase - 1).addInserted(rec, readBase); + readBase += e.getLength(); + } else if (operator.equals(CigarOperator.D)) { + // accumulate for each position that spans the deletion + for (int i = 0; i < e.getLength(); i++) { + // the offset is the one for the previous base + accumulator.get(refBase + i).addDeleted(rec, readBase - 1); + } + refBase += e.getLength(); + } else { + if (operator.consumesReadBases()) readBase += e.getLength(); + if (operator.consumesReferenceBases()) refBase += e.getLength(); + } + } + } + /** * Create the next relevant zero-coverage LocusInfo * @@ -456,8 +560,8 @@ else if (nextbit >= stopBeforeLocus.getPosition()) { */ private void populateCompleteQueue(final Locus stopBeforeLocus) { // Because of gapped alignments, it is possible to create LocusInfo's with no reads associated with them. - // Skip over these. - while (!accumulator.isEmpty() && accumulator.get(0).getRecordAndPositions().isEmpty() && + // Skip over these if not including indels + while (!accumulator.isEmpty() && accumulator.get(0).isEmpty() && locusComparator.compare(accumulator.get(0), stopBeforeLocus) < 0) { accumulator.remove(0); } @@ -494,6 +598,29 @@ private void populateCompleteQueue(final Locus stopBeforeLocus) { lastPosition = locusInfo.getPosition(); } + /** + * Ensure that the queue is populated and get the accumulator offset for the current record + */ + private int getAccumulatorOffset(SAMRecord rec) { + final SAMSequenceRecord ref = getReferenceSequence(rec.getReferenceIndex()); + final int alignmentStart = rec.getAlignmentStart(); + final int alignmentEnd = rec.getAlignmentEnd(); + final int alignmentLength = alignmentEnd - alignmentStart; + // get the offset for an insertion if we are tracking them + final int insOffset = + (includeIndels && startWithInsertion(rec.getCigar())) ? 1 : 0; + // if there is an insertion in the first base and it is not tracked in the accumulator, add it + if (insOffset == 1 && accumulator.isEmpty()) { + accumulator.add(new LocusInfo(ref, alignmentStart - 1)); + } + + // Ensure there are LocusInfos up to and including this position + for (int i=accumulator.size(); i<=alignmentLength+insOffset; ++i) { + accumulator.add(new LocusInfo(ref, alignmentStart + i - insOffset)); + } + return alignmentStart - insOffset; + } + private SAMSequenceRecord getReferenceSequence(final int referenceSequenceIndex) { return samReader.getFileHeader().getSequence(referenceSequenceIndex); } @@ -548,5 +675,14 @@ public int getMaxReadsToAccumulatePerLocus() { * cap is applied (the first maxReadsToAccumulatePerLocus reads are kept and all subsequent ones are dropped). */ public void setMaxReadsToAccumulatePerLocus(final int maxReadsToAccumulatePerLocus) { this.maxReadsToAccumulatePerLocus = maxReadsToAccumulatePerLocus; } + + public boolean isIncludeIndels() { + return includeIndels; + } + + public void setIncludeIndels(final boolean includeIndels) { + this.includeIndels = includeIndels; + } + } diff --git a/src/test/java/htsjdk/samtools/util/SamLocusIteratorTest.java b/src/test/java/htsjdk/samtools/util/SamLocusIteratorTest.java index eddd257be5..132a7b8779 100644 --- a/src/test/java/htsjdk/samtools/util/SamLocusIteratorTest.java +++ b/src/test/java/htsjdk/samtools/util/SamLocusIteratorTest.java @@ -23,128 +23,327 @@ */ package htsjdk.samtools.util; -import htsjdk.samtools.SamInputResource; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SamReaderFactory; +import htsjdk.samtools.*; import org.testng.Assert; import org.testng.annotations.Test; -import java.io.ByteArrayInputStream; - /** * @author alecw@broadinstitute.org */ public class SamLocusIteratorTest { - private SamReader createSamFileReader(final String samExample) { - final ByteArrayInputStream inputStream = new ByteArrayInputStream(samExample.getBytes()); - return SamReaderFactory.makeDefault().open(SamInputResource.of(inputStream)); + + /** Coverage for tests with the same reads */ + final static int coverage = 2; + + /** the read length for the testss */ + final static int readLength = 36; + + final static SAMFileHeader header = new SAMFileHeader(); + + static { + header.setSortOrder(SAMFileHeader.SortOrder.coordinate); + SAMSequenceDictionary dict = new SAMSequenceDictionary(); + dict.addSequence(new SAMSequenceRecord("chrM", 100000)); + header.setSequenceDictionary(dict); + } + + /** Get the record builder for the tests with the default parameters that are needed */ + private static SAMRecordSetBuilder getRecordBuilder() { + final SAMRecordSetBuilder builder = new SAMRecordSetBuilder(); + builder.setHeader(header); + builder.setReadLength(readLength); + return builder; } - private SamLocusIterator createSamLocusIterator(final SamReader samReader) { - final SamLocusIterator ret = new SamLocusIterator(samReader); + /** Create the SamLocusIterator with the builder*/ + private SamLocusIterator createSamLocusIterator(final SAMRecordSetBuilder builder) { + final SamLocusIterator ret = new SamLocusIterator(builder.getSamReader()); ret.setEmitUncoveredLoci(false); return ret; } + /** + * Test a simple with only matches, with both including or not indels + */ @Test public void testBasicIterator() { - - final String sqHeader = "@HD\tSO:coordinate\tVN:1.0\n@SQ\tSN:chrM\tAS:HG18\tLN:100000\n"; - final String seq1 = "ACCTACGTTCAATATTACAGGCGAACATACTTACTA"; - final String qual1 = "++++++++++++++++++++++++++++++++++++"; // phred 10 - final String s1 = "3851612\t16\tchrM\t165\t255\t36M\t*\t0\t0\t" + seq1 + "\t" + qual1 + "\n"; - final String exampleSam = sqHeader + s1 + s1; - - final SamReader samReader = createSamFileReader(exampleSam); - final SamLocusIterator sli = createSamLocusIterator(samReader); - - - // make sure we accumulated depth of 2 for each position - int pos = 165; - for (final SamLocusIterator.LocusInfo li : sli) { - Assert.assertEquals(pos++, li.getPosition()); - Assert.assertEquals(2, li.getRecordAndPositions().size()); + final SAMRecordSetBuilder builder = getRecordBuilder(); + // add records up to coverage for the test in that position + final int startPosition = 165; + for(int i = 0; i < coverage; i++) { + // add a negative-strand fragment mapped on chrM with base quality of 10 + builder.addFrag("record"+i, 0, startPosition, true, false, "36M", null, 10); + } + // test both for include indels and do not include indels + for (final boolean incIndels : new boolean[]{false, true}){ + final SamLocusIterator sli = createSamLocusIterator(builder); + sli.setIncludeIndels(incIndels); + // make sure we accumulated depth for each position + int pos = startPosition; + for (final SamLocusIterator.LocusInfo li : sli) { + Assert.assertEquals(li.getPosition(), pos++); + Assert.assertEquals(li.getRecordAndPositions().size(), coverage); + // make sure that we are not accumulating indels + Assert.assertEquals(li.getDeletedInRecord().size(), 0); + Assert.assertEquals(li.getInsertedInRecord().size(), 0); + } } - } + /** + * Test the emit uncovered loci, with both including or not indels + */ @Test public void testEmitUncoveredLoci() { - final String sqHeader = "@HD\tSO:coordinate\tVN:1.0\n@SQ\tSN:chrM\tAS:HG18\tLN:100000\n"; - final String seq1 = "ACCTACGTTCAATATTACAGGCGAACATACTTACTA"; - final String qual1 = "++++++++++++++++++++++++++++++++++++"; // phred 10 - final String s1 = "3851612\t16\tchrM\t165\t255\t36M\t*\t0\t0\t" + seq1 + "\t" + qual1 + "\n"; - final String exampleSam = sqHeader + s1 + s1; + final SAMRecordSetBuilder builder = getRecordBuilder(); + // add records up to coverage for the test in that position + final int startPosition = 165; + for(int i = 0; i < coverage; i++) { + // add a negative-strand fragment mapped on chrM with base quality of 10 + builder.addFrag("record"+i, 0, startPosition, true, false, "36M", null, 10); + } - final SamReader samReader = createSamFileReader(exampleSam); - final SamLocusIterator sli = new SamLocusIterator(samReader); + final int coveredEnd = CoordMath.getEnd(startPosition, readLength); - // make sure we accumulated depth of 2 for each position - int pos = 1; - final int coveredStart = 165; - final int coveredEnd = CoordMath.getEnd(coveredStart, seq1.length()); - for (final SamLocusIterator.LocusInfo li : sli) { - Assert.assertEquals(li.getPosition(), pos++); - final int expectedReads; - if (li.getPosition() >= coveredStart && li.getPosition() <= coveredEnd) { - expectedReads = 2; - } else { - expectedReads = 0; + // test both for include indels and do not include indels + for (final boolean incIndels : new boolean[]{false, true}) { + final SamLocusIterator sli = createSamLocusIterator(builder); + sli.setEmitUncoveredLoci(true); + sli.setIncludeIndels(incIndels); + // make sure we accumulated depth of 2 for each position + int pos = 1; + for (final SamLocusIterator.LocusInfo li : sli) { + Assert.assertEquals(li.getPosition(), pos++); + final int expectedReads; + if (li.getPosition() >= startPosition && li.getPosition() <= coveredEnd) { + expectedReads = coverage; + } else { + expectedReads = 0; + } + Assert.assertEquals(li.getRecordAndPositions().size(), expectedReads); + // make sure that we are not accumulating indels + Assert.assertEquals(li.getDeletedInRecord().size(), 0); + Assert.assertEquals(li.getInsertedInRecord().size(), 0); } - Assert.assertEquals(li.getRecordAndPositions().size(), expectedReads); + Assert.assertEquals(pos, header.getSequence(0).getSequenceLength() + 1); } - Assert.assertEquals(pos, 100001); - } + /** + * Test the quality filter, with both including or not indels + */ @Test public void testQualityFilter() { + final SAMRecordSetBuilder builder = getRecordBuilder(); + // add records up to coverage for the test in that position + final int startPosition = 165; + for(int i = 0; i < coverage; i++) { + final String qualityString; + // half of the reads have a different quality + if(i % 2 == 0) { + qualityString = null; + } else { + qualityString = "+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*"; + } + // add a negative-strand fragment mapped on chrM with base quality of 10 + builder.addFrag("record"+i, 0, startPosition, true, false, "36M", qualityString, 10); + } - final String sqHeader = "@HD\tSO:coordinate\tVN:1.0\n@SQ\tSN:chrM\tAS:HG18\tLN:100000\n"; - final String seq1 = "ACCTACGTTCAATATTACAGGCGAACATACTTACTA"; - final String qual1 = "++++++++++++++++++++++++++++++++++++"; // phred 10 - final String qual2 = "+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*"; // phred 10,9... - final String s1 = "3851612\t16\tchrM\t165\t255\t36M\t*\t0\t0\t" + seq1 + "\t" + qual1 + "\n"; - final String s2 = "3851612\t16\tchrM\t165\t255\t36M\t*\t0\t0\t" + seq1 + "\t" + qual2 + "\n"; - final String exampleSam = sqHeader + s1 + s2; + // test both for include indels and do not include indels + for (final boolean incIndels : new boolean[]{false, true}) { + final SamLocusIterator sli = createSamLocusIterator(builder); + sli.setQualityScoreCutoff(10); + sli.setIncludeIndels(incIndels); + // make sure we accumulated depth coverage for even positions, coverage/2 for odd positions + int pos = startPosition; + for (final SamLocusIterator.LocusInfo li : sli) { + Assert.assertEquals(li.getRecordAndPositions().size(), (pos % 2 == 0) ? coverage / 2 : coverage); + Assert.assertEquals(li.getPosition(), pos++); + // make sure that we are not accumulating indels + Assert.assertEquals(li.getDeletedInRecord().size(), 0); + Assert.assertEquals(li.getInsertedInRecord().size(), 0); + } + } + } - final SamReader samReader = createSamFileReader(exampleSam); - final SamLocusIterator sli = createSamLocusIterator(samReader); - sli.setQualityScoreCutoff(10); + /** + * Test a simple deletion, with both including or not indels + */ + @Test + public void testSimpleDeletion() { + final SAMRecordSetBuilder builder = getRecordBuilder(); + // add records up to coverage for the test in that position + final int startPosition = 165; + for(int i = 0; i < coverage; i++) { + // add a negative-strand fragment mapped on chrM with base quality of 10 + builder.addFrag("record"+i, 0, startPosition, true, false, "18M10D18M", null, 10); + } + final int deletionStart = 183; + final int deletionEnd = 192; + // test both for include indels and do not include indels + for (final boolean incIndels : new boolean[]{false, true}){ + final SamLocusIterator sli = createSamLocusIterator(builder); + sli.setIncludeIndels(incIndels); + // make sure we accumulated depth for each position + int pos = startPosition; + for (final SamLocusIterator.LocusInfo li : sli) { + boolean isDeletedPosition = (pos >= deletionStart && pos <= deletionEnd); + if(!incIndels && isDeletedPosition) { + pos = deletionEnd + 1; + isDeletedPosition = false; + } + Assert.assertEquals(li.getPosition(), pos++); + if(isDeletedPosition) { + // make sure there are no reads without indels + Assert.assertEquals(li.getRecordAndPositions().size(), 0); + // make sure that we are accumulating indels + Assert.assertEquals(li.getDeletedInRecord().size(), coverage); + Assert.assertEquals(li.getInsertedInRecord().size(), 0); + } else { + // make sure we are accumulating normal coverage + Assert.assertEquals(li.getRecordAndPositions().size(), coverage); + // make sure that we are not accumulating indels + Assert.assertEquals(li.getDeletedInRecord().size(), 0); + Assert.assertEquals(li.getInsertedInRecord().size(), 0); + } + } + } + } + /** + * Test a simple insertion, with both including or not indels + */ + @Test + public void testSimpleInsertion() { + final SAMRecordSetBuilder builder = getRecordBuilder(); + // add records up to coverage for the test in that position + final int startPosition = 165; + for(int i = 0; i < coverage; i++) { + // add a negative-strand fragment mapped on chrM with base quality of 10 + builder.addFrag("record"+i, 0, startPosition, true, false, "30M3I3M", null, 10); + } + final int insStart = 194; + // test both for include indels and do not include indels + for (final boolean incIndels : new boolean[]{false, true}){ + final SamLocusIterator sli = createSamLocusIterator(builder); + sli.setIncludeIndels(incIndels); + // make sure we accumulated depth for each position + int pos = startPosition; + for (final SamLocusIterator.LocusInfo li : sli) { + Assert.assertEquals(li.getPosition(), pos++); + // make sure we are accumulating normal coverage + Assert.assertEquals(li.getRecordAndPositions().size(), coverage); + // make sure that we are not accumulating deletions + Assert.assertEquals(li.getDeletedInRecord().size(), 0); + if(incIndels && li.getPosition() == insStart) { + Assert.assertEquals(li.getInsertedInRecord().size(), coverage, "Tracking indels: "+incIndels+". At "+li.toString()); + } else { + Assert.assertEquals(li.getInsertedInRecord().size(), 0, "Tracking indels: "+incIndels+". At "+li.toString()); + } + } + } + } - // make sure we accumulated depth 2 for even positions, 1 for odd positions - int pos = 165; - for (final SamLocusIterator.LocusInfo li : sli) { - Assert.assertEquals((pos % 2 == 0) ? 1 : 2, li.getRecordAndPositions().size()); - Assert.assertEquals(pos++, li.getPosition()); + /** + * Test an insertion at the start of the read, with both including or not indels + */ + @Test + public void testStartWithInsertion() { + final SAMRecordSetBuilder builder = getRecordBuilder(); + // add records up to coverage for the test in that position + final int startPosition = 165; + for(int i = 0; i < coverage; i++) { + // add a negative-strand fragment mapped on chrM with base quality of 10 + builder.addFrag("record"+i, 0, startPosition, true, false, "3I33M", null, 10); } + // test both for include indels and do not include indels + for (final boolean incIndels : new boolean[]{false, true}) { + final SamLocusIterator sli = createSamLocusIterator(builder); + sli.setIncludeIndels(incIndels); + // make sure we accumulated depth for each position + int pos = (incIndels) ? startPosition - 1 : startPosition; + boolean indelPosition = incIndels; + for (final SamLocusIterator.LocusInfo li : sli) { + Assert.assertEquals(li.getPosition(), pos); + // accumulation of coverage + Assert.assertEquals(li.getRecordAndPositions().size(), (indelPosition) ? 0 : coverage); + // no accumulation of deletions + Assert.assertEquals(li.getDeletedInRecord().size(), 0); + // accumulation of insertion + Assert.assertEquals(li.getInsertedInRecord().size(), (indelPosition) ? coverage : 0); + // check offsets of the insertion + if(indelPosition) { + Assert.assertEquals(li.getInsertedInRecord().get(0).getOffset(), 0); + Assert.assertEquals(li.getInsertedInRecord().get(0).getOffset(), 0); + indelPosition = false; + } + pos++; + } + } } /** - * Try all CIGAR operands (except H and P) and confirm that loci produced by SamLocusIterator are as expected. + * Test an insertion at the start of a soft-clipped read, with both including or not indels */ @Test - public void testSimpleGappedAlignment() { - final String sqHeader = "@HD\tSO:coordinate\tVN:1.0\n@SQ\tSN:chrM\tAS:HG18\tLN:100000\n"; - final String seq1 = "ACCTACGTTCAATATTACAGGCGAACATACTTACTA"; - final String qual1 = "++++++++++++++++++++++++++++++++++++"; // phred 10 - final String s1 = "3851612\t16\tchrM\t165\t255\t3S3M3N3M3D3M3I18M3S\t*\t0\t0\t" + seq1 + "\t" + qual1 + "\n"; - final String exampleSam = sqHeader + s1 + s1; + public void testStartWithSoftClipAndInsertion() { + final SAMRecordSetBuilder builder = getRecordBuilder(); + // add records up to coverage for the test in that position + final int startPosition = 165; + for(int i = 0; i < coverage; i++) { + // add a negative-strand fragment mapped on chrM with base quality of 10 + builder.addFrag("record"+i, 0, startPosition, true, false, "1S3I32M", null, 10); + } - final SamReader samReader = createSamFileReader(exampleSam); - final SamLocusIterator sli = createSamLocusIterator(samReader); + // test both for include indels and do not include indels + for (final boolean incIndels : new boolean[]{false, true}) { + final SamLocusIterator sli = createSamLocusIterator(builder); + sli.setIncludeIndels(incIndels); + // make sure we accumulated depth for each position + int pos = (incIndels) ? startPosition - 1 : startPosition; + boolean indelPosition = incIndels; + for (final SamLocusIterator.LocusInfo li : sli) { + Assert.assertEquals(li.getPosition(), pos); + // accumulation of coverage + Assert.assertEquals(li.getRecordAndPositions().size(), (indelPosition) ? 0 : coverage); + // no accumulation of deletions + Assert.assertEquals(li.getDeletedInRecord().size(), 0); + // accumulation of insertion + Assert.assertEquals(li.getInsertedInRecord().size(), (indelPosition) ? coverage : 0); + // check offsets of the insertion + if(indelPosition) { + Assert.assertEquals(li.getInsertedInRecord().get(0).getOffset(), 1); + Assert.assertEquals(li.getInsertedInRecord().get(0).getOffset(), 1); + indelPosition = false; + } + pos++; + } + } + } + /** + * Try all CIGAR operands (except H and P) and confirm that loci produced by SamLocusIterator are as expected, + * with both including or not indels + */ + @Test + public void testSimpleGappedAlignment() { + final SAMRecordSetBuilder builder = getRecordBuilder(); + // add records up to coverage for the test in that position + final int startPosition = 165; + for(int i = 0; i < coverage; i++) { + // add a negative-strand fragment mapped on chrM with base quality of 10 + builder.addFrag("record"+i, 0, startPosition, true, false, "3S3M3N3M3D3M3I18M3S", null, 10); + } // make sure we accumulated depth of 2 for each position - final int[] expectedReferencePositions = new int[]{ + final int[] expectedPositions = new int[]{ // 3S 165, 166, 167, // 3M // 3N 171, 172, 173, // 3M - // 3D + 174, 175, 176, // 3D 177, 178, 179, // 3M // 3I 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197}; // 18M @@ -154,36 +353,81 @@ public void testSimpleGappedAlignment() { 3, 4, 5, // 3M // 3N 6, 7, 8, // 3M - // 3D + 8, 8, 8, // 3D previous 0-based offset 9, 10, 11, // 3M // 3I 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 // 3M }; - int i = 0; - for (final SamLocusIterator.LocusInfo li : sli) { - Assert.assertEquals(li.getRecordAndPositions().size(), 2); - Assert.assertEquals(li.getPosition(), expectedReferencePositions[i]); - Assert.assertEquals(li.getRecordAndPositions().get(0).getOffset(), expectedReadOffsets[i]); - Assert.assertEquals(li.getRecordAndPositions().get(1).getOffset(), expectedReadOffsets[i]); - ++i; + + // to check the range of the insertion + final int firstDelBase = 174; + final int lastDelBase = 176; + + final int expectedInsertionPosition = 179; // previous reference base + final int expectedInsertionOffset = 12; // first read base in the insertion + + // test both for include indels and do not include indels + for (final boolean incIndels : new boolean[]{false, true}) { + final SamLocusIterator sli = createSamLocusIterator(builder); + sli.setIncludeIndels(incIndels); + + int i = 0; + for (final SamLocusIterator.LocusInfo li : sli) { + // check if it is in the deletion range + boolean inDelRange = (expectedPositions[i] >= firstDelBase && expectedPositions[i] <= lastDelBase); + // if we are not including indels, the expected position index change if it is in an deletion range + if(!incIndels && inDelRange ) { + i += 3; + inDelRange = false; // set to false to do not check the range of deletions + } + // check if the LocusInfo is the expected + Assert.assertEquals(li.getPosition(), expectedPositions[i]); + // check the insertions + if(incIndels && li.getPosition() == expectedInsertionPosition) { + // check the accumulated coverage + Assert.assertEquals(li.getInsertedInRecord().size(), coverage); + // check the record offset + Assert.assertEquals(li.getInsertedInRecord().get(0).getOffset(), expectedInsertionOffset); + Assert.assertEquals(li.getInsertedInRecord().get(1).getOffset(), expectedInsertionOffset); + } else { + Assert.assertEquals(li.getInsertedInRecord().size(), 0); + } + // check the range of deletions + if(inDelRange) { + // check the coverage for insertion and normal records + Assert.assertEquals(li.getDeletedInRecord().size(), coverage); + Assert.assertEquals(li.getRecordAndPositions().size(), 0); + // check the offset for the deletion + Assert.assertEquals(li.getDeletedInRecord().get(0).getOffset(), expectedReadOffsets[i]); + Assert.assertEquals(li.getDeletedInRecord().get(1).getOffset(), expectedReadOffsets[i]); + } else { + // if it is not a deletion, perform the same test as before + Assert.assertEquals(li.getRecordAndPositions().size(), coverage); + // Assert.assertEquals(li.getDeletedInRecord().size(), 0); + Assert.assertEquals(li.getRecordAndPositions().get(0).getOffset(), expectedReadOffsets[i]); + Assert.assertEquals(li.getRecordAndPositions().get(1).getOffset(), expectedReadOffsets[i]); + } + ++i; + } } + + } /** - * Test two reads that overlap because one has a deletion in the middle of it. + * Test two reads that overlap because one has a deletion in the middle of it, without tracking indels */ @Test - public void testOverlappingGappedAlignments() { - final String sqHeader = "@HD\tSO:coordinate\tVN:1.0\n@SQ\tSN:chrM\tAS:HG18\tLN:100000\n"; - final String seq1 = "ACCTACGTTCAATATTACAGGCGAACATACTTACTA"; - final String qual1 = "++++++++++++++++++++++++++++++++++++"; // phred 10 + public void testOverlappingGappedAlignmentsWithoutIndels() { + final SAMRecordSetBuilder builder = getRecordBuilder(); + // add records up to coverage for the test in that position + final int startPosition = 165; // Were it not for the gap, these two reads would not overlap - final String s1 = "3851612\t16\tchrM\t165\t255\t18M10D18M\t*\t0\t0\t" + seq1 + "\t" + qual1 + "\n"; - final String s2 = "3851613\t16\tchrM\t206\t255\t36M\t*\t0\t0\t" + seq1 + "\t" + qual1 + "\n"; - final String exampleSam = sqHeader + s1 + s2; + builder.addFrag("record1", 0, startPosition, true, false, "18M10D18M", null, 10); + builder.addFrag("record2", 0, 206, true, false, "36M", null, 10); + + final SamLocusIterator sli = createSamLocusIterator(builder); - final SamReader samReader = createSamFileReader(exampleSam); - final SamLocusIterator sli = createSamLocusIterator(samReader); // 5 base overlap btw the two reads final int numBasesCovered = 36 + 36 - 5; final int[] expectedReferencePositions = new int[numBasesCovered]; @@ -193,26 +437,26 @@ public void testOverlappingGappedAlignments() { int i; // First 18 bases are from the first read for (i = 0; i < 18; ++i) { - expectedReferencePositions[i] = 165 + i; + expectedReferencePositions[i] = startPosition + i; expectedDepths[i] = 1; expectedReadOffsets[i] = new int[]{i}; } // Gap of 10, then 13 bases from the first read for (; i < 36 - 5; ++i) { - expectedReferencePositions[i] = 165 + 10 + i; + expectedReferencePositions[i] = startPosition + 10 + i; expectedDepths[i] = 1; expectedReadOffsets[i] = new int[]{i}; } // Last 5 bases of first read overlap first 5 bases of second read for (; i < 36; ++i) { - expectedReferencePositions[i] = 165 + 10 + i; + expectedReferencePositions[i] = startPosition + 10 + i; expectedDepths[i] = 2; expectedReadOffsets[i] = new int[]{i, i - 31}; } // Last 31 bases of 2nd read for (; i < 36 + 36 - 5; ++i) { - expectedReferencePositions[i] = 165 + 10 + i; + expectedReferencePositions[i] = startPosition + 10 + i; expectedDepths[i] = 1; expectedReadOffsets[i] = new int[]{i - 31}; } @@ -225,7 +469,90 @@ public void testOverlappingGappedAlignments() { for (int j = 0; j < expectedReadOffsets[i].length; ++j) { Assert.assertEquals(li.getRecordAndPositions().get(j).getOffset(), expectedReadOffsets[i][j]); } + // make sure that we are not accumulating indels + Assert.assertEquals(li.getDeletedInRecord().size(), 0); + Assert.assertEquals(li.getInsertedInRecord().size(), 0); ++i; } } + + /** + * Test two reads that overlap because one has a deletion in the middle of it, tracking indels + */ + @Test + public void testOverlappingGappedAlignmentsWithIndels() { + final SAMRecordSetBuilder builder = getRecordBuilder(); + // add records up to coverage for the test in that position + final int startPosition = 165; + // Were it not for the gap, these two reads would not overlap + builder.addFrag("record1", 0, startPosition, true, false, "18M10D18M", null, 10); + builder.addFrag("record2", 0, 206, true, false, "36M", null, 10); + + final SamLocusIterator sli = createSamLocusIterator(builder); + sli.setIncludeIndels(true); + + // 46 for the gapped alignment, and 5 base overlap btw the two reads + final int numBasesCovered = 46 + 36 - 5; + final int[] expectedReferencePositions = new int[numBasesCovered]; + final int[] expectedDepths = new int[numBasesCovered]; + final int[] expectedDelDepths = new int[numBasesCovered]; + final int[][] expectedReadOffsets = new int[numBasesCovered][]; + final int expectedDelOffset = 17; // previous 0-based offset + + int i; + // First 18 bases are from the first read + for (i = 0; i < 18; ++i) { + expectedReferencePositions[i] = startPosition + i; + expectedDepths[i] = 1; + expectedDelDepths[i] = 0; + expectedReadOffsets[i] = new int[]{i}; + } + // Gap of 10 + for(; i < 18 + 10; ++i) { + expectedReferencePositions[i] = startPosition + i; + expectedDepths[i] = 0; + expectedDelDepths[i] = 1; + expectedReadOffsets[i] = new int[0]; + } + // the next bases for the first read (without the 5 overlapping) + for(; i < 46 - 5; ++i) { + expectedReferencePositions[i] = startPosition + i; + expectedDepths[i] = 1; + expectedDelDepths[i] = 0; + expectedReadOffsets[i] = new int[]{i - 10}; + } + // last 5 bases of the first read overlap first 5 bases of second read + for(; i < 46; ++i) { + expectedReferencePositions[i] = startPosition + i; + expectedDepths[i] = 2; + expectedDelDepths[i] = 0; + expectedReadOffsets[i] = new int[]{i - 10, i + 10 - 46 - 5 }; + } + // Last 31 bases of 2nd read + for(; i < numBasesCovered; ++i) { + expectedReferencePositions[i] = startPosition + i; + expectedDepths[i] = 1; + expectedDelDepths[i] = 0; + expectedReadOffsets[i] = new int[]{i + 10 - 46 - 5}; + } + i = 0; + for (final SamLocusIterator.LocusInfo li : sli) { + // checking the same as without indels + Assert.assertEquals(li.getRecordAndPositions().size(), expectedDepths[i]); + Assert.assertEquals(li.getPosition(), expectedReferencePositions[i]); + Assert.assertEquals(li.getRecordAndPositions().size(), expectedReadOffsets[i].length); + for (int j = 0; j < expectedReadOffsets[i].length; ++j) { + Assert.assertEquals(li.getRecordAndPositions().get(j).getOffset(), expectedReadOffsets[i][j]); + } + // check the deletions + Assert.assertEquals(li.getDeletedInRecord().size(), expectedDelDepths[i]); + if(expectedDelDepths[i] != 0) { + Assert.assertEquals(li.getDeletedInRecord().get(0).getOffset(), expectedDelOffset); + } + // checking that insertions are not accumulating + Assert.assertEquals(li.getInsertedInRecord().size(), 0); + ++i; + } + } + } From 2fb6901987bf1c34c1394d4fbdab0c07cc29351c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20G=C3=B3mez-S=C3=A1nchez?= Date: Wed, 18 May 2016 13:58:16 +0200 Subject: [PATCH 3/5] Fixing startWithInsertion + code stype --- .../samtools/util/SamLocusIterator.java | 100 ++++++++---------- .../samtools/util/SamLocusIteratorTest.java | 97 +++++++++-------- 2 files changed, 94 insertions(+), 103 deletions(-) diff --git a/src/main/java/htsjdk/samtools/util/SamLocusIterator.java b/src/main/java/htsjdk/samtools/util/SamLocusIterator.java index 81559eb965..21b5cd355c 100644 --- a/src/main/java/htsjdk/samtools/util/SamLocusIterator.java +++ b/src/main/java/htsjdk/samtools/util/SamLocusIterator.java @@ -23,26 +23,10 @@ */ package htsjdk.samtools.util; -import htsjdk.samtools.AlignmentBlock; -import htsjdk.samtools.Cigar; -import htsjdk.samtools.CigarElement; -import htsjdk.samtools.CigarOperator; -import htsjdk.samtools.SAMException; -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SAMSequenceRecord; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.filter.AggregateFilter; -import htsjdk.samtools.filter.DuplicateReadFilter; -import htsjdk.samtools.filter.FilteringSamIterator; -import htsjdk.samtools.filter.SamRecordFilter; -import htsjdk.samtools.filter.SecondaryOrSupplementaryFilter; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; +import htsjdk.samtools.*; +import htsjdk.samtools.filter.*; + +import java.util.*; /** * Iterator that traverses a SAM File, accumulating information on a per-locus basis. @@ -101,7 +85,7 @@ public void add(final SAMRecord read, final int position) { /** Accumulate info for one read with a deletion */ public void addDeleted(final SAMRecord read, int previousPosition) { - if(deletedInRecord == null) { + if (deletedInRecord == null) { deletedInRecord = new ArrayList<>(); } deletedInRecord.add(new RecordAndOffset(read, previousPosition)); @@ -112,7 +96,7 @@ public void addDeleted(final SAMRecord read, int previousPosition) { * For this locus, the reads in the insertion are included also in recordAndOffsets */ public void addInserted(final SAMRecord read, int firstPosition) { - if(insertedInRecord == null) { + if (insertedInRecord == null) { insertedInRecord = new ArrayList<>(); } insertedInRecord.add(new RecordAndOffset(read, firstPosition)); @@ -129,13 +113,10 @@ public List getDeletedInRecord() { public List getInsertedInRecord() { return (insertedInRecord == null) ? Collections.emptyList() : Collections.unmodifiableList(insertedInRecord); } - public String getSequenceName() { return referenceSequence.getSequenceName(); } - @Override public String toString() { return referenceSequence.getSequenceName() + ":" + position; } - public int getSequenceLength() {return referenceSequence.getSequenceLength();} /** * @return true if all the RecordAndOffset lists are empty; - * false if at least one have records + * false if at least one have records */ public boolean isEmpty() { return recordAndOffsets.isEmpty() && @@ -351,7 +332,7 @@ public LocusInfo next() { int start = rec.getAlignmentStart(); // only if we are including indels and the record does not start in the first base of the reference // the stop locus to populate the queue is not the same if the record starts with an insertion - if(includeIndels && start != 1 && startWithInsertion(rec.getCigar())) { + if (includeIndels && start != 1 && startWithInsertion(rec.getCigar())) { // the start to populate is one less start--; } @@ -382,7 +363,7 @@ public LocusInfo next() { if (!surpassedAccumulationThreshold()) { accumulateSamRecord(rec); // Store the indels if requested - if(includeIndels) { + if (includeIndels) { accumulateIndels(rec); } } @@ -428,17 +409,15 @@ private boolean surpassedAccumulationThreshold() { } /** - * Check if cigar start with an insertion, ignoring softclip - * @param cigar the cigar element - * @return true if it start with an insertion; false otherwise + * Check if cigar start with an insertion, ignoring other operators that do not consume references bases + * @param cigar the cigar + * @return true if the first operator to consume reference bases or be an insertion, is an insertion; false otherwise */ private static boolean startWithInsertion(final Cigar cigar) { - for(final CigarElement element: cigar.getCigarElements()) { - switch(element.getOperator()) { - case I: return true; - case S: continue; - default: break; - } + for (final CigarElement element : cigar.getCigarElements()) { + if (element.getOperator()==CigarOperator.I) return true; + if (!element.getOperator().consumesReferenceBases()) continue; + break; } return false; } @@ -458,11 +437,11 @@ private void accumulateSamRecord(final SAMRecord rec) { // interpret the CIGAR string and add the base info for (final AlignmentBlock alignmentBlock : rec.getAlignmentBlocks()) { - final int readStart = alignmentBlock.getReadStart(); - final int refStart = alignmentBlock.getReferenceStart(); + final int readStart = alignmentBlock.getReadStart(); + final int refStart = alignmentBlock.getReferenceStart(); final int blockLength = alignmentBlock.getLength(); - for (int i=0; i= stopBeforeLocus.getPosition()) { + } else if (nextbit >= stopBeforeLocus.getPosition()) { return null; } } @@ -562,7 +539,7 @@ private void populateCompleteQueue(final Locus stopBeforeLocus) { // Because of gapped alignments, it is possible to create LocusInfo's with no reads associated with them. // Skip over these if not including indels while (!accumulator.isEmpty() && accumulator.get(0).isEmpty() && - locusComparator.compare(accumulator.get(0), stopBeforeLocus) < 0) { + locusComparator.compare(accumulator.get(0), stopBeforeLocus) < 0) { accumulator.remove(0); } if (accumulator.isEmpty()) { @@ -603,19 +580,18 @@ private void populateCompleteQueue(final Locus stopBeforeLocus) { */ private int getAccumulatorOffset(SAMRecord rec) { final SAMSequenceRecord ref = getReferenceSequence(rec.getReferenceIndex()); - final int alignmentStart = rec.getAlignmentStart(); - final int alignmentEnd = rec.getAlignmentEnd(); + final int alignmentStart = rec.getAlignmentStart(); + final int alignmentEnd = rec.getAlignmentEnd(); final int alignmentLength = alignmentEnd - alignmentStart; // get the offset for an insertion if we are tracking them - final int insOffset = - (includeIndels && startWithInsertion(rec.getCigar())) ? 1 : 0; + final int insOffset = (includeIndels && startWithInsertion(rec.getCigar())) ? 1 : 0; // if there is an insertion in the first base and it is not tracked in the accumulator, add it if (insOffset == 1 && accumulator.isEmpty()) { accumulator.add(new LocusInfo(ref, alignmentStart - 1)); } // Ensure there are LocusInfos up to and including this position - for (int i=accumulator.size(); i<=alignmentLength+insOffset; ++i) { + for (int i = accumulator.size(); i <= alignmentLength + insOffset; ++i) { accumulator.add(new LocusInfo(ref, alignmentStart + i - insOffset)); } return alignmentStart - insOffset; @@ -643,19 +619,29 @@ public void setSamFilters(final List samFilters) { this.samFilters = samFilters; } - public int getQualityScoreCutoff() { return qualityScoreCutoff; } + public int getQualityScoreCutoff() { + return qualityScoreCutoff; + } - public void setQualityScoreCutoff(final int qualityScoreCutoff) { this.qualityScoreCutoff = qualityScoreCutoff; } + public void setQualityScoreCutoff(final int qualityScoreCutoff) { + this.qualityScoreCutoff = qualityScoreCutoff; + } public int getMappingQualityScoreCutoff() { return mappingQualityScoreCutoff; } - public void setMappingQualityScoreCutoff(final int mappingQualityScoreCutoff) { this.mappingQualityScoreCutoff = mappingQualityScoreCutoff; } + public void setMappingQualityScoreCutoff(final int mappingQualityScoreCutoff) { + this.mappingQualityScoreCutoff = mappingQualityScoreCutoff; + } - public boolean isIncludeNonPfReads() { return includeNonPfReads; } + public boolean isIncludeNonPfReads() { + return includeNonPfReads; + } - public void setIncludeNonPfReads(final boolean includeNonPfReads) { this.includeNonPfReads = includeNonPfReads; } + public void setIncludeNonPfReads(final boolean includeNonPfReads) { + this.includeNonPfReads = includeNonPfReads; + } public boolean isEmitUncoveredLoci() { return emitUncoveredLoci; @@ -674,7 +660,9 @@ public int getMaxReadsToAccumulatePerLocus() { * As is pointed out above, setting this could cause major bias because of the non-random nature with which the * cap is applied (the first maxReadsToAccumulatePerLocus reads are kept and all subsequent ones are dropped). */ - public void setMaxReadsToAccumulatePerLocus(final int maxReadsToAccumulatePerLocus) { this.maxReadsToAccumulatePerLocus = maxReadsToAccumulatePerLocus; } + public void setMaxReadsToAccumulatePerLocus(final int maxReadsToAccumulatePerLocus) { + this.maxReadsToAccumulatePerLocus = maxReadsToAccumulatePerLocus; + } public boolean isIncludeIndels() { return includeIndels; diff --git a/src/test/java/htsjdk/samtools/util/SamLocusIteratorTest.java b/src/test/java/htsjdk/samtools/util/SamLocusIteratorTest.java index 132a7b8779..9e8e898684 100644 --- a/src/test/java/htsjdk/samtools/util/SamLocusIteratorTest.java +++ b/src/test/java/htsjdk/samtools/util/SamLocusIteratorTest.java @@ -23,7 +23,10 @@ */ package htsjdk.samtools.util; -import htsjdk.samtools.*; +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMRecordSetBuilder; +import htsjdk.samtools.SAMSequenceDictionary; +import htsjdk.samtools.SAMSequenceRecord; import org.testng.Assert; import org.testng.annotations.Test; @@ -62,7 +65,7 @@ private SamLocusIterator createSamLocusIterator(final SAMRecordSetBuilder builde return ret; } - /** + /** * Test a simple with only matches, with both including or not indels */ @Test @@ -70,12 +73,12 @@ public void testBasicIterator() { final SAMRecordSetBuilder builder = getRecordBuilder(); // add records up to coverage for the test in that position final int startPosition = 165; - for(int i = 0; i < coverage; i++) { + for (int i = 0; i < coverage; i++) { // add a negative-strand fragment mapped on chrM with base quality of 10 - builder.addFrag("record"+i, 0, startPosition, true, false, "36M", null, 10); + builder.addFrag("record" + i, 0, startPosition, true, false, "36M", null, 10); } // test both for include indels and do not include indels - for (final boolean incIndels : new boolean[]{false, true}){ + for (final boolean incIndels : new boolean[] {false, true}) { final SamLocusIterator sli = createSamLocusIterator(builder); sli.setIncludeIndels(incIndels); // make sure we accumulated depth for each position @@ -90,7 +93,7 @@ public void testBasicIterator() { } } - /** + /** * Test the emit uncovered loci, with both including or not indels */ @Test @@ -99,15 +102,15 @@ public void testEmitUncoveredLoci() { final SAMRecordSetBuilder builder = getRecordBuilder(); // add records up to coverage for the test in that position final int startPosition = 165; - for(int i = 0; i < coverage; i++) { + for (int i = 0; i < coverage; i++) { // add a negative-strand fragment mapped on chrM with base quality of 10 - builder.addFrag("record"+i, 0, startPosition, true, false, "36M", null, 10); + builder.addFrag("record" + i, 0, startPosition, true, false, "36M", null, 10); } final int coveredEnd = CoordMath.getEnd(startPosition, readLength); // test both for include indels and do not include indels - for (final boolean incIndels : new boolean[]{false, true}) { + for (final boolean incIndels : new boolean[] {false, true}) { final SamLocusIterator sli = createSamLocusIterator(builder); sli.setEmitUncoveredLoci(true); sli.setIncludeIndels(incIndels); @@ -130,7 +133,7 @@ public void testEmitUncoveredLoci() { } } - /** + /** * Test the quality filter, with both including or not indels */ @Test @@ -138,20 +141,20 @@ public void testQualityFilter() { final SAMRecordSetBuilder builder = getRecordBuilder(); // add records up to coverage for the test in that position final int startPosition = 165; - for(int i = 0; i < coverage; i++) { + for (int i = 0; i < coverage; i++) { final String qualityString; // half of the reads have a different quality - if(i % 2 == 0) { + if (i % 2 == 0) { qualityString = null; } else { qualityString = "+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*+*"; } // add a negative-strand fragment mapped on chrM with base quality of 10 - builder.addFrag("record"+i, 0, startPosition, true, false, "36M", qualityString, 10); + builder.addFrag("record" + i, 0, startPosition, true, false, "36M", qualityString, 10); } // test both for include indels and do not include indels - for (final boolean incIndels : new boolean[]{false, true}) { + for (final boolean incIndels : new boolean[] {false, true}) { final SamLocusIterator sli = createSamLocusIterator(builder); sli.setQualityScoreCutoff(10); sli.setIncludeIndels(incIndels); @@ -167,7 +170,7 @@ public void testQualityFilter() { } } - /** + /** * Test a simple deletion, with both including or not indels */ @Test @@ -175,26 +178,26 @@ public void testSimpleDeletion() { final SAMRecordSetBuilder builder = getRecordBuilder(); // add records up to coverage for the test in that position final int startPosition = 165; - for(int i = 0; i < coverage; i++) { + for (int i = 0; i < coverage; i++) { // add a negative-strand fragment mapped on chrM with base quality of 10 - builder.addFrag("record"+i, 0, startPosition, true, false, "18M10D18M", null, 10); + builder.addFrag("record" + i, 0, startPosition, true, false, "18M10D18M", null, 10); } final int deletionStart = 183; final int deletionEnd = 192; // test both for include indels and do not include indels - for (final boolean incIndels : new boolean[]{false, true}){ + for (final boolean incIndels : new boolean[] {false, true}) { final SamLocusIterator sli = createSamLocusIterator(builder); sli.setIncludeIndels(incIndels); // make sure we accumulated depth for each position int pos = startPosition; for (final SamLocusIterator.LocusInfo li : sli) { boolean isDeletedPosition = (pos >= deletionStart && pos <= deletionEnd); - if(!incIndels && isDeletedPosition) { + if (!incIndels && isDeletedPosition) { pos = deletionEnd + 1; isDeletedPosition = false; } Assert.assertEquals(li.getPosition(), pos++); - if(isDeletedPosition) { + if (isDeletedPosition) { // make sure there are no reads without indels Assert.assertEquals(li.getRecordAndPositions().size(), 0); // make sure that we are accumulating indels @@ -211,7 +214,7 @@ public void testSimpleDeletion() { } } - /** + /** * Test a simple insertion, with both including or not indels */ @Test @@ -219,13 +222,13 @@ public void testSimpleInsertion() { final SAMRecordSetBuilder builder = getRecordBuilder(); // add records up to coverage for the test in that position final int startPosition = 165; - for(int i = 0; i < coverage; i++) { + for (int i = 0; i < coverage; i++) { // add a negative-strand fragment mapped on chrM with base quality of 10 - builder.addFrag("record"+i, 0, startPosition, true, false, "30M3I3M", null, 10); + builder.addFrag("record" + i, 0, startPosition, true, false, "30M3I3M", null, 10); } final int insStart = 194; // test both for include indels and do not include indels - for (final boolean incIndels : new boolean[]{false, true}){ + for (final boolean incIndels : new boolean[] {false, true}) { final SamLocusIterator sli = createSamLocusIterator(builder); sli.setIncludeIndels(incIndels); // make sure we accumulated depth for each position @@ -236,10 +239,10 @@ public void testSimpleInsertion() { Assert.assertEquals(li.getRecordAndPositions().size(), coverage); // make sure that we are not accumulating deletions Assert.assertEquals(li.getDeletedInRecord().size(), 0); - if(incIndels && li.getPosition() == insStart) { - Assert.assertEquals(li.getInsertedInRecord().size(), coverage, "Tracking indels: "+incIndels+". At "+li.toString()); + if (incIndels && li.getPosition() == insStart) { + Assert.assertEquals(li.getInsertedInRecord().size(), coverage, "Tracking indels: " + incIndels + ". At " + li.toString()); } else { - Assert.assertEquals(li.getInsertedInRecord().size(), 0, "Tracking indels: "+incIndels+". At "+li.toString()); + Assert.assertEquals(li.getInsertedInRecord().size(), 0, "Tracking indels: " + incIndels + ". At " + li.toString()); } } } @@ -253,13 +256,13 @@ public void testStartWithInsertion() { final SAMRecordSetBuilder builder = getRecordBuilder(); // add records up to coverage for the test in that position final int startPosition = 165; - for(int i = 0; i < coverage; i++) { + for (int i = 0; i < coverage; i++) { // add a negative-strand fragment mapped on chrM with base quality of 10 - builder.addFrag("record"+i, 0, startPosition, true, false, "3I33M", null, 10); + builder.addFrag("record" + i, 0, startPosition, true, false, "3I33M", null, 10); } // test both for include indels and do not include indels - for (final boolean incIndels : new boolean[]{false, true}) { + for (final boolean incIndels : new boolean[] {false, true}) { final SamLocusIterator sli = createSamLocusIterator(builder); sli.setIncludeIndels(incIndels); // make sure we accumulated depth for each position @@ -274,7 +277,7 @@ public void testStartWithInsertion() { // accumulation of insertion Assert.assertEquals(li.getInsertedInRecord().size(), (indelPosition) ? coverage : 0); // check offsets of the insertion - if(indelPosition) { + if (indelPosition) { Assert.assertEquals(li.getInsertedInRecord().get(0).getOffset(), 0); Assert.assertEquals(li.getInsertedInRecord().get(0).getOffset(), 0); indelPosition = false; @@ -292,13 +295,13 @@ public void testStartWithSoftClipAndInsertion() { final SAMRecordSetBuilder builder = getRecordBuilder(); // add records up to coverage for the test in that position final int startPosition = 165; - for(int i = 0; i < coverage; i++) { + for (int i = 0; i < coverage; i++) { // add a negative-strand fragment mapped on chrM with base quality of 10 - builder.addFrag("record"+i, 0, startPosition, true, false, "1S3I32M", null, 10); + builder.addFrag("record" + i, 0, startPosition, true, false, "1S3I32M", null, 10); } // test both for include indels and do not include indels - for (final boolean incIndels : new boolean[]{false, true}) { + for (final boolean incIndels : new boolean[] {false, true}) { final SamLocusIterator sli = createSamLocusIterator(builder); sli.setIncludeIndels(incIndels); // make sure we accumulated depth for each position @@ -313,7 +316,7 @@ public void testStartWithSoftClipAndInsertion() { // accumulation of insertion Assert.assertEquals(li.getInsertedInRecord().size(), (indelPosition) ? coverage : 0); // check offsets of the insertion - if(indelPosition) { + if (indelPosition) { Assert.assertEquals(li.getInsertedInRecord().get(0).getOffset(), 1); Assert.assertEquals(li.getInsertedInRecord().get(0).getOffset(), 1); indelPosition = false; @@ -332,9 +335,9 @@ public void testSimpleGappedAlignment() { final SAMRecordSetBuilder builder = getRecordBuilder(); // add records up to coverage for the test in that position final int startPosition = 165; - for(int i = 0; i < coverage; i++) { + for (int i = 0; i < coverage; i++) { // add a negative-strand fragment mapped on chrM with base quality of 10 - builder.addFrag("record"+i, 0, startPosition, true, false, "3S3M3N3M3D3M3I18M3S", null, 10); + builder.addFrag("record" + i, 0, startPosition, true, false, "3S3M3N3M3D3M3I18M3S", null, 10); } // make sure we accumulated depth of 2 for each position @@ -367,7 +370,7 @@ public void testSimpleGappedAlignment() { final int expectedInsertionOffset = 12; // first read base in the insertion // test both for include indels and do not include indels - for (final boolean incIndels : new boolean[]{false, true}) { + for (final boolean incIndels : new boolean[] {false, true}) { final SamLocusIterator sli = createSamLocusIterator(builder); sli.setIncludeIndels(incIndels); @@ -376,14 +379,14 @@ public void testSimpleGappedAlignment() { // check if it is in the deletion range boolean inDelRange = (expectedPositions[i] >= firstDelBase && expectedPositions[i] <= lastDelBase); // if we are not including indels, the expected position index change if it is in an deletion range - if(!incIndels && inDelRange ) { + if (!incIndels && inDelRange) { i += 3; inDelRange = false; // set to false to do not check the range of deletions } // check if the LocusInfo is the expected Assert.assertEquals(li.getPosition(), expectedPositions[i]); // check the insertions - if(incIndels && li.getPosition() == expectedInsertionPosition) { + if (incIndels && li.getPosition() == expectedInsertionPosition) { // check the accumulated coverage Assert.assertEquals(li.getInsertedInRecord().size(), coverage); // check the record offset @@ -393,7 +396,7 @@ public void testSimpleGappedAlignment() { Assert.assertEquals(li.getInsertedInRecord().size(), 0); } // check the range of deletions - if(inDelRange) { + if (inDelRange) { // check the coverage for insertion and normal records Assert.assertEquals(li.getDeletedInRecord().size(), coverage); Assert.assertEquals(li.getRecordAndPositions().size(), 0); @@ -508,28 +511,28 @@ public void testOverlappingGappedAlignmentsWithIndels() { expectedReadOffsets[i] = new int[]{i}; } // Gap of 10 - for(; i < 18 + 10; ++i) { + for (; i < 18 + 10; ++i) { expectedReferencePositions[i] = startPosition + i; expectedDepths[i] = 0; expectedDelDepths[i] = 1; expectedReadOffsets[i] = new int[0]; } // the next bases for the first read (without the 5 overlapping) - for(; i < 46 - 5; ++i) { + for (; i < 46 - 5; ++i) { expectedReferencePositions[i] = startPosition + i; expectedDepths[i] = 1; expectedDelDepths[i] = 0; expectedReadOffsets[i] = new int[]{i - 10}; } // last 5 bases of the first read overlap first 5 bases of second read - for(; i < 46; ++i) { + for (; i < 46; ++i) { expectedReferencePositions[i] = startPosition + i; expectedDepths[i] = 2; expectedDelDepths[i] = 0; - expectedReadOffsets[i] = new int[]{i - 10, i + 10 - 46 - 5 }; + expectedReadOffsets[i] = new int[]{i - 10, i + 10 - 46 - 5}; } // Last 31 bases of 2nd read - for(; i < numBasesCovered; ++i) { + for (; i < numBasesCovered; ++i) { expectedReferencePositions[i] = startPosition + i; expectedDepths[i] = 1; expectedDelDepths[i] = 0; @@ -546,7 +549,7 @@ public void testOverlappingGappedAlignmentsWithIndels() { } // check the deletions Assert.assertEquals(li.getDeletedInRecord().size(), expectedDelDepths[i]); - if(expectedDelDepths[i] != 0) { + if (expectedDelDepths[i] != 0) { Assert.assertEquals(li.getDeletedInRecord().get(0).getOffset(), expectedDelOffset); } // checking that insertions are not accumulating From ce3f2d8876ea2b6538f2b2cd5044a63e5e38d7d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20G=C3=B3mez-S=C3=A1nchez?= Date: Wed, 18 May 2016 14:24:55 +0200 Subject: [PATCH 4/5] fixing removed methods in LocusInfo --- src/main/java/htsjdk/samtools/util/SamLocusIterator.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/main/java/htsjdk/samtools/util/SamLocusIterator.java b/src/main/java/htsjdk/samtools/util/SamLocusIterator.java index 21b5cd355c..5dbf79cbea 100644 --- a/src/main/java/htsjdk/samtools/util/SamLocusIterator.java +++ b/src/main/java/htsjdk/samtools/util/SamLocusIterator.java @@ -107,6 +107,9 @@ public void addInserted(final SAMRecord read, int firstPosition) { /** @return 1-based reference position */ public int getPosition() { return position; } public List getRecordAndPositions() { return Collections.unmodifiableList(recordAndOffsets); } + public String getSequenceName() { return referenceSequence.getSequenceName(); } + @Override public String toString() { return referenceSequence.getSequenceName() + ":" + position; } + public int getSequenceLength() {return referenceSequence.getSequenceLength();} public List getDeletedInRecord() { return (deletedInRecord == null) ? Collections.emptyList() : Collections.unmodifiableList(deletedInRecord); } From 25556c94e797e9c3f2a7ac72694723afc301a4ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20G=C3=B3mez-S=C3=A1nchez?= Date: Wed, 18 May 2016 14:42:48 +0200 Subject: [PATCH 5/5] adding more tests --- .../samtools/util/SamLocusIteratorTest.java | 98 ++++++++++++++++++- 1 file changed, 96 insertions(+), 2 deletions(-) diff --git a/src/test/java/htsjdk/samtools/util/SamLocusIteratorTest.java b/src/test/java/htsjdk/samtools/util/SamLocusIteratorTest.java index 9e8e898684..d48459795a 100644 --- a/src/test/java/htsjdk/samtools/util/SamLocusIteratorTest.java +++ b/src/test/java/htsjdk/samtools/util/SamLocusIteratorTest.java @@ -240,9 +240,9 @@ public void testSimpleInsertion() { // make sure that we are not accumulating deletions Assert.assertEquals(li.getDeletedInRecord().size(), 0); if (incIndels && li.getPosition() == insStart) { - Assert.assertEquals(li.getInsertedInRecord().size(), coverage, "Tracking indels: " + incIndels + ". At " + li.toString()); + Assert.assertEquals(li.getInsertedInRecord().size(), coverage); } else { - Assert.assertEquals(li.getInsertedInRecord().size(), 0, "Tracking indels: " + incIndels + ". At " + li.toString()); + Assert.assertEquals(li.getInsertedInRecord().size(), 0); } } } @@ -326,6 +326,100 @@ public void testStartWithSoftClipAndInsertion() { } } + /** + * Test an insertion after N in CIGAR + */ + @Test + public void testNBeforeInsertion() { + final SAMRecordSetBuilder builder = getRecordBuilder(); + // add records up to coverage for the test in that position + final int startPosition = 165; + for (int i = 0; i < coverage; i++) { + // add a negative-strand fragment mapped on chrM with base quality of 10 + builder.addFrag("record" + i, 0, startPosition, true, false, "2M4N3I27M", null, 10); + } + final int startN = 167; + final int endN = 170; + + // test both for include indels and do not include indels + for (final boolean incIndels : new boolean[] {false, true}) { + final SamLocusIterator sli = createSamLocusIterator(builder); + sli.setIncludeIndels(incIndels); + // make sure we accumulated depth for each position + int pos = startPosition; + for (final SamLocusIterator.LocusInfo li : sli) { + // skipping Ns + if (pos >= startN && pos <= endN) { + pos = (incIndels) ? endN : endN + 1; + } + Assert.assertEquals(li.getPosition(), pos); + // accumulation of coverage + Assert.assertEquals(li.getRecordAndPositions().size(), (pos == endN) ? 0 : coverage); + // no accumulation of deletions + Assert.assertEquals(li.getDeletedInRecord().size(), 0); + // accumulation of insertion + Assert.assertEquals(li.getInsertedInRecord().size(), (pos == endN) ? coverage : 0); + // check offsets of the insertion + if (pos == endN) { + Assert.assertEquals(li.getInsertedInRecord().get(0).getOffset(), 2); + Assert.assertEquals(li.getInsertedInRecord().get(0).getOffset(), 2); + } + pos++; + } + } + } + + /** + * Test a deletion after N in CIGAR + */ + @Test + public void testNBeforeDeletion() { + final SAMRecordSetBuilder builder = getRecordBuilder(); + // add records up to coverage for the test in that position + final int startPosition = 165; + for (int i = 0; i < coverage; i++) { + // add a negative-strand fragment mapped on chrM with base quality of 10 + builder.addFrag("record" + i, 0, startPosition, true, false, "2M4N4D5M", null, 10); + } + final int startN = 167; + final int endN = 170; + final int startDel = 171; + final int endDel = 174; + + // test both for include indels and do not include indels + for (final boolean incIndels : new boolean[] {false, true}) { + final SamLocusIterator sli = createSamLocusIterator(builder); + sli.setIncludeIndels(incIndels); + // make sure we accumulated depth for each position + int pos = startPosition; + for (final SamLocusIterator.LocusInfo li : sli) { + if (pos >= startN && pos <= endN) { + if (incIndels) { + // skipping Ns + pos = endN + 1; + } else { + // skip deletions + pos = endDel + 1; + } + } + final boolean insideDeletion = incIndels && (pos >= startDel && pos <= endDel); + Assert.assertEquals(li.getPosition(), pos); + // accumulation of coverage + Assert.assertEquals(li.getRecordAndPositions().size(), (insideDeletion) ? 0 : coverage); + // accumulation of deletions + Assert.assertEquals(li.getDeletedInRecord().size(), (insideDeletion) ? coverage : 0); + // no accumulation of insertion + Assert.assertEquals(li.getInsertedInRecord().size(), 0); + // check offsets of the insertion + if (pos == endN) { + Assert.assertEquals(li.getInsertedInRecord().get(0).getOffset(), 2); + Assert.assertEquals(li.getInsertedInRecord().get(0).getOffset(), 2); + } + pos++; + } + } + } + /** * Try all CIGAR operands (except H and P) and confirm that loci produced by SamLocusIterator are as expected, * with both including or not indels