diff --git a/src/main/java/htsjdk/samtools/CRAMIterator.java b/src/main/java/htsjdk/samtools/CRAMIterator.java index 17adadf0bc..2c99bcf642 100644 --- a/src/main/java/htsjdk/samtools/CRAMIterator.java +++ b/src/main/java/htsjdk/samtools/CRAMIterator.java @@ -68,6 +68,10 @@ public void setValidationStringency( this.validationStringency = validationStringency; } + /** + * `samRecordIndex` only used when validation is not `SILENT` + * (for identification by the validator which records are invalid) + */ private long samRecordIndex; private ArrayList cramRecords; @@ -84,7 +88,7 @@ public CRAMIterator(final InputStream inputStream, final CRAMReferenceSource ref this.containerIterator = containerIterator; firstContainerOffset = this.countingInputStream.getCount(); - records = new ArrayList(10000); + records = new ArrayList(CRAMContainerStreamWriter.DEFAULT_RECORDS_PER_SLICE); normalizer = new CramNormalizer(cramHeader.getSamFileHeader(), referenceSource); parser = new ContainerParser(cramHeader.getSamFileHeader()); @@ -103,7 +107,7 @@ public CRAMIterator(final SeekableStream seekableStream, final CRAMReferenceSour this.containerIterator = containerIterator; firstContainerOffset = containerIterator.getFirstContainerOffset(); - records = new ArrayList(10000); + records = new ArrayList(CRAMContainerStreamWriter.DEFAULT_RECORDS_PER_SLICE); normalizer = new CramNormalizer(cramHeader.getSamFileHeader(), referenceSource); parser = new ContainerParser(cramHeader.getSamFileHeader()); @@ -143,10 +147,7 @@ void nextContainer() throws IOException, IllegalArgumentException, } } - if (records == null) - records = new ArrayList(container.nofRecords); - else - records.clear(); + records.clear(); if (cramRecords == null) cramRecords = new ArrayList(container.nofRecords); else @@ -172,15 +173,17 @@ void nextContainer() throws IOException, IllegalArgumentException, for (int i = 0; i < container.slices.length; i++) { final Slice slice = container.slices[i]; + if (slice.sequenceId < 0) continue; + if (!slice.validateRefMD5(refs)) { final String msg = String.format( "Reference sequence MD5 mismatch for slice: sequence id %d, start %d, span %d, expected MD5 %s", - slice.sequenceId, - slice.alignmentStart, - slice.alignmentSpan, - String.format("%032x", new BigInteger(1, slice.refMD5))); + slice.sequenceId, + slice.alignmentStart, + slice.alignmentSpan, + String.format("%032x", new BigInteger(1, slice.refMD5))); throw new CRAMException(msg); } } @@ -201,12 +204,6 @@ void nextContainer() throws IOException, IllegalArgumentException, samRecord.setValidationStringency(validationStringency); - if (validationStringency != ValidationStringency.SILENT) { - final List validationErrors = samRecord.isValid(); - SAMUtils.processValidationErrors(validationErrors, - samRecordIndex, validationStringency); - } - if (mReader != null) { final long chunkStart = (container.offset << 16) | cramRecord.sliceIndex; final long chunkEnd = ((container.offset << 16) | cramRecord.sliceIndex) + 1; @@ -215,7 +212,6 @@ void nextContainer() throws IOException, IllegalArgumentException, } records.add(samRecord); - samRecordIndex++; } cramRecords.clear(); iterator = records.iterator(); @@ -267,7 +263,15 @@ public boolean hasNext() { @Override public SAMRecord next() { if (hasNext()) { - return iterator.next(); + + SAMRecord samRecord = iterator.next(); + + if (validationStringency != ValidationStringency.SILENT) { + SAMUtils.processValidationErrors(samRecord.isValid(), samRecordIndex++, validationStringency); + } + + return samRecord; + } else { throw new NoSuchElementException(); } diff --git a/src/test/java/htsjdk/samtools/CRAMIteratorTest.java b/src/test/java/htsjdk/samtools/CRAMIteratorTest.java new file mode 100644 index 0000000000..0f495c92a5 --- /dev/null +++ b/src/test/java/htsjdk/samtools/CRAMIteratorTest.java @@ -0,0 +1,45 @@ +package htsjdk.samtools; + +import htsjdk.HtsjdkTest; +import htsjdk.samtools.cram.ref.ReferenceSource; +import htsjdk.samtools.seekablestream.SeekableStream; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.io.File; + + +/** + * This test serves for verifying CRAMIterator records validation using strict validation strategy + * + * @author Anton_Mazur@epam.com, EPAM Systems, Inc. + **/ + +public class CRAMIteratorTest extends HtsjdkTest { + + @Test(description = "This test checks that records validation is deferred until they are retrieved") + public void noValidationFailureOnContainerOpen() { + try (SAMRecordIterator cramIteratorOverInvalidRecords = getCramFileIterator(ValidationStringency.STRICT)) { + Assert.assertTrue(cramIteratorOverInvalidRecords.hasNext()); + } + } + + @Test(expectedExceptions = SAMException.class) + public void throwOnRecordValidationFailure() { + try (SAMRecordIterator cramIteratorOverInvalidRecords = getCramFileIterator(ValidationStringency.STRICT)) { + while (cramIteratorOverInvalidRecords.hasNext()) { + cramIteratorOverInvalidRecords.next(); + } + } + } + + private SAMRecordIterator getCramFileIterator(ValidationStringency valStringency) { + final File refFile = new File("src/test/resources/htsjdk/samtools/cram/ce.fa"); + final File cramFile = new File("src/test/resources/htsjdk/samtools/cram/ce#containsInvalidRecords.3.0.cram"); + final ReferenceSource source = new ReferenceSource(refFile); + + final CRAMFileReader cramFileReader = new CRAMFileReader(cramFile, (SeekableStream) null, source); + cramFileReader.setValidationStringency(valStringency); + return cramFileReader.getIterator(); + } +} diff --git a/src/test/resources/htsjdk/samtools/cram/ce#containsInvalidRecords.3.0.cram b/src/test/resources/htsjdk/samtools/cram/ce#containsInvalidRecords.3.0.cram new file mode 100644 index 0000000000..5f8aae53ed Binary files /dev/null and b/src/test/resources/htsjdk/samtools/cram/ce#containsInvalidRecords.3.0.cram differ