Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cram implementation reports validation errors at container granularity #1091

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 22 additions & 18 deletions src/main/java/htsjdk/samtools/CRAMIterator.java
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ public void setValidationStringency(
this.validationStringency = validationStringency;
}

/**
* `samRecordIndex` only used when validation is not `SILENT`
* (for identification by the validator which records are invalid)
*/
SathemBite marked this conversation as resolved.
Show resolved Hide resolved
private long samRecordIndex;
private ArrayList<CramCompressionRecord> cramRecords;

Expand All @@ -84,7 +88,7 @@ public CRAMIterator(final InputStream inputStream, final CRAMReferenceSource ref
this.containerIterator = containerIterator;

firstContainerOffset = this.countingInputStream.getCount();
records = new ArrayList<SAMRecord>(10000);
records = new ArrayList<SAMRecord>(CRAMContainerStreamWriter.DEFAULT_RECORDS_PER_SLICE);
normalizer = new CramNormalizer(cramHeader.getSamFileHeader(),
referenceSource);
parser = new ContainerParser(cramHeader.getSamFileHeader());
Expand All @@ -103,7 +107,7 @@ public CRAMIterator(final SeekableStream seekableStream, final CRAMReferenceSour
this.containerIterator = containerIterator;

firstContainerOffset = containerIterator.getFirstContainerOffset();
records = new ArrayList<SAMRecord>(10000);
records = new ArrayList<SAMRecord>(CRAMContainerStreamWriter.DEFAULT_RECORDS_PER_SLICE);
normalizer = new CramNormalizer(cramHeader.getSamFileHeader(),
referenceSource);
parser = new ContainerParser(cramHeader.getSamFileHeader());
Expand Down Expand Up @@ -143,10 +147,7 @@ void nextContainer() throws IOException, IllegalArgumentException,
}
}

if (records == null)
records = new ArrayList<SAMRecord>(container.nofRecords);
else
records.clear();
records.clear();
if (cramRecords == null)
cramRecords = new ArrayList<CramCompressionRecord>(container.nofRecords);
else
Expand All @@ -172,15 +173,17 @@ void nextContainer() throws IOException, IllegalArgumentException,

for (int i = 0; i < container.slices.length; i++) {
final Slice slice = container.slices[i];

if (slice.sequenceId < 0)
continue;

if (!slice.validateRefMD5(refs)) {
final String msg = String.format(
"Reference sequence MD5 mismatch for slice: sequence id %d, start %d, span %d, expected MD5 %s",
slice.sequenceId,
slice.alignmentStart,
slice.alignmentSpan,
String.format("%032x", new BigInteger(1, slice.refMD5)));
slice.sequenceId,
slice.alignmentStart,
slice.alignmentSpan,
String.format("%032x", new BigInteger(1, slice.refMD5)));
throw new CRAMException(msg);
}
}
Expand All @@ -201,12 +204,6 @@ void nextContainer() throws IOException, IllegalArgumentException,

samRecord.setValidationStringency(validationStringency);

if (validationStringency != ValidationStringency.SILENT) {
final List<SAMValidationError> validationErrors = samRecord.isValid();
SAMUtils.processValidationErrors(validationErrors,
samRecordIndex, validationStringency);
}

if (mReader != null) {
final long chunkStart = (container.offset << 16) | cramRecord.sliceIndex;
final long chunkEnd = ((container.offset << 16) | cramRecord.sliceIndex) + 1;
Expand All @@ -215,7 +212,6 @@ void nextContainer() throws IOException, IllegalArgumentException,
}

records.add(samRecord);
samRecordIndex++;
}
cramRecords.clear();
iterator = records.iterator();
Expand Down Expand Up @@ -267,7 +263,15 @@ public boolean hasNext() {
@Override
public SAMRecord next() {
if (hasNext()) {
return iterator.next();

SAMRecord samRecord = iterator.next();

if (validationStringency != ValidationStringency.SILENT) {
SAMUtils.processValidationErrors(samRecord.isValid(), samRecordIndex++, validationStringency);
}

return samRecord;

} else {
throw new NoSuchElementException();
}
Expand Down
45 changes: 45 additions & 0 deletions src/test/java/htsjdk/samtools/CRAMIteratorTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
package htsjdk.samtools;

import htsjdk.HtsjdkTest;
import htsjdk.samtools.cram.ref.ReferenceSource;
import htsjdk.samtools.seekablestream.SeekableStream;
import org.testng.Assert;
import org.testng.annotations.Test;

import java.io.File;


/**
* This test serves for verifying CRAMIterator records validation using strict validation strategy
*
* @author [email protected], EPAM Systems, Inc.
**/

public class CRAMIteratorTest extends HtsjdkTest {

@Test(description = "This test checks that records validation is deferred until they are retrieved")
public void noValidationFailureOnContainerOpen() {
try (SAMRecordIterator cramIteratorOverInvalidRecords = getCramFileIterator(ValidationStringency.STRICT)) {
Assert.assertTrue(cramIteratorOverInvalidRecords.hasNext());
}
}
SathemBite marked this conversation as resolved.
Show resolved Hide resolved

@Test(expectedExceptions = SAMException.class)
public void throwOnRecordValidationFailure() {
try (SAMRecordIterator cramIteratorOverInvalidRecords = getCramFileIterator(ValidationStringency.STRICT)) {
while (cramIteratorOverInvalidRecords.hasNext()) {
cramIteratorOverInvalidRecords.next();
}
}
}

private SAMRecordIterator getCramFileIterator(ValidationStringency valStringency) {
final File refFile = new File("src/test/resources/htsjdk/samtools/cram/ce.fa");
final File cramFile = new File("src/test/resources/htsjdk/samtools/cram/ce#containsInvalidRecords.3.0.cram");
final ReferenceSource source = new ReferenceSource(refFile);

final CRAMFileReader cramFileReader = new CRAMFileReader(cramFile, (SeekableStream) null, source);
cramFileReader.setValidationStringency(valStringency);
return cramFileReader.getIterator();
}
}
Binary file not shown.