Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cram implementation reports validation errors at container granularity #1091

Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 17 additions & 22 deletions src/main/java/htsjdk/samtools/CRAMIterator.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;

import htsjdk.samtools.cram.CRAMException;

Expand Down Expand Up @@ -87,7 +86,7 @@ public CRAMIterator(final InputStream inputStream, final CRAMReferenceSource ref
this.containerIterator = containerIterator;

firstContainerOffset = this.countingInputStream.getCount();
records = new ArrayList<SAMRecord>(10000);
records = new ArrayList<SAMRecord>(CRAMContainerStreamWriter.DEFAULT_RECORDS_PER_SLICE);
normalizer = new CramNormalizer(cramHeader.getSamFileHeader(),
referenceSource);
parser = new ContainerParser(cramHeader.getSamFileHeader());
Expand All @@ -106,7 +105,7 @@ public CRAMIterator(final SeekableStream seekableStream, final CRAMReferenceSour
this.containerIterator = containerIterator;

firstContainerOffset = containerIterator.getFirstContainerOffset();
records = new ArrayList<SAMRecord>(10000);
records = new ArrayList<SAMRecord>(CRAMContainerStreamWriter.DEFAULT_RECORDS_PER_SLICE);
normalizer = new CramNormalizer(cramHeader.getSamFileHeader(),
referenceSource);
parser = new ContainerParser(cramHeader.getSamFileHeader());
Expand All @@ -131,6 +130,7 @@ void nextContainer() throws IOException, IllegalArgumentException,
nextRecord = null;
return;
}

container = containerIterator.next();
if (container.isEOF()) {
records.clear();
Expand All @@ -146,10 +146,7 @@ void nextContainer() throws IOException, IllegalArgumentException,
}
}

if (records == null)
records = new ArrayList<SAMRecord>(container.nofRecords);
else
records.clear();
records.clear();
if (cramRecords == null)
cramRecords = new ArrayList<CramCompressionRecord>(container.nofRecords);
else
Expand All @@ -175,15 +172,17 @@ void nextContainer() throws IOException, IllegalArgumentException,

for (int i = 0; i < container.slices.length; i++) {
final Slice slice = container.slices[i];

if (slice.sequenceId < 0)
continue;

if (!slice.validateRefMD5(refs)) {
final String msg = String.format(
"Reference sequence MD5 mismatch for slice: sequence id %d, start %d, span %d, expected MD5 %s",
slice.sequenceId,
slice.alignmentStart,
slice.alignmentSpan,
String.format("%032x", new BigInteger(1, slice.refMD5)));
slice.sequenceId,
slice.alignmentStart,
slice.alignmentSpan,
String.format("%032x", new BigInteger(1, slice.refMD5)));
throw new CRAMException(msg);
}
}
Expand All @@ -204,12 +203,6 @@ void nextContainer() throws IOException, IllegalArgumentException,

samRecord.setValidationStringency(validationStringency);

if (validationStringency != ValidationStringency.SILENT) {
final List<SAMValidationError> validationErrors = samRecord.isValid();
SAMUtils.processValidationErrors(validationErrors,
samRecordIndex, validationStringency);
}

if (mReader != null) {
final long chunkStart = (container.offset << 16) | cramRecord.sliceIndex;
final long chunkEnd = ((container.offset << 16) | cramRecord.sliceIndex) + 1;
Expand All @@ -218,7 +211,6 @@ void nextContainer() throws IOException, IllegalArgumentException,
}

records.add(samRecord);
samRecordIndex++;
}
cramRecords.clear();
iterator = records.iterator();
Expand Down Expand Up @@ -271,7 +263,12 @@ public boolean hasNext() {

@Override
public SAMRecord next() {
return iterator.next();
SAMRecord samRecord = iterator.next();
if (validationStringency != ValidationStringency.SILENT) {
SAMUtils.processValidationErrors(samRecord.isValid(), samRecordIndex++, validationStringency);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since samRecordIndex is a class not a local variable, can you add a comment to the declaration saying that it is only used when Validation Stringency is not SILENT, and otherwise it isn't valid. (this was always true even before this PR, but it would be good to make that clear).

}

return samRecord;
}

@Override
Expand All @@ -286,8 +283,7 @@ public void close() {
try {
if (countingInputStream != null)
countingInputStream.close();
} catch (final IOException e) {
}
} catch (final IOException e) { }
}

@Override
Expand All @@ -306,5 +302,4 @@ public void setFileSource(final SamReader mReader) {
public SAMFileHeader getSAMFileHeader() {
return cramHeader.getSamFileHeader();
}

}
56 changes: 56 additions & 0 deletions src/test/java/htsjdk/samtools/CRAMIteratorTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package htsjdk.samtools;

import htsjdk.HtsjdkTest;
import htsjdk.samtools.cram.ref.ReferenceSource;
import htsjdk.samtools.seekablestream.SeekableStream;
import org.junit.Assert;
import org.testng.annotations.Test;

import java.io.File;


/**
* This test serve for verifying CRAMIterator records validation using strict validation strategy
*
* @author [email protected], EPAM Systems, Inc.
**/

public class CRAMIteratorTest extends HtsjdkTest {

@Test(description = "This test checks that records validation is deferred until they are retrieved")
public void notThrowOnOpeningContainerWithInvalidRecords() {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe rename this to "noValidationFailureOnContainerOpen".

final File refFile = new File("src/test/resources/htsjdk/samtools/cram/ce.fa");
final File cramFileWithInvalidRecs = new File("src/test/resources/htsjdk/samtools/cram/ce#containsInvalidRecords.3.0.cram");
final ReferenceSource source = new ReferenceSource(refFile);
final SAMRecordIterator cramIteratorOverInvalidRecords =
getCramFileIterator(cramFileWithInvalidRecs, source, ValidationStringency.STRICT);

Assert.assertTrue(cramIteratorOverInvalidRecords.hasNext());
cramIteratorOverInvalidRecords.close();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SAMRecordIterator is closable, so you should be able to use try-with-resources here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@cmnbroad Hello! Thank you for review, requested changes are done

}
SathemBite marked this conversation as resolved.
Show resolved Hide resolved

@Test(expectedExceptions = SAMException.class)
public void throwOnRecordValidationFailure() {
final File refFile = new File("src/test/resources/htsjdk/samtools/cram/ce.fa");
final File cramFileWithInvalidRecs = new File("src/test/resources/htsjdk/samtools/cram/ce#containsInvalidRecords.3.0.cram");
final ReferenceSource source = new ReferenceSource(refFile);
final SAMRecordIterator cramIteratorOverInvalidRecords =
getCramFileIterator(cramFileWithInvalidRecs, source, ValidationStringency.STRICT);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Most of the code up to this point is duplicated in both test methods. Can you factor that out (maybe into getCramFileIterator).

try{
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

try-with-resources

while (cramIteratorOverInvalidRecords.hasNext()) {
cramIteratorOverInvalidRecords.next();
}
} finally {
cramIteratorOverInvalidRecords.close();
}
}

private SAMRecordIterator getCramFileIterator(File cramFile,
ReferenceSource source,
ValidationStringency valStringency) {

final CRAMFileReader cramFileReader = new CRAMFileReader(cramFile, (SeekableStream) null, source);
cramFileReader.setValidationStringency(valStringency);
return cramFileReader.getIterator();
}
}
Binary file not shown.