Skip to content

Commit

Permalink
Misc CRAM cleanup (#1253)
Browse files Browse the repository at this point in the history
* rm ExposedByteArrayOutputStream
* rm IllegalAccessException
* rm IllegalArgumentException
* rm IOException and EOFException
* rethrow IOExceptions as RuntimeIOExceptions
* rm all other public throws
* misc cleanup HuffmanByteHelper
* made some exceptions more specific
* Boundary implements Iterator<Container>
* add equivalent check
* findBasesByMD5 now throws GaveUpException
* GaveUpException now a top-level (and immutable) class
* Immutable GaveUpException
* misc CRAMIterator cleanup
* change 2 debug logs to infos
  • Loading branch information
jmthibault79 authored Jan 17, 2019
1 parent 16a4e37 commit 5217fe4
Show file tree
Hide file tree
Showing 49 changed files with 784 additions and 751 deletions.
4 changes: 2 additions & 2 deletions src/main/java/htsjdk/samtools/BAMSBIIndexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.util.BlockCompressedInputStream;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.RuntimeEOFException;

import java.io.EOFException;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
Expand Down Expand Up @@ -80,7 +80,7 @@ public static void createIndex(final SeekableStream in, final OutputStream out,
// Process the record start position, then skip to the start of the next BAM record
indexWriter.processRecord(recordStart);
InputStreamUtils.skipFully(blockIn, blockSize);
} catch (EOFException e) {
} catch (RuntimeEOFException e) {
break;
}
}
Expand Down
85 changes: 40 additions & 45 deletions src/main/java/htsjdk/samtools/CRAMBAIIndexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -115,56 +115,51 @@ public CRAMBAIIndexer(final OutputStream output, final SAMFileHeader fileHeader)
* @param container container to be indexed
*/
public void processContainer(final Container container, final ValidationStringency validationStringency) {
try {
if (container == null || container.isEOF()) {
return;
}
if (container == null || container.isEOF()) {
return;
}

int sliceIndex = 0;
for (final Slice slice : container.slices) {
int sliceIndex = 0;
for (final Slice slice : container.slices) {
slice.containerOffset = container.offset;
slice.index = sliceIndex++;
if (slice.isMultiref()) {
final ContainerParser parser = new ContainerParser(indexBuilder.bamHeader);
final Map<Integer, AlignmentSpan> refSet = parser.getReferences(container, validationStringency);
final Slice fakeSlice = new Slice();
slice.containerOffset = container.offset;
slice.index = sliceIndex++;
if (slice.isMultiref()) {
final ContainerParser parser = new ContainerParser(indexBuilder.bamHeader);
final Map<Integer, AlignmentSpan> refSet = parser.getReferences(container, validationStringency);
final Slice fakeSlice = new Slice();
slice.containerOffset = container.offset;
slice.index = sliceIndex++;
/**
* Unmapped span must be processed after mapped spans:
*/
AlignmentSpan unmappedSpan = refSet.remove(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
for (final int refId : new TreeSet<>(refSet.keySet())) {
final AlignmentSpan span = refSet.get(refId);
fakeSlice.sequenceId = refId;
fakeSlice.containerOffset = slice.containerOffset;
fakeSlice.offset = slice.offset;
fakeSlice.index = slice.index;

fakeSlice.alignmentStart = span.getStart();
fakeSlice.alignmentSpan = span.getSpan();
fakeSlice.nofRecords = span.getCount();
processSingleReferenceSlice(fakeSlice);
}
if (unmappedSpan != null) {
final AlignmentSpan span = unmappedSpan;
fakeSlice.sequenceId = SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
fakeSlice.containerOffset = slice.containerOffset;
fakeSlice.offset = slice.offset;
fakeSlice.index = slice.index;

fakeSlice.alignmentStart = SAMRecord.NO_ALIGNMENT_START;
fakeSlice.alignmentSpan = 0;
fakeSlice.nofRecords = span.getCount();
processSingleReferenceSlice(fakeSlice);
}
} else {
processSingleReferenceSlice(slice);
/**
* Unmapped span must be processed after mapped spans:
*/
AlignmentSpan unmappedSpan = refSet.remove(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
for (final int refId : new TreeSet<>(refSet.keySet())) {
final AlignmentSpan span = refSet.get(refId);
fakeSlice.sequenceId = refId;
fakeSlice.containerOffset = slice.containerOffset;
fakeSlice.offset = slice.offset;
fakeSlice.index = slice.index;

fakeSlice.alignmentStart = span.getStart();
fakeSlice.alignmentSpan = span.getSpan();
fakeSlice.nofRecords = span.getCount();
processSingleReferenceSlice(fakeSlice);
}
if (unmappedSpan != null) {
final AlignmentSpan span = unmappedSpan;
fakeSlice.sequenceId = SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
fakeSlice.containerOffset = slice.containerOffset;
fakeSlice.offset = slice.offset;
fakeSlice.index = slice.index;

fakeSlice.alignmentStart = SAMRecord.NO_ALIGNMENT_START;
fakeSlice.alignmentSpan = 0;
fakeSlice.nofRecords = span.getCount();
processSingleReferenceSlice(fakeSlice);
}
} else {
processSingleReferenceSlice(slice);
}

} catch (final IOException e) {
throw new RuntimeIOException("Failed to read cram container", e);
}
}

Expand Down
29 changes: 12 additions & 17 deletions src/main/java/htsjdk/samtools/CRAMFileReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -315,13 +315,10 @@ public SAMRecordIterator iterator(final SAMFileSpan fileSpan) {
// get the file coordinates for the span:
final long[] coordinateArray = ((BAMFileSpan) fileSpan).toCoordinateArray();
if (coordinateArray == null || coordinateArray.length == 0) return emptyIterator;
try {
// create an input stream that reads the source cram stream only within the coordinate pairs:
final SeekableStream seekableStream = getSeekableStreamOrFailWithRTE();
return new CRAMIterator(seekableStream, referenceSource, coordinateArray, validationStringency);
} catch (final IOException e) {
throw new RuntimeException(e);
}

// create an input stream that reads the source cram stream only within the coordinate pairs:
final SeekableStream seekableStream = getSeekableStreamOrFailWithRTE();
return new CRAMIterator(seekableStream, referenceSource, coordinateArray, validationStringency);
}

@Override
Expand Down Expand Up @@ -508,16 +505,14 @@ public CRAMIntervalIterator(final QueryInterval[] queries, final boolean contain
super(queries, contained);

if (coordinates != null && coordinates.length != 0) {
try {
unfilteredIterator = new CRAMIterator(
getSeekableStreamOrFailWithRTE(),
referenceSource,
coordinates,
validationStringency
);
} catch (final IOException e) {
throw new RuntimeEOFException(e);
}

unfilteredIterator = new CRAMIterator(
getSeekableStreamOrFailWithRTE(),
referenceSource,
coordinates,
validationStringency
);

getNextRecord(); // advance to the first record that matches the filter criteria
}
}
Expand Down
46 changes: 22 additions & 24 deletions src/main/java/htsjdk/samtools/CRAMIterator.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,15 @@
import htsjdk.samtools.cram.structure.CramHeader;
import htsjdk.samtools.cram.structure.Slice;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.util.Log;

import java.io.IOException;
import java.io.InputStream;
import java.math.BigInteger;
import java.util.*;

import htsjdk.samtools.cram.CRAMException;
import htsjdk.samtools.util.RuntimeIOException;

public class CRAMIterator implements SAMRecordIterator {
private static final Log log = Log.getInstance(CRAMIterator.class);
private final CountingInputStream countingInputStream;
private final CramHeader cramHeader;
private final ArrayList<SAMRecord> records;
Expand All @@ -63,8 +61,7 @@ public ValidationStringency getValidationStringency() {
return validationStringency;
}

public void setValidationStringency(
final ValidationStringency validationStringency) {
public void setValidationStringency(final ValidationStringency validationStringency) {
this.validationStringency = validationStringency;
}

Expand All @@ -75,11 +72,13 @@ public void setValidationStringency(
private long samRecordIndex;
private ArrayList<CramCompressionRecord> cramRecords;

public CRAMIterator(final InputStream inputStream, final CRAMReferenceSource referenceSource, final ValidationStringency validationStringency)
throws IOException {
public CRAMIterator(final InputStream inputStream,
final CRAMReferenceSource referenceSource,
final ValidationStringency validationStringency) {
if (null == referenceSource) {
throw new CRAMException("A reference source is required for CRAM files");
}

this.countingInputStream = new CountingInputStream(inputStream);
this.referenceSource = referenceSource;
this.validationStringency = validationStringency;
Expand All @@ -88,17 +87,20 @@ public CRAMIterator(final InputStream inputStream, final CRAMReferenceSource ref
this.containerIterator = containerIterator;

firstContainerOffset = this.countingInputStream.getCount();
records = new ArrayList<SAMRecord>(CRAMContainerStreamWriter.DEFAULT_RECORDS_PER_SLICE);
records = new ArrayList<>(CRAMContainerStreamWriter.DEFAULT_RECORDS_PER_SLICE);
normalizer = new CramNormalizer(cramHeader.getSamFileHeader(),
referenceSource);
parser = new ContainerParser(cramHeader.getSamFileHeader());
}

public CRAMIterator(final SeekableStream seekableStream, final CRAMReferenceSource referenceSource, final long[] coordinates, final ValidationStringency validationStringency)
throws IOException {
public CRAMIterator(final SeekableStream seekableStream,
final CRAMReferenceSource referenceSource,
final long[] coordinates,
final ValidationStringency validationStringency) {
if (null == referenceSource) {
throw new CRAMException("A reference source is required for CRAM files");
}

this.countingInputStream = new CountingInputStream(seekableStream);
this.referenceSource = referenceSource;
this.validationStringency = validationStringency;
Expand All @@ -107,24 +109,24 @@ public CRAMIterator(final SeekableStream seekableStream, final CRAMReferenceSour
this.containerIterator = containerIterator;

firstContainerOffset = containerIterator.getFirstContainerOffset();
records = new ArrayList<SAMRecord>(CRAMContainerStreamWriter.DEFAULT_RECORDS_PER_SLICE);
records = new ArrayList<>(CRAMContainerStreamWriter.DEFAULT_RECORDS_PER_SLICE);
normalizer = new CramNormalizer(cramHeader.getSamFileHeader(),
referenceSource);
parser = new ContainerParser(cramHeader.getSamFileHeader());
}

@Deprecated
public CRAMIterator(final SeekableStream seekableStream, final CRAMReferenceSource referenceSource, final long[] coordinates)
throws IOException {
public CRAMIterator(final SeekableStream seekableStream,
final CRAMReferenceSource referenceSource,
final long[] coordinates) {
this(seekableStream, referenceSource, coordinates, ValidationStringency.DEFAULT_STRINGENCY);
}

public CramHeader getCramHeader() {
return cramHeader;
}

void nextContainer() throws IOException, IllegalArgumentException,
IllegalAccessException, CRAMException {
void nextContainer() throws IllegalArgumentException, CRAMException {

if (containerIterator != null) {
if (!containerIterator.hasNext()) {
Expand All @@ -149,7 +151,7 @@ void nextContainer() throws IOException, IllegalArgumentException,

records.clear();
if (cramRecords == null)
cramRecords = new ArrayList<CramCompressionRecord>(container.nofRecords);
cramRecords = new ArrayList<>(container.nofRecords);
else
cramRecords.clear();

Expand Down Expand Up @@ -250,11 +252,7 @@ public boolean advanceToAlignmentInContainer(final int refIndex, final int pos)
public boolean hasNext() {
if (container != null && container.isEOF()) return false;
if (!iterator.hasNext()) {
try {
nextContainer();
} catch (IOException | IllegalAccessException e) {
throw new SAMException(e);
}
nextContainer();
}

return !records.isEmpty();
Expand Down Expand Up @@ -287,10 +285,10 @@ public void close() {
records.clear();
//noinspection EmptyCatchBlock
try {
if (countingInputStream != null)
if (countingInputStream != null) {
countingInputStream.close();
} catch (final IOException e) {
}
}
} catch (final RuntimeIOException e) { }
}

@Override
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/htsjdk/samtools/cram/CRAIEntry.java
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ public CRAIEntry(final int sequenceId,
* @param line string formatted as a CRAI index entry
* @throws CRAIIndex.CRAIIndexException
*/
public CRAIEntry(final String line) throws CRAIIndex.CRAIIndexException {
public CRAIEntry(final String line) {
final String[] chunks = line.split("\t");
if (chunks.length != CRAI_INDEX_COLUMNS) {
throw new CRAIIndex.CRAIIndexException(
Expand Down
12 changes: 9 additions & 3 deletions src/main/java/htsjdk/samtools/cram/CRAIIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import htsjdk.samtools.seekablestream.SeekableMemoryStream;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.util.RuntimeIOException;

import java.io.*;
import java.util.*;
Expand Down Expand Up @@ -70,11 +71,16 @@ public void processContainer(final Container container) {
}
}

public static SeekableStream openCraiFileAsBaiStream(final File cramIndexFile, final SAMSequenceDictionary dictionary) throws IOException {
return openCraiFileAsBaiStream(new FileInputStream(cramIndexFile), dictionary);
public static SeekableStream openCraiFileAsBaiStream(final File cramIndexFile, final SAMSequenceDictionary dictionary) {
try {
return openCraiFileAsBaiStream(new FileInputStream(cramIndexFile), dictionary);
}
catch (final FileNotFoundException e) {
throw new RuntimeIOException(e);
}
}

public static SeekableStream openCraiFileAsBaiStream(final InputStream indexStream, final SAMSequenceDictionary dictionary) throws IOException, CRAIIndexException {
public static SeekableStream openCraiFileAsBaiStream(final InputStream indexStream, final SAMSequenceDictionary dictionary) {
final List<CRAIEntry> full = CRAMCRAIIndexer.readIndex(indexStream).getCRAIEntries();
Collections.sort(full);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import htsjdk.samtools.cram.compression.ExternalCompressor;
import htsjdk.samtools.cram.encoding.writer.CramRecordWriter;
import htsjdk.samtools.cram.io.DefaultBitOutputStream;
import htsjdk.samtools.cram.io.ExposedByteArrayOutputStream;
import htsjdk.samtools.cram.structure.block.ExternalBlock;
import htsjdk.samtools.cram.structure.block.Block;
import htsjdk.samtools.cram.structure.CompressionHeader;
Expand Down
Loading

0 comments on commit 5217fe4

Please sign in to comment.