Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Misc CRAM cleanup #1253

Merged
merged 19 commits into from
Jan 17, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/main/java/htsjdk/samtools/BAMSBIIndexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.util.BlockCompressedInputStream;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.RuntimeEOFException;

import java.io.EOFException;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
Expand Down Expand Up @@ -80,7 +80,7 @@ public static void createIndex(final SeekableStream in, final OutputStream out,
// Process the record start position, then skip to the start of the next BAM record
indexWriter.processRecord(recordStart);
InputStreamUtils.skipFully(blockIn, blockSize);
} catch (EOFException e) {
} catch (RuntimeEOFException e) {
break;
}
}
Expand Down
85 changes: 40 additions & 45 deletions src/main/java/htsjdk/samtools/CRAMBAIIndexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -115,56 +115,51 @@ public CRAMBAIIndexer(final OutputStream output, final SAMFileHeader fileHeader)
* @param container container to be indexed
*/
public void processContainer(final Container container, final ValidationStringency validationStringency) {
try {
if (container == null || container.isEOF()) {
return;
}
if (container == null || container.isEOF()) {
return;
}

int sliceIndex = 0;
for (final Slice slice : container.slices) {
int sliceIndex = 0;
for (final Slice slice : container.slices) {
slice.containerOffset = container.offset;
slice.index = sliceIndex++;
if (slice.isMultiref()) {
final ContainerParser parser = new ContainerParser(indexBuilder.bamHeader);
final Map<Integer, AlignmentSpan> refSet = parser.getReferences(container, validationStringency);
final Slice fakeSlice = new Slice();
slice.containerOffset = container.offset;
slice.index = sliceIndex++;
if (slice.isMultiref()) {
final ContainerParser parser = new ContainerParser(indexBuilder.bamHeader);
final Map<Integer, AlignmentSpan> refSet = parser.getReferences(container, validationStringency);
final Slice fakeSlice = new Slice();
slice.containerOffset = container.offset;
slice.index = sliceIndex++;
/**
* Unmapped span must be processed after mapped spans:
*/
AlignmentSpan unmappedSpan = refSet.remove(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
for (final int refId : new TreeSet<>(refSet.keySet())) {
final AlignmentSpan span = refSet.get(refId);
fakeSlice.sequenceId = refId;
fakeSlice.containerOffset = slice.containerOffset;
fakeSlice.offset = slice.offset;
fakeSlice.index = slice.index;

fakeSlice.alignmentStart = span.getStart();
fakeSlice.alignmentSpan = span.getSpan();
fakeSlice.nofRecords = span.getCount();
processSingleReferenceSlice(fakeSlice);
}
if (unmappedSpan != null) {
final AlignmentSpan span = unmappedSpan;
fakeSlice.sequenceId = SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
fakeSlice.containerOffset = slice.containerOffset;
fakeSlice.offset = slice.offset;
fakeSlice.index = slice.index;

fakeSlice.alignmentStart = SAMRecord.NO_ALIGNMENT_START;
fakeSlice.alignmentSpan = 0;
fakeSlice.nofRecords = span.getCount();
processSingleReferenceSlice(fakeSlice);
}
} else {
processSingleReferenceSlice(slice);
/**
* Unmapped span must be processed after mapped spans:
*/
AlignmentSpan unmappedSpan = refSet.remove(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
for (final int refId : new TreeSet<>(refSet.keySet())) {
final AlignmentSpan span = refSet.get(refId);
fakeSlice.sequenceId = refId;
fakeSlice.containerOffset = slice.containerOffset;
fakeSlice.offset = slice.offset;
fakeSlice.index = slice.index;

fakeSlice.alignmentStart = span.getStart();
fakeSlice.alignmentSpan = span.getSpan();
fakeSlice.nofRecords = span.getCount();
processSingleReferenceSlice(fakeSlice);
}
if (unmappedSpan != null) {
final AlignmentSpan span = unmappedSpan;
fakeSlice.sequenceId = SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
fakeSlice.containerOffset = slice.containerOffset;
fakeSlice.offset = slice.offset;
fakeSlice.index = slice.index;

fakeSlice.alignmentStart = SAMRecord.NO_ALIGNMENT_START;
fakeSlice.alignmentSpan = 0;
fakeSlice.nofRecords = span.getCount();
processSingleReferenceSlice(fakeSlice);
}
} else {
processSingleReferenceSlice(slice);
}

} catch (final IOException e) {
throw new RuntimeIOException("Failed to read cram container", e);
}
}

Expand Down
29 changes: 12 additions & 17 deletions src/main/java/htsjdk/samtools/CRAMFileReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -315,13 +315,10 @@ public SAMRecordIterator iterator(final SAMFileSpan fileSpan) {
// get the file coordinates for the span:
final long[] coordinateArray = ((BAMFileSpan) fileSpan).toCoordinateArray();
if (coordinateArray == null || coordinateArray.length == 0) return emptyIterator;
try {
// create an input stream that reads the source cram stream only within the coordinate pairs:
final SeekableStream seekableStream = getSeekableStreamOrFailWithRTE();
return new CRAMIterator(seekableStream, referenceSource, coordinateArray, validationStringency);
} catch (final IOException e) {
throw new RuntimeException(e);
}

// create an input stream that reads the source cram stream only within the coordinate pairs:
final SeekableStream seekableStream = getSeekableStreamOrFailWithRTE();
return new CRAMIterator(seekableStream, referenceSource, coordinateArray, validationStringency);
}

@Override
Expand Down Expand Up @@ -508,16 +505,14 @@ public CRAMIntervalIterator(final QueryInterval[] queries, final boolean contain
super(queries, contained);

if (coordinates != null && coordinates.length != 0) {
try {
unfilteredIterator = new CRAMIterator(
getSeekableStreamOrFailWithRTE(),
referenceSource,
coordinates,
validationStringency
);
} catch (final IOException e) {
throw new RuntimeEOFException(e);
}

unfilteredIterator = new CRAMIterator(
getSeekableStreamOrFailWithRTE(),
referenceSource,
coordinates,
validationStringency
);

getNextRecord(); // advance to the first record that matches the filter criteria
}
}
Expand Down
46 changes: 22 additions & 24 deletions src/main/java/htsjdk/samtools/CRAMIterator.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,15 @@
import htsjdk.samtools.cram.structure.CramHeader;
import htsjdk.samtools.cram.structure.Slice;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.util.Log;

import java.io.IOException;
import java.io.InputStream;
import java.math.BigInteger;
import java.util.*;

import htsjdk.samtools.cram.CRAMException;
import htsjdk.samtools.util.RuntimeIOException;

public class CRAMIterator implements SAMRecordIterator {
private static final Log log = Log.getInstance(CRAMIterator.class);
private final CountingInputStream countingInputStream;
private final CramHeader cramHeader;
private final ArrayList<SAMRecord> records;
Expand All @@ -63,8 +61,7 @@ public ValidationStringency getValidationStringency() {
return validationStringency;
}

public void setValidationStringency(
final ValidationStringency validationStringency) {
public void setValidationStringency(final ValidationStringency validationStringency) {
this.validationStringency = validationStringency;
}

Expand All @@ -75,11 +72,13 @@ public void setValidationStringency(
private long samRecordIndex;
private ArrayList<CramCompressionRecord> cramRecords;

public CRAMIterator(final InputStream inputStream, final CRAMReferenceSource referenceSource, final ValidationStringency validationStringency)
throws IOException {
public CRAMIterator(final InputStream inputStream,
final CRAMReferenceSource referenceSource,
final ValidationStringency validationStringency) {
if (null == referenceSource) {
throw new CRAMException("A reference source is required for CRAM files");
}

this.countingInputStream = new CountingInputStream(inputStream);
this.referenceSource = referenceSource;
this.validationStringency = validationStringency;
Expand All @@ -88,17 +87,20 @@ public CRAMIterator(final InputStream inputStream, final CRAMReferenceSource ref
this.containerIterator = containerIterator;

firstContainerOffset = this.countingInputStream.getCount();
records = new ArrayList<SAMRecord>(CRAMContainerStreamWriter.DEFAULT_RECORDS_PER_SLICE);
records = new ArrayList<>(CRAMContainerStreamWriter.DEFAULT_RECORDS_PER_SLICE);
normalizer = new CramNormalizer(cramHeader.getSamFileHeader(),
referenceSource);
parser = new ContainerParser(cramHeader.getSamFileHeader());
}

public CRAMIterator(final SeekableStream seekableStream, final CRAMReferenceSource referenceSource, final long[] coordinates, final ValidationStringency validationStringency)
throws IOException {
public CRAMIterator(final SeekableStream seekableStream,
final CRAMReferenceSource referenceSource,
final long[] coordinates,
final ValidationStringency validationStringency) {
if (null == referenceSource) {
throw new CRAMException("A reference source is required for CRAM files");
}

this.countingInputStream = new CountingInputStream(seekableStream);
this.referenceSource = referenceSource;
this.validationStringency = validationStringency;
Expand All @@ -107,24 +109,24 @@ public CRAMIterator(final SeekableStream seekableStream, final CRAMReferenceSour
this.containerIterator = containerIterator;

firstContainerOffset = containerIterator.getFirstContainerOffset();
records = new ArrayList<SAMRecord>(CRAMContainerStreamWriter.DEFAULT_RECORDS_PER_SLICE);
records = new ArrayList<>(CRAMContainerStreamWriter.DEFAULT_RECORDS_PER_SLICE);
normalizer = new CramNormalizer(cramHeader.getSamFileHeader(),
referenceSource);
parser = new ContainerParser(cramHeader.getSamFileHeader());
}

@Deprecated
public CRAMIterator(final SeekableStream seekableStream, final CRAMReferenceSource referenceSource, final long[] coordinates)
throws IOException {
public CRAMIterator(final SeekableStream seekableStream,
final CRAMReferenceSource referenceSource,
final long[] coordinates) {
this(seekableStream, referenceSource, coordinates, ValidationStringency.DEFAULT_STRINGENCY);
}

public CramHeader getCramHeader() {
return cramHeader;
}

void nextContainer() throws IOException, IllegalArgumentException,
IllegalAccessException, CRAMException {
void nextContainer() throws IllegalArgumentException, CRAMException {

if (containerIterator != null) {
if (!containerIterator.hasNext()) {
Expand All @@ -149,7 +151,7 @@ void nextContainer() throws IOException, IllegalArgumentException,

records.clear();
if (cramRecords == null)
cramRecords = new ArrayList<CramCompressionRecord>(container.nofRecords);
cramRecords = new ArrayList<>(container.nofRecords);
else
cramRecords.clear();

Expand Down Expand Up @@ -250,11 +252,7 @@ public boolean advanceToAlignmentInContainer(final int refIndex, final int pos)
public boolean hasNext() {
if (container != null && container.isEOF()) return false;
if (!iterator.hasNext()) {
try {
nextContainer();
} catch (IOException | IllegalAccessException e) {
throw new SAMException(e);
}
nextContainer();
}

return !records.isEmpty();
Expand Down Expand Up @@ -287,10 +285,10 @@ public void close() {
records.clear();
//noinspection EmptyCatchBlock
try {
if (countingInputStream != null)
if (countingInputStream != null) {
countingInputStream.close();
} catch (final IOException e) {
}
}
} catch (final RuntimeIOException e) { }
}

@Override
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/htsjdk/samtools/cram/CRAIEntry.java
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ public CRAIEntry(final int sequenceId,
* @param line string formatted as a CRAI index entry
* @throws CRAIIndex.CRAIIndexException
*/
public CRAIEntry(final String line) throws CRAIIndex.CRAIIndexException {
public CRAIEntry(final String line) {
final String[] chunks = line.split("\t");
if (chunks.length != CRAI_INDEX_COLUMNS) {
throw new CRAIIndex.CRAIIndexException(
Expand Down
12 changes: 9 additions & 3 deletions src/main/java/htsjdk/samtools/cram/CRAIIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import htsjdk.samtools.seekablestream.SeekableMemoryStream;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.util.RuntimeIOException;

import java.io.*;
import java.util.*;
Expand Down Expand Up @@ -70,11 +71,16 @@ public void processContainer(final Container container) {
}
}

public static SeekableStream openCraiFileAsBaiStream(final File cramIndexFile, final SAMSequenceDictionary dictionary) throws IOException {
return openCraiFileAsBaiStream(new FileInputStream(cramIndexFile), dictionary);
public static SeekableStream openCraiFileAsBaiStream(final File cramIndexFile, final SAMSequenceDictionary dictionary) {
try {
return openCraiFileAsBaiStream(new FileInputStream(cramIndexFile), dictionary);
}
catch (final FileNotFoundException e) {
throw new RuntimeIOException(e);
}
}

public static SeekableStream openCraiFileAsBaiStream(final InputStream indexStream, final SAMSequenceDictionary dictionary) throws IOException, CRAIIndexException {
public static SeekableStream openCraiFileAsBaiStream(final InputStream indexStream, final SAMSequenceDictionary dictionary) {
final List<CRAIEntry> full = CRAMCRAIIndexer.readIndex(indexStream).getCRAIEntries();
Collections.sort(full);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import htsjdk.samtools.cram.compression.ExternalCompressor;
import htsjdk.samtools.cram.encoding.writer.CramRecordWriter;
import htsjdk.samtools.cram.io.DefaultBitOutputStream;
import htsjdk.samtools.cram.io.ExposedByteArrayOutputStream;
import htsjdk.samtools.cram.structure.block.ExternalBlock;
import htsjdk.samtools.cram.structure.block.Block;
import htsjdk.samtools.cram.structure.CompressionHeader;
Expand Down
Loading