Skip to content

Commit

Permalink
Implement CRAI writing for CRAM (default index remains BAI).
Browse files Browse the repository at this point in the history
  • Loading branch information
cmnbroad committed Jun 14, 2016
1 parent cf1a42b commit e86f6cd
Show file tree
Hide file tree
Showing 11 changed files with 1,137 additions and 174 deletions.
135 changes: 135 additions & 0 deletions src/main/java/htsjdk/samtools/CRAMCRAIIndexer.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
package htsjdk.samtools;

import htsjdk.samtools.cram.CRAIEntry;
import htsjdk.samtools.cram.CRAIIndex;
import htsjdk.samtools.cram.build.CramIO;
import htsjdk.samtools.cram.common.Version;
import htsjdk.samtools.cram.structure.*;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.util.RuntimeIOException;

import java.io.BufferedOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.IOException;
import java.util.Scanner;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

/**
* Indexer for creating/reading/writing a CRAIIndex for a CRAM file/stream. There
* are three ways to obtain an index:
* </p><ul>
* <li>create an index for an entire CRAM stream and write it to an output stream</li>
* <li>create an index on-the-fly by processing one container at a time</li>
* <li>read an existing index from an input stream</li>
* </ul><p>
*/
public class CRAMCRAIIndexer {

final private CRAIIndex craiIndex = new CRAIIndex();
final private GZIPOutputStream os;

/**
* Create a CRAMCRAIIndexer that writes to the given output stream.
* @param os output stream to which the index will be written
* @param samHeader SAMFileHeader - user to verify sort order
*/
public CRAMCRAIIndexer(OutputStream os, SAMFileHeader samHeader) {
if (samHeader.getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
throw new SAMException("CRAM file be coordinate-sorted for indexing.");
}
try {
this.os = new GZIPOutputStream(new BufferedOutputStream(os));
}
catch (IOException e) {
throw new RuntimeIOException("Error opening CRAI index output stream");
}
}

/**
* Create index entries for a single container.
* @param container the container to index
*/
public void processContainer(final Container container) {
craiIndex.processContainer(container);
}

// TODO this is only used by test code
public void addEntry(CRAIEntry entry) {
craiIndex.addEntry(entry);
}

/**
* Finish creating the index by writing the accumulated entries out to the stream.
*/
public void finish() {
try {
craiIndex.writeIndex(os);
os.flush();
os.close();
}
catch (IOException e) {
throw new RuntimeIOException("Error writing CRAI index to output stream");
}
}

/**
* Generate and write a CRAI index to an output stream from a CRAM input stream
*
* @param cramStream CRAM stream to index; must be coordinate sorted
* @param craiStream stream for output index
*/
public static void writeIndex(final SeekableStream cramStream, OutputStream craiStream) {
try {
final CramHeader cramHeader = CramIO.readCramHeader(cramStream);
final CRAMCRAIIndexer indexer = new CRAMCRAIIndexer(craiStream, cramHeader.getSamFileHeader());
final Version cramVersion = cramHeader.getVersion();

// get the first container and it's offset
long offset = cramStream.position();
Container container = ContainerIO.readContainer(cramVersion, cramStream);

while (container != null && !container.isEOF()) {
container.offset = offset;
indexer.processContainer(container);
offset = cramStream.position();
container = ContainerIO.readContainer(cramVersion, cramStream);
}

indexer.finish();
}
catch (IOException e) {
throw new RuntimeIOException("Error writing CRAI index to output stream");
}
}

/**
* Read an input stream containing a .crai index and return a CRAIIndex object.
* @param is Input stream to read
* @return A CRAIIndex object representing the index.
*/
public static CRAIIndex readIndex(final InputStream is) {
CRAIIndex craiIndex = new CRAIIndex();
Scanner scanner = null;

try {
scanner = new Scanner(new GZIPInputStream(is));
while (scanner.hasNextLine()) {
final String line = scanner.nextLine();
craiIndex.addEntry(new CRAIEntry(line));
}
}
catch (IOException e) {
throw new RuntimeIOException("Error reading CRAI index from output stream");
}
finally {
if (null != scanner) {
scanner.close();
}
}

return craiIndex;
}

}
16 changes: 7 additions & 9 deletions src/main/java/htsjdk/samtools/CRAMFileReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
* Create a CRAMFileReader from either a file or input stream using the reference source returned by
* {@link ReferenceSource#getDefaultCRAMReferenceSource() getDefaultCRAMReferenceSource}.
*
*
* @param cramFile CRAM file to open
* @param inputStream CRAM stream to read
*
Expand Down Expand Up @@ -172,14 +171,13 @@ public CRAMFileReader(final InputStream inputStream, final SeekableStream indexI

iterator = new CRAMIterator(inputStream, referenceSource, validationStringency);
if (indexInputStream != null) {
try {
mIndex = new CachingBAMFileIndex(indexInputStream, iterator.getSAMFileHeader().getSequenceDictionary());
} catch (Exception e) {
// try CRAI instead:
indexInputStream.seek(0);
final SeekableStream baiStream = CRAIIndex.openCraiFileAsBaiStream(indexInputStream, iterator.getSAMFileHeader().getSequenceDictionary());
SeekableStream baiStream = SamIndexes.asBaiSeekableStreamOrNull(indexInputStream, iterator.getSAMFileHeader().getSequenceDictionary());
if (null != baiStream) {
mIndex = new CachingBAMFileIndex(baiStream, iterator.getSAMFileHeader().getSequenceDictionary());
}
else {
throw new IllegalArgumentException("CRAM index must be a BAI or CRAI stream");
}
}
}

Expand Down Expand Up @@ -249,7 +247,7 @@ public boolean hasIndex() {
@Override
public BAMIndex getIndex() {
if (!hasIndex())
throw new SAMException("No index is available for this BAM file.");
throw new SAMException("No index is available for this CRAM file.");
if (mIndex == null) {
final SAMSequenceDictionary dictionary = getFileHeader()
.getSequenceDictionary();
Expand All @@ -265,7 +263,7 @@ public BAMIndex getIndex() {
// convert CRAI into BAI:
final SeekableStream baiStream;
try {
baiStream = CRAIIndex.openCraiFileAsBaiStream(mIndexFile, iterator.getSAMFileHeader().getSequenceDictionary());
baiStream = SamIndexes.asBaiSeekableStreamOrNull(new SeekableFileStream(mIndexFile), iterator.getSAMFileHeader().getSequenceDictionary());
} catch (IOException e) {
throw new RuntimeException(e);
}
Expand Down
18 changes: 18 additions & 0 deletions src/main/java/htsjdk/samtools/cram/CRAIEntry.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package htsjdk.samtools.cram;

import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.cram.structure.Container;
import htsjdk.samtools.cram.structure.Slice;
import htsjdk.samtools.util.RuntimeIOException;
Expand Down Expand Up @@ -156,6 +157,23 @@ public int compare(final CRAIEntry o1, final CRAIEntry o2) {
}
};

public static Comparator<CRAIEntry> byStartDesc = new Comparator<CRAIEntry>() {

@Override
public int compare(CRAIEntry o1, CRAIEntry o2) {
if (o1.sequenceId != o2.sequenceId) {
if (o1.sequenceId == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX)
return 1;
if (o2.sequenceId == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX)
return -1;
return -o2.sequenceId + o1.sequenceId;
}
if (o1.alignmentStart != o2.alignmentStart)
return o1.alignmentStart - o2.alignmentStart;

return (int) (o1.containerStartOffset - o2.containerStartOffset);
}
};

public static boolean intersect(final CRAIEntry e0, final CRAIEntry e1) {
if (e0.sequenceId != e1.sequenceId) {
Expand Down
Loading

0 comments on commit e86f6cd

Please sign in to comment.