Skip to content

Commit

Permalink
Support writing a CRAI index from CRAMContainerStreamWriter (#1351)
Browse files Browse the repository at this point in the history
  • Loading branch information
tomwhite authored and cmnbroad committed May 20, 2019
1 parent 4747d08 commit e2c0fdd
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 19 deletions.
6 changes: 4 additions & 2 deletions src/main/java/htsjdk/samtools/CRAMBAIIndexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@
* but it is unused. This would be accomplished via {@link #createIndex(SeekableStream, File, Log, ValidationStringency)}.
*
*/
public class CRAMBAIIndexer {
public class CRAMBAIIndexer implements CRAMIndexer {

// The number of references (chromosomes) in the BAM file
private final int numReferences;
Expand Down Expand Up @@ -130,7 +130,8 @@ public CRAMBAIIndexer(final OutputStream output, final SAMFileHeader fileHeader)
*
* @param container container to be indexed
*/
void processContainer(final Container container, final ValidationStringency validationStringency) {
@Override
public void processContainer(final Container container, final ValidationStringency validationStringency) {
if (container == null || container.isEOF()) {
return;
}
Expand Down Expand Up @@ -226,6 +227,7 @@ public void processAsSingleReferenceSlice(final Slice slice) {
* After all the slices have been processed, finish is called.
* Writes any final information and closes the output file.
*/
@Override
public void finish() {
// process any remaining references
advanceToReference(numReferences);
Expand Down
8 changes: 7 additions & 1 deletion src/main/java/htsjdk/samtools/CRAMCRAIIndexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
* <li>read an existing index from an input stream</li>
* </ul><p>
*/
public class CRAMCRAIIndexer {
public class CRAMCRAIIndexer implements CRAMIndexer {

final private CRAIIndex craiIndex = new CRAIIndex();
final private GZIPOutputStream os;
Expand Down Expand Up @@ -72,9 +72,15 @@ public void processContainer(final Container container) {
craiIndex.processContainer(container);
}

@Override
public void processContainer(final Container container, final ValidationStringency validationStringency) {
processContainer(container);
}

/**
* Finish creating the index by writing the accumulated entries out to the stream.
*/
@Override
public void finish() {
try {
craiIndex.writeIndex(os);
Expand Down
27 changes: 22 additions & 5 deletions src/main/java/htsjdk/samtools/CRAMContainerStreamWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ public class CRAMContainerStreamWriter {
private Set<String> captureTags = new TreeSet<>();
private Set<String> ignoreTags = new TreeSet<>();

private CRAMBAIIndexer indexer;
private CRAMIndexer indexer;
private long offset;

/**
Expand All @@ -78,14 +78,31 @@ public CRAMContainerStreamWriter(
final CRAMReferenceSource source,
final SAMFileHeader samFileHeader,
final String cramId) {
this(outputStream, source, samFileHeader, cramId, indexStream == null ? null : new CRAMBAIIndexer(indexStream, samFileHeader));
}

/**
* Create a CRAMContainerStreamWriter for writing SAM records into a series of CRAM
* containers on output stream, with an optional index.
*
* @param outputStream where to write the CRAM stream.
* @param source reference source
* @param samFileHeader {@link SAMFileHeader} to be used. Sort order is determined by the sortOrder property of this arg.
* @param cramId used for display in error message display
* @param indexer CRAM indexer. Can be null if no index is required.
*/
public CRAMContainerStreamWriter(
final OutputStream outputStream,
final CRAMReferenceSource source,
final SAMFileHeader samFileHeader,
final String cramId,
final CRAMIndexer indexer) {
this.outputStream = outputStream;
this.source = source;
this.samFileHeader = samFileHeader;
this.cramID = cramId;
this.source = source;
this.indexer = indexer;
containerFactory = new ContainerFactory(samFileHeader, recordsPerSlice);
if (indexStream != null) {
indexer = new CRAMBAIIndexer(indexStream, samFileHeader);
}
}

/**
Expand Down
20 changes: 20 additions & 0 deletions src/main/java/htsjdk/samtools/CRAMIndexer.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package htsjdk.samtools;

import htsjdk.samtools.cram.structure.Container;

/**
* Interface for indexing CRAM.
*/
public interface CRAMIndexer {
/**
* Create index entries for a single container.
* @param container the container to index
* @param validationStringency stringency for validating records, passed to {@link Container#getSpans(ValidationStringency)}
*/
void processContainer(final Container container, final ValidationStringency validationStringency);

/**
* Finish creating the index by writing the accumulated entries out.
*/
void finish();
}
49 changes: 38 additions & 11 deletions src/test/java/htsjdk/samtools/CRAMContainerStreamWriterTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,9 @@
import htsjdk.HtsjdkTest;
import htsjdk.samtools.cram.ref.ReferenceSource;
import htsjdk.samtools.reference.InMemoryReferenceSequenceFile;
import htsjdk.samtools.seekablestream.SeekableMemoryStream;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.Log.LogLevel;
import htsjdk.samtools.util.RuntimeIOException;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
Expand All @@ -18,7 +16,6 @@
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
Expand Down Expand Up @@ -75,6 +72,19 @@ private void doTest(final List<SAMRecord> samRecords, final ByteArrayOutputStrea
final CRAMContainerStreamWriter containerStream = new CRAMContainerStreamWriter(outStream, indexStream, refSource, header, "test");
containerStream.writeHeader(header);

writeThenReadRecords(samRecords, outStream, refSource, containerStream);
}

private void doTestWithIndexer(final List<SAMRecord> samRecords, final ByteArrayOutputStream outStream, final SAMFileHeader header, final CRAMIndexer indexer) {
final ReferenceSource refSource = createReferenceSource();

final CRAMContainerStreamWriter containerStream = new CRAMContainerStreamWriter(outStream, refSource, header, "test", indexer);
containerStream.writeHeader(header);

writeThenReadRecords(samRecords, outStream, refSource, containerStream);
}

private void writeThenReadRecords(List<SAMRecord> samRecords, ByteArrayOutputStream outStream, ReferenceSource refSource, CRAMContainerStreamWriter containerStream) {
for (SAMRecord record : samRecords) {
containerStream.writeAlignment(record);
}
Expand Down Expand Up @@ -144,15 +154,32 @@ public void testCRAMContainerAggregatePartitions() throws IOException {
Assert.assertEquals(count, nRecs);
}

@Test(description = "Test CRAMContainerStream with index")
public void testCRAMContainerStreamWithIndex() throws IOException {
@Test(description = "Test CRAMContainerStream with bai index")
public void testCRAMContainerStreamWithBaiIndex() throws IOException {
final List<SAMRecord> samRecords = createRecords(100);
final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
final ByteArrayOutputStream indexStream = new ByteArrayOutputStream();
doTest(samRecords, outStream, indexStream);
outStream.close();
indexStream.close();
try (ByteArrayOutputStream outStream = new ByteArrayOutputStream();
ByteArrayOutputStream indexStream = new ByteArrayOutputStream()) {
doTest(samRecords, outStream, indexStream);
outStream.flush();
indexStream.flush();
checkCRAMContainerStream(outStream, indexStream, ".bai");
}
}

@Test(description = "Test CRAMContainerStream with crai index")
public void testCRAMContainerStreamWithCraiIndex() throws IOException {
final List<SAMRecord> samRecords = createRecords(100);
final SAMFileHeader header = createSAMHeader(SAMFileHeader.SortOrder.coordinate);
try (ByteArrayOutputStream outStream = new ByteArrayOutputStream();
ByteArrayOutputStream indexStream = new ByteArrayOutputStream()) {
doTestWithIndexer(samRecords, outStream, header, new CRAMCRAIIndexer(indexStream, header));
outStream.flush();
indexStream.flush();
checkCRAMContainerStream(outStream, indexStream, ".crai");
}
}

private void checkCRAMContainerStream(ByteArrayOutputStream outStream, ByteArrayOutputStream indexStream, String indexExtension) throws IOException {
// write the file out
final File cramTempFile = File.createTempFile("cramContainerStreamTest", ".cram");
cramTempFile.deleteOnExit();
Expand All @@ -161,7 +188,7 @@ public void testCRAMContainerStreamWithIndex() throws IOException {
cramFileStream.close();

// write the index out
final File indexTempFile = File.createTempFile("cramContainerStreamTest", ".bai");
final File indexTempFile = File.createTempFile("cramContainerStreamTest", indexExtension);
indexTempFile.deleteOnExit();
OutputStream indexFileStream = new FileOutputStream(indexTempFile);
indexFileStream.write(indexStream.toByteArray());
Expand Down

0 comments on commit e2c0fdd

Please sign in to comment.