Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support writing a CRAI index from CRAMContainerStreamWriter #1351

Merged
merged 2 commits into from
May 20, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions src/main/java/htsjdk/samtools/CRAMBAIIndexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@
* but it is unused. This would be accomplished via {@link #createIndex(SeekableStream, File, Log, ValidationStringency)}.
*
*/
public class CRAMBAIIndexer {
public class CRAMBAIIndexer implements CRAMIndexer {

// The number of references (chromosomes) in the BAM file
private final int numReferences;
Expand Down Expand Up @@ -130,7 +130,8 @@ public CRAMBAIIndexer(final OutputStream output, final SAMFileHeader fileHeader)
*
* @param container container to be indexed
*/
void processContainer(final Container container, final ValidationStringency validationStringency) {
@Override
public void processContainer(final Container container, final ValidationStringency validationStringency) {
if (container == null || container.isEOF()) {
return;
}
Expand Down Expand Up @@ -226,6 +227,7 @@ public void processAsSingleReferenceSlice(final Slice slice) {
* After all the slices have been processed, finish is called.
* Writes any final information and closes the output file.
*/
@Override
public void finish() {
// process any remaining references
advanceToReference(numReferences);
Expand Down
8 changes: 7 additions & 1 deletion src/main/java/htsjdk/samtools/CRAMCRAIIndexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
* <li>read an existing index from an input stream</li>
* </ul><p>
*/
public class CRAMCRAIIndexer {
public class CRAMCRAIIndexer implements CRAMIndexer {

final private CRAIIndex craiIndex = new CRAIIndex();
final private GZIPOutputStream os;
Expand Down Expand Up @@ -72,9 +72,15 @@ public void processContainer(final Container container) {
craiIndex.processContainer(container);
}

@Override
public void processContainer(final Container container, final ValidationStringency validationStringency) {
processContainer(container);
}

/**
* Finish creating the index by writing the accumulated entries out to the stream.
*/
@Override
public void finish() {
try {
craiIndex.writeIndex(os);
Expand Down
27 changes: 22 additions & 5 deletions src/main/java/htsjdk/samtools/CRAMContainerStreamWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ public class CRAMContainerStreamWriter {
private Set<String> captureTags = new TreeSet<>();
private Set<String> ignoreTags = new TreeSet<>();

private CRAMBAIIndexer indexer;
private CRAMIndexer indexer;
private long offset;

/**
Expand All @@ -78,14 +78,31 @@ public CRAMContainerStreamWriter(
final CRAMReferenceSource source,
final SAMFileHeader samFileHeader,
final String cramId) {
this(outputStream, source, samFileHeader, cramId, indexStream == null ? null : new CRAMBAIIndexer(indexStream, samFileHeader));
}

/**
* Create a CRAMContainerStreamWriter for writing SAM records into a series of CRAM
* containers on output stream, with an optional index.
*
* @param outputStream where to write the CRAM stream.
* @param source reference source
* @param samFileHeader {@link SAMFileHeader} to be used. Sort order is determined by the sortOrder property of this arg.
* @param cramId used for display in error message display
* @param indexer CRAM indexer. Can be null if no index is required.
*/
public CRAMContainerStreamWriter(
final OutputStream outputStream,
final CRAMReferenceSource source,
final SAMFileHeader samFileHeader,
final String cramId,
final CRAMIndexer indexer) {
this.outputStream = outputStream;
this.source = source;
this.samFileHeader = samFileHeader;
this.cramID = cramId;
this.source = source;
this.indexer = indexer;
containerFactory = new ContainerFactory(samFileHeader, recordsPerSlice);
if (indexStream != null) {
indexer = new CRAMBAIIndexer(indexStream, samFileHeader);
}
}

/**
Expand Down
20 changes: 20 additions & 0 deletions src/main/java/htsjdk/samtools/CRAMIndexer.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package htsjdk.samtools;

import htsjdk.samtools.cram.structure.Container;

/**
* Interface for indexing CRAM.
*/
public interface CRAMIndexer {
/**
* Create index entries for a single container.
* @param container the container to index
* @param validationStringency stringency for validating records, passed to {@link Container#getSpans(ValidationStringency)}
*/
void processContainer(final Container container, final ValidationStringency validationStringency);

/**
* Finish creating the index by writing the accumulated entries out.
*/
void finish();
}
49 changes: 38 additions & 11 deletions src/test/java/htsjdk/samtools/CRAMContainerStreamWriterTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,9 @@
import htsjdk.HtsjdkTest;
import htsjdk.samtools.cram.ref.ReferenceSource;
import htsjdk.samtools.reference.InMemoryReferenceSequenceFile;
import htsjdk.samtools.seekablestream.SeekableMemoryStream;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.Log.LogLevel;
import htsjdk.samtools.util.RuntimeIOException;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
Expand All @@ -18,7 +16,6 @@
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
Expand Down Expand Up @@ -75,6 +72,19 @@ private void doTest(final List<SAMRecord> samRecords, final ByteArrayOutputStrea
final CRAMContainerStreamWriter containerStream = new CRAMContainerStreamWriter(outStream, indexStream, refSource, header, "test");
containerStream.writeHeader(header);

writeThenReadRecords(samRecords, outStream, refSource, containerStream);
}

private void doTestWithIndexer(final List<SAMRecord> samRecords, final ByteArrayOutputStream outStream, final SAMFileHeader header, final CRAMIndexer indexer) {
final ReferenceSource refSource = createReferenceSource();

final CRAMContainerStreamWriter containerStream = new CRAMContainerStreamWriter(outStream, refSource, header, "test", indexer);
containerStream.writeHeader(header);
tomwhite marked this conversation as resolved.
Show resolved Hide resolved

writeThenReadRecords(samRecords, outStream, refSource, containerStream);
}

private void writeThenReadRecords(List<SAMRecord> samRecords, ByteArrayOutputStream outStream, ReferenceSource refSource, CRAMContainerStreamWriter containerStream) {
for (SAMRecord record : samRecords) {
containerStream.writeAlignment(record);
}
Expand Down Expand Up @@ -144,15 +154,32 @@ public void testCRAMContainerAggregatePartitions() throws IOException {
Assert.assertEquals(count, nRecs);
}

@Test(description = "Test CRAMContainerStream with index")
public void testCRAMContainerStreamWithIndex() throws IOException {
@Test(description = "Test CRAMContainerStream with bai index")
public void testCRAMContainerStreamWithBaiIndex() throws IOException {
final List<SAMRecord> samRecords = createRecords(100);
final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
final ByteArrayOutputStream indexStream = new ByteArrayOutputStream();
doTest(samRecords, outStream, indexStream);
outStream.close();
indexStream.close();
try (ByteArrayOutputStream outStream = new ByteArrayOutputStream();
ByteArrayOutputStream indexStream = new ByteArrayOutputStream()) {
doTest(samRecords, outStream, indexStream);
outStream.flush();
indexStream.flush();
checkCRAMContainerStream(outStream, indexStream, ".bai");
}
}

@Test(description = "Test CRAMContainerStream with crai index")
public void testCRAMContainerStreamWithCraiIndex() throws IOException {
final List<SAMRecord> samRecords = createRecords(100);
final SAMFileHeader header = createSAMHeader(SAMFileHeader.SortOrder.coordinate);
try (ByteArrayOutputStream outStream = new ByteArrayOutputStream();
ByteArrayOutputStream indexStream = new ByteArrayOutputStream()) {
doTestWithIndexer(samRecords, outStream, header, new CRAMCRAIIndexer(indexStream, header));
outStream.flush();
indexStream.flush();
checkCRAMContainerStream(outStream, indexStream, ".crai");
}
}

private void checkCRAMContainerStream(ByteArrayOutputStream outStream, ByteArrayOutputStream indexStream, String indexExtension) throws IOException {
// write the file out
final File cramTempFile = File.createTempFile("cramContainerStreamTest", ".cram");
cramTempFile.deleteOnExit();
Expand All @@ -161,7 +188,7 @@ public void testCRAMContainerStreamWithIndex() throws IOException {
cramFileStream.close();

// write the index out
final File indexTempFile = File.createTempFile("cramContainerStreamTest", ".bai");
final File indexTempFile = File.createTempFile("cramContainerStreamTest", indexExtension);
indexTempFile.deleteOnExit();
OutputStream indexFileStream = new FileOutputStream(indexTempFile);
indexFileStream.write(indexStream.toByteArray());
Expand Down