diff --git a/src/main/java/htsjdk/samtools/CRAMBAIIndexer.java b/src/main/java/htsjdk/samtools/CRAMBAIIndexer.java index cd0cf3fe69..4448f81adc 100755 --- a/src/main/java/htsjdk/samtools/CRAMBAIIndexer.java +++ b/src/main/java/htsjdk/samtools/CRAMBAIIndexer.java @@ -79,7 +79,7 @@ * but it is unused. This would be accomplished via {@link #createIndex(SeekableStream, File, Log, ValidationStringency)}. * */ -public class CRAMBAIIndexer { +public class CRAMBAIIndexer implements CRAMIndexer { // The number of references (chromosomes) in the BAM file private final int numReferences; @@ -130,7 +130,8 @@ public CRAMBAIIndexer(final OutputStream output, final SAMFileHeader fileHeader) * * @param container container to be indexed */ - void processContainer(final Container container, final ValidationStringency validationStringency) { + @Override + public void processContainer(final Container container, final ValidationStringency validationStringency) { if (container == null || container.isEOF()) { return; } @@ -226,6 +227,7 @@ public void processAsSingleReferenceSlice(final Slice slice) { * After all the slices have been processed, finish is called. * Writes any final information and closes the output file. */ + @Override public void finish() { // process any remaining references advanceToReference(numReferences); diff --git a/src/main/java/htsjdk/samtools/CRAMCRAIIndexer.java b/src/main/java/htsjdk/samtools/CRAMCRAIIndexer.java index 0ac24929ee..a1f8cd7486 100644 --- a/src/main/java/htsjdk/samtools/CRAMCRAIIndexer.java +++ b/src/main/java/htsjdk/samtools/CRAMCRAIIndexer.java @@ -26,7 +26,7 @@ *
  • read an existing index from an input stream
  • *

    */ -public class CRAMCRAIIndexer { +public class CRAMCRAIIndexer implements CRAMIndexer { final private CRAIIndex craiIndex = new CRAIIndex(); final private GZIPOutputStream os; @@ -72,9 +72,15 @@ public void processContainer(final Container container) { craiIndex.processContainer(container); } + @Override + public void processContainer(final Container container, final ValidationStringency validationStringency) { + processContainer(container); + } + /** * Finish creating the index by writing the accumulated entries out to the stream. */ + @Override public void finish() { try { craiIndex.writeIndex(os); diff --git a/src/main/java/htsjdk/samtools/CRAMContainerStreamWriter.java b/src/main/java/htsjdk/samtools/CRAMContainerStreamWriter.java index b87744e45d..509695ab4d 100644 --- a/src/main/java/htsjdk/samtools/CRAMContainerStreamWriter.java +++ b/src/main/java/htsjdk/samtools/CRAMContainerStreamWriter.java @@ -59,7 +59,7 @@ public class CRAMContainerStreamWriter { private Set captureTags = new TreeSet<>(); private Set ignoreTags = new TreeSet<>(); - private CRAMBAIIndexer indexer; + private CRAMIndexer indexer; private long offset; /** @@ -78,14 +78,31 @@ public CRAMContainerStreamWriter( final CRAMReferenceSource source, final SAMFileHeader samFileHeader, final String cramId) { + this(outputStream, source, samFileHeader, cramId, indexStream == null ? null : new CRAMBAIIndexer(indexStream, samFileHeader)); + } + + /** + * Create a CRAMContainerStreamWriter for writing SAM records into a series of CRAM + * containers on output stream, with an optional index. + * + * @param outputStream where to write the CRAM stream. + * @param source reference source + * @param samFileHeader {@link SAMFileHeader} to be used. Sort order is determined by the sortOrder property of this arg. + * @param cramId used for display in error message display + * @param indexer CRAM indexer. Can be null if no index is required. + */ + public CRAMContainerStreamWriter( + final OutputStream outputStream, + final CRAMReferenceSource source, + final SAMFileHeader samFileHeader, + final String cramId, + final CRAMIndexer indexer) { this.outputStream = outputStream; + this.source = source; this.samFileHeader = samFileHeader; this.cramID = cramId; - this.source = source; + this.indexer = indexer; containerFactory = new ContainerFactory(samFileHeader, recordsPerSlice); - if (indexStream != null) { - indexer = new CRAMBAIIndexer(indexStream, samFileHeader); - } } /** diff --git a/src/main/java/htsjdk/samtools/CRAMIndexer.java b/src/main/java/htsjdk/samtools/CRAMIndexer.java new file mode 100644 index 0000000000..d59e8a1c1e --- /dev/null +++ b/src/main/java/htsjdk/samtools/CRAMIndexer.java @@ -0,0 +1,20 @@ +package htsjdk.samtools; + +import htsjdk.samtools.cram.structure.Container; + +/** + * Interface for indexing CRAM. + */ +public interface CRAMIndexer { + /** + * Create index entries for a single container. + * @param container the container to index + * @param validationStringency stringency for validating records, passed to {@link Container#getSpans(ValidationStringency)} + */ + void processContainer(final Container container, final ValidationStringency validationStringency); + + /** + * Finish creating the index by writing the accumulated entries out. + */ + void finish(); +} diff --git a/src/test/java/htsjdk/samtools/CRAMContainerStreamWriterTest.java b/src/test/java/htsjdk/samtools/CRAMContainerStreamWriterTest.java index 9ab9ed2784..3b7d1b7bbd 100644 --- a/src/test/java/htsjdk/samtools/CRAMContainerStreamWriterTest.java +++ b/src/test/java/htsjdk/samtools/CRAMContainerStreamWriterTest.java @@ -3,11 +3,9 @@ import htsjdk.HtsjdkTest; import htsjdk.samtools.cram.ref.ReferenceSource; import htsjdk.samtools.reference.InMemoryReferenceSequenceFile; -import htsjdk.samtools.seekablestream.SeekableMemoryStream; import htsjdk.samtools.util.CloseableIterator; import htsjdk.samtools.util.Log; import htsjdk.samtools.util.Log.LogLevel; -import htsjdk.samtools.util.RuntimeIOException; import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @@ -18,7 +16,6 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; -import java.io.StringWriter; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -75,6 +72,19 @@ private void doTest(final List samRecords, final ByteArrayOutputStrea final CRAMContainerStreamWriter containerStream = new CRAMContainerStreamWriter(outStream, indexStream, refSource, header, "test"); containerStream.writeHeader(header); + writeThenReadRecords(samRecords, outStream, refSource, containerStream); + } + + private void doTestWithIndexer(final List samRecords, final ByteArrayOutputStream outStream, final SAMFileHeader header, final CRAMIndexer indexer) { + final ReferenceSource refSource = createReferenceSource(); + + final CRAMContainerStreamWriter containerStream = new CRAMContainerStreamWriter(outStream, refSource, header, "test", indexer); + containerStream.writeHeader(header); + + writeThenReadRecords(samRecords, outStream, refSource, containerStream); + } + + private void writeThenReadRecords(List samRecords, ByteArrayOutputStream outStream, ReferenceSource refSource, CRAMContainerStreamWriter containerStream) { for (SAMRecord record : samRecords) { containerStream.writeAlignment(record); } @@ -144,15 +154,32 @@ public void testCRAMContainerAggregatePartitions() throws IOException { Assert.assertEquals(count, nRecs); } - @Test(description = "Test CRAMContainerStream with index") - public void testCRAMContainerStreamWithIndex() throws IOException { + @Test(description = "Test CRAMContainerStream with bai index") + public void testCRAMContainerStreamWithBaiIndex() throws IOException { final List samRecords = createRecords(100); - final ByteArrayOutputStream outStream = new ByteArrayOutputStream(); - final ByteArrayOutputStream indexStream = new ByteArrayOutputStream(); - doTest(samRecords, outStream, indexStream); - outStream.close(); - indexStream.close(); + try (ByteArrayOutputStream outStream = new ByteArrayOutputStream(); + ByteArrayOutputStream indexStream = new ByteArrayOutputStream()) { + doTest(samRecords, outStream, indexStream); + outStream.flush(); + indexStream.flush(); + checkCRAMContainerStream(outStream, indexStream, ".bai"); + } + } + + @Test(description = "Test CRAMContainerStream with crai index") + public void testCRAMContainerStreamWithCraiIndex() throws IOException { + final List samRecords = createRecords(100); + final SAMFileHeader header = createSAMHeader(SAMFileHeader.SortOrder.coordinate); + try (ByteArrayOutputStream outStream = new ByteArrayOutputStream(); + ByteArrayOutputStream indexStream = new ByteArrayOutputStream()) { + doTestWithIndexer(samRecords, outStream, header, new CRAMCRAIIndexer(indexStream, header)); + outStream.flush(); + indexStream.flush(); + checkCRAMContainerStream(outStream, indexStream, ".crai"); + } + } + private void checkCRAMContainerStream(ByteArrayOutputStream outStream, ByteArrayOutputStream indexStream, String indexExtension) throws IOException { // write the file out final File cramTempFile = File.createTempFile("cramContainerStreamTest", ".cram"); cramTempFile.deleteOnExit(); @@ -161,7 +188,7 @@ public void testCRAMContainerStreamWithIndex() throws IOException { cramFileStream.close(); // write the index out - final File indexTempFile = File.createTempFile("cramContainerStreamTest", ".bai"); + final File indexTempFile = File.createTempFile("cramContainerStreamTest", indexExtension); indexTempFile.deleteOnExit(); OutputStream indexFileStream = new FileOutputStream(indexTempFile); indexFileStream.write(indexStream.toByteArray());