diff --git a/src/main/java/htsjdk/samtools/BAMFileReader.java b/src/main/java/htsjdk/samtools/BAMFileReader.java index 9642de8807..7988f0c7d2 100644 --- a/src/main/java/htsjdk/samtools/BAMFileReader.java +++ b/src/main/java/htsjdk/samtools/BAMFileReader.java @@ -337,6 +337,12 @@ static long findVirtualOffsetOfFirstRecord(final File bam) throws IOException { return offset; } + /** Reads through the header and sequence records to find the virtual file offset of the first record in the BAM file. */ + static long findVirtualOffsetOfFirstRecord(final SeekableStream seekableStream) throws IOException { + final BAMFileReader reader = new BAMFileReader(seekableStream, (SeekableStream) null, false, false, ValidationStringency.SILENT, new DefaultSAMRecordFactory()); + return reader.mFirstRecordPointer; + } + /** * If true, writes the source of every read into the source SAMRecords. * @param enabled true to write source information into each SAMRecord. @@ -944,6 +950,10 @@ public CloseableIterator createIndexIterator(final QueryInterval[] in return new BAMQueryFilteringIterator(iterator, new BAMQueryMultipleIntervalsIteratorFilter(intervals, contained)); } + public long getVirtualFilePointer() { + return mCompressedInputStream.getFilePointer(); + } + /** * Iterate over the SAMRecords defined by the sections of the file described in the ctor argument. */ diff --git a/src/main/java/htsjdk/samtools/BAMSBIIndexer.java b/src/main/java/htsjdk/samtools/BAMSBIIndexer.java new file mode 100644 index 0000000000..0830bc1288 --- /dev/null +++ b/src/main/java/htsjdk/samtools/BAMSBIIndexer.java @@ -0,0 +1,65 @@ +package htsjdk.samtools; + +import htsjdk.samtools.cram.io.InputStreamUtils; +import htsjdk.samtools.seekablestream.SeekablePathStream; +import htsjdk.samtools.seekablestream.SeekableStream; +import htsjdk.samtools.util.BlockCompressedInputStream; +import htsjdk.samtools.util.IOUtil; + +import java.io.EOFException; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.file.Files; +import java.nio.file.Path; + +/** + * Writes SBI files for BAM files, as understood by {@link SBIIndex}. + */ +public final class BAMSBIIndexer { + + /** + * Perform indexing on the given BAM file, at the granularity level specified. + * + * @param bamFile the path to the BAM file + * @param granularity write the offset of every n-th alignment to the index + * @throws IOException as per java IO contract + */ + public static void createIndex(final Path bamFile, final long granularity) throws IOException { + Path splittingBaiFile = IOUtil.addExtension(bamFile, SBIIndex.FILE_EXTENSION); + try (SeekableStream in = new SeekablePathStream(bamFile); OutputStream out = Files.newOutputStream(splittingBaiFile)) { + createIndex(in, out, granularity); + } + } + + /** + * Perform indexing on the given BAM file, at the granularity level specified. + * + * @param in a seekable stream for reading the BAM file from + * @param out the stream to write the index to + * @param granularity write the offset of every n-th alignment to the index + * @throws IOException as per java IO contract + */ + public static void createIndex(final SeekableStream in, final OutputStream out, final long granularity) throws IOException { + long recordStart = SAMUtils.findVirtualOffsetOfFirstRecordInBam(in); + try (BlockCompressedInputStream blockIn = new BlockCompressedInputStream(in)) { + blockIn.seek(recordStart); + final ByteBuffer byteBuffer = ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN); // BAM is little-endian + SBIIndexWriter indexWriter = new SBIIndexWriter(out, granularity); + while (true) { + try { + recordStart = blockIn.getFilePointer(); + InputStreamUtils.readFully(blockIn, byteBuffer.array(), 0, 4); + final int blockSize = byteBuffer.getInt(0); // length of remainder of alignment record + indexWriter.processRecord(recordStart); + InputStreamUtils.skipFully(blockIn, blockSize); + } catch (EOFException e) { + break; + } + } + indexWriter.writeVirtualOffset(recordStart); + indexWriter.finish(in.length()); + } + } +} diff --git a/src/main/java/htsjdk/samtools/SAMUtils.java b/src/main/java/htsjdk/samtools/SAMUtils.java index 408ff121f9..6e5c9bd123 100644 --- a/src/main/java/htsjdk/samtools/SAMUtils.java +++ b/src/main/java/htsjdk/samtools/SAMUtils.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools; +import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.util.BinaryCodec; import htsjdk.samtools.util.CigarUtil; import htsjdk.samtools.util.CloserUtil; @@ -685,6 +686,18 @@ public static long findVirtualOffsetOfFirstRecordInBam(final File bamFile) { } } + /** + * Returns the virtual file offset of the first record in a BAM file - i.e. the virtual file + * offset after skipping over the text header and the sequence records. + */ + public static long findVirtualOffsetOfFirstRecordInBam(final SeekableStream seekableStream) { + try { + return BAMFileReader.findVirtualOffsetOfFirstRecord(seekableStream); + } catch (final IOException ioe) { + throw new RuntimeEOFException(ioe); + } + } + /** * Given a Cigar, Returns blocks of the sequence that have been aligned directly to the * reference sequence. Note that clipped portions, and inserted and deleted bases (vs. the reference) diff --git a/src/main/java/htsjdk/samtools/SBIIndex.java b/src/main/java/htsjdk/samtools/SBIIndex.java new file mode 100644 index 0000000000..18fa230f06 --- /dev/null +++ b/src/main/java/htsjdk/samtools/SBIIndex.java @@ -0,0 +1,251 @@ +package htsjdk.samtools; + +import htsjdk.samtools.util.BinaryCodec; +import htsjdk.samtools.util.BlockCompressedFilePointerUtil; + +import java.io.BufferedInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.NavigableSet; +import java.util.TreeSet; + +/** + * SBI is an index into BGZF-compressed data files, which has an entry for the file position of the start of every + * nth record. Reads files that were created by {@link BAMSBIIndexer}. + */ +public final class SBIIndex { + + public static class Header { + private final long fileLength; + private final byte[] md5; + private final byte[] uuid; + private final long totalNumberOfRecords; + private final long granularity; + + public Header(long fileLength, byte[] md5, byte[] uuid, long totalNumberOfRecords, long granularity) { + this.fileLength = fileLength; + this.md5 = md5; + this.uuid = uuid; + this.totalNumberOfRecords = totalNumberOfRecords; + this.granularity = granularity; + } + + public long getFileLength() { + return fileLength; + } + + public byte[] getMd5() { + return md5; + } + + public byte[] getUuid() { + return uuid; + } + + public long getTotalNumberOfRecords() { + return totalNumberOfRecords; + } + + public long getGranularity() { + return granularity; + } + } + + public static final String FILE_EXTENSION = ".sbi"; + + /** + * SBI magic number. + */ + static final byte[] SBI_MAGIC = "SBI\1".getBytes(); + + private final Header header; + private final NavigableSet virtualOffsets; + + /** + * Create an in-memory SBI with the given virtual offsets. + * @param virtualOffsets the offsets in the index + */ + public SBIIndex(final Header header, final NavigableSet virtualOffsets) { + this.header = header; + this.virtualOffsets = new TreeSet<>(virtualOffsets); + if (this.virtualOffsets.isEmpty()) { + throw new RuntimeException("Invalid SBI format: should contain at least one offset"); + } + } + + /** + * Load an SBI into memory from a path. + * @param path the path to the SBI file + * @throws IOException as per java IO contract + */ + public static SBIIndex load(final Path path) throws IOException { + try (InputStream in = new BufferedInputStream(Files.newInputStream(path))) { + return readIndex(in); + } + } + + /** + * Load an SBI into memory from a stream. + * @param in the stream to read the SBI from + */ + public static SBIIndex load(final InputStream in) { + return readIndex(in); + } + + private static SBIIndex readIndex(final InputStream in) { + BinaryCodec binaryCodec = new BinaryCodec(in); + Header header = readHeader(binaryCodec); + long numOffsets = binaryCodec.readLong(); + NavigableSet virtualOffsets = new TreeSet<>(); + long prev = -1; + for (long i = 0; i < numOffsets; i++) { + long cur = binaryCodec.readLong(); + if (prev > cur) { + throw new RuntimeException(String.format( + "Invalid SBI; offsets not in order: %#x > %#x", + prev, cur)); + } + virtualOffsets.add(prev = cur); + } + return new SBIIndex(header, virtualOffsets); + } + + private static Header readHeader(BinaryCodec binaryCodec) { + final byte[] buffer = new byte[SBI_MAGIC.length]; + binaryCodec.readBytes(buffer); + if (!Arrays.equals(buffer, SBI_MAGIC)) { + throw new RuntimeException("Invalid file header in SBI: " + new String(buffer) + " (" + Arrays.toString(buffer) + ")"); + } + long fileLength = binaryCodec.readLong(); + byte[] md5 = new byte[16]; + binaryCodec.readBytes(md5); + byte[] uuid = new byte[16]; + binaryCodec.readBytes(uuid); + long totalNumberOfRecords = binaryCodec.readLong(); + long granularity = binaryCodec.readLong(); + return new Header(fileLength, md5, uuid, totalNumberOfRecords, granularity); + } + + /** + * Returns the granularity of the index, that is the number of alignments between subsequent entries in the index, + * or zero if not specified. + * @return the granularity of the index + */ + public long getGranularity() { + return header.getGranularity(); + } + + /** + * Returns the entries in the index. + * + * @return a set of file pointers for all the alignment offsets in the index, in ascending order. The last + * virtual file pointer is the position at which the next record would start if it were added to the file. + */ + public NavigableSet getVirtualOffsets() { + return new TreeSet<>(virtualOffsets); + } + + /** + * Returns number of entries in the index. + * + * @return the number of virtual offsets in the index + */ + public long size() { + return virtualOffsets.size(); + } + + /** + * Returns the length of the data file in bytes. + * + * @return the length of the data file in bytes + */ + public long dataFileLength() { + return header.getFileLength(); + } + + /** + * Split the data file for this index into non-overlapping chunks of roughly the given size that cover the whole + * file and that can be read independently of one another. + * + * @param splitSize the rough size of each split in bytes + * @return a list of contiguous, non-overlapping, sorted chunks that cover the whole data file + * @see #getChunk(long, long) + */ + public List split(long splitSize) { + if (splitSize <= 0) { + throw new IllegalArgumentException(String.format("Split size must be positive: %s", splitSize)); + } + long fileSize = dataFileLength(); + List chunks = new ArrayList<>(); + for (long splitStart = 0; splitStart < fileSize; splitStart += splitSize) { + Chunk chunk = getChunk(splitStart, splitStart + splitSize); + if (chunk != null) { + chunks.add(chunk); + } + } + return chunks; + } + + /** + * Return a chunk that corresponds to the given range in the data file. Note that the chunk does not necessarily + * completely cover the given range, however this method will map a set of contiguous, non-overlapping file ranges + * that cover the whole data file to a set of contiguous, non-overlapping chunks that cover the whole data file. + * + * @param splitStart the start of the file range (inclusive) + * @param splitEnd the start of the file range (exclusive) + * @return a chunk whose virtual start is at the first alignment start position that is greater than or equal to the + * given split start position, and whose virtual end is at the first alignment start position that is greater than + * or equal to the given split end position, or null if the chunk would be empty. + * @see #split(long) + */ + public Chunk getChunk(long splitStart, long splitEnd) { + if (splitStart >= splitEnd) { + throw new IllegalArgumentException(String.format("Split start (%s) must be less than end (%s)", splitStart, splitEnd)); + } + long maxEnd = BlockCompressedFilePointerUtil.getBlockAddress(virtualOffsets.last()); + splitStart = Math.min(splitStart, maxEnd); + splitEnd = Math.min(splitEnd, maxEnd); + long virtualSplitStart = BlockCompressedFilePointerUtil.makeFilePointer(splitStart); + long virtualSplitEnd = BlockCompressedFilePointerUtil.makeFilePointer(splitEnd); + Long virtualSplitStartAlignment = virtualOffsets.ceiling(virtualSplitStart); + Long virtualSplitEndAlignment = virtualOffsets.ceiling(virtualSplitEnd); + // neither virtualSplitStartAlignment nor virtualSplitEndAlignment should ever be null, but check anyway + if (virtualSplitStartAlignment == null) { + throw new IllegalArgumentException(String.format("No virtual offset found for virtual file pointer %s, last virtual offset %s", + BlockCompressedFilePointerUtil.asString(virtualSplitStart), BlockCompressedFilePointerUtil.asString(virtualOffsets.last()))); + } + if (virtualSplitEndAlignment == null) { + throw new IllegalArgumentException(String.format("No virtual offset found for virtual file pointer %s, last virtual offset %s", + BlockCompressedFilePointerUtil.asString(virtualSplitEnd), BlockCompressedFilePointerUtil.asString(virtualOffsets.last()))); + } + if (virtualSplitStartAlignment.longValue() == virtualSplitEndAlignment.longValue()) { + return null; + } + return new Chunk(virtualSplitStartAlignment, virtualSplitEndAlignment); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + + SBIIndex that = (SBIIndex) o; + + return virtualOffsets.equals(that.virtualOffsets); + } + + @Override + public int hashCode() { + return virtualOffsets.hashCode(); + } + + @Override + public String toString() { + return virtualOffsets.toString(); + } +} diff --git a/src/main/java/htsjdk/samtools/SBIIndexWriter.java b/src/main/java/htsjdk/samtools/SBIIndexWriter.java new file mode 100644 index 0000000000..49a6ab7134 --- /dev/null +++ b/src/main/java/htsjdk/samtools/SBIIndexWriter.java @@ -0,0 +1,113 @@ +package htsjdk.samtools; + +import htsjdk.samtools.util.BinaryCodec; + +import java.io.OutputStream; +import java.util.NavigableSet; +import java.util.TreeSet; + +/** + * Writes SBI files as understood by {@link SBIIndex}. + *

+ * To use this class, first construct an instance from an output stream, and a desired granualrity. Then for each + * record in the file being indexed, pass the virtual file offset of the record to the {@link #processRecord} method. + * The indexer will keep a count of the records passed in an index every nth record. When there are no records + * left call {@link #finish(long)} to complete writing the index. + */ +public final class SBIIndexWriter { + + // Default to a granularity level of 4096. This is generally sufficient + // for very large BAM files, relative to a maximum heap size in the + // gigabyte range. + public static final long DEFAULT_GRANULARITY = 4096; + + private static final byte[] EMPTY_MD5 = new byte[16]; + private static final byte[] EMPTY_UUID = new byte[16]; + + private final BinaryCodec binaryCodec; + private final long granularity; + private final NavigableSet virtualOffsets = new TreeSet<>(); + private long count; + + /** + * Prepare to write an SBI index with the default granularity. + * + * @param out the stream to write the index to + */ + public SBIIndexWriter(final OutputStream out) { + this(out, SBIIndexWriter.DEFAULT_GRANULARITY); + } + + /** + * Prepare to write an SBI index. + * + * @param out the stream to write the index to + * @param granularity write the offset of every nth record to the index + */ + public SBIIndexWriter(final OutputStream out, final long granularity) { + this.binaryCodec = new BinaryCodec(out); + this.granularity = granularity; + } + + /** + * Process a record for the index: the offset of every nth record will be written to the index. + * + * @param virtualOffset virtual file pointer of the record + */ + public void processRecord(final long virtualOffset) { + if (count++ % granularity == 0) { + writeVirtualOffset(virtualOffset); + } + } + + /** + * Write the given virtual offset to the index. The offset is always written to the index, no account is taken + * of the granularity. + * + * @param virtualOffset virtual file pointer of the record + */ + public void writeVirtualOffset(long virtualOffset) { + virtualOffsets.add(virtualOffset); + } + + /** + * Complete the index, and close the output stream. + * + * @param dataFileLength the length of the data file in bytes + */ + public void finish(long dataFileLength) { + finish(dataFileLength, null, null); + } + + /** + * Complete the index, and close the output stream. + * + * @param dataFileLength the length of the data file in bytes + * @param md5 the MD5 hash of the data file, or null if not specified + * @param uuid the UUID for the data file, or null if not specified + */ + public void finish(long dataFileLength, byte[] md5, byte[] uuid) { + if (md5 != null && md5.length != 16) { + throw new IllegalArgumentException("Invalid MD5 length: " + md5.length); + } + if (uuid != null && uuid.length != 16) { + throw new IllegalArgumentException("Invalid UUID length: " + uuid.length); + } + + // header + binaryCodec.writeBytes(SBIIndex.SBI_MAGIC); + binaryCodec.writeLong(dataFileLength); + binaryCodec.writeBytes(md5 == null ? EMPTY_MD5 : md5); + binaryCodec.writeBytes(uuid == null ? EMPTY_UUID : uuid); + binaryCodec.writeLong(count); + binaryCodec.writeLong(granularity); + binaryCodec.writeLong(virtualOffsets.size()); + + // offsets + for (long virtualOffset : virtualOffsets) { + binaryCodec.writeLong(virtualOffset); + } + binaryCodec.close(); + } + +} diff --git a/src/test/java/htsjdk/samtools/BAMSBIIndexerTest.java b/src/test/java/htsjdk/samtools/BAMSBIIndexerTest.java new file mode 100644 index 0000000000..75f183d6aa --- /dev/null +++ b/src/test/java/htsjdk/samtools/BAMSBIIndexerTest.java @@ -0,0 +1,137 @@ +package htsjdk.samtools; + +import htsjdk.HtsjdkTest; +import htsjdk.samtools.seekablestream.SeekableFileStream; +import htsjdk.samtools.util.BlockCompressedFilePointerUtil; +import htsjdk.samtools.util.CloseableIterator; +import htsjdk.samtools.util.Iterables; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.NavigableSet; + +public class BAMSBIIndexerTest extends HtsjdkTest { + private static final File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools"); + private static final File BAM_FILE = new File(TEST_DATA_DIR, "example.bam"); + private static final File EMPTY_BAM_FILE = new File(TEST_DATA_DIR, "empty.bam"); + private static final File LARGE_BAM_FILE = new File(TEST_DATA_DIR, "CEUTrio.HiSeq.WGS.b37.ch20.1m-1m1k.NA12878.bam"); + + @Test + public void testEmptyBam() throws Exception { + long bamFileSize = EMPTY_BAM_FILE.length(); + SBIIndex index1 = fromBAMFile(EMPTY_BAM_FILE, SBIIndexWriter.DEFAULT_GRANULARITY); + SBIIndex index2 = fromSAMRecords(EMPTY_BAM_FILE, SBIIndexWriter.DEFAULT_GRANULARITY); + Assert.assertEquals(index1, index2); + Assert.assertEquals(index1.dataFileLength(), bamFileSize); + Assert.assertEquals(index2.dataFileLength(), bamFileSize); + // the splitting index for a BAM with no records has a single entry that is just the length of the BAM file + Assert.assertEquals(index1.getVirtualOffsets(), Collections.singletonList(BlockCompressedFilePointerUtil.makeFilePointer(bamFileSize))); + Assert.assertEquals(index1.getVirtualOffsets(), Collections.singletonList(BlockCompressedFilePointerUtil.makeFilePointer(bamFileSize))); + } + + @Test + public void testReadFromIndexPositions() throws Exception { + SBIIndex index = fromBAMFile(BAM_FILE, 2); + NavigableSet virtualOffsets = index.getVirtualOffsets(); + Long firstVirtualOffset = virtualOffsets.first(); + Long expectedFirstAlignment = SAMUtils.findVirtualOffsetOfFirstRecordInBam(new SeekableFileStream(BAM_FILE)); + Assert.assertEquals(firstVirtualOffset, expectedFirstAlignment); + Assert.assertNotNull(getReadAtOffset(BAM_FILE, firstVirtualOffset)); + + for (Long virtualOffset : virtualOffsets.headSet(virtualOffsets.last())) { // for all but the last offset + Assert.assertNotNull(getReadAtOffset(BAM_FILE, virtualOffset)); + } + } + + @Test + public void testSplit() throws Exception { + long bamFileSize = LARGE_BAM_FILE.length(); + SBIIndex index = fromBAMFile(LARGE_BAM_FILE, 100); + List chunks = index.split(bamFileSize / 10); + Assert.assertTrue(chunks.size() > 1); + + SamReader samReader = SamReaderFactory.makeDefault() + .validationStringency(ValidationStringency.SILENT) + .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS).open(LARGE_BAM_FILE); + List allReads = Iterables.slurp(samReader); + + List allReadsFromChunks = new ArrayList<>(); + for (Chunk chunk : chunks) { + allReadsFromChunks.addAll(getReadsInChunk(LARGE_BAM_FILE, chunk)); + } + Assert.assertEquals(allReadsFromChunks, allReads); + + List optimizedChunks = Chunk.optimizeChunkList(chunks, 0); + Assert.assertEquals(optimizedChunks.size(), 1); + List allReadsFromOneChunk = getReadsInChunk(LARGE_BAM_FILE, optimizedChunks.get(0)); + Assert.assertEquals(allReadsFromOneChunk, allReads); + } + + @Test + public void testIndexersProduceSameIndexes() throws Exception { + long bamFileSize = BAM_FILE.length(); + for (long g : new long[] { 1, 2, 10, SBIIndexWriter.DEFAULT_GRANULARITY }) { + SBIIndex index1 = fromBAMFile(BAM_FILE, g); + SBIIndex index2 = fromSAMRecords(BAM_FILE, g); + Assert.assertEquals(index1, index2); + Assert.assertEquals(index1.dataFileLength(), bamFileSize); + Assert.assertEquals(index2.dataFileLength(), bamFileSize); + } + } + + private SBIIndex fromBAMFile(File bamFile, long granularity) throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + BAMSBIIndexer.createIndex(new SeekableFileStream(bamFile), out, granularity); + return SBIIndex.load(new ByteArrayInputStream(out.toByteArray())); + } + + private SBIIndex fromSAMRecords(File bamFile, long granularity) { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + SBIIndexWriter indexWriter = new SBIIndexWriter(out, granularity); + BAMFileReader bamFileReader = bamFileReader(bamFile); + CloseableIterator iterator = bamFileReader.getIterator(); + while (iterator.hasNext()) { + processAlignment(indexWriter, iterator.next()); + } + indexWriter.writeVirtualOffset(bamFileReader.getVirtualFilePointer()); + indexWriter.finish(bamFile.length()); + return SBIIndex.load(new ByteArrayInputStream(out.toByteArray())); + } + + public void processAlignment(SBIIndexWriter indexWriter, SAMRecord rec) { + SAMFileSource source = rec.getFileSource(); + if (source == null) { + throw new SAMException("No source (virtual file offsets); needed for indexing on BAM Record " + rec); + } + BAMFileSpan filePointer = (BAMFileSpan) source.getFilePointer(); + indexWriter.processRecord(filePointer.getFirstOffset()); + } + + private BAMFileReader bamFileReader(File bamFile) { + SamReader samReader = SamReaderFactory.makeDefault() + .validationStringency(ValidationStringency.SILENT) + .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS).open(bamFile); + return (BAMFileReader) ((SamReader.PrimitiveSamReaderToSamReaderAdapter) samReader).underlyingReader(); + } + + private SAMRecord getReadAtOffset(File bamFile, long virtualOffset) { + Chunk chunk = new Chunk(virtualOffset, BlockCompressedFilePointerUtil.makeFilePointer(bamFile.length())); + try (CloseableIterator iterator = bamFileReader(bamFile).getIterator(new BAMFileSpan(chunk))) { + Assert.assertTrue(iterator.hasNext()); + return iterator.next(); + } + } + + private List getReadsInChunk(File bamFile, Chunk chunk) { + try (CloseableIterator iterator = bamFileReader(bamFile).getIterator(new BAMFileSpan(chunk))) { + return Iterables.slurp(iterator); + } + } +} diff --git a/src/test/resources/htsjdk/samtools/CEUTrio.HiSeq.WGS.b37.ch20.1m-1m1k.NA12878.bam b/src/test/resources/htsjdk/samtools/CEUTrio.HiSeq.WGS.b37.ch20.1m-1m1k.NA12878.bam new file mode 100644 index 0000000000..16aff793ba Binary files /dev/null and b/src/test/resources/htsjdk/samtools/CEUTrio.HiSeq.WGS.b37.ch20.1m-1m1k.NA12878.bam differ