Skip to content

Commit

Permalink
Use latest SBI code from htsjdk PR (samtools/htsjdk#1138)
Browse files Browse the repository at this point in the history
  • Loading branch information
tomwhite committed Jul 12, 2018
1 parent b86f0aa commit 3ff47b6
Show file tree
Hide file tree
Showing 8 changed files with 536 additions and 428 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileSpan;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SBIIndex;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReader.PrimitiveSamReaderToSamReaderAdapter;
import htsjdk.samtools.SplittingBAMIndex;
import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.util.BlockCompressedFilePointerUtil;
Expand Down Expand Up @@ -71,17 +71,16 @@ protected JavaRDD<PathChunk> getPathChunks(
String referenceSourcePath)
throws IOException {

String splittingBaiPath = path + SplittingBAMIndex.FILE_EXTENSION;
if (fileSystemWrapper.exists(jsc.hadoopConfiguration(), splittingBaiPath)) {
try (SeekableStream sbiStream =
fileSystemWrapper.open(jsc.hadoopConfiguration(), splittingBaiPath)) {
SplittingBAMIndex splittingBAMIndex = SplittingBAMIndex.load(sbiStream);
Broadcast<SplittingBAMIndex> splittingBAMIndexBroadcast = jsc.broadcast(splittingBAMIndex);
String sbiPath = path + SBIIndex.FILE_EXTENSION;
if (fileSystemWrapper.exists(jsc.hadoopConfiguration(), sbiPath)) {
try (SeekableStream sbiStream = fileSystemWrapper.open(jsc.hadoopConfiguration(), sbiPath)) {
SBIIndex sbiIndex = SBIIndex.load(sbiStream);
Broadcast<SBIIndex> sbiIndexBroadcast = jsc.broadcast(sbiIndex);
pathSplitSource
.getPathSplits(jsc, path, splitSize)
.flatMap(
pathSplit -> {
SplittingBAMIndex index = splittingBAMIndexBroadcast.getValue();
SBIIndex index = sbiIndexBroadcast.getValue();
Chunk chunk = index.getChunk(pathSplit.getStart(), pathSplit.getEnd());
if (chunk == null) {
return Collections.emptyIterator();
Expand Down
77 changes: 77 additions & 0 deletions src/main/java/htsjdk/samtools/BAMSBIIndexer.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package htsjdk.samtools;

import htsjdk.samtools.cram.io.InputStreamUtils;
import htsjdk.samtools.seekablestream.SeekablePathStream;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.util.BlockCompressedInputStream;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.RuntimeEOFException;
import java.io.EOFException;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.file.Files;
import java.nio.file.Path;

/** Writes SBI files for BAM files, as understood by {@link SBIIndex}. */
public final class BAMSBIIndexer {

/**
* Perform indexing on the given BAM file, at the granularity level specified.
*
* @param bamFile the path to the BAM file
* @param granularity write the offset of every n-th alignment to the index
* @throws IOException as per java IO contract
*/
public static void createIndex(final Path bamFile, final long granularity) throws IOException {
Path splittingBaiFile = IOUtil.addExtension(bamFile, SBIIndex.FILE_EXTENSION);
try (SeekableStream in = new SeekablePathStream(bamFile);
OutputStream out = Files.newOutputStream(splittingBaiFile)) {
createIndex(in, out, granularity);
}
}

/**
* Perform indexing on the given BAM file, at the granularity level specified.
*
* @param in a seekable stream for reading the BAM file from
* @param out the stream to write the index to
* @param granularity write the offset of every n-th alignment to the index
* @throws IOException as per java IO contract
*/
public static void createIndex(
final SeekableStream in, final OutputStream out, final long granularity) throws IOException {
long recordStart = findVirtualOffsetOfFirstRecordInBam(in);
try (BlockCompressedInputStream blockIn = new BlockCompressedInputStream(in)) {
blockIn.seek(recordStart);
final ByteBuffer byteBuffer =
ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN); // BAM is little-endian
SBIIndexWriter indexWriter = new SBIIndexWriter(out, granularity);
while (true) {
try {
recordStart = blockIn.getFilePointer();
InputStreamUtils.readFully(blockIn, byteBuffer.array(), 0, 4);
final int blockSize = byteBuffer.getInt(0); // length of remainder of alignment record
indexWriter.processRecord(recordStart);
InputStreamUtils.skipFully(blockIn, blockSize);
} catch (EOFException e) {
break;
}
}
indexWriter.finish(recordStart, in.length());
}
}

/**
* Returns the virtual file offset of the first record in a BAM file - i.e. the virtual file
* offset after skipping over the text header and the sequence records.
*/
public static long findVirtualOffsetOfFirstRecordInBam(final SeekableStream seekableStream) {
try {
return BAMFileReader2.findVirtualOffsetOfFirstRecord(seekableStream);
} catch (final IOException ioe) {
throw new RuntimeEOFException(ioe);
}
}
}
Loading

0 comments on commit 3ff47b6

Please sign in to comment.