Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CSI index support for BAM files #1040

Merged
merged 16 commits into from
Jan 16, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
395 changes: 65 additions & 330 deletions src/main/java/htsjdk/samtools/AbstractBAMFileIndex.java

Large diffs are not rendered by default.

14 changes: 13 additions & 1 deletion src/main/java/htsjdk/samtools/BAMFileConstants.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,19 @@ class BAMFileConstants {

static final byte[] BAM_MAGIC = "BAM\1".getBytes();
/**
* BAM index file magic number.
* BAM index file magic numbers.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
* BAM index file magic numbers.
* BAM index file magic numbers.
* @deprecated prefer {@link BAI_INDEX_MAGIC}

* @deprecated prefer {@link BAMFileConstants#BAI_INDEX_MAGIC}
*/
@Deprecated
static final byte[] BAM_INDEX_MAGIC = "BAI\1".getBytes();
static final byte[] BAI_INDEX_MAGIC = "BAI\1".getBytes();
static final byte[] CSI_INDEX_MAGIC = "CSI\1".getBytes();

/**
* CSI index related constants
*/
static final int CSI_MAGIC_OFFSET = 0;
static final int CSI_MINSHIFT_OFFSET = 4;
static final int CSI_AUXDATA_OFFSET = 12;
static final int CSI_CHUNK_SIZE = 16;
}
38 changes: 33 additions & 5 deletions src/main/java/htsjdk/samtools/BAMFileReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,9 @@ protected void enableIndexMemoryMapping(final boolean enabled) {

@Override
public SamReader.Type type() {
if (mIndexFile != null && getIndexType().equals(SamIndexes.CSI)) {
return SamReader.Type.BAM_CSI_TYPE;
}
return SamReader.Type.BAM_TYPE;
}

Expand All @@ -398,16 +401,41 @@ public BAMIndex getIndex() {
if(!hasIndex())
throw new SAMException("No index is available for this BAM file.");
if(mIndex == null) {
if (mIndexFile != null)
mIndex = mEnableIndexCaching ? new CachingBAMFileIndex(mIndexFile, getFileHeader().getSequenceDictionary(), mEnableIndexMemoryMapping)
: new DiskBasedBAMFileIndex(mIndexFile, getFileHeader().getSequenceDictionary(), mEnableIndexMemoryMapping);
else
SamIndexes samIndex = getIndexType();
if (samIndex == null) {
mIndex = mEnableIndexCaching ? new CachingBAMFileIndex(mIndexStream, getFileHeader().getSequenceDictionary())
: new DiskBasedBAMFileIndex(mIndexStream, getFileHeader().getSequenceDictionary());
: new DiskBasedBAMFileIndex(mIndexStream, getFileHeader().getSequenceDictionary());
} else if (samIndex.equals(SamIndexes.BAI)) {
mIndex = mEnableIndexCaching ? new CachingBAMFileIndex(mIndexFile, getFileHeader().getSequenceDictionary(), mEnableIndexMemoryMapping)
: new DiskBasedBAMFileIndex(mIndexFile, getFileHeader().getSequenceDictionary(), mEnableIndexMemoryMapping);
} else if (samIndex.equals(SamIndexes.CSI)) {
mIndex = new CSIIndex(mIndexFile, mEnableIndexMemoryMapping, getFileHeader().getSequenceDictionary());
} else {
throw new SAMFormatException("Unsupported BAM index file: " + mIndexFile.getName());
}
}

return mIndex;
}

/**
* Return the type of the BAM index, BAI or CSI.
* @return one of {@link SamIndexes#BAI} or {@link SamIndexes#CSI} or null
*/
public SamIndexes getIndexType() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could you add javadoc to this new public method?

if (mIndexFile != null) {
if (mIndexFile.getName().toLowerCase().endsWith(BAMIndex.BAI_INDEX_SUFFIX)) {
return SamIndexes.BAI;
} else if (mIndexFile.getName().toLowerCase().endsWith(BAMIndex.CSI_INDEX_SUFFIX)) {
return SamIndexes.CSI;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It probably makes sense for this method to throw if mIndexFile != null but it's not BAI or CSI.

}

throw new SAMFormatException("Unknown BAM index file type: " + mIndexFile.getName());
}

return null;
}

public void setEagerDecode(final boolean desired) { this.eagerDecode = desired; }

@Override
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/htsjdk/samtools/BAMFileWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ private BAMIndexer createBamIndex(final String pathURI) {
try {
final String indexFileBase = pathURI.endsWith(BamFileIoUtils.BAM_FILE_EXTENSION) ?
pathURI.substring(0, pathURI.lastIndexOf('.')) : pathURI;
final Path indexPath = IOUtil.getPath(indexFileBase + BAMIndex.BAMIndexSuffix);
final Path indexPath = IOUtil.getPath(indexFileBase + BAMIndex.BAI_INDEX_SUFFIX);
if (Files.exists(indexPath)) {
if (!Files.isWritable(indexPath)) {
throw new SAMException("Not creating BAM index since unable to write index file " + indexPath.toUri());
Expand Down
10 changes: 8 additions & 2 deletions src/main/java/htsjdk/samtools/BAMIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,13 @@
*/
public interface BAMIndex extends Closeable {

public static final String BAMIndexSuffix = ".bai";
/**
* @deprecated prefer {@link BAMIndex#BAI_INDEX_SUFFIX} instead.
*/
@Deprecated
String BAMIndexSuffix = ".bai";
String BAI_INDEX_SUFFIX = ".bai";
String CSI_INDEX_SUFFIX = ".csi";

/**
* Gets the compressed chunks which should be searched for the contents of records contained by the span
Expand All @@ -58,7 +64,7 @@ public interface BAMIndex extends Closeable {
* @param reference the reference of interest
* @return meta data for the reference
*/
public BAMIndexMetaData getMetaData(int reference);
BAMIndexMetaData getMetaData(int reference);

/**
* Close the index and release any associated resources.
Expand Down
11 changes: 8 additions & 3 deletions src/main/java/htsjdk/samtools/BAMIndexMetaData.java
Original file line number Diff line number Diff line change
Expand Up @@ -212,17 +212,22 @@ long getLastOffset() {
}

/**
* Prints meta-data statistics from BAM index (.bai) file
* Prints meta-data statistics from BAM index (.bai or .csi) file
* Statistics include count of aligned and unaligned reads for each reference sequence
* and a count of all records with no start coordinate
*/
static public void printIndexStats(final File inputBamFile) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this doesn't seem to be tested ever, but I think it's really just intended for debugging so that's fine.

try {
final BAMFileReader bam = new BAMFileReader(inputBamFile, null, false, false, ValidationStringency.SILENT, new DefaultSAMRecordFactory());
if (!bam.hasIndex()) {
if (!bam.hasIndex() || bam.getIndexType() == null) {
throw new SAMException("No index for bam file " + inputBamFile);
}

BAMIndexMetaData[] data = getIndexStats(bam);
if (data == null) {
throw new SAMException("Exception in getting index statistics");
}

// read through all the bins of every reference.
int nRefs = bam.getFileHeader().getSequenceDictionary().size();
for (int i = 0; i < nRefs; i++) {
Expand All @@ -245,7 +250,7 @@ static public void printIndexStats(final File inputBamFile) {
}

/**
* Prints meta-data statistics from BAM index (.bai) file
* Prints meta-data statistics from BAM index (.bai or .csi) file
* Statistics include count of aligned and unaligned reads for each reference sequence
* and a count of all records with no start coordinate
*/
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/htsjdk/samtools/BamFileIoUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ public static void gatherWithBlockCopying(final List<File> bams, final File outp
if (createMd5) out = new Md5CalculatingOutputStream(out, new File(output.getAbsolutePath() + ".md5"));
File indexFile = null;
if (createIndex) {
indexFile = new File(output.getParentFile(), IOUtil.basename(output) + BAMIndex.BAMIndexSuffix);
indexFile = new File(output.getParentFile(), IOUtil.basename(output) + BAMIndex.BAI_INDEX_SUFFIX);
out = new StreamInflatingIndexingOutputStream(out, indexFile);
}

Expand Down Expand Up @@ -161,7 +161,7 @@ private static OutputStream buildOutputStream(final File outputFile, final boole
outputStream = new Md5CalculatingOutputStream(outputStream, new File(outputFile.getAbsolutePath() + ".md5"));
}
if (createIndex) {
outputStream = new StreamInflatingIndexingOutputStream(outputStream, new File(outputFile.getParentFile(), IOUtil.basename(outputFile) + BAMIndex.BAMIndexSuffix));
outputStream = new StreamInflatingIndexingOutputStream(outputStream, new File(outputFile.getParentFile(), IOUtil.basename(outputFile) + BAMIndex.BAI_INDEX_SUFFIX));
}
return outputStream;
}
Expand Down
Loading