-
Notifications
You must be signed in to change notification settings - Fork 242
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
CSI index support for BAM files #1040
Changes from all commits
f4795bc
4dee079
ed5aac6
c04ccce
796aa23
f5f35e5
217c230
757987d
9ab2a59
71a6407
3cd30a6
e061a32
0e9a1d3
e9c75ee
205e738
83daf3e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -378,6 +378,9 @@ protected void enableIndexMemoryMapping(final boolean enabled) { | |
|
||
@Override | ||
public SamReader.Type type() { | ||
if (mIndexFile != null && getIndexType().equals(SamIndexes.CSI)) { | ||
return SamReader.Type.BAM_CSI_TYPE; | ||
} | ||
return SamReader.Type.BAM_TYPE; | ||
} | ||
|
||
|
@@ -398,16 +401,41 @@ public BAMIndex getIndex() { | |
if(!hasIndex()) | ||
throw new SAMException("No index is available for this BAM file."); | ||
if(mIndex == null) { | ||
if (mIndexFile != null) | ||
mIndex = mEnableIndexCaching ? new CachingBAMFileIndex(mIndexFile, getFileHeader().getSequenceDictionary(), mEnableIndexMemoryMapping) | ||
: new DiskBasedBAMFileIndex(mIndexFile, getFileHeader().getSequenceDictionary(), mEnableIndexMemoryMapping); | ||
else | ||
SamIndexes samIndex = getIndexType(); | ||
if (samIndex == null) { | ||
mIndex = mEnableIndexCaching ? new CachingBAMFileIndex(mIndexStream, getFileHeader().getSequenceDictionary()) | ||
: new DiskBasedBAMFileIndex(mIndexStream, getFileHeader().getSequenceDictionary()); | ||
: new DiskBasedBAMFileIndex(mIndexStream, getFileHeader().getSequenceDictionary()); | ||
} else if (samIndex.equals(SamIndexes.BAI)) { | ||
mIndex = mEnableIndexCaching ? new CachingBAMFileIndex(mIndexFile, getFileHeader().getSequenceDictionary(), mEnableIndexMemoryMapping) | ||
: new DiskBasedBAMFileIndex(mIndexFile, getFileHeader().getSequenceDictionary(), mEnableIndexMemoryMapping); | ||
} else if (samIndex.equals(SamIndexes.CSI)) { | ||
mIndex = new CSIIndex(mIndexFile, mEnableIndexMemoryMapping, getFileHeader().getSequenceDictionary()); | ||
} else { | ||
throw new SAMFormatException("Unsupported BAM index file: " + mIndexFile.getName()); | ||
} | ||
} | ||
|
||
return mIndex; | ||
} | ||
|
||
/** | ||
* Return the type of the BAM index, BAI or CSI. | ||
* @return one of {@link SamIndexes#BAI} or {@link SamIndexes#CSI} or null | ||
*/ | ||
public SamIndexes getIndexType() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could you add javadoc to this new public method? |
||
if (mIndexFile != null) { | ||
if (mIndexFile.getName().toLowerCase().endsWith(BAMIndex.BAI_INDEX_SUFFIX)) { | ||
return SamIndexes.BAI; | ||
} else if (mIndexFile.getName().toLowerCase().endsWith(BAMIndex.CSI_INDEX_SUFFIX)) { | ||
return SamIndexes.CSI; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It probably makes sense for this method to throw if mIndexFile != null but it's not BAI or CSI. |
||
} | ||
|
||
throw new SAMFormatException("Unknown BAM index file type: " + mIndexFile.getName()); | ||
} | ||
|
||
return null; | ||
} | ||
|
||
public void setEagerDecode(final boolean desired) { this.eagerDecode = desired; } | ||
|
||
@Override | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -212,17 +212,22 @@ long getLastOffset() { | |
} | ||
|
||
/** | ||
* Prints meta-data statistics from BAM index (.bai) file | ||
* Prints meta-data statistics from BAM index (.bai or .csi) file | ||
* Statistics include count of aligned and unaligned reads for each reference sequence | ||
* and a count of all records with no start coordinate | ||
*/ | ||
static public void printIndexStats(final File inputBamFile) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this doesn't seem to be tested ever, but I think it's really just intended for debugging so that's fine. |
||
try { | ||
final BAMFileReader bam = new BAMFileReader(inputBamFile, null, false, false, ValidationStringency.SILENT, new DefaultSAMRecordFactory()); | ||
if (!bam.hasIndex()) { | ||
if (!bam.hasIndex() || bam.getIndexType() == null) { | ||
throw new SAMException("No index for bam file " + inputBamFile); | ||
} | ||
|
||
BAMIndexMetaData[] data = getIndexStats(bam); | ||
if (data == null) { | ||
throw new SAMException("Exception in getting index statistics"); | ||
} | ||
|
||
// read through all the bins of every reference. | ||
int nRefs = bam.getFileHeader().getSequenceDictionary().size(); | ||
for (int i = 0; i < nRefs; i++) { | ||
|
@@ -245,7 +250,7 @@ static public void printIndexStats(final File inputBamFile) { | |
} | ||
|
||
/** | ||
* Prints meta-data statistics from BAM index (.bai) file | ||
* Prints meta-data statistics from BAM index (.bai or .csi) file | ||
* Statistics include count of aligned and unaligned reads for each reference sequence | ||
* and a count of all records with no start coordinate | ||
*/ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.