Skip to content

Commit

Permalink
Add support for CRAM in SamSequenceDictionaryExtractor (#1305)
Browse files Browse the repository at this point in the history
  • Loading branch information
lindenb authored and lbergelson committed Feb 26, 2019
1 parent d6043f0 commit 205d5f0
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 7 deletions.
26 changes: 26 additions & 0 deletions src/main/java/htsjdk/samtools/CRAMIterator.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,24 @@

import java.io.InputStream;
import java.math.BigInteger;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;

import htsjdk.samtools.cram.CRAMException;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.RuntimeIOException;

public class CRAMIterator implements SAMRecordIterator {

/** A CRAMReferenceSource that is never invoked . It's used in {@link #extractDictionary(Path)}*/
private static final CRAMReferenceSource NIL_CRAM_REFERENCE_SRC = new CRAMReferenceSource() {
@Override
public byte[] getReferenceBases(final SAMSequenceRecord sequenceRecord, boolean tryNameVariants) {
throw new IllegalStateException("CRAMReferenceSource.getReferenceBases shouldn't be called");
}
};

private final CountingInputStream countingInputStream;
private final CramHeader cramHeader;
private final ArrayList<SAMRecord> records;
Expand Down Expand Up @@ -303,4 +315,18 @@ public SAMFileHeader getSAMFileHeader() {
return cramHeader.getSamFileHeader();
}

/** extracts a {@link SAMSequenceDictionary} from a cram file.
* @return the dictionary of the cram file
* @throws SAMException if a dictionary cannot be extracted
*/
public static SAMSequenceDictionary extractDictionary(final Path cramPath) {
IOUtil.assertFileIsReadable(cramPath);
try (final InputStream in = Files.newInputStream(cramPath)) {
try(final CRAMIterator iter= new CRAMIterator(in, NIL_CRAM_REFERENCE_SRC, ValidationStringency.SILENT)) {
return iter.getSAMFileHeader().getSequenceDictionary();
}
} catch (final Exception err) {
throw new SAMException("Cannot extract dictionary from "+cramPath, err);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
package htsjdk.variant.utils;

import htsjdk.samtools.*;
import htsjdk.samtools.cram.build.CramIO;
import htsjdk.samtools.reference.ReferenceSequenceFileFactory;
import htsjdk.samtools.util.BufferedLineReader;
import htsjdk.samtools.util.CollectionUtil;
Expand All @@ -49,7 +50,7 @@ enum TYPE {
FASTA(ReferenceSequenceFileFactory.FASTA_EXTENSIONS) {

@Override
SAMSequenceDictionary extractDictionary(Path reference) {
SAMSequenceDictionary extractDictionary(final Path reference) {
final SAMSequenceDictionary dict = ReferenceSequenceFileFactory.getReferenceSequenceFile(reference).getSequenceDictionary();
if (dict == null)
throw new SAMException("Could not find dictionary next to reference file " + reference.toUri().toString());
Expand All @@ -70,17 +71,24 @@ SAMSequenceDictionary extractDictionary(final Path dictionary) {
}
}
},
CRAM(CramIO.CRAM_FILE_EXTENSION) {

@Override
SAMSequenceDictionary extractDictionary(final Path cram) {
return CRAMIterator.extractDictionary(cram);
}
},
SAM(IOUtil.SAM_FILE_EXTENSION, BamFileIoUtils.BAM_FILE_EXTENSION) {

@Override
SAMSequenceDictionary extractDictionary(Path sam) {
SAMSequenceDictionary extractDictionary(final Path sam) {
return SamReaderFactory.makeDefault().getFileHeader(sam).getSequenceDictionary();
}
},
VCF(IOUtil.VCF_EXTENSIONS) {

@Override
SAMSequenceDictionary extractDictionary(Path vcf) {
SAMSequenceDictionary extractDictionary(final Path vcf) {
try (VCFFileReader vcfPathReader = new VCFFileReader(vcf, false)){
return vcfPathReader.getFileHeader().getSequenceDictionary();
}
Expand All @@ -89,7 +97,7 @@ SAMSequenceDictionary extractDictionary(Path vcf) {
INTERVAL_LIST(IOUtil.INTERVAL_LIST_FILE_EXTENSION) {

@Override
SAMSequenceDictionary extractDictionary(Path intervalList) {
SAMSequenceDictionary extractDictionary(final Path intervalList) {
return IntervalList.fromPath(intervalList).getHeader().getSequenceDictionary();
}
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,14 @@
import org.testng.Assert;

import java.io.File;
import java.nio.file.Path;
import java.nio.file.Paths;

/**
* @author farjoun on 4/9/14.
*/
public class SAMSequenceDictionaryExtractorTest extends HtsjdkTest {
String path = "src/test/resources/htsjdk/variant/utils/SamSequenceDictionaryExtractor/";
final String path = "src/test/resources/htsjdk/variant/utils/SamSequenceDictionaryExtractor/";

@DataProvider(name = "testExtractDictionaries")
public Object[][] dictionaries() {
Expand All @@ -49,13 +51,14 @@ public Object[][] dictionaries() {
new Object[]{"test2_comp.interval_list", "Homo_sapiens_assembly18.trimmed.dict"},
new Object[]{"ScreenSamReads.100.input.sam", "test3_comp.interval_list"},
new Object[]{"ScreenSamReads.100.input.sam", "test4_comp.interval_list"},
new Object[]{"toy.cram", "toy.dict"}
};
}

@Test(dataProvider = "testExtractDictionaries")
public void testExtractDictionary(final String dictSource, final String dictExpected) throws Exception {
final File dictSourceFile = new File(path, dictSource);
final File dictExpectedFile = new File(path, dictExpected);
final Path dictSourceFile = Paths.get(path, dictSource);
final Path dictExpectedFile = Paths.get(path, dictExpected);
final SAMSequenceDictionary dict1 = SAMSequenceDictionaryExtractor.extractDictionary(dictSourceFile);
final SAMSequenceDictionary dict2 = SAMSequenceDictionaryExtractor.extractDictionary(dictExpectedFile);

Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
@HD VN:1.5 SO:unsorted
@SQ SN:ref LN:45 M5:7a66cae8ab14aef8d635bc80649e730b
@SQ SN:ref2 LN:40 M5:1636753510ec27476fdd109a6684680e

0 comments on commit 205d5f0

Please sign in to comment.