diff --git a/src/main/java/htsjdk/samtools/CRAMIterator.java b/src/main/java/htsjdk/samtools/CRAMIterator.java index b2a61d3c8e..72b1082288 100644 --- a/src/main/java/htsjdk/samtools/CRAMIterator.java +++ b/src/main/java/htsjdk/samtools/CRAMIterator.java @@ -32,12 +32,24 @@ import java.io.InputStream; import java.math.BigInteger; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.*; import htsjdk.samtools.cram.CRAMException; +import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.RuntimeIOException; public class CRAMIterator implements SAMRecordIterator { + + /** A CRAMReferenceSource that is never invoked . It's used in {@link #extractDictionary(Path)}*/ + private static final CRAMReferenceSource NIL_CRAM_REFERENCE_SRC = new CRAMReferenceSource() { + @Override + public byte[] getReferenceBases(final SAMSequenceRecord sequenceRecord, boolean tryNameVariants) { + throw new IllegalStateException("CRAMReferenceSource.getReferenceBases shouldn't be called"); + } + }; + private final CountingInputStream countingInputStream; private final CramHeader cramHeader; private final ArrayList records; @@ -303,4 +315,18 @@ public SAMFileHeader getSAMFileHeader() { return cramHeader.getSamFileHeader(); } + /** extracts a {@link SAMSequenceDictionary} from a cram file. + * @return the dictionary of the cram file + * @throws SAMException if a dictionary cannot be extracted + */ + public static SAMSequenceDictionary extractDictionary(final Path cramPath) { + IOUtil.assertFileIsReadable(cramPath); + try (final InputStream in = Files.newInputStream(cramPath)) { + try(final CRAMIterator iter= new CRAMIterator(in, NIL_CRAM_REFERENCE_SRC, ValidationStringency.SILENT)) { + return iter.getSAMFileHeader().getSequenceDictionary(); + } + } catch (final Exception err) { + throw new SAMException("Cannot extract dictionary from "+cramPath, err); + } + } } diff --git a/src/main/java/htsjdk/variant/utils/SAMSequenceDictionaryExtractor.java b/src/main/java/htsjdk/variant/utils/SAMSequenceDictionaryExtractor.java index aeafaf46a2..e236fc6303 100644 --- a/src/main/java/htsjdk/variant/utils/SAMSequenceDictionaryExtractor.java +++ b/src/main/java/htsjdk/variant/utils/SAMSequenceDictionaryExtractor.java @@ -25,6 +25,7 @@ package htsjdk.variant.utils; import htsjdk.samtools.*; +import htsjdk.samtools.cram.build.CramIO; import htsjdk.samtools.reference.ReferenceSequenceFileFactory; import htsjdk.samtools.util.BufferedLineReader; import htsjdk.samtools.util.CollectionUtil; @@ -49,7 +50,7 @@ enum TYPE { FASTA(ReferenceSequenceFileFactory.FASTA_EXTENSIONS) { @Override - SAMSequenceDictionary extractDictionary(Path reference) { + SAMSequenceDictionary extractDictionary(final Path reference) { final SAMSequenceDictionary dict = ReferenceSequenceFileFactory.getReferenceSequenceFile(reference).getSequenceDictionary(); if (dict == null) throw new SAMException("Could not find dictionary next to reference file " + reference.toUri().toString()); @@ -70,17 +71,24 @@ SAMSequenceDictionary extractDictionary(final Path dictionary) { } } }, + CRAM(CramIO.CRAM_FILE_EXTENSION) { + + @Override + SAMSequenceDictionary extractDictionary(final Path cram) { + return CRAMIterator.extractDictionary(cram); + } + }, SAM(IOUtil.SAM_FILE_EXTENSION, BamFileIoUtils.BAM_FILE_EXTENSION) { @Override - SAMSequenceDictionary extractDictionary(Path sam) { + SAMSequenceDictionary extractDictionary(final Path sam) { return SamReaderFactory.makeDefault().getFileHeader(sam).getSequenceDictionary(); } }, VCF(IOUtil.VCF_EXTENSIONS) { @Override - SAMSequenceDictionary extractDictionary(Path vcf) { + SAMSequenceDictionary extractDictionary(final Path vcf) { try (VCFFileReader vcfPathReader = new VCFFileReader(vcf, false)){ return vcfPathReader.getFileHeader().getSequenceDictionary(); } @@ -89,7 +97,7 @@ SAMSequenceDictionary extractDictionary(Path vcf) { INTERVAL_LIST(IOUtil.INTERVAL_LIST_FILE_EXTENSION) { @Override - SAMSequenceDictionary extractDictionary(Path intervalList) { + SAMSequenceDictionary extractDictionary(final Path intervalList) { return IntervalList.fromPath(intervalList).getHeader().getSequenceDictionary(); } }; diff --git a/src/test/java/htsjdk/variant/utils/SAMSequenceDictionaryExtractorTest.java b/src/test/java/htsjdk/variant/utils/SAMSequenceDictionaryExtractorTest.java index af32411122..46c8c21687 100644 --- a/src/test/java/htsjdk/variant/utils/SAMSequenceDictionaryExtractorTest.java +++ b/src/test/java/htsjdk/variant/utils/SAMSequenceDictionaryExtractorTest.java @@ -31,12 +31,14 @@ import org.testng.Assert; import java.io.File; +import java.nio.file.Path; +import java.nio.file.Paths; /** * @author farjoun on 4/9/14. */ public class SAMSequenceDictionaryExtractorTest extends HtsjdkTest { - String path = "src/test/resources/htsjdk/variant/utils/SamSequenceDictionaryExtractor/"; + final String path = "src/test/resources/htsjdk/variant/utils/SamSequenceDictionaryExtractor/"; @DataProvider(name = "testExtractDictionaries") public Object[][] dictionaries() { @@ -49,13 +51,14 @@ public Object[][] dictionaries() { new Object[]{"test2_comp.interval_list", "Homo_sapiens_assembly18.trimmed.dict"}, new Object[]{"ScreenSamReads.100.input.sam", "test3_comp.interval_list"}, new Object[]{"ScreenSamReads.100.input.sam", "test4_comp.interval_list"}, + new Object[]{"toy.cram", "toy.dict"} }; } @Test(dataProvider = "testExtractDictionaries") public void testExtractDictionary(final String dictSource, final String dictExpected) throws Exception { - final File dictSourceFile = new File(path, dictSource); - final File dictExpectedFile = new File(path, dictExpected); + final Path dictSourceFile = Paths.get(path, dictSource); + final Path dictExpectedFile = Paths.get(path, dictExpected); final SAMSequenceDictionary dict1 = SAMSequenceDictionaryExtractor.extractDictionary(dictSourceFile); final SAMSequenceDictionary dict2 = SAMSequenceDictionaryExtractor.extractDictionary(dictExpectedFile); diff --git a/src/test/resources/htsjdk/variant/utils/SamSequenceDictionaryExtractor/toy.cram b/src/test/resources/htsjdk/variant/utils/SamSequenceDictionaryExtractor/toy.cram new file mode 100644 index 0000000000..5c7b17e493 Binary files /dev/null and b/src/test/resources/htsjdk/variant/utils/SamSequenceDictionaryExtractor/toy.cram differ diff --git a/src/test/resources/htsjdk/variant/utils/SamSequenceDictionaryExtractor/toy.dict b/src/test/resources/htsjdk/variant/utils/SamSequenceDictionaryExtractor/toy.dict new file mode 100644 index 0000000000..b2328c41ce --- /dev/null +++ b/src/test/resources/htsjdk/variant/utils/SamSequenceDictionaryExtractor/toy.dict @@ -0,0 +1,3 @@ +@HD VN:1.5 SO:unsorted +@SQ SN:ref LN:45 M5:7a66cae8ab14aef8d635bc80649e730b +@SQ SN:ref2 LN:40 M5:1636753510ec27476fdd109a6684680e