Skip to content

Commit

Permalink
Add LiftOver constructor that takes an input stream (#1412)
Browse files Browse the repository at this point in the history
* Add LiftOver constructor that takes an InputStream
  • Loading branch information
kevinpetersavage authored and lbergelson committed Aug 26, 2019
1 parent 9628c1e commit 46b1a00
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 26 deletions.
39 changes: 25 additions & 14 deletions src/main/java/htsjdk/samtools/liftover/Chain.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import java.util.List;
import java.util.regex.Pattern;


/**
* Holds a single chain from a UCSC chain file. Chain file format is described here: http://genome.ucsc.edu/goldenPath/help/chain.html
*
Expand Down Expand Up @@ -309,26 +310,36 @@ public int hashCode() {
/**
* Read all the chains and load into an OverlapDetector.
* @param chainFile File in UCSC chain format.
* @return OverlapDetector will all Chains from reader loaded into it.
* @return OverlapDetector with all Chains from reader loaded into it.
*/
static OverlapDetector<Chain> loadChains(final File chainFile) {
final BufferedLineReader reader = new BufferedLineReader(IOUtil.openFileForReading(chainFile));
IOUtil.assertFileIsReadable(chainFile);
try(final BufferedLineReader reader = new BufferedLineReader(IOUtil.openFileForReading(chainFile))){
return loadChains(reader, chainFile.toString());
}
}

/**
* Read all the chains and load into an OverlapDetector.
* @param reader reader of file in UCSC chain format.
* @return OverlapDetector with all Chains from reader loaded into it.
*/
static OverlapDetector<Chain> loadChains(final BufferedLineReader reader, String sourceName) {
final OverlapDetector<Chain> ret = new OverlapDetector<Chain>(0, 0);
Chain chain;
while ((chain = Chain.loadChain(reader, chainFile.toString())) != null) {
while ((chain = Chain.loadChain(reader, sourceName)) != null) {
ret.addLhs(chain, chain.interval);
}
reader.close();
return ret;
}

/**
* Read a single Chain from reader.
* @param reader Text representation of chains.
* @param chainFile For error messages only.
* @param sourceName For error messages only.
* @return New Chain with associated ContinuousBlocks.
*/
private static Chain loadChain(final BufferedLineReader reader, final String chainFile) {
private static Chain loadChain(final BufferedLineReader reader, final String sourceName) {
String line;
while (true) {
line = reader.readLine();
Expand All @@ -342,10 +353,10 @@ private static Chain loadChain(final BufferedLineReader reader, final String cha
}
final String[] chainFields = SPLITTER.split(line);
if (chainFields.length != 13) {
throwChainFileParseException("chain line has wrong number of fields", chainFile, reader.getLineNumber());
throwChainFileParseException("chain line has wrong number of fields", sourceName, reader.getLineNumber());
}
if (!"chain".equals(chainFields[0])) {
throwChainFileParseException("chain line does not start with 'chain'", chainFile, reader.getLineNumber());
throwChainFileParseException("chain line does not start with 'chain'", sourceName, reader.getLineNumber());
}
double score = 0;
String fromSequenceName = null;
Expand All @@ -372,7 +383,7 @@ private static Chain loadChain(final BufferedLineReader reader, final String cha
toChainEnd = Integer.parseInt(chainFields[11]);
id = Integer.parseInt(chainFields[12]);
} catch (NumberFormatException e) {
throwChainFileParseException("Invalid field", chainFile, reader.getLineNumber());
throwChainFileParseException("Invalid field", sourceName, reader.getLineNumber());
}
final Chain chain = new Chain(score, fromSequenceName, fromSequenceSize, fromChainStart, fromChainEnd, toSequenceName, toSequenceSize, toNegativeStrand, toChainStart,
toChainEnd, id);
Expand All @@ -383,18 +394,18 @@ private static Chain loadChain(final BufferedLineReader reader, final String cha
line = reader.readLine();
if (line == null || line.equals("")) {
if (!sawLastLine) {
throwChainFileParseException("Reached end of chain without seeing terminal block", chainFile, reader.getLineNumber());
throwChainFileParseException("Reached end of chain without seeing terminal block", sourceName, reader.getLineNumber());
}
break;
}
if (sawLastLine) {
throwChainFileParseException("Terminal block seen before end of chain", chainFile, reader.getLineNumber());
throwChainFileParseException("Terminal block seen before end of chain", sourceName, reader.getLineNumber());
}
String[] blockFields = SPLITTER.split(line);
if (blockFields.length == 1) {
sawLastLine = true;
} else if (blockFields.length != 3) {
throwChainFileParseException("Block line has unexpected number of fields", chainFile, reader.getLineNumber());
throwChainFileParseException("Block line has unexpected number of fields", sourceName, reader.getLineNumber());
}
int size = Integer.parseInt(blockFields[0]);
chain.addBlock(fromBlockStart, toBlockStart, size);
Expand All @@ -408,7 +419,7 @@ private static Chain loadChain(final BufferedLineReader reader, final String cha
return chain;
}

private static void throwChainFileParseException(final String message, final String chainFile, final int lineNumber) {
throw new SAMException(message + " in chain file " + chainFile + " at line " + lineNumber);
private static void throwChainFileParseException(final String message, final String sourceName, final int lineNumber) {
throw new SAMException(message + " in chain file " + sourceName + " at line " + lineNumber);
}
}
22 changes: 15 additions & 7 deletions src/main/java/htsjdk/samtools/liftover/LiftOver.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,10 @@

import htsjdk.samtools.SAMException;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Interval;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.OverlapDetector;
import htsjdk.samtools.util.*;

import java.io.File;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
Expand All @@ -39,6 +37,7 @@
import java.util.Map;
import java.util.Set;


/**
* Java port of UCSC liftOver. Only the most basic liftOver functionality is implemented.
* Internally coordinates are 0-based, half-open. The API is standard Picard 1-based, inclusive.
Expand Down Expand Up @@ -84,10 +83,19 @@ public long getFailedIntervalsBelowThreshold() {
/**
* Load UCSC chain file in order to lift over Intervals.
*/
public LiftOver(File chainFile) {
IOUtil.assertFileIsReadable(chainFile);
chains = Chain.loadChains(chainFile);
public LiftOver(File chainFile){
this(Chain.loadChains(chainFile));
}

/**
* Load UCSC chain file in order to lift over Intervals.
*/
public LiftOver(InputStream chainFileInputStream, String sourceName) {
this(Chain.loadChains(new BufferedLineReader(chainFileInputStream), sourceName));
}

private LiftOver(OverlapDetector<Chain> chains) {
this.chains = chains;
for (final Chain chain : this.chains.getAll()) {
final String from = chain.fromSequenceName;
final String to = chain.toSequenceName;
Expand Down
21 changes: 16 additions & 5 deletions src/test/java/htsjdk/samtools/liftover/LiftOverTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,7 @@
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.io.File;
import java.io.PrintWriter;
import java.io.*;
import java.util.*;
import java.util.stream.Stream;

Expand All @@ -44,14 +43,21 @@ public class LiftOverTest extends HtsjdkTest {
private static final File CHAIN_FILE = new File(TEST_DATA_DIR, "hg18ToHg19.over.chain");

private LiftOver liftOver;
Map<String, Set<String>> contigMap;
private Map<String, Set<String>> contigMap;
private LiftOver liftOverFromInputStream;

@BeforeClass
public void initLiftOver() {
liftOver = new LiftOver(CHAIN_FILE);
contigMap = liftOver.getContigMap();
}

@BeforeClass
public void initLiftOverFromInputStream() throws FileNotFoundException {
InputStream chainFileInputStream = new FileInputStream(CHAIN_FILE);
liftOverFromInputStream = new LiftOver(chainFileInputStream, CHAIN_FILE.toString());
}

@Test(dataProvider = "testIntervals")
public void testBasic(final Interval in, final Interval expected) {
Assert.assertEquals(liftOver.liftOver(in), expected);
Expand Down Expand Up @@ -461,21 +467,26 @@ public void testWriteChain() throws Exception {
File outFile = File.createTempFile("test.", ".chain");
outFile.deleteOnExit();
PrintWriter pw = new PrintWriter(outFile);
final Map<Integer, Chain> originalChainMap = new TreeMap<Integer, Chain>();
final Map<Integer, Chain> originalChainMap = new TreeMap<>();
for (final Chain chain : chains.getAll()) {
chain.write(pw);
originalChainMap.put(chain.id, chain);
}
pw.close();

final OverlapDetector<Chain> newChains = Chain.loadChains(outFile);
final Map<Integer, Chain> newChainMap = new TreeMap<Integer, Chain>();
final Map<Integer, Chain> newChainMap = new TreeMap<>();
for (final Chain chain : newChains.getAll()) {
newChainMap.put(chain.id, chain);
}
Assert.assertEquals(newChainMap, originalChainMap);
}

@Test(dataProvider = "testIntervals")
public void loadLiftOverFromInputStream(final Interval in, final Interval expected) {
Assert.assertEquals(liftOverFromInputStream.liftOver(in), expected);
}

@Test(dataProvider = "testIntervals")
public void testGetContigMap(final Interval in, final Interval expected) {
if (expected != null) {
Expand Down

0 comments on commit 46b1a00

Please sign in to comment.