Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tribble/Tabix index path support #810

Merged
merged 9 commits into from
Mar 10, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/main/java/htsjdk/tribble/TabixFeatureReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
*/
package htsjdk.tribble;

import htsjdk.samtools.seekablestream.SeekableStreamFactory;
import htsjdk.samtools.util.BlockCompressedInputStream;
import htsjdk.samtools.util.RuntimeIOException;
import htsjdk.tribble.readers.*;
Expand Down
45 changes: 33 additions & 12 deletions src/main/java/htsjdk/tribble/Tribble.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import htsjdk.tribble.util.TabixUtils;

import java.io.File;
import java.nio.file.Path;

/**
* Common, tribble wide constants and static functions
Expand All @@ -37,49 +38,69 @@ private Tribble() { } // can't be instantiated
public final static String STANDARD_INDEX_EXTENSION = ".idx";

/**
* Return the name of the index file for the provided vcf {@code filename}
* Return the name of the index file for the provided {@code filename}
* Does not actually create an index
* @param filename name of the vcf file
* @param filename name of the file
* @return non-null String representing the index filename
*/
public static String indexFile(final String filename) {
return indexFile(filename, STANDARD_INDEX_EXTENSION);
}

/**
* Return the File of the index file for the provided vcf {@code file}
* Return the File of the index file for the provided {@code file}
* Does not actually create an index
* @param file the vcf file
* @param file the file
* @return a non-null File representing the index
*/
public static File indexFile(final File file) {
return indexFile(file.getAbsoluteFile(), STANDARD_INDEX_EXTENSION);
}

/**
* Return the name of the tabix index file for the provided vcf {@code filename}
* Return the name of the index file for the provided {@code path}
* Does not actually create an index
* @param filename name of the vcf file
* @param path the path
* @return Path representing the index filename
*/
public static Path indexPath(final Path path) {
return path.getFileSystem().getPath(indexFile(path.toAbsolutePath().toString()));
}

/**
* Return the name of the tabix index file for the provided {@code filename}
* Does not actually create an index
* @param filename name of the file
* @return non-null String representing the index filename
*/
public static String tabixIndexFile(final String filename) {
return indexFile(filename, TabixUtils.STANDARD_INDEX_EXTENSION);
}

/**
* Return the File of the tabix index file for the provided vcf {@code file}
* Return the File of the tabix index file for the provided {@code file}
* Does not actually create an index
* @param file the vcf file
* @param file the file
* @return a non-null File representing the index
*/
public static File tabixIndexFile(final File file) {
return indexFile(file.getAbsoluteFile(), TabixUtils.STANDARD_INDEX_EXTENSION);
}

/**
* Return the name of the index file for the provided vcf {@code filename} and {@code extension}
* Return the name of the tabix index file for the provided {@code path}
* Does not actually create an index
* @param path the path
* @return Path representing the index filename
*/
public static Path tabixIndexPath(final Path path) {
return path.getFileSystem().getPath(tabixIndexFile(path.toAbsolutePath().toString()));
}

/**
* Return the name of the index file for the provided {@code filename} and {@code extension}
* Does not actually create an index
* @param filename name of the vcf file
* @param filename name of the file
* @param extension the extension to use for the index
* @return non-null String representing the index filename
*/
Expand All @@ -88,9 +109,9 @@ private static String indexFile(final String filename, final String extension) {
}

/**
* Return the File of the index file for the provided vcf {@code file} and {@code extension}
* Return the File of the index file for the provided {@code file} and {@code extension}
* Does not actually create an index
* @param file the vcf file
* @param file the file
* @param extension the extension to use for the index
* @return a non-null File representing the index
*/
Expand Down
2 changes: 0 additions & 2 deletions src/main/java/htsjdk/tribble/TribbleIndexedFeatureReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,9 @@
import htsjdk.tribble.util.ParsingUtils;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.nio.channels.SeekableByteChannel;
import java.util.ArrayList;
Expand Down
72 changes: 52 additions & 20 deletions src/main/java/htsjdk/tribble/index/AbstractIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,19 @@

package htsjdk.tribble.index;

import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.RuntimeIOException;
import htsjdk.tribble.Tribble;
import htsjdk.tribble.TribbleException;
import htsjdk.tribble.util.LittleEndianInputStream;
import htsjdk.tribble.util.LittleEndianOutputStream;

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashMap;
Expand Down Expand Up @@ -67,11 +71,12 @@ public enum IndexType {
private final static long NO_TS = -1L;

protected int version; // Our version value
protected File indexedFile = null; // The file we've created this index for
protected Path indexedPath = null; // The file we've created this index for
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is technically a breaking change. I'm going to say it's fine though. I don't believe there are any subclasses of AbstractIndex in the wild that we're going to be breaking.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I haven't realize that it is breaking compatibility. If we do not want to break compatibility, I can kept the field with the deprecated annotation and set it when the used constructor is a File. Let me know if I should do that.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, it's tricky. Anything field that's visible to a subclass of a non-final class is technically a breaking change since someone could be relying on it if they implement a subclass. It's why I'm so strongly in favor of making classes final and of making variable private with accessors if they need to be accessed.

protected long indexedFileSize = NO_FILE_SIZE; // The size of the indexed file
protected long indexedFileTS = NO_TS; // The timestamp
protected String indexedFileMD5 = NO_MD5; // The MD5 value, generally not filled in (expensive to calc)
protected int flags;
protected final Log logger = Log.getInstance(this.getClass());

public boolean hasFileSize() {
return indexedFileSize != NO_FILE_SIZE;
Expand Down Expand Up @@ -116,8 +121,8 @@ public boolean equalsIgnoreProperties(final Object obj) {
return false;
}

if (indexedFile != other.indexedFile && (indexedFile == null || !indexedFile.equals(other.indexedFile))) {
System.err.printf("equals indexedFile: this %s != other %s%n", indexedFile, other.indexedFile);
if (indexedPath != other.indexedPath && (indexedPath == null || !indexedPath.equals(other.indexedPath))) {
System.err.printf("equals indexedPath: this %s != other %s%n", indexedPath, other.indexedPath);
return false;
}

Expand Down Expand Up @@ -159,18 +164,27 @@ public AbstractIndex() {
* @param featureFile the feature file to create an index from
*/
public AbstractIndex(final String featureFile) {
this(new File(featureFile));
this();
try {
this.indexedPath = IOUtil.getPath(featureFile).toAbsolutePath();
} catch (IOException e) {
throw new IllegalArgumentException("IO error: " + e.getMessage(), e);
}
}

public AbstractIndex(final File featureFile) {
this(featureFile.toPath());
}

public AbstractIndex(final Path featurePath) {
this();
this.indexedFile = featureFile;
this.indexedPath = featurePath.toAbsolutePath();
}

public AbstractIndex(final AbstractIndex parent) {
this();
this.version = parent.version;
this.indexedFile = parent.indexedFile;
this.indexedPath = parent.indexedPath;
this.indexedFileSize = parent.indexedFileSize;
this.indexedFileTS = parent.indexedFileTS;
this.indexedFileMD5 = parent.indexedFileMD5;
Expand Down Expand Up @@ -200,8 +214,18 @@ public boolean isCurrentVersion() {
return version == VERSION;
}

/**
* Gets the indexed file.
* @throws UnsupportedOperationException if the path cannot be represented as a file.
* @deprecated on 03/2017. Use {@link #getIndexedPath()} instead.
*/
@Deprecated
public File getIndexedFile() {
return indexedFile;
return getIndexedPath().toFile();
}

public Path getIndexedPath() {
return indexedPath;
}

public long getIndexedFileSize() {
Expand Down Expand Up @@ -234,10 +258,14 @@ public boolean containsChromosome(final String chr) {
}

public void finalizeIndex() {
// these two functions must be called now because the file may be being written during on the fly indexing
if (indexedFile != null) {
this.indexedFileSize = indexedFile.length();
this.indexedFileTS = indexedFile.lastModified();
try {
// these two functions must be called now because the file may be being written during on the fly indexing
if (indexedPath != null) {
this.indexedFileSize = Files.size(indexedPath);
this.indexedFileTS = Files.getLastModifiedTime(indexedPath).toMillis();
}
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}

Expand All @@ -251,7 +279,7 @@ private void writeHeader(final LittleEndianOutputStream dos) throws IOException
dos.writeInt(MAGIC_NUMBER);
dos.writeInt(getType());
dos.writeInt(version);
dos.writeString(indexedFile.getAbsolutePath());
dos.writeString(indexedPath.toUri().toString());
dos.writeLong(indexedFileSize);
dos.writeLong(indexedFileTS);
dos.writeString(indexedFileMD5);
Expand All @@ -274,7 +302,7 @@ private void writeHeader(final LittleEndianOutputStream dos) throws IOException
private void readHeader(final LittleEndianInputStream dis) throws IOException {

version = dis.readInt();
indexedFile = new File(dis.readString());
indexedPath = IOUtil.getPath(dis.readString());
indexedFileSize = dis.readLong();
indexedFileTS = dis.readLong();
indexedFileMD5 = dis.readString();
Expand Down Expand Up @@ -349,18 +377,22 @@ public void write(final LittleEndianOutputStream stream) throws IOException {
}

@Override
public void write(final File idxFile) throws IOException {
try(final LittleEndianOutputStream idxStream = new LittleEndianOutputStream(new BufferedOutputStream(new FileOutputStream(idxFile)))) {
public void write(final Path idxPath) throws IOException {
try(final LittleEndianOutputStream idxStream = new LittleEndianOutputStream(new BufferedOutputStream(Files.newOutputStream(idxPath)))) {
write(idxStream);
}
}

@Override
public void writeBasedOnFeatureFile(final File featureFile) throws IOException {
if (!featureFile.isFile()) return;
write(Tribble.indexFile(featureFile));
public void writeBasedOnFeaturePath(final Path featurePath) throws IOException {
if (!Files.isRegularFile(featurePath)) {
logger.warn("Index not written into ", featurePath);
return;
}
write(Tribble.indexPath(featurePath));
}


public void read(final LittleEndianInputStream dis) throws IOException {
try {
readHeader(dis);
Expand All @@ -386,7 +418,7 @@ public void read(final LittleEndianInputStream dis) throws IOException {
}

protected void printIndexInfo() {
System.out.println(String.format("Index for %s with %d indices", indexedFile, chrIndices.size()));
System.out.println(String.format("Index for %s with %d indices", indexedPath, chrIndices.size()));
final BlockStats stats = getBlockStats(true);
System.out.println(String.format(" total blocks %d", stats.total));
System.out.println(String.format(" total empty blocks %d", stats.empty));
Expand Down
23 changes: 13 additions & 10 deletions src/main/java/htsjdk/tribble/index/DynamicIndexCreator.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import htsjdk.tribble.util.MathUtils;

import java.io.File;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
Expand All @@ -56,13 +57,15 @@ public class DynamicIndexCreator extends TribbleIndexCreator {
MathUtils.RunningStat stats = new MathUtils.RunningStat();
long basesSeen = 0;
Feature lastFeature = null;
File inputFile;

public DynamicIndexCreator(final File inputFile, final IndexFactory.IndexBalanceApproach iba) {
public DynamicIndexCreator(final Path inputPath, final IndexFactory.IndexBalanceApproach iba) {
this.iba = iba;
// get a list of index creators
this.inputFile = inputFile;
creators = getIndexCreators(inputFile,iba);
creators = getIndexCreators(inputPath, iba);
}

public DynamicIndexCreator(final File inputFile, final IndexFactory.IndexBalanceApproach iba) {
this(inputFile.toPath(), iba);
}

@Override
Expand Down Expand Up @@ -90,19 +93,19 @@ public Index finalizeIndex(final long finalFilePosition) {

/**
* create a list of index creators (initialized) representing the common index types we'd suspect they'd like to use
* @param inputFile the input file to use to create the indexes
* @param inputPath the input path to use to create the indexes
* @return a map of index type to the best index for that balancing approach
*/
private Map<IndexFactory.IndexType,TribbleIndexCreator> getIndexCreators(final File inputFile, final IndexFactory.IndexBalanceApproach iba) {
private Map<IndexFactory.IndexType,TribbleIndexCreator> getIndexCreators(final Path inputPath, final IndexFactory.IndexBalanceApproach iba) {
final Map<IndexFactory.IndexType,TribbleIndexCreator> creators = new HashMap<IndexFactory.IndexType,TribbleIndexCreator>();

if (iba == IndexFactory.IndexBalanceApproach.FOR_SIZE) {
// add a linear index with the default bin size
final LinearIndexCreator linearNormal = new LinearIndexCreator(inputFile, LinearIndexCreator.DEFAULT_BIN_WIDTH);
final LinearIndexCreator linearNormal = new LinearIndexCreator(inputPath, LinearIndexCreator.DEFAULT_BIN_WIDTH);
creators.put(IndexFactory.IndexType.LINEAR,linearNormal);

// create a tree index with the default size
final IntervalIndexCreator treeNormal = new IntervalIndexCreator(inputFile, IntervalIndexCreator.DEFAULT_FEATURE_COUNT);
final IntervalIndexCreator treeNormal = new IntervalIndexCreator(inputPath, IntervalIndexCreator.DEFAULT_FEATURE_COUNT);
creators.put(IndexFactory.IndexType.INTERVAL_TREE,treeNormal);
}

Expand All @@ -111,12 +114,12 @@ private Map<IndexFactory.IndexType,TribbleIndexCreator> getIndexCreators(final F
if (iba == IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME) {
// create a linear index with a small bin size
final LinearIndexCreator linearSmallBin =
new LinearIndexCreator(inputFile, Math.max(200, LinearIndexCreator.DEFAULT_BIN_WIDTH / 4));
new LinearIndexCreator(inputPath, Math.max(200, LinearIndexCreator.DEFAULT_BIN_WIDTH / 4));
creators.put(IndexFactory.IndexType.LINEAR,linearSmallBin);

// create a tree index with a small index size
final IntervalIndexCreator treeSmallBin =
new IntervalIndexCreator(inputFile, Math.max(20, IntervalIndexCreator.DEFAULT_FEATURE_COUNT / 8));
new IntervalIndexCreator(inputPath, Math.max(20, IntervalIndexCreator.DEFAULT_FEATURE_COUNT / 8));
creators.put(IndexFactory.IndexType.INTERVAL_TREE,treeSmallBin);
}

Expand Down
Loading