Skip to content

Commit

Permalink
Add an IntervalCodec that use useful for sorting large sets of Interv…
Browse files Browse the repository at this point in the history
…als (#1288)

* Added IntervalCodec that is useful for sorting large sets of Intervals.
* Made IntervalCoordinateComparator public
* Adding an IntervalListWriter class to write Intervals.
  • Loading branch information
nh13 authored and lbergelson committed Feb 20, 2019
1 parent d771b30 commit e8e0a6f
Show file tree
Hide file tree
Showing 7 changed files with 454 additions and 90 deletions.
2 changes: 1 addition & 1 deletion src/main/java/htsjdk/samtools/BAMRecordCodec.java
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ public SAMRecord decode() {
final int recordLength;
try {
recordLength = this.binaryCodec.readInt();
} catch (RuntimeEOFException e) {
} catch (final RuntimeEOFException e) {
return null;
}

Expand Down
119 changes: 119 additions & 0 deletions src/main/java/htsjdk/samtools/util/IntervalCodec.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
/*
* The MIT License
*
* Copyright (c) 2019 Nils Homer
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package htsjdk.samtools.util;

import htsjdk.samtools.SAMSequenceDictionary;
import java.io.InputStream;
import java.io.OutputStream;

public class IntervalCodec implements SortingCollection.Codec<Interval> {

private final SAMSequenceDictionary dict;

private final BinaryCodec binaryCodec = new BinaryCodec();

/**
* Creates a new binary codec to read or write.
* @param dict the sequence dictionary associated with the intervals.
*/
public IntervalCodec(final SAMSequenceDictionary dict) {
this.dict = dict;
}

@Override
public IntervalCodec clone() {
return new IntervalCodec(this.dict);
}


/**
* Sets the output stream that records will be written to.
*/
@Override
public void setOutputStream(final OutputStream os) {
this.binaryCodec.setOutputStream(os);
}

/**
* Sets the output stream that records will be written to.
*/
public void setOutputStream(final OutputStream os, final String filename) {
this.binaryCodec.setOutputStream(os);
this.binaryCodec.setOutputFileName(filename);
}

/**
* Sets the input stream that records will be read from.
*/
@Override
public void setInputStream(final InputStream is) {
this.binaryCodec.setInputStream(is);
}

/**
* Sets the input stream that records will be read from.
*/
public void setInputStream(final InputStream is, final String filename) {
this.binaryCodec.setInputStream(is);
this.binaryCodec.setInputFileName(filename);
}

/**
* Writes the interval to the output stream.
* @param interval the interval to write.
*/
@Override
public void encode(final Interval interval) {
final String name = interval.getName();
this.binaryCodec.writeInt(this.dict.getSequenceIndex(interval.getContig()));
this.binaryCodec.writeInt(interval.getStart());
this.binaryCodec.writeInt(interval.getEnd());
this.binaryCodec.writeBoolean(interval.isNegativeStrand());
this.binaryCodec.writeBoolean(name != null);
if (name != null) {
this.binaryCodec.writeString(name, false, true);
}
}

/**
* Reads an interval from the input stream.
* @return null if no more intervals, otherwise the next interval.
*/
@Override
public Interval decode() {
final int sequenceIndex;
try {
sequenceIndex = this.binaryCodec.readInt();
} catch (final RuntimeEOFException e) {
return null;
}
return new Interval(
this.dict.getSequence(sequenceIndex).getSequenceName(),
this.binaryCodec.readInt(),
this.binaryCodec.readInt(),
this.binaryCodec.readBoolean(),
(this.binaryCodec.readBoolean()) ? this.binaryCodec.readNullTerminatedString() : null
);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
* The MIT License
*
* Copyright (c) 2019 Nils Homer
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package htsjdk.samtools.util;

import htsjdk.samtools.SAMFileHeader;

import java.io.Serializable;
import java.util.Comparator;

/**
* Comparator that orders intervals based on their sequence index, by coordinate
* then by strand and finally by name.
*/
public class IntervalCoordinateComparator implements Comparator<Interval>, Serializable {
private static final long serialVersionUID = 1L;

private final SAMFileHeader header;

/** Constructs a comparator using the supplied sequence header. */
public IntervalCoordinateComparator(final SAMFileHeader header) {
this.header = header;
}

@Override
public int compare(final Interval lhs, final Interval rhs) {
final int lhsIndex = this.header.getSequenceIndex(lhs.getContig());
final int rhsIndex = this.header.getSequenceIndex(rhs.getContig());
int retval = lhsIndex - rhsIndex;

if (retval == 0) {
retval = lhs.getStart() - rhs.getStart();
}
if (retval == 0) {
retval = lhs.getEnd() - rhs.getEnd();
}
if (retval == 0) {
if (lhs.isPositiveStrand() && rhs.isNegativeStrand()) {
retval = -1;
} else if (lhs.isNegativeStrand() && rhs.isPositiveStrand()) {
retval = 1;
}
}
if (retval == 0) {
if (lhs.getName() == null) {
if (rhs.getName() == null) {
return 0;
} else {
return -1;
}
} else if (rhs.getName() == null) {
return 1;
} else {
return lhs.getName().compareTo(rhs.getName());
}
}

return retval;
}
}
109 changes: 20 additions & 89 deletions src/main/java/htsjdk/samtools/util/IntervalList.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,7 @@
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.SAMTextHeaderCodec;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.io.*;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
Expand Down Expand Up @@ -596,44 +592,32 @@ public static IntervalList fromReader(final BufferedReader in) {
}

/**
* Writes out the list of intervals to the supplied file.
* Writes out the list of intervals to the supplied path.
*
* @param file a file to write to. If exists it will be overwritten.
* @param path a path to write to. If exists it will be overwritten.
*/
public void write(final File file) {
try (final BufferedWriter out = IOUtil.openFileForBufferedWriting(file)) {
final FormatUtil format = new FormatUtil();

// Write out the header
if (this.header != null) {
final SAMTextHeaderCodec codec = new SAMTextHeaderCodec();
codec.encode(out, this.header);
}

// Write out the intervals
for (final Interval interval : this) {
out.write(interval.getContig());
out.write('\t');
out.write(format.format(interval.getStart()));
out.write('\t');
out.write(format.format(interval.getEnd()));
out.write('\t');
out.write(interval.isPositiveStrand() ? '+' : '-');
out.write('\t');
if (interval.getName() != null) {
out.write(interval.getName());
} else {
out.write(".");
public void write(final Path path) {
try {
try (IntervalListWriter writer = new IntervalListWriter(path, this.header)) {
for (final Interval interval : this) {
writer.write(interval);
}
out.newLine();
}

out.flush();
} catch (final IOException ioe) {
throw new SAMException("Error writing out interval list to file: " + file.getAbsolutePath(), ioe);
}
catch (final IOException ioe) {
throw new SAMException("Error writing out interval list to file: " + path.toAbsolutePath(), ioe);
}
}

/**
* Writes out the list of intervals to the supplied file.
*
* @param file a file to write to. If exists it will be overwritten.
*/
public void write(final File file) {
this.write(file.toPath());
}

/**
* A utility function for generating the intersection of two IntervalLists, checks for equal dictionaries.
*
Expand Down Expand Up @@ -899,56 +883,3 @@ public int hashCode() {
return result;
}
}

/**
* Comparator that orders intervals based on their sequence index, by coordinate
* then by strand and finally by name.
*/
class IntervalCoordinateComparator implements Comparator<Interval>, Serializable {
private static final long serialVersionUID = 1L;

private final SAMFileHeader header;

/**
* Constructs a comparator using the supplied sequence header.
*/
IntervalCoordinateComparator(final SAMFileHeader header) {
this.header = header;
}

@Override
public int compare(final Interval lhs, final Interval rhs) {
final int lhsIndex = this.header.getSequenceIndex(lhs.getContig());
final int rhsIndex = this.header.getSequenceIndex(rhs.getContig());
int retval = lhsIndex - rhsIndex;

if (retval == 0) {
retval = lhs.getStart() - rhs.getStart();
}
if (retval == 0) {
retval = lhs.getEnd() - rhs.getEnd();
}
if (retval == 0) {
if (lhs.isPositiveStrand() && rhs.isNegativeStrand()) {
retval = -1;
} else if (lhs.isNegativeStrand() && rhs.isPositiveStrand()) {
retval = 1;
}
}
if (retval == 0) {
if (lhs.getName() == null) {
if (rhs.getName() == null) {
return 0;
} else {
return -1;
}
} else if (rhs.getName() == null) {
return 1;
} else {
return lhs.getName().compareTo(rhs.getName());
}
}

return retval;
}
}
Loading

0 comments on commit e8e0a6f

Please sign in to comment.