-
Notifications
You must be signed in to change notification settings - Fork 242
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add an IntervalCodec that use useful for sorting large sets of Interv…
…als (#1288) * Added IntervalCodec that is useful for sorting large sets of Intervals. * Made IntervalCoordinateComparator public * Adding an IntervalListWriter class to write Intervals.
- Loading branch information
1 parent
d771b30
commit e8e0a6f
Showing
7 changed files
with
454 additions
and
90 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
/* | ||
* The MIT License | ||
* | ||
* Copyright (c) 2019 Nils Homer | ||
* | ||
* Permission is hereby granted, free of charge, to any person obtaining a copy | ||
* of this software and associated documentation files (the "Software"), to deal | ||
* in the Software without restriction, including without limitation the rights | ||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
* copies of the Software, and to permit persons to whom the Software is | ||
* furnished to do so, subject to the following conditions: | ||
* | ||
* The above copyright notice and this permission notice shall be included in | ||
* all copies or substantial portions of the Software. | ||
* | ||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
* THE SOFTWARE. | ||
*/ | ||
package htsjdk.samtools.util; | ||
|
||
import htsjdk.samtools.SAMSequenceDictionary; | ||
import java.io.InputStream; | ||
import java.io.OutputStream; | ||
|
||
public class IntervalCodec implements SortingCollection.Codec<Interval> { | ||
|
||
private final SAMSequenceDictionary dict; | ||
|
||
private final BinaryCodec binaryCodec = new BinaryCodec(); | ||
|
||
/** | ||
* Creates a new binary codec to read or write. | ||
* @param dict the sequence dictionary associated with the intervals. | ||
*/ | ||
public IntervalCodec(final SAMSequenceDictionary dict) { | ||
this.dict = dict; | ||
} | ||
|
||
@Override | ||
public IntervalCodec clone() { | ||
return new IntervalCodec(this.dict); | ||
} | ||
|
||
|
||
/** | ||
* Sets the output stream that records will be written to. | ||
*/ | ||
@Override | ||
public void setOutputStream(final OutputStream os) { | ||
this.binaryCodec.setOutputStream(os); | ||
} | ||
|
||
/** | ||
* Sets the output stream that records will be written to. | ||
*/ | ||
public void setOutputStream(final OutputStream os, final String filename) { | ||
this.binaryCodec.setOutputStream(os); | ||
this.binaryCodec.setOutputFileName(filename); | ||
} | ||
|
||
/** | ||
* Sets the input stream that records will be read from. | ||
*/ | ||
@Override | ||
public void setInputStream(final InputStream is) { | ||
this.binaryCodec.setInputStream(is); | ||
} | ||
|
||
/** | ||
* Sets the input stream that records will be read from. | ||
*/ | ||
public void setInputStream(final InputStream is, final String filename) { | ||
this.binaryCodec.setInputStream(is); | ||
this.binaryCodec.setInputFileName(filename); | ||
} | ||
|
||
/** | ||
* Writes the interval to the output stream. | ||
* @param interval the interval to write. | ||
*/ | ||
@Override | ||
public void encode(final Interval interval) { | ||
final String name = interval.getName(); | ||
this.binaryCodec.writeInt(this.dict.getSequenceIndex(interval.getContig())); | ||
this.binaryCodec.writeInt(interval.getStart()); | ||
this.binaryCodec.writeInt(interval.getEnd()); | ||
this.binaryCodec.writeBoolean(interval.isNegativeStrand()); | ||
this.binaryCodec.writeBoolean(name != null); | ||
if (name != null) { | ||
this.binaryCodec.writeString(name, false, true); | ||
} | ||
} | ||
|
||
/** | ||
* Reads an interval from the input stream. | ||
* @return null if no more intervals, otherwise the next interval. | ||
*/ | ||
@Override | ||
public Interval decode() { | ||
final int sequenceIndex; | ||
try { | ||
sequenceIndex = this.binaryCodec.readInt(); | ||
} catch (final RuntimeEOFException e) { | ||
return null; | ||
} | ||
return new Interval( | ||
this.dict.getSequence(sequenceIndex).getSequenceName(), | ||
this.binaryCodec.readInt(), | ||
this.binaryCodec.readInt(), | ||
this.binaryCodec.readBoolean(), | ||
(this.binaryCodec.readBoolean()) ? this.binaryCodec.readNullTerminatedString() : null | ||
); | ||
} | ||
} |
80 changes: 80 additions & 0 deletions
80
src/main/java/htsjdk/samtools/util/IntervalCoordinateComparator.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
/* | ||
* The MIT License | ||
* | ||
* Copyright (c) 2019 Nils Homer | ||
* | ||
* Permission is hereby granted, free of charge, to any person obtaining a copy | ||
* of this software and associated documentation files (the "Software"), to deal | ||
* in the Software without restriction, including without limitation the rights | ||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
* copies of the Software, and to permit persons to whom the Software is | ||
* furnished to do so, subject to the following conditions: | ||
* | ||
* The above copyright notice and this permission notice shall be included in | ||
* all copies or substantial portions of the Software. | ||
* | ||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
* THE SOFTWARE. | ||
*/ | ||
package htsjdk.samtools.util; | ||
|
||
import htsjdk.samtools.SAMFileHeader; | ||
|
||
import java.io.Serializable; | ||
import java.util.Comparator; | ||
|
||
/** | ||
* Comparator that orders intervals based on their sequence index, by coordinate | ||
* then by strand and finally by name. | ||
*/ | ||
public class IntervalCoordinateComparator implements Comparator<Interval>, Serializable { | ||
private static final long serialVersionUID = 1L; | ||
|
||
private final SAMFileHeader header; | ||
|
||
/** Constructs a comparator using the supplied sequence header. */ | ||
public IntervalCoordinateComparator(final SAMFileHeader header) { | ||
this.header = header; | ||
} | ||
|
||
@Override | ||
public int compare(final Interval lhs, final Interval rhs) { | ||
final int lhsIndex = this.header.getSequenceIndex(lhs.getContig()); | ||
final int rhsIndex = this.header.getSequenceIndex(rhs.getContig()); | ||
int retval = lhsIndex - rhsIndex; | ||
|
||
if (retval == 0) { | ||
retval = lhs.getStart() - rhs.getStart(); | ||
} | ||
if (retval == 0) { | ||
retval = lhs.getEnd() - rhs.getEnd(); | ||
} | ||
if (retval == 0) { | ||
if (lhs.isPositiveStrand() && rhs.isNegativeStrand()) { | ||
retval = -1; | ||
} else if (lhs.isNegativeStrand() && rhs.isPositiveStrand()) { | ||
retval = 1; | ||
} | ||
} | ||
if (retval == 0) { | ||
if (lhs.getName() == null) { | ||
if (rhs.getName() == null) { | ||
return 0; | ||
} else { | ||
return -1; | ||
} | ||
} else if (rhs.getName() == null) { | ||
return 1; | ||
} else { | ||
return lhs.getName().compareTo(rhs.getName()); | ||
} | ||
} | ||
|
||
return retval; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.