Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Addresses part of issue #18, Allele class refactoring. #1370

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ jacocoTestReport {
}

dependencies {
compile "org.apache.commons:commons-lang3:3.9"
compile "org.apache.commons:commons-jexl:2.1.1"
compile "commons-logging:commons-logging:1.1.1"
compile "org.xerial.snappy:snappy-java:1.1.4"
Expand Down
123 changes: 123 additions & 0 deletions src/main/java/htsjdk/samtools/util/SequenceUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import htsjdk.samtools.*;
import htsjdk.samtools.fastq.FastqConstants;
import htsjdk.utils.ValidationUtils;
import org.apache.commons.lang3.ArrayUtils;

import java.io.File;
import java.math.BigInteger;
Expand All @@ -34,6 +35,7 @@
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;
import java.util.Objects;
import java.util.Random;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
Expand Down Expand Up @@ -130,6 +132,113 @@ public static boolean basesEqual(final byte lhs, final byte rhs) {
return (bases[lhs] == bases[rhs]);
}

/**
* Compares two bases.
* <p>It returns 0 if both bases represent the same nucleotide or combination of if ambiguous.</p>
* <p>Otherwise
* it returns -1 or 1 depending on what bases each represents. More concretely if 'A', 'C', 'G' and 'T' are equivalent to 1, 2, 4, 8 then
* the sortable unsigned value associated to a valid IUPAC code is the one corresponding to the sum of each basic
* nucleotide value above across al the nucleotides the code represents.
* </p>
* <p>
* codes with a lower value precede values with larger value.
* </p>
* @param lhs the "left" base to compare.
* @param rhs the "right" base to compare.
* @return
*/
public static int compareBases(final byte lhs, final byte rhs) {
if (lhs < 0 || lhs >= BASES_ARRAY_LENGTH) {
throw new UnsupportedOperationException("bad base code: " + rhs);
} else if (rhs < 0 || rhs >= BASES_ARRAY_LENGTH) {
throw new UnsupportedOperationException("bad base code: " + rhs);
} else {
return Byte.compare(bases[lhs], bases[rhs]);
}
}

/**
* Compares two base sequences.
* Case are ignored so that "AaTtcCGg" == "AAttCcgG".
* <p>
* presence of non-base values would result returning false, even if the valid bases are
* the same.
* </p>
*
* @param lhs first base sequence to compare.
* @param rhs second base sequence to compare.
* @return
*/
public static boolean equals(final byte[] lhs, final byte[] rhs) {
if (lhs.length != rhs.length) {
return false;
} else {
for (int i = 0; i < lhs.length; i++) {
final byte l = lhs[i];
if (l < 0 || l >= BASES_ARRAY_LENGTH) {
return false;
}
final byte r = rhs[i];
if (r < 0 || r >= BASES_ARRAY_LENGTH) {
return false;
} else if (bases[r] != bases[l]) {
return false;
}
}
return true;
}
}

/**
* Calculates a hash-code making sure that it would return the same value for sequences
* that only differ in case. Also differences in non-valid IUPAC codes are also ignored.
* <p>
* The result of this method is consistent with {@link #equals(byte[], byte[])} so that:
* <p><code>
* equals(X, Y) --> hashCode(X) == hashCode(Y)
* </code></p>
* </p>
*
* @param b the target base array.
* @throws NullPointerException if {@code b} is {@code null}.
*
* @return any possible integer value.
*/
public static int hashCode(final byte[] b) {
if (b.length == 0) {
return 0;
} else {
int accumulator = bases[b[0]];
for (int i = 1; i < b.length; i++) {
final byte base = b[i];
if (base < 0 || base >= BASES_ARRAY_LENGTH) {
accumulator = accumulator * 31;
} else {
accumulator = accumulator * 31 + bases[b[i]];
}
}
return accumulator;
}
}

/**
* Calculates a hash-code making sure that it would return the same value for bases that
* only differ in case.
* <p>
* The result of this method is consistent with {@link #equals(byte[], byte[])} so that:
* <p><code>
* equals(X, Y) --> hashCode(X) == hashCode(Y)
* </code></p>
* </p>
*
* @param base the target base.
*
* @return any possible integer value.
*/
public static int hashCode(final byte base) {
return base < 0 || base >= BASES_ARRAY_LENGTH ? 0 : bases[base];
}

/**
* Efficiently compare two IUPAC base codes, one coming from a read sequence and the other coming from
* a reference sequence, using the reference code as a 'pattern' that the read base must match.
Expand Down Expand Up @@ -274,6 +383,19 @@ public static void assertSequenceListsEqual(final List<SAMSequenceRecord> s1, fi
}
}

public static boolean isValidIUPAC(final byte base) {
return base >= 0 && base <= BASES_ARRAY_LENGTH && bases[base] != 0;
}

public static boolean areValidIUPACs(final byte[] bases) {
for (final byte base : bases) {
if (!isValidIUPAC(base)) {
return false;
}
}
return true;
}

public static class SequenceListsDifferException extends SAMException {
public SequenceListsDifferException() {
}
Expand Down Expand Up @@ -1117,4 +1239,5 @@ static public void getRandomBases(Random random, final int length, final byte[]
bases[i] = VALID_BASES_UPPER[random.nextInt(VALID_BASES_UPPER.length)];
}
}

}
19 changes: 19 additions & 0 deletions src/main/java/htsjdk/tribble/util/ParsingUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ public static <T> String join(String separator, Collection<T> objects) {
* @param list
* @param <T>
* @return
* @deprecated consider using {@link List#sort}
*/
public static <T extends Comparable> List<T> sortList(Collection<T> list) {
ArrayList<T> ret = new ArrayList<>();
Expand All @@ -138,6 +139,24 @@ public static <T extends Comparable> List<T> sortList(Collection<T> list) {
return ret;
}

/**
* Returns the input list into a new list with a different order.
* <p>
* The input list is not modified.
* </p>
* @param list the list to copy and sort.
* @param comparator the comparator to use to sort elements.
* @param <T> the list element type.
* @return never {@code null}.
* @throws NullPointerException if {@code list} or comparator is {@code null}.
*/
public static <T> List<T> sortList(final Collection<? extends T> list, final Comparator<? super T> comparator) {
Objects.requireNonNull(comparator); // let's fail early.
final List<T> ret = new ArrayList<>(list);
ret.sort(comparator);
return ret;
}

public static <T extends Comparable<T>, V> String sortedString(Map<T, V> c) {
List<T> t = new ArrayList<>(c.keySet());
Collections.sort(t);
Expand Down
174 changes: 174 additions & 0 deletions src/main/java/htsjdk/variant/variantcontext/AbstractAllele.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
package htsjdk.variant.variantcontext;

/**
* Provides most common implementations for {@link Allele} methods.
*/
abstract class AbstractAllele implements Allele {

AbstractAllele() {
}

@Override
public Allele asAlternative() {
throw new UnsupportedOperationException("cannot be alternative: " + this);
}

@Override
public Allele asReference() {
throw new UnsupportedOperationException("cannot be reference: " + this);
}

@Override
public byte[] encodeAsBytes() {
return encodeAsString().getBytes();
}

@Override
public boolean isAlternative() {
return false;
}

@Override
public int numberOfBases() {
return 0;
}

@Override
public byte baseAt(final int index) {
throw new IndexOutOfBoundsException();
}

@Override
public Allele extend(final byte[] tail) {
throw new UnsupportedOperationException();
}

@Override
public boolean isBreakend() {
return false;
}

@Override
public boolean isPairedBreakend() {
return false;
}

@Override
public boolean isContigInsertion() {
return false;
}

@Override
public StructuralVariantType getStructuralVariantType() {
return null;
}

@Override
public boolean isStructural() {
return false;
}

@Override
public boolean isNoCall() {
return false;
}

@Override
public boolean isCalled() {
return false;
}

@Override
public boolean isReference() {
return false;
}

@Override
public boolean isNonReference() {
return !isReference();
}

@Override
public boolean isSymbolic() {
return false;
}

@Override
public String getSymbolicID() {
return null;
}

@Override
public boolean isInline() {
return false;
}

@Override
public boolean isBreakpoint() {
return false;
}

@Override
public boolean isSingleBreakend() {
return false;
}

@Override
public Breakend asBreakend() {
return null;
}

@Override
public String encodeAsString() {
throw new UnsupportedOperationException();
}

@Override
public String getContigID() {
return null;
}

@Override
public boolean hasContigID() {
return false;
}

@Override
public String getBaseString() {
final int numberOfBases = numberOfBases();
if (numberOfBases == 0) {
return "";
} else if (numberOfBases == 1) {
return "" + (char) baseAt(0);
} else {
final StringBuilder builder = new StringBuilder(numberOfBases);
for (int i = 0; i < numberOfBases; i++) {
builder.append((char) baseAt(i));
}
return builder.toString();
}
}

@Override
public String getDisplayString() {
return encodeAsString();
}

@Override
public byte[] getDisplayBases() {
return encodeAsBytes();
}

@Override
public boolean isSpanDeletion() { return false; }

@Override
public boolean isUnspecifiedAlternative() {
return false;
}

@Override
public String toString() {
return encodeAsString() + (isReference() ? "*" : "");
}
}
Loading