Skip to content

Commit

Permalink
Merge branch 'master' into TASK-298-CRAM_implementation_reports_valid…
Browse files Browse the repository at this point in the history
…ation_errors_at_container_granularity

# Conflicts:
#	src/main/java/htsjdk/samtools/CRAMIterator.java
  • Loading branch information
Anton Mazur committed Oct 30, 2018
2 parents cba9220 + c484241 commit cb13b65
Show file tree
Hide file tree
Showing 211 changed files with 13,144 additions and 2,563 deletions.
15 changes: 14 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,20 @@ cache:
jdk:
- oraclejdk8
- openjdk8
script: ./gradlew test jacocoTestReport;
matrix:
fast_finish: true
allow_failures:
- env: TEST_TYPE=FTP
include:
- jdk: oraclejdk8
env: TEST_TYPE=FTP
script:
- if [[ $TEST_TYPE == "FTP" ]]; then
./gradlew testFTP jacocoTestReport;
else
./gradlew test jacocoTestReport;
fi

after_success:
- bash <(curl -s https://codecov.io/bash)
- echo "TRAVIS_BRANCH='$TRAVIS_BRANCH'";
Expand Down
33 changes: 22 additions & 11 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ plugins {
id 'maven'
id 'signing'
id 'jacoco'
id 'com.palantir.git-version' version '0.5.1'
id 'com.github.johnrengelman.shadow' version '1.2.3'
id 'com.github.maiflai.scalatest' version '0.15'
id 'com.palantir.git-version' version '0.11.0'
id 'com.github.johnrengelman.shadow' version '2.0.4'
id 'com.github.maiflai.scalatest' version '0.22'
}

repositories {
Expand All @@ -36,13 +36,14 @@ dependencies {
compile "org.xerial.snappy:snappy-java:1.1.4"
compile "org.apache.commons:commons-compress:1.4.1"
compile "org.tukaani:xz:1.5"
compile "gov.nih.nlm.ncbi:ngs-java:1.2.4"
compile "gov.nih.nlm.ncbi:ngs-java:2.9.0"

testCompile "org.scala-lang:scala-library:2.12.1"
testCompile "org.scalatest:scalatest_2.12:3.0.1"
testRuntime 'org.pegdown:pegdown:1.4.2' // Necessary for generating HTML reports with ScalaTest
testCompile "org.testng:testng:6.9.9"
testCompile "org.scala-lang:scala-library:2.12.6"
testCompile "org.scalatest:scalatest_2.12:3.0.5"
testRuntime 'org.pegdown:pegdown:1.6.0' // Necessary for generating HTML reports with ScalaTest
testCompile "org.testng:testng:6.14.3"
testCompile "com.google.jimfs:jimfs:1.1"
testCompile "com.google.guava:guava:26.0-jre"
}

sourceCompatibility = 1.8
Expand Down Expand Up @@ -100,11 +101,22 @@ test {
tags {
exclude "slow"
exclude "broken"
exclude "ftp"
if (System.env.CI == "false") exclude "sra"
if (!OperatingSystem.current().isUnix()) exclude "unix"
}
} dependsOn findScalaAndJavaTypes


task testFTP(type: Test) {
description = "Runs the tests that require connection to a remote ftp server"
tags {
include "ftp"
exclude "slow"
exclude "broken"
}
}

task testSRA(type: Test) {
description = "Run the SRA tests"
jvmArgs += '-Dsamjdk.sra_libraries_download=true'
Expand All @@ -115,9 +127,8 @@ task testSRA(type: Test) {
}
}

task wrapper(type: Wrapper) {
description = "Regenerate the gradle wrapper"
gradleVersion = '3.2.1'
wrapper {
gradleVersion = '4.8.1'
}

// This is a hack to disable the java 8 default javadoc lint until we fix the html formatting
Expand Down
Binary file modified gradle/wrapper/gradle-wrapper.jar
Binary file not shown.
3 changes: 1 addition & 2 deletions gradle/wrapper/gradle-wrapper.properties
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#Fri Jan 20 17:10:11 EST 2017
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-4.8.1-bin.zip
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-3.2.1-all.zip
23 changes: 13 additions & 10 deletions gradlew
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env bash
#!/usr/bin/env sh

##############################################################################
##
Expand Down Expand Up @@ -33,11 +33,11 @@ DEFAULT_JVM_OPTS=""
# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD="maximum"

warn ( ) {
warn () {
echo "$*"
}

die ( ) {
die () {
echo
echo "$*"
echo
Expand Down Expand Up @@ -154,16 +154,19 @@ if $cygwin ; then
esac
fi

# Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
function splitJvmOpts() {
JVM_OPTS=("$@")
# Escape application args
save () {
for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
echo " "
}
eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
APP_ARGS=$(save "$@")

# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"

# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
if [[ "$(uname)" == "Darwin" ]] && [[ "$HOME" == "$PWD" ]]; then
if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
cd "$(dirname "$0")"
fi

exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
exec "$JAVACMD" "$@"
4 changes: 2 additions & 2 deletions src/main/java/htsjdk/samtools/AbstractSAMHeaderRecord.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
*/
package htsjdk.samtools;

import javax.xml.bind.annotation.XmlTransient;

import java.io.Serializable;
import java.util.LinkedHashMap;
import java.util.Map;
Expand All @@ -33,7 +33,7 @@
* Base class for the various concrete records in a SAM header, providing uniform
* access to the attributes.
*/
@XmlTransient /* don't consider this class for XML-serialization */

public abstract class AbstractSAMHeaderRecord implements Serializable {
public static final long serialVersionUID = 1L;

Expand Down
13 changes: 9 additions & 4 deletions src/main/java/htsjdk/samtools/BAMFileWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
/**
* Concrete implementation of SAMFileWriter for writing gzipped BAM files.
*/
class BAMFileWriter extends SAMFileWriterImpl {
public class BAMFileWriter extends SAMFileWriterImpl {

private final BinaryCodec outputBinaryCodec;
private BAMRecordCodec bamRecordCodec = null;
Expand Down Expand Up @@ -78,7 +78,7 @@ protected BAMFileWriter(final OutputStream os, final File file, final int compre
}

protected BAMFileWriter(final OutputStream os, final String absoluteFilename, final int compressionLevel, final DeflaterFactory deflaterFactory) {
blockCompressedOutputStream = new BlockCompressedOutputStream(os, null, compressionLevel, deflaterFactory);
blockCompressedOutputStream = new BlockCompressedOutputStream(os, (Path)null, compressionLevel, deflaterFactory);
outputBinaryCodec = new BinaryCodec(blockCompressedOutputStream);
outputBinaryCodec.setOutputFileName(absoluteFilename);
}
Expand Down Expand Up @@ -200,8 +200,13 @@ protected static void writeHeader(final BinaryCodec outputBinaryCodec, final SAM
writeHeader(outputBinaryCodec, samFileHeader, headerString);
}

protected static void writeHeader(final OutputStream outputStream, final SAMFileHeader samFileHeader) {
final BlockCompressedOutputStream blockCompressedOutputStream = new BlockCompressedOutputStream(outputStream, null);
/**
* Write a BAM file header to an output stream in block compressed BAM format.
* @param outputStream the stream to write the BAM header to
* @param samFileHeader the header to write
*/
public static void writeHeader(final OutputStream outputStream, final SAMFileHeader samFileHeader) {
final BlockCompressedOutputStream blockCompressedOutputStream = new BlockCompressedOutputStream(outputStream, (Path)null);
final BinaryCodec outputBinaryCodec = new BinaryCodec(blockCompressedOutputStream);
writeHeader(outputBinaryCodec, samFileHeader);
try {
Expand Down
116 changes: 111 additions & 5 deletions src/main/java/htsjdk/samtools/BAMRecord.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import java.nio.ByteBuffer;
import java.nio.ByteOrder;

import static htsjdk.samtools.SAMTag.CG;

/**
* Wrapper class for binary BAM records.
Expand All @@ -39,6 +40,28 @@ public class BAMRecord extends SAMRecord {
*/
private static final int READ_NAME_OFFSET = 0;

/**
* Constant for converting between the number of operators in a Cigar and the length
* of the int[] array needed to represent it in the BAM format
*/
static public final short CIGAR_SIZE_MULTIPLIER = 4;

/**
* Maximal number of cigar operators that can be represented normally in the cigar part of the bam record.
* Records that have larger cigars will have their Cigars encoded to int[] and placed in the CG tag in the attributes (BAM only)
* This should happen upon encoding. In place of the Cigar a sentinel value will be placed <READLENGTH>S<REFERENCELENGTH>N
* <p>
* When a BAM record is decoded, the sentinel cigar informs of the existance of the CG tag, which is decoded and removed.
* The sentinel value is then replaced with the actual cigar (in memory).
*/
public final static int MAX_CIGAR_OPERATORS = 0xffff;

public final static int MAX_CIGAR_ELEMENT_LENGTH = (1 << 28) - 1;
/**
* Number of operators in "Sentinel" cigar xSyN
*/
private final static int LONG_CIGAR_SENTINEL_LENGTH = 2;

/**
* Variable-length part of BAMRecord. Lazily decoded.
*/
Expand Down Expand Up @@ -203,6 +226,12 @@ protected void setAttribute(final short tag, final Object value, final boolean i
*/
@Override
public void clearAttributes() {
// If there's a long cigar, the CG might be "hiding" in the attributes, and
// if the original attributes haven't been parsed yet, we will lose the long cigar.
// by "getting" the cigar prior to clearing the attributes, we protect against that.
if (!mAttributesDecoded) {
getCigar();
}
mAttributesDecoded = true;
mBinaryDataStale = true;
super.clearAttributes();
Expand Down Expand Up @@ -244,10 +273,15 @@ public int getReadNameLength() {
public Cigar getCigar() {
if (mRestOfBinaryData != null && !mCigarDecoded) {
final int cigarOffset = readNameSize();
final ByteBuffer byteBuffer = ByteBuffer.wrap(mRestOfBinaryData, cigarOffset, cigarSize());
final ByteBuffer byteBuffer = ByteBuffer.wrap(mRestOfBinaryData, cigarOffset, cigarSize());
byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
super.initializeCigar(BinaryCigarCodec.decode(byteBuffer));
mCigarDecoded = true;

if (getCigarLength() == LONG_CIGAR_SENTINEL_LENGTH && isSentinelCigar(super.getCigar(), getReadLength())) {
extractCigarFromCGAttribute(super.getCigar());
}

if (null != getHeader() && getValidationStringency() != ValidationStringency.SILENT && !this.getReadUnmappedFlag()) {
// Don't know line number, and don't want to force read name to be decoded.
SAMUtils.processValidationErrors(validateCigar(-1L), -1, getValidationStringency());
Expand All @@ -256,6 +290,72 @@ public Cigar getCigar() {
return super.getCigar();
}

/**
* Checks to see if the provided Cigar could be considered the "sentinel cigar" that indicates
* that the actual cigar is too long for the BAM spec and should be taken from the CG tag. This
* was introduced in SAM v1.6.
*/
static boolean isSentinelCigar(final Cigar cigar, final int readLength) {
// There's an implicit assumption here there readLength == length of read in cigar, unless readLength==0
return cigar.numCigarElements() == 2 &&
cigar.getCigarElement(1).getOperator() == CigarOperator.N &&
cigar.getCigarElement(0).getOperator() == CigarOperator.S &&
(cigar.getCigarElement(0).getLength() == readLength || readLength == 0) ;
}


/**
* Long cigars (with more than 64K operators) cannot be encoded into BAM. Instead a sentinel cigar is
* placed as a placeholder, and the actual cigar is placed in the CG tag. This method
* extracts the CIGAR from the CG tag and places it into the (in memory) cigar.
*/
private void extractCigarFromCGAttribute(final Cigar sentinelCigar) throws IllegalStateException {
final int[] cigarFromCG = (int[]) getAttribute(SAMTagUtil.getSingleton().CG);

if (cigarFromCG == null) return;

// place the integer array into a buffer so we can decode it
final ByteBuffer byteBuffer = ByteBuffer.allocate(cigarFromCG.length * CIGAR_SIZE_MULTIPLIER)
.order(ByteOrder.LITTLE_ENDIAN);
byteBuffer.asIntBuffer().put(cigarFromCG);

// decode cigar
final Cigar decodedCigar = BinaryCigarCodec.decode(byteBuffer);

// Sanity check
if (decodedCigar.numCigarElements() <= MAX_CIGAR_OPERATORS) {
throw new IllegalStateException(String.format(
"Only Cigar with > %d operators should be placed in CG tag. Found %d operators. \n Here's the Cigar:\n%s",
MAX_CIGAR_OPERATORS,
decodedCigar.getCigarElements().size(),
decodedCigar.toString()));
}

if (decodedCigar.getReferenceLength() != sentinelCigar.getReferenceLength()) {
throw new IllegalStateException(String.format(
"Sentinel cigar and %s cigar should have the same reference length. Found %d and %d.\n Here's the Cigar:\n%s",
CG.name(),
sentinelCigar.getReferenceLength(),
decodedCigar.getReferenceLength(),
decodedCigar.toString()));
}

if (decodedCigar.getReadLength() != sentinelCigar.getReadLength() ) {
throw new IllegalStateException(String.format(
"Sentinel cigar and %s cigar should have the same read length. Found %d and %d.\n Here's the Cigar:\n%s",
CG.name(),
sentinelCigar.getReadLength(),
decodedCigar.getReadLength(),
decodedCigar.toString()));
}

//used initializeCigar instead of setCigar so as to not clobber the indexingBin.
initializeCigar(decodedCigar);

// remove CG attribute.
setAttribute(SAMTagUtil.getSingleton().CG, null);
}

/**
* Avoids decoding CIGAR in order to get length.
*/
Expand Down Expand Up @@ -308,11 +408,17 @@ private void decodeAttributes() {
if (mAttributesDecoded) {
return;
}

mAttributesDecoded = true;
final int tagsOffset = readNameSize() + cigarSize() + basesSize() + qualsSize();
final int tagsSize = mRestOfBinaryData.length - tagsOffset;
final SAMBinaryTagAndValue attributes = BinaryTagCodec.readTags(mRestOfBinaryData, tagsOffset, tagsSize, getValidationStringency());
setAttributes(attributes);

// if there's a CG tag, we should getCigar() so that the CG tag has a chance of turning into the CIGAR
if (hasAttribute(CG.name())) {
getCigar();
}
}

private byte[] decodeBaseQualities() {
Expand All @@ -334,7 +440,7 @@ private byte[] decodeBaseQualities() {

private String decodeReadName() {
// Don't include terminating null
return StringUtil.bytesToString(mRestOfBinaryData, READ_NAME_OFFSET, mReadNameLength-1);
return StringUtil.bytesToString(mRestOfBinaryData, READ_NAME_OFFSET, mReadNameLength - 1);
}

private byte[] decodeReadBases() {
Expand All @@ -344,7 +450,7 @@ private byte[] decodeReadBases() {
final int basesOffset = readNameSize() + cigarSize();
try {
return SAMUtils.compressedBasesToBytes(mReadLength, mRestOfBinaryData, basesOffset);
} catch ( final IllegalArgumentException ex ) {
} catch (final IllegalArgumentException ex) {
final String msg = ex.getMessage() + " in read: " + getReadName();
throw new IllegalStateException(msg, ex);
}
Expand All @@ -358,11 +464,11 @@ private int readNameSize() {
}

private int cigarSize() {
return mCigarLength * 4;
return mCigarLength * CIGAR_SIZE_MULTIPLIER;
}

private int basesSize() {
return (mReadLength + 1)/2;
return (mReadLength + 1) / 2;
}

private int qualsSize() {
Expand Down
Loading

0 comments on commit cb13b65

Please sign in to comment.