Skip to content

Commit

Permalink
Adding support for 0-length B arrays in SAM files to conform to 1.6 s…
Browse files Browse the repository at this point in the history
…pec (#1194)

* Adding support for 0-length B arrays in SAM files to conform to 1.6 spec.
* adding explicit test dependency on a recent version of guava, we
already have a transitive test dependency on an older version so this shouldn't be
a big deal (unless of course it breaks something unexpected)
* adding new method for combining Dataproviders to TestNGUtils
  • Loading branch information
lbergelson authored Oct 24, 2018
1 parent 334800e commit 44baddf
Show file tree
Hide file tree
Showing 13 changed files with 721 additions and 29 deletions.
1 change: 1 addition & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ dependencies {
testRuntime 'org.pegdown:pegdown:1.6.0' // Necessary for generating HTML reports with ScalaTest
testCompile "org.testng:testng:6.14.3"
testCompile "com.google.jimfs:jimfs:1.1"
testCompile "com.google.guava:guava:26.0-jre"
}

sourceCompatibility = 1.8
Expand Down
5 changes: 2 additions & 3 deletions src/main/java/htsjdk/samtools/SAMLineParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -465,10 +465,9 @@ private void reportErrorParsingLine(final String reason) {
private void reportErrorParsingLine(final Exception e) {
final String errorMessage = makeErrorString(e.getMessage());
if (validationStringency == ValidationStringency.STRICT) {
throw new SAMFormatException(errorMessage);
throw new SAMFormatException(errorMessage, e);
} else if (validationStringency == ValidationStringency.LENIENT) {
System.err
.println("Ignoring SAM validation error due to lenient parsing:");
System.err.println("Ignoring SAM validation error due to lenient parsing:");
System.err.println(errorMessage);
}
}
Expand Down
3 changes: 0 additions & 3 deletions src/main/java/htsjdk/samtools/SAMRecord.java
Original file line number Diff line number Diff line change
Expand Up @@ -1419,9 +1419,6 @@ public Object getAttribute(final short tag) {
* String values are not validated to ensure that they conform to SAM spec.
*/
public void setAttribute(final String tag, final Object value) {
if (value != null && value.getClass().isArray() && Array.getLength(value) == 0) {
throw new IllegalArgumentException("Empty value passed for tag " + tag);
}
setAttribute(SAMTagUtil.getSingleton().makeBinaryTag(tag), value);
}

Expand Down
33 changes: 18 additions & 15 deletions src/main/java/htsjdk/samtools/TextTagCodec.java
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ public class TextTagCodec {
// 3 fields for non-empty strings 2 fields if the string is empty.
private static final int NUM_TAG_FIELDS = 3;

private static final String[] EMPTY_STRING_ARRAY = new String[0];

/**
* This is really a local variable of decode(), but allocated here to reduce allocations.
*/
Expand All @@ -52,7 +54,7 @@ public class TextTagCodec {
/**
* Convert in-memory representation of tag to SAM text representation.
* @param tagName Two-character tag name.
* @param value Tag value as approriate Object subclass.
* @param value Tag value as appropriate Object subclass.
* @return SAM text String representation, i.e. name:type:value
*/
public String encode(final String tagName, Object value) {
Expand All @@ -71,7 +73,7 @@ public String encode(final String tagName, Object value) {
// H should never happen anymore.
value = StringUtil.bytesToHexString((byte[])value);
} else if (tagType == 'B') {
value = getArrayType(value, false) + "," + encodeArrayValue(value);
value = getArrayType(value, false) + encodeArrayValue(value);
} else if (tagType == 'i') {
final long longVal = ((Number) value).longValue();
// as the spec says: [-2^31, 2^32)
Expand All @@ -83,7 +85,7 @@ public String encode(final String tagName, Object value) {
return sb.toString();
}

private char getArrayType(final Object array, final boolean isUnsigned) {
private static char getArrayType(final Object array, final boolean isUnsigned) {
final char type;
final Class<?> componentType = array.getClass().getComponentType();
if (componentType == Float.TYPE) {
Expand All @@ -97,18 +99,18 @@ private char getArrayType(final Object array, final boolean isUnsigned) {
return (isUnsigned? Character.toUpperCase(type): type);
}

private String encodeArrayValue(final Object value) {
final StringBuilder ret = new StringBuilder(Array.get(value, 0).toString());
private static String encodeArrayValue(final Object value) {
final int length = Array.getLength(value);
for (int i = 1; i < length; ++i) {
final StringBuilder ret = new StringBuilder();
for (int i = 0; i < length; ++i) {
ret.append(',');
ret.append(Array.get(value, i).toString());
}
return ret.toString();

}

private long[] widenToUnsigned(final Object array) {
private static long[] widenToUnsigned(final Object array) {
final Class<?> componentType = array.getClass().getComponentType();
final long mask;
if (componentType == Byte.TYPE) mask = 0xffL;
Expand All @@ -127,7 +129,7 @@ String encodeUnsignedArray(final String tagName, final Object array) {
throw new IllegalArgumentException("Non-array passed to encodeUnsignedArray: " + array.getClass());
}
final long[] widened = widenToUnsigned(array);
return tagName + ":B:" + getArrayType(array, true) + "," + encodeArrayValue(widened);
return tagName + ":B:" + getArrayType(array, true) + encodeArrayValue(widened);
}

/**
Expand Down Expand Up @@ -175,7 +177,7 @@ public Object setValue(final Object o) {
};
}

private Object convertStringToObject(final String type, final String stringVal) {
private static Object convertStringToObject(final String type, final String stringVal) {
if (type.equals("Z")) {
return stringVal;
} else if (type.equals("A")) {
Expand Down Expand Up @@ -219,17 +221,18 @@ else if (SAMUtils.isValidUnsignedIntegerAttribute(lValue)) {
}
}

private Object covertStringArrayToObject(final String stringVal) {
private static Object covertStringArrayToObject(final String stringVal) {
final String[] elementTypeAndValue = new String[2];
if (StringUtil.splitConcatenateExcessTokens(stringVal, elementTypeAndValue, ',') != 2) {
throw new SAMFormatException("Tag of type B should have an element type followed by comma");
}

final int numberOfTokens = StringUtil.splitConcatenateExcessTokens(stringVal, elementTypeAndValue, ',');

if (elementTypeAndValue[0].length() != 1) {
throw new SAMFormatException("Unrecognized element type for array tag value: " + elementTypeAndValue[0]);
}

final char elementType = elementTypeAndValue[0].charAt(0);
final String[] stringValues = elementTypeAndValue[1].split(",");
if (stringValues.length == 0) throw new SAMFormatException("Tag of type B should have at least one element");

final String[] stringValues = elementTypeAndValue[1] != null ? elementTypeAndValue[1].split(",") : EMPTY_STRING_ARRAY;
if (elementType == 'f') {
final float[] ret = new float[stringValues.length];
for (int i = 0; i < stringValues.length; ++i) {
Expand Down
86 changes: 81 additions & 5 deletions src/test/java/htsjdk/samtools/SAMRecordUnitTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,21 +25,25 @@
package htsjdk.samtools;

import htsjdk.HtsjdkTest;
import htsjdk.samtools.cram.build.CramIO;
import htsjdk.samtools.util.BinaryCodec;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.TestUtil;
import htsjdk.utils.TestNGUtils;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.io.*;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.lang.reflect.Array;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;

public class SAMRecordUnitTest extends HtsjdkTest {

private static final String ARRAY_TAG = "xa";

@DataProvider(name = "serializationTestData")
public Object[][] getSerializationTestData() {
return new Object[][] {
Expand Down Expand Up @@ -1169,4 +1173,76 @@ private Object[][] hasAttributeTestData() throws IOException {
public void testHasAttribute(final SAMRecord samRecord, final String tag, final boolean expectedHasAttribute) {
Assert.assertEquals(samRecord.hasAttribute(tag), expectedHasAttribute);
}

@Test
public void test_setAttribute_empty_array() {
final SAMFileHeader header = new SAMFileHeader();
final SAMRecord record = new SAMRecord(header);
Assert.assertNull(record.getStringAttribute(ARRAY_TAG));
record.setAttribute(ARRAY_TAG, new int[0]);
Assert.assertNotNull(record.getSignedIntArrayAttribute(ARRAY_TAG));
Assert.assertEquals(record.getSignedIntArrayAttribute(ARRAY_TAG), new int[0]);
Assert.assertEquals(record.getAttribute(ARRAY_TAG), new char[0]);
record.setAttribute(ARRAY_TAG, null);
Assert.assertNull(record.getStringAttribute(ARRAY_TAG));
}

private static Object[][] getEmptyArrays() {
return new Object[][]{
{new int[0], int[].class},
{new short[0], short[].class},
{new byte[0], byte[].class},
{new float[0], float[].class},
};
}

private static Object[][] getFileExtensions(){
return new Object[][]{
{BamFileIoUtils.BAM_FILE_EXTENSION}, {IOUtil.SAM_FILE_EXTENSION}, {CramIO.CRAM_FILE_EXTENSION}
};
}

@DataProvider
public Object[][] getEmptyArraysAndExtensions(){
return TestNGUtils.cartesianProduct(getEmptyArrays(), getFileExtensions());
}

@Test(dataProvider = "getEmptyArraysAndExtensions")
public void testWriteSamWithEmptyArray(Object emptyArray, Class<?> arrayClass, String fileExtension) throws IOException {
Assert.assertEquals(emptyArray.getClass(), arrayClass);
Assert.assertEquals(Array.getLength(emptyArray), 0);

final SAMRecordSetBuilder samRecords = new SAMRecordSetBuilder();
samRecords.addFrag("Read", 0, 100, false);
final SAMRecord record = samRecords.getRecords().iterator().next();
record.setAttribute(ARRAY_TAG, emptyArray);
checkArrayIsEmpty(ARRAY_TAG, record, arrayClass);

final Path tmp = Files.createTempFile("tmp", fileExtension);
IOUtil.deleteOnExit(tmp);

final SAMFileWriterFactory writerFactory = new SAMFileWriterFactory()
.setCreateMd5File(false)
.setCreateIndex(false);
final Path reference = IOUtil.getPath("src/test/resources/htsjdk/samtools/one-contig.fasta");
try (final SAMFileWriter samFileWriter = writerFactory.makeWriter(samRecords.getHeader(), false, tmp, reference)) {
samFileWriter.addAlignment(record);
}

try (final SamReader reader = SamReaderFactory.makeDefault()
.referenceSequence(reference)
.open(tmp)) {
final SAMRecordIterator iterator = reader.iterator();
Assert.assertTrue(iterator.hasNext());
final SAMRecord recordFromDisk = iterator.next();
checkArrayIsEmpty(ARRAY_TAG, recordFromDisk, arrayClass);
}
}

private static void checkArrayIsEmpty(String arrayTag, SAMRecord recordFromDisk, Class<?> expectedClass) {
final Object attribute = recordFromDisk.getAttribute(arrayTag);
Assert.assertNotNull(attribute);
Assert.assertEquals(attribute.getClass(), expectedClass);
Assert.assertEquals(Array.getLength(attribute), 0);
}
}
20 changes: 20 additions & 0 deletions src/test/java/htsjdk/samtools/SAMTextReaderTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,15 @@
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.CloserUtil;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;

public class SAMTextReaderTest extends HtsjdkTest {
private static final String ARRAY_TAG = "xa";

// Simple input, spot check that parsed correctly, and make sure nothing blows up.
@Test
public void testBasic() throws Exception {
Expand Down Expand Up @@ -135,4 +138,21 @@ public void testTagWithColon() {
Assert.assertEquals(recFromText.getAttribute(SAMTag.CQ.name()), valueWithColons);
CloserUtil.close(reader);
}

@DataProvider
public Object[][] getRecordsWithArrays(){
final String recordBase = "Read\t4\tchr1\t1\t0\t*\t*\t0\t0\tG\t%\t";
return new Object[][]{
{recordBase + ARRAY_TAG + ":B:i", new int[0]},
{recordBase + ARRAY_TAG + ":B:i,", new int[0]},
{recordBase + ARRAY_TAG + ":B:i,1,2,3,", new int[]{1,2,3}},
};
}

@Test(dataProvider = "getRecordsWithArrays")
public void testSamRecordCanHandleArrays(String samRecord, Object array){
final SAMLineParser samLineParser = new SAMLineParser(new SAMFileHeader());
final SAMRecord record = samLineParser.parseLine(samRecord);
Assert.assertEquals(record.getAttribute(ARRAY_TAG), array);
}
}
14 changes: 11 additions & 3 deletions src/test/java/htsjdk/samtools/SAMTextWriterTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@

public class SAMTextWriterTest extends HtsjdkTest {

private SAMRecordSetBuilder getSAMReader(final boolean sortForMe, final SAMFileHeader.SortOrder sortOrder) {
private SAMRecordSetBuilder getSamRecordSet(final boolean sortForMe, final SAMFileHeader.SortOrder sortOrder) {
final SAMRecordSetBuilder ret = new SAMRecordSetBuilder(sortForMe, sortOrder);
ret.addPair("readB", 20, 200, 300);
ret.addPair("readA", 20, 100, 150);
Expand All @@ -45,7 +45,7 @@ private SAMRecordSetBuilder getSAMReader(final boolean sortForMe, final SAMFileH

@Test
public void testNullHeader() throws Exception {
final SAMRecordSetBuilder recordSetBuilder = getSAMReader(true, SAMFileHeader.SortOrder.coordinate);
final SAMRecordSetBuilder recordSetBuilder = getSamRecordSet(true, SAMFileHeader.SortOrder.coordinate);
for (final SAMRecord rec : recordSetBuilder.getRecords()) {
rec.setHeader(null);
}
Expand Down Expand Up @@ -77,7 +77,7 @@ private void doTest(final SAMRecordSetBuilder recordSetBuilder) throws Exception
}

private void doTest(final SamFlagField samFlagField) throws Exception {
doTest(getSAMReader(true, SAMFileHeader.SortOrder.coordinate), samFlagField);
doTest(getSamRecordSet(true, SAMFileHeader.SortOrder.coordinate), samFlagField);
}

private void doTest(final SAMRecordSetBuilder recordSetBuilder, final SamFlagField samFlagField) throws Exception {
Expand Down Expand Up @@ -128,4 +128,12 @@ private void doTest(final SAMRecordSetBuilder recordSetBuilder, final SamFlagFie
Assert.assertFalse(newSAMIt.hasNext());
inputSAM.close();
}

@Test
public void testEmptyArrayAttributeHasNoCommaWhenWrittenToSAM(){
final SAMFileHeader header = new SAMFileHeader();
final SAMRecord record = new SAMRecord(header);
record.setAttribute("xa", new int[0]);
Assert.assertTrue(record.getSAMString().endsWith("xa:B:i\n"));
}
}
Loading

0 comments on commit 44baddf

Please sign in to comment.