Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MergeSamFiles accept SO:UNKNOWN #1069

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions src/main/java/htsjdk/samtools/SAMFileHeader.java
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ public SortOrder getSortOrder() {
try {
return SortOrder.valueOf(so);
} catch (IllegalArgumentException e) {
log.warn("Found non conforming header SO tag: " + so + ". Treating as 'unknown'.");
log.warn("Found non-conforming header SO tag: " + so + ". Treating as 'unknown'.");
sortOrder = SortOrder.unknown;
}
}
Expand Down Expand Up @@ -324,12 +324,18 @@ public void setAttribute(final String key, final Object value) {
*/
@Override
public void setAttribute(final String key, final String value) {
String tempVal = value;
if (key.equals(SORT_ORDER_TAG)) {
this.sortOrder = null;
try {
tempVal = SortOrder.valueOf(value).toString();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In setSortOrder() the code uses .name() instead of .toString() which I think is clearer; either way the code should be consistent.

Also, it seems odd that setSortOrder() sets sortOrder to non-null, if the argument is a valid SortOrder, but this method sets it to null. Maybe this method should just call setSortOrder() when key is SORT_ORDER_TAG?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pshapiro4broad that's how the old implementation also does it...it let's the super-class deal with it...take a look at htsjdk.samtools.SAMFileHeader#getSortOrder, I think that might clarify things up (and note that this.sortOrder is private)

} catch (IllegalArgumentException e) {
tempVal = SortOrder.unknown.toString();
}
} else if (key.equals(GROUP_ORDER_TAG)) {
this.groupOrder = null;
}
super.setAttribute(key, value);
super.setAttribute(key, tempVal);
}

/**
Expand Down
30 changes: 26 additions & 4 deletions src/main/java/htsjdk/samtools/SAMTextHeaderCodec.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@
*/
package htsjdk.samtools;

import htsjdk.samtools.SAMFileHeader.SortOrder;
import htsjdk.samtools.util.DateParser;
import htsjdk.samtools.util.LineReader;
import htsjdk.samtools.util.RuntimeIOException;
import htsjdk.samtools.util.StringUtil;
import htsjdk.samtools.util.Log;

import java.io.BufferedWriter;
import java.io.IOException;
Expand Down Expand Up @@ -69,6 +71,7 @@ public class SAMTextHeaderCodec {
private static final Pattern FIELD_SEPARATOR_RE = Pattern.compile(FIELD_SEPARATOR);

public static final String COMMENT_PREFIX = HEADER_LINE_START + HeaderRecordType.CO.name() + FIELD_SEPARATOR;
private static final Log log = Log.getInstance(SAMTextHeaderCodec.class);

void setWriter(final BufferedWriter writer) {
this.writer = writer;
Expand Down Expand Up @@ -231,10 +234,10 @@ private void parseHDLine(final ParsedHeaderLine parsedHeaderLine) {

final String soString = parsedHeaderLine.getValue(SAMFileHeader.SORT_ORDER_TAG);
try {
if (soString != null) SAMFileHeader.SortOrder.valueOf(soString);
if (soString != null) SortOrder.valueOf(soString);
} catch (IllegalArgumentException e) {
reportErrorParsingLine(HEADER_LINE_START + parsedHeaderLine.getHeaderRecordType() +
" line has non-conforming SO tag value: "+ soString + ".",
" line has non-conforming SO tag value: " + soString + ".",
SAMValidationError.Type.HEADER_TAG_NON_CONFORMING_VALUE, null);
}

Expand Down Expand Up @@ -323,11 +326,30 @@ private class ParsedHeaderLine {
SAMValidationError.Type.HEADER_TAG_MULTIPLY_DEFINED, null);
continue;
}
validateSortOrderValue(keyAndValue);
mKeyValuePairs.put(keyAndValue[0], keyAndValue[1]);
}
lineValid = true;
}

private void validateSortOrderValue(String[] value) {
if (SAMFileHeader.SORT_ORDER_TAG.equals(value[0])) {
try {
SortOrder.valueOf(value[1]);
} catch (IllegalArgumentException e) {
if (validationStringency == ValidationStringency.STRICT) {
throw new SAMFormatException("Found non-conforming header SO tag: "
+ value[1]
+ ", exiting because VALIDATION_STRINGENCY=STRICT");
} else if (validationStringency == ValidationStringency.LENIENT) {
log.warn("Found non-conforming header SO tag: "
+ value[1] + ". Treating as 'unknown'.");
}
value[1] = SortOrder.unknown.toString();
}
}
}

/**
* True if the line is recognized as one of the valid HeaderRecordTypes.
*/
Expand Down Expand Up @@ -462,7 +484,7 @@ protected String getPGLine(final SAMProgramRecord programRecord) {
private void writeRGLine(final SAMReadGroupRecord readGroup) {
println(getRGLine(readGroup));
}

protected String getRGLine(final SAMReadGroupRecord readGroup) {
final String[] fields = new String[2 + readGroup.getAttributes().size()];
fields[0] = HEADER_LINE_START + HeaderRecordType.RG;
Expand Down Expand Up @@ -525,4 +547,4 @@ public void setValidationStringency(final ValidationStringency validationStringe
}
this.validationStringency = validationStringency;
}
}
}
91 changes: 90 additions & 1 deletion src/test/java/htsjdk/samtools/SAMFileHeaderTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,17 @@
package htsjdk.samtools;

import htsjdk.HtsjdkTest;
import htsjdk.samtools.util.BufferedLineReader;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.util.Arrays;

public class SAMFileHeaderTest extends HtsjdkTest {

@Test
public void testSortOrder() {
public void testSortOrderManualSetting() {
final SAMFileHeader header = new SAMFileHeader();

header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
Expand All @@ -45,6 +47,21 @@ public void testSortOrder() {
header.setAttribute(SAMFileHeader.SORT_ORDER_TAG, SAMFileHeader.SortOrder.coordinate);
Assert.assertEquals(header.getSortOrder(), SAMFileHeader.SortOrder.coordinate);
Assert.assertEquals(header.getAttribute(SAMFileHeader.SORT_ORDER_TAG), SAMFileHeader.SortOrder.coordinate.name());

header.setAttribute(SAMFileHeader.SORT_ORDER_TAG, "UNKNOWN");
Assert.assertEquals(header.getSortOrder(), SAMFileHeader.SortOrder.unknown);
Assert.assertEquals(header.getAttribute(SAMFileHeader.SORT_ORDER_TAG),
SAMFileHeader.SortOrder.unknown.name());

header.setAttribute(SAMFileHeader.SORT_ORDER_TAG, "uNknOWn");
Assert.assertEquals(header.getSortOrder(), SAMFileHeader.SortOrder.unknown);
Assert.assertEquals(header.getAttribute(SAMFileHeader.SORT_ORDER_TAG),
SAMFileHeader.SortOrder.unknown.name());

header.setAttribute(SAMFileHeader.SORT_ORDER_TAG, "cOoRdinate");
Assert.assertEquals(header.getSortOrder(), SAMFileHeader.SortOrder.unknown);
Assert.assertEquals(header.getAttribute(SAMFileHeader.SORT_ORDER_TAG),
SAMFileHeader.SortOrder.unknown.name());
}

@Test
Expand Down Expand Up @@ -81,4 +98,76 @@ public void testGetSequenceIfNameIsNotFound() {

Assert.assertNull(header.getSequence("chr2"));
}

@Test
public void testWrongTag() {
String[] testData = new String[]{
"@hd\tVN:1.0\tSO:unsorted\n",
"@sq\tSN:chrM\tLN:16571\n",
"@rg\tID:1\tSM:sample1\n",
"@pg\tID:1\tPN:A\n",
"@co\tVN:1.0\tSO:unsorted\n"
};
for (String stringHeader : testData) {
SAMTextHeaderCodec codec = new SAMTextHeaderCodec();
SAMFileHeader header = codec.decode(BufferedLineReader.fromString(stringHeader), null);
String validationErrors = header.getValidationErrors().toString();
Assert.assertTrue(validationErrors.contains("Unrecognized header record type"));
}

}

@DataProvider(name = "DataForWrongTagTests")
public Object[][] dataForWrongTagTests() {
return new Object[][] {
{"@HD\tVN:1.0\tSO:UNSORTED\n"},
{"@HD\tVN:1.0\tSO:FALSE\n"},
{"@HD\tVN:1.0\tSO:COORDINATE\n"},
{"@HD\tVN:1.0\tSO:uNknOWn\n"},
{"@HD\tVN:1.0\tSO:cOoRdinate\n"},
};
}

@Test(dataProvider = "DataForWrongTagTests")
public void testSortOrderCodecSetting(String hdr) {
String validString = "@HD\tVN:1.0\tSO:unknown\n";

SAMTextHeaderCodec codec = new SAMTextHeaderCodec();
SAMFileHeader header = codec.decode(BufferedLineReader.fromString(validString), null);

header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
Assert.assertEquals(header.getSortOrder(), SAMFileHeader.SortOrder.coordinate);
Assert.assertEquals(header.getAttribute(SAMFileHeader.SORT_ORDER_TAG), SAMFileHeader.SortOrder.coordinate.name());

header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
Assert.assertEquals(header.getSortOrder(), SAMFileHeader.SortOrder.unsorted);
Assert.assertEquals(header.getAttribute(SAMFileHeader.SORT_ORDER_TAG), SAMFileHeader.SortOrder.unsorted.name());

header.setAttribute(SAMFileHeader.SORT_ORDER_TAG, "badname");
Assert.assertEquals(header.getSortOrder(), SAMFileHeader.SortOrder.unknown);
Assert.assertEquals(header.getAttribute(SAMFileHeader.SORT_ORDER_TAG), SAMFileHeader.SortOrder.unknown.name());

header = codec.decode(BufferedLineReader.fromString(hdr), null);
Assert.assertTrue(header.getSortOrder().toString().equals("unknown"));
}

@Test(dataProvider = "DataForWrongTagTests", expectedExceptions = SAMFormatException.class)
public void testValidationStringencyStrict(String stringHeader) {
SAMTextHeaderCodec codec = new SAMTextHeaderCodec();
codec.setValidationStringency(ValidationStringency.STRICT);
codec.decode(BufferedLineReader.fromString(stringHeader), null);
}

@Test(dataProvider = "DataForWrongTagTests")
public void testValidationStringencyLenientAndSilent(String stringHeader) {
SAMTextHeaderCodec codec = new SAMTextHeaderCodec();

codec.setValidationStringency(ValidationStringency.LENIENT);
SAMFileHeader headerLenient = codec.decode(BufferedLineReader.fromString(stringHeader), null);
Assert.assertTrue(headerLenient.getSortOrder().equals(SAMFileHeader.SortOrder.unknown));

codec.setValidationStringency(ValidationStringency.SILENT);
SAMFileHeader headerSilent = codec.decode(BufferedLineReader.fromString(stringHeader), null);
Assert.assertTrue(headerSilent.getSortOrder().equals(SAMFileHeader.SortOrder.unknown));
}
}
7 changes: 0 additions & 7 deletions src/test/java/htsjdk/samtools/ValidateSamFileTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -552,12 +552,6 @@ public Object[][] tagCorrectlyProcessData() throws IOException {
"@RG\tID:0\tSM:Hi,Mom!\n" +
"E\t147\tchr1\t15\t255\t10M\t=\t2\t-30\tCAACAGAAGC\t)'.*.+2,))\tU2:Z:CAA";

final String SOTagCorrectlyProcessTestData =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this removed?

Copy link

@IanaKabakova IanaKabakova Jul 18, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for your reply and sorry for the delay. We've removed this test case since as @a-filipp0v said, it's redundant after our implementation.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure I understand...if it's just "Redundant" but not exactly equals, I'd ask that it be left in.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This case does not pass now, since we no longer treat SO:NOTKNOWN as error as test expects.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

got it

"@HD\tVN:1.0\tSO:NOTKNOWN\n" +
"@SQ\tSN:chr1\tLN:101\n" +
"@RG\tID:0\tSM:Hi,Mom!\n" +
"E\t147\tchr1\t15\t255\t10M\t=\t2\t-30\tCAACAGAAGC\t)'.*.+2,))\tU2:Z:CAA";

final String GOTagCorrectlyProcessTestData =
"@HD\tVN:1.0\tGO:NOTKNOWN\n" +
"@SQ\tSN:chr1\tLN:101\n" +
Expand All @@ -568,7 +562,6 @@ public Object[][] tagCorrectlyProcessData() throws IOException {
{E2TagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.E2_BASE_EQUALS_PRIMARY_BASE},
{E2TagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.MISMATCH_READ_LENGTH_AND_E2_LENGTH},
{U2TagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.MISMATCH_READ_LENGTH_AND_U2_LENGTH},
{SOTagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.HEADER_TAG_NON_CONFORMING_VALUE},
{GOTagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.HEADER_TAG_NON_CONFORMING_VALUE}
};
}
Expand Down