Skip to content

Commit

Permalink
- implementing the spec change in samtools/hts-specs#333
Browse files Browse the repository at this point in the history
  • Loading branch information
Yossi Farjoun committed Sep 17, 2018
1 parent e0bf651 commit 34b13e0
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 6 deletions.
15 changes: 9 additions & 6 deletions src/main/java/htsjdk/samtools/SAMSequenceRecord.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,10 @@ public class SAMSequenceRecord extends AbstractSAMHeaderRecord implements Clonea


/**
* This is not a valid sequence name, because it is reserved in the MRNM field of SAM text format
* This is not a valid sequence name, because it is reserved in the RNEXT field of SAM text format
* to mean "same reference as RNAME field."
*/
public static final String RESERVED_MRNM_SEQUENCE_NAME = "=";
public static final String RESERVED_RNEXT_SEQUENCE_NAME = "=";

/**
* The standard tags are stored in text header without type information, because the type of these tags is known.
Expand All @@ -71,10 +71,13 @@ public class SAMSequenceRecord extends AbstractSAMHeaderRecord implements Clonea
SPECIES_TAG));

// Split on any whitespace
private static Pattern SEQUENCE_NAME_SPLITTER = Pattern.compile("\\s");
private static Pattern SEQUENCE_NAME_SPLITTER = Pattern.compile("[\\s]");
// These are the chars matched by \\s.
private static char[] WHITESPACE_CHARS = {' ', '\t', '\n', '\013', '\f', '\r'}; // \013 is vertical tab


private static Pattern ILLEGAL_RNAME_CHARS = Pattern.compile("[\\\\,\"\'`()<>{}\\]\\[]");

/** a (private) empty constructor is required for JAXB.XML-serialisation */
@SuppressWarnings("unused")
private SAMSequenceRecord() {
Expand All @@ -91,7 +94,7 @@ public SAMSequenceRecord(final String name) {

public SAMSequenceRecord(final String name, final int sequenceLength) {
if (name != null) {
if (SEQUENCE_NAME_SPLITTER.matcher(name).find()) {
if (SEQUENCE_NAME_SPLITTER.matcher(name).find() || ILLEGAL_RNAME_CHARS.matcher(name).find()) {
throw new SAMException("Sequence name contains invalid character: " + name);
}
validateSequenceName(name);
Expand Down Expand Up @@ -230,8 +233,8 @@ public static String truncateSequenceName(final String sequenceName) {
* Throw an exception if the sequence name is not valid.
*/
public static void validateSequenceName(final String name) {
if (RESERVED_MRNM_SEQUENCE_NAME.equals(name)) {
throw new SAMException("'" + RESERVED_MRNM_SEQUENCE_NAME + "' is not a valid sequence name");
if (RESERVED_RNEXT_SEQUENCE_NAME.equals(name)) {
throw new SAMException("'" + RESERVED_RNEXT_SEQUENCE_NAME + "' is not a valid sequence name");
}
}

Expand Down
21 changes: 21 additions & 0 deletions src/test/java/htsjdk/samtools/SAMSequenceRecordTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -83,4 +83,25 @@ public void testIsSameSequence(final SAMSequenceRecord rec1 , final SAMSequenceR
Assert.assertEquals(rec1.isSameSequence(rec2), isSame);
}
}

@DataProvider
public Object[][] illegalSequenceNames(){
return new Object[][]{
{"space "},
{"comma,"},
{"lbrace["},
{"rbrace]"},
{"slash\\"},
{"smaller<"},
{"bigger<"},
{"lparen("},
{"rparen)"},
{"lbracket{"},
{"rbracket}"}};
}

@Test(dataProvider = "illegalSequenceNames", expectedExceptions = SAMException.class)
public void testIllegalSequenceNames(final String sequenceName){
new SAMSequenceRecord(sequenceName,100);
}
}

0 comments on commit 34b13e0

Please sign in to comment.