Skip to content

Commit

Permalink
Add separate map for ID header lines
Browse files Browse the repository at this point in the history
  • Loading branch information
mjhipp committed Feb 10, 2021
1 parent 4af15f6 commit 569ccf1
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 19 deletions.
61 changes: 51 additions & 10 deletions src/main/java/htsjdk/variant/vcf/VCFHeader.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
Expand Down Expand Up @@ -75,6 +76,7 @@ public enum HEADER_FIELDS {
private final Map<String, VCFInfoHeaderLine> mInfoMetaData = new LinkedHashMap<String, VCFInfoHeaderLine>();
private final Map<String, VCFFormatHeaderLine> mFormatMetaData = new LinkedHashMap<String, VCFFormatHeaderLine>();
private final Map<String, VCFFilterHeaderLine> mFilterMetaData = new LinkedHashMap<String, VCFFilterHeaderLine>();
private final Map<String, Map<String, VCFSimpleHeaderLine>> mIdMetaData = new LinkedHashMap<String, Map<String, VCFSimpleHeaderLine>>();
private final Map<String, VCFHeaderLine> mOtherMetaData = new LinkedHashMap<String, VCFHeaderLine>();
private final Map<String, VCFContigHeaderLine> contigMetaData = new LinkedHashMap<>();

Expand Down Expand Up @@ -374,8 +376,7 @@ private boolean addMetadataLineLookupEntry(final VCFHeaderLine line) {
} else if ( line instanceof VCFContigHeaderLine ) {
return addContigMetaDataLineLookupEntry((VCFContigHeaderLine) line);
} else if ( line instanceof VCFSimpleHeaderLine ){
final VCFSimpleHeaderLine simpleLine = (VCFSimpleHeaderLine) line;
return addMetaDataLineMapLookupEntry(mOtherMetaData, simpleLine.getKey() + ":" + simpleLine.getID(), simpleLine);
return addIdMetaDataLineLookupEntry((VCFSimpleHeaderLine) line);
} else {
return addMetaDataLineMapLookupEntry(mOtherMetaData, line.getKey(), line);
}
Expand Down Expand Up @@ -404,6 +405,24 @@ private boolean addContigMetaDataLineLookupEntry(final VCFContigHeaderLine line)
return true;
}

/**
* Add a simple header line to the lookup list for ID header lines (mIdMetaData). If there's
* already an ID line with the same field type and ID, does not add the line.
*
* Note: does not add the ID line to the master list of header lines in mMetaData --
* this must be done separately if desired.
*
* @param line ID header line to add
* @return true if line was added to the list of ID lines, otherwise false
*/
private boolean addIdMetaDataLineLookupEntry(final VCFSimpleHeaderLine line) {
// Check first layer for key/field type
if (!mIdMetaData.containsKey(line.getKey())) {
mIdMetaData.put(line.getKey(), new LinkedHashMap<String, VCFSimpleHeaderLine>());
}
return addMetaDataLineMapLookupEntry(mIdMetaData.get(line.getKey()), line.getID(), line);
}

/**
* Add a header line to the provided map at a given key. If the key already exists, it will not be replaced.
* If it does already exist and GeneralUtils.DEBUG_MODE_ENABLED is true, it will issue warnings about duplicates,
Expand Down Expand Up @@ -582,32 +601,54 @@ public boolean hasFilterLine(final String id) {
}

/**
* First checks for a VCFHeaderLine with the given key.
* If not found, looks for the first ID header line with the given key as the field type.
*
* @param key the header key name
* @return the meta data line, or null if there is none
*/
public VCFHeaderLine getOtherHeaderLine(final String key) {
return mOtherMetaData.get(key);
if (mOtherMetaData.containsKey(key)) {
return mOtherMetaData.get(key);
} else if (mIdMetaData.containsKey(key)) {
// Get the first item in the linked hash map
Map<String, VCFSimpleHeaderLine> fieldMetaData = mIdMetaData.get(key);
if (fieldMetaData.keySet().size() > 0) {
return fieldMetaData.get(fieldMetaData.keySet().iterator().next());
}
}
return null;
}

/**
* Returns the other HeaderLines in their original ordering
*/
public Collection<VCFHeaderLine> getOtherHeaderLines() {
return mOtherMetaData.values();
}

/**
* @param key the header key or field type
* @param id the header id
* @return the meta data line, or null if there is none
*/
public VCFSimpleHeaderLine getOtherHeaderLine(final String key, final String id) {
final VCFHeaderLine line = mOtherMetaData.get(key + ":" + id);
if (line instanceof VCFSimpleHeaderLine) {
return (VCFSimpleHeaderLine) line;
public VCFSimpleHeaderLine getIdHeaderLine(final String key, final String id) {
if (mIdMetaData.containsKey(key)) {
return mIdMetaData.get(key).get(id);
} else {
return null;
}
}

/**
* Returns the other HeaderLines in their original ordering
* Returns the ID HeaderLines, in order by:
* - First key/field type to be added
* - Within key/field types, in original order
*/
public Collection<VCFHeaderLine> getOtherHeaderLines() {
return mOtherMetaData.values();
public Collection<VCFSimpleHeaderLine> getIdHeaderLines() {
final Collection<VCFSimpleHeaderLine> lines = new LinkedList<>();
mIdMetaData.forEach((k, v) -> lines.addAll(v.values()));
return lines;
}

/**
Expand Down
18 changes: 9 additions & 9 deletions src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -312,17 +312,17 @@ public void testVCFHeaderAddOtherLine() {
}

@Test
public void testVcfHeaderAddSimpleHeaderLine() {
public void testVcfHeaderAddIdHeaderLine() {
final VCFHeader header = getHiSeqVCFHeader();
final VCFSimpleHeaderLine delLine = new VCFSimpleHeaderLine("ALT", "DEL", "Deletion relative to the reference");
final VCFSimpleHeaderLine insLine = new VCFSimpleHeaderLine("ALT", "INS", "Insertion of novel sequence relative to the reference");
header.addMetaDataLine(delLine);
header.addMetaDataLine(insLine);

Assert.assertTrue(header.getOtherHeaderLines().contains(delLine), "DEL line not found in other header lines");
Assert.assertTrue(header.getOtherHeaderLines().contains(insLine), "INS line not found in other header lines");
Assert.assertNotNull(header.getOtherHeaderLine("ALT", "DEL"), "Lookup for ALT:DEL by key failed");
Assert.assertNotNull(header.getOtherHeaderLine("ALT", "INS"), "Lookup for ALT:INS by key failed");
Assert.assertTrue(header.getIdHeaderLines().contains(delLine), "DEL line not found in ID header lines");
Assert.assertTrue(header.getIdHeaderLines().contains(insLine), "INS line not found in ID header lines");
Assert.assertNotNull(header.getIdHeaderLine("ALT", "DEL"), "Lookup for ALT/DEL by key failed");
Assert.assertNotNull(header.getIdHeaderLine("ALT", "INS"), "Lookup for ALT/INS by key failed");
}

@Test
Expand Down Expand Up @@ -384,16 +384,16 @@ public void testVCFHeaderAddDuplicateHeaderLine() {
}

@Test
public void testVCFHeaderAddDuplicateSimpleHeaderLine() {
public void testVCFHeaderAddDuplicateIdHeaderLine() {
final VCFHeader header = getHiSeqVCFHeader();
final VCFSimpleHeaderLine delLine = new VCFSimpleHeaderLine("ALT", "DEL", "Deletion relative to the reference");
header.addMetaDataLine(delLine);
Assert.assertTrue(header.getOtherHeaderLines().contains(delLine), "DEL line not found in other header lines");
Assert.assertTrue(header.getIdHeaderLines().contains(delLine), "DEL line not found in ID header lines");

final int numHeaderLinesBefore = header.getOtherHeaderLines().size();
final int numHeaderLinesBefore = header.getIdHeaderLines().size();
// readd the same header line
header.addMetaDataLine(delLine);
final int numHeaderLinesAfter = header.getOtherHeaderLines().size();
final int numHeaderLinesAfter = header.getIdHeaderLines().size();

// assert that we have the same number of other header lines before and after
Assert.assertEquals(numHeaderLinesBefore, numHeaderLinesAfter);
Expand Down

0 comments on commit 569ccf1

Please sign in to comment.