diff --git a/build.gradle b/build.gradle index 7562a0aff5..970ab9c27a 100644 --- a/build.gradle +++ b/build.gradle @@ -58,7 +58,13 @@ group = 'com.github.samtools' defaultTasks 'jar' +tasks.withType(JavaCompile) { + options.encoding = 'UTF-8' +} +tasks.withType(Javadoc) { + options.addStringOption('encoding', 'UTF-8') +} jar { manifest { diff --git a/src/main/java/htsjdk/variant/variantcontext/writer/VCFWriter.java b/src/main/java/htsjdk/variant/variantcontext/writer/VCFWriter.java index 742b1c8929..1bdeb36305 100644 --- a/src/main/java/htsjdk/variant/variantcontext/writer/VCFWriter.java +++ b/src/main/java/htsjdk/variant/variantcontext/writer/VCFWriter.java @@ -173,6 +173,8 @@ public static VCFHeader writeHeader(VCFHeader header, final String streamNameForError) { try { + rejectVCFV43Headers(header); + // the file format field needs to be written first writer.write(versionLine + "\n"); @@ -258,10 +260,21 @@ public void add(final VariantContext context) { @Override public void setHeader(final VCFHeader header) { + rejectVCFV43Headers(header); + if (outputHasBeenWritten) { throw new IllegalStateException("The header cannot be modified after the header or variants have been written to the output stream."); } this.mHeader = doNotWriteGenotypes ? new VCFHeader(header.getMetaDataInSortedOrder()) : header; this.vcfEncoder = new VCFEncoder(this.mHeader, this.allowMissingFieldsInHeader, this.writeFullFormatField); } + + // writing vcf v4.3 is not implemented + private static void rejectVCFV43Headers(final VCFHeader targetHeader) { + if (targetHeader.getVCFHeaderVersion() != null && targetHeader.getVCFHeaderVersion().isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_3)) { + throw new IllegalArgumentException(String.format("Writing VCF version %s is not implemented", targetHeader.getVCFHeaderVersion())); + } + + + } } diff --git a/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java b/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java index 1b89929dae..1f264d8eac 100644 --- a/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java +++ b/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java @@ -33,6 +33,7 @@ import htsjdk.tribble.TribbleException; import htsjdk.tribble.index.tabix.TabixFormat; import htsjdk.tribble.util.ParsingUtils; +import htsjdk.utils.ValidationUtils; import htsjdk.variant.utils.GeneralUtils; import htsjdk.variant.variantcontext.*; @@ -44,7 +45,6 @@ import java.util.*; import java.util.zip.GZIPInputStream; - public abstract class AbstractVCFCodec extends AsciiFeatureCodec implements NameAwareCodec { public final static int MAX_ALLELE_SIZE_BEFORE_WARNING = (int)Math.pow(2, 20); @@ -54,6 +54,11 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec protected VCFHeader header = null; protected VCFHeaderVersion version = null; + private final static VCFTextTransformer percentEncodingTextTransformer = new VCFPercentEncodedTextTransformer(); + private final static VCFTextTransformer passThruTextTransformer = new VCFPassThruTextTransformer(); + //by default, we use the passThruTextTransformer (assume pre v4.3) + private VCFTextTransformer vcfTextTransformer = passThruTextTransformer; + // a mapping of the allele protected Map> alleleMap = new HashMap>(3); @@ -196,8 +201,13 @@ protected VCFHeader parseHeaderFromLines( final List headerStrings, fina final VCFContigHeaderLine contig = new VCFContigHeaderLine(str.substring(9), version, VCFConstants.CONTIG_HEADER_START.substring(2), contigCounter++); metaData.add(contig); } else if ( str.startsWith(VCFConstants.ALT_HEADER_START) ) { - final VCFSimpleHeaderLine alt = new VCFSimpleHeaderLine(str.substring(6), version, VCFConstants.ALT_HEADER_START.substring(2), Arrays.asList("ID", "Description"), Collections.emptyList()); - metaData.add(alt); + metaData.add(getAltHeaderLine(str.substring(VCFConstants.ALT_HEADER_OFFSET), version)); + } else if ( str.startsWith(VCFConstants.PEDIGREE_HEADER_START) ) { + metaData.add(getPedigreeHeaderLine(str.substring(VCFConstants.PEDIGREE_HEADER_OFFSET), version)); + } else if ( str.startsWith(VCFConstants.META_HEADER_START) ) { + metaData.add(getMetaHeaderLine(str.substring(VCFConstants.META_HEADER_OFFSET), version)); + } else if ( str.startsWith(VCFConstants.SAMPLE_HEADER_START) ) { + metaData.add(getSampleHeaderLine(str.substring(VCFConstants.SAMPLE_HEADER_OFFSET), version)); } else { int equals = str.indexOf('='); if ( equals != -1 ) @@ -206,9 +216,7 @@ protected VCFHeader parseHeaderFromLines( final List headerStrings, fina } } - this.header = new VCFHeader(metaData, sampleNames); - if ( doOnTheFlyModifications ) - this.header = VCFStandardHeaderLines.repairStandardHeaderLines(this.header); + setVCFHeader(new VCFHeader(version, metaData, sampleNames), version); return this.header; } @@ -230,21 +238,76 @@ public VCFHeaderVersion getVersion() { /** * Explicitly set the VCFHeader on this codec. This will overwrite the header read from the file * and the version state stored in this instance; conversely, reading the header from a file will - * overwrite whatever is set here. The returned header may not be identical to the header argument - * since the header lines may be "repaired" (i.e., rewritten) if doOnTheFlyModifications is set. + * overwrite whatever is set here. + * + * @param newHeader + * @param newVersion + * @return the actual header for this codec. The returned header may not be identical to the header + * argument since the header lines may be "repaired" (i.e., rewritten) if doOnTheFlyModifications is set. + * @throws TribbleException if the requested header version is not compatible with the existing version */ - public VCFHeader setVCFHeader(final VCFHeader header, final VCFHeaderVersion version) { - this.version = version; - + public VCFHeader setVCFHeader(final VCFHeader newHeader, final VCFHeaderVersion newVersion) { + validateHeaderVersionTransition(newHeader, newVersion); if (this.doOnTheFlyModifications) { - this.header = VCFStandardHeaderLines.repairStandardHeaderLines(header); + final VCFHeader repairedHeader = VCFStandardHeaderLines.repairStandardHeaderLines(newHeader); + // validate the new header after repair to ensure the resulting header version is + // still compatible with the current version + validateHeaderVersionTransition(repairedHeader, newVersion); + this.header = repairedHeader; } else { - this.header = header; + this.header = newHeader; } + this.version = newVersion; + this.vcfTextTransformer = getTextTransformerForVCFVersion(newVersion); + return this.header; } + /** + * Create and return a VCFAltHeaderLine object from a header line string that conforms to the {@code sourceVersion} + * @param headerLineString VCF header line being parsed without the leading "##ALT=" + * @param sourceVersion the VCF header version derived from which the source was retrieved. The resulting header + * line object should be validate for this header version. + * @return a VCFAltHeaderLine object + */ + public VCFAltHeaderLine getAltHeaderLine(final String headerLineString, final VCFHeaderVersion sourceVersion) { + return new VCFAltHeaderLine(headerLineString, sourceVersion); + } + + /** + * Create and return a VCFPedigreeHeaderLine object from a header line string that conforms to the {@code sourceVersion} + * @param headerLineString VCF header line being parsed without the leading "##PEDIGREE=" + * @param sourceVersion the VCF header version derived from which the source was retrieved. The resulting header + * line object should be validate for this header version. + * @return a VCFPedigreeHeaderLine object + */ + public VCFPedigreeHeaderLine getPedigreeHeaderLine(final String headerLineString, final VCFHeaderVersion sourceVersion) { + return new VCFPedigreeHeaderLine(headerLineString, sourceVersion); + } + + /** + * Create and return a VCFMetaHeaderLine object from a header line string that conforms to the {@code sourceVersion} + * @param headerLineString VCF header line being parsed without the leading "##META=" + * @param sourceVersion the VCF header version derived from which the source was retrieved. The resulting header + * line object should be validate for this header version. + * @return a VCFMetaHeaderLine object + */ + public VCFMetaHeaderLine getMetaHeaderLine(final String headerLineString, final VCFHeaderVersion sourceVersion) { + return new VCFMetaHeaderLine(headerLineString, sourceVersion); + } + + /** + * Create and return a VCFSampleHeaderLine object from a header line string that conforms to the {@code sourceVersion} + * @param headerLineString VCF header line being parsed without the leading "##SAMPLE=" + * @param sourceVersion the VCF header version derived from which the source was retrieved. The resulting header + * line object should be validate for this header version. + * @return a VCFSampleHeaderLine object + */ + public VCFSampleHeaderLine getSampleHeaderLine(final String headerLineString, final VCFHeaderVersion sourceVersion) { + return new VCFSampleHeaderLine(headerLineString, sourceVersion); + } + /** * the fast decode function * @param line the line of text for the record @@ -264,6 +327,40 @@ public VariantContext decode(String line) { return decodeLine(line, true); } + /** + * Throw if new a version/header are not compatible with the existing version/header. Generally, any version + * before v4.2 can be up-converted to v4.2, but not to v4.3. Once a header is established as v4.3, it cannot + * can not be up or down converted, and it must remain at v4.3. + * @param newHeader + * @param newVersion + * @throws TribbleException if the header conversion is not valid + */ + private void validateHeaderVersionTransition(final VCFHeader newHeader, final VCFHeaderVersion newVersion) { + ValidationUtils.nonNull(newHeader); + ValidationUtils.nonNull(newVersion); + + VCFHeader.validateVersionTransition(version, newVersion); + + // If this codec currently has no header (this happens when the header is being established for + // the first time during file parsing), establish an initial header and version, and bypass + // validation. + if (header != null && newHeader.getVCFHeaderVersion() != null) { + VCFHeader.validateVersionTransition(header.getVCFHeaderVersion(), newHeader.getVCFHeaderVersion()); + } + } + + /** + * For v4.3 up, attribute values can contain embedded percent-encoded characters which must be decoded + * on read. Return a version-aware text transformer that can decode encoded text. + * @param targetVersion the version for which a transformer is bing requested + * @return a {@link VCFTextTransformer} suitable for the targetVersion + */ + private VCFTextTransformer getTextTransformerForVCFVersion(final VCFHeaderVersion targetVersion) { + return targetVersion != null && targetVersion.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_3) ? + percentEncodingTextTransformer : + passThruTextTransformer; + } + private VariantContext decodeLine(final String line, final boolean includeGenotypes) { // the same line reader is not used for parsing the header and parsing lines, if we see a #, we've seen a header line if (line.startsWith(VCFHeader.HEADER_INDICATOR)) return null; @@ -429,14 +526,14 @@ private Map parseInfo(String infoField) { // split on the INFO field separator List infoValueSplit = ParsingUtils.split(valueString, VCFConstants.INFO_FIELD_ARRAY_SEPARATOR_CHAR); if ( infoValueSplit.size() == 1 ) { - value = infoValueSplit.get(0); + value = vcfTextTransformer.decodeText(infoValueSplit.get(0)); final VCFInfoHeaderLine headerLine = header.getInfoHeaderLine(key); if ( headerLine != null && headerLine.getType() == VCFHeaderLineType.Flag && value.equals("0") ) { // deal with the case where a flag field has =0, such as DB=0, by skipping the add continue; } } else { - value = infoValueSplit; + value = vcfTextTransformer.decodeText(infoValueSplit); } } else { key = infoFields.get(i); @@ -675,6 +772,7 @@ public LazyGenotypesContext.LazyData createGenotypeMap(final String str, boolean PlIsSet = false; for (int genotypeOffset = 1; genotypeOffset < nParts; genotypeOffset++) { List genotypeValues = ParsingUtils.split(genotypeParts[genotypeOffset], VCFConstants.GENOTYPE_FIELD_SEPARATOR_CHAR); + genotypeValues = vcfTextTransformer.decodeText(genotypeValues); final String sampleName = sampleNameIterator.next(); final GenotypeBuilder gb = new GenotypeBuilder(sampleName); diff --git a/src/main/java/htsjdk/variant/vcf/VCFAltHeaderLine.java b/src/main/java/htsjdk/variant/vcf/VCFAltHeaderLine.java new file mode 100644 index 0000000000..71c4850f07 --- /dev/null +++ b/src/main/java/htsjdk/variant/vcf/VCFAltHeaderLine.java @@ -0,0 +1,22 @@ +package htsjdk.variant.vcf; + +import java.util.*; + +/** + * A class representing ALT fields in the VCF header + */ +public class VCFAltHeaderLine extends VCFSimpleHeaderLine { + private static final long serialVersionUID = 1L; + + private static List expectedTags = Collections.unmodifiableList( + new ArrayList(2) {{ + add(ID_ATTRIBUTE); + add(DESCRIPTION_ATTRIBUTE); + }} + ); + + public VCFAltHeaderLine(final String line, final VCFHeaderVersion version) { + super(VCFConstants.ALT_HEADER_KEY, new VCF4Parser().parseLine(line, expectedTags)); + } + +} diff --git a/src/main/java/htsjdk/variant/vcf/VCFCodec.java b/src/main/java/htsjdk/variant/vcf/VCFCodec.java index 6e5d3b7d2e..42f07150d1 100644 --- a/src/main/java/htsjdk/variant/vcf/VCFCodec.java +++ b/src/main/java/htsjdk/variant/vcf/VCFCodec.java @@ -98,7 +98,7 @@ public Object readActualHeader(final LineIterator lineIterator) { version = VCFHeaderVersion.toHeaderVersion(lineFields[1]); if ( ! version.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_0) ) throw new TribbleException.InvalidHeader("This codec is strictly for VCFv4; please use the VCF3 codec for " + lineFields[1]); - if ( version != VCFHeaderVersion.VCF4_0 && version != VCFHeaderVersion.VCF4_1 && version != VCFHeaderVersion.VCF4_2 ) + if ( version != VCFHeaderVersion.VCF4_0 && version != VCFHeaderVersion.VCF4_1 && version != VCFHeaderVersion.VCF4_2 && version != VCFHeaderVersion.VCF4_3) throw new TribbleException.InvalidHeader("This codec is strictly for VCFv4 and does not support " + lineFields[1]); } headerStrings.add(lineIterator.next()); diff --git a/src/main/java/htsjdk/variant/vcf/VCFConstants.java b/src/main/java/htsjdk/variant/vcf/VCFConstants.java index 6a52d1df0e..64fdf2bc8e 100644 --- a/src/main/java/htsjdk/variant/vcf/VCFConstants.java +++ b/src/main/java/htsjdk/variant/vcf/VCFConstants.java @@ -89,10 +89,25 @@ public final class VCFConstants { public static final String FILTER_HEADER_START = "##FILTER"; public static final String FORMAT_HEADER_START = "##FORMAT"; public static final String INFO_HEADER_START = "##INFO"; - public static final String ALT_HEADER_START = "##ALT"; + public static final String ALT_HEADER_KEY = "ALT"; + public static final String ALT_HEADER_START = VCFHeader.METADATA_INDICATOR + ALT_HEADER_KEY ; public static final String CONTIG_HEADER_KEY = "contig"; public static final String CONTIG_HEADER_START = "##" + CONTIG_HEADER_KEY; + public static final int ALT_HEADER_OFFSET = ALT_HEADER_START.length() + 1; + + public static final String PEDIGREE_HEADER_KEY = "PEDIGREE"; + public static final String PEDIGREE_HEADER_START = VCFHeader.METADATA_INDICATOR + PEDIGREE_HEADER_KEY; + public static final int PEDIGREE_HEADER_OFFSET = PEDIGREE_HEADER_START.length() + 1; + + public static final String SAMPLE_HEADER_KEY = "SAMPLE"; + public static final String SAMPLE_HEADER_START = VCFHeader.METADATA_INDICATOR + SAMPLE_HEADER_KEY; + public static final int SAMPLE_HEADER_OFFSET = SAMPLE_HEADER_START.length() + 1; + + public static final String META_HEADER_KEY = "META"; + public static final String META_HEADER_START = VCFHeader.METADATA_INDICATOR + META_HEADER_KEY; + public static final int META_HEADER_OFFSET = META_HEADER_START.length() + 1; + // old indel alleles public static final char DELETION_ALLELE_v3 = 'D'; public static final char INSERTION_ALLELE_v3 = 'I'; diff --git a/src/main/java/htsjdk/variant/vcf/VCFEncoder.java b/src/main/java/htsjdk/variant/vcf/VCFEncoder.java index 96dd104ae9..48c5dfc6b8 100644 --- a/src/main/java/htsjdk/variant/vcf/VCFEncoder.java +++ b/src/main/java/htsjdk/variant/vcf/VCFEncoder.java @@ -25,7 +25,7 @@ public class VCFEncoder { /** - * The encoding used for VCF files: ISO-8859-1 + * The encoding used for VCF files: ISO-8859-1. When writing VCF4.3 is implemented, this should change to UTF-8. */ public static final Charset VCF_CHARSET = Charset.forName("ISO-8859-1"); private static final String QUAL_FORMAT_STRING = "%.2f"; diff --git a/src/main/java/htsjdk/variant/vcf/VCFHeader.java b/src/main/java/htsjdk/variant/vcf/VCFHeader.java index c5cab7434b..c023e5bbf2 100644 --- a/src/main/java/htsjdk/variant/vcf/VCFHeader.java +++ b/src/main/java/htsjdk/variant/vcf/VCFHeader.java @@ -29,6 +29,7 @@ import htsjdk.samtools.SAMSequenceRecord; import htsjdk.tribble.TribbleException; import htsjdk.tribble.util.ParsingUtils; +import htsjdk.utils.ValidationUtils; import htsjdk.variant.utils.GeneralUtils; import htsjdk.variant.variantcontext.VariantContextComparator; @@ -62,6 +63,13 @@ public enum HEADER_FIELDS { CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO } + /** + * The VCF version for this header; once a header version is established, it can only be + * changed subject to version transition rules defined by + * {@link #validateVersionTransition(VCFHeaderVersion, VCFHeaderVersion)} + */ + private VCFHeaderVersion vcfHeaderVersion; + // the associated meta data private final Set mMetaData = new LinkedHashSet(); private final Map mInfoMetaData = new LinkedHashMap(); @@ -126,7 +134,7 @@ public VCFHeader(final VCFHeader toCopy) { } /** - * create a VCF header, given a list of meta data and auxillary tags + * create a VCF header, given a list of meta data and auxiliary tags * * @param metaData the meta data associated with this header * @param genotypeSampleNames the sample names @@ -135,6 +143,19 @@ public VCFHeader(final Set metaData, final Set genotypeSa this(metaData, new ArrayList(genotypeSampleNames)); } + /** + * create a VCF header, given a target version, a list of meta data and auxiliary tags + * + * @param vcfHeaderVersion the vcf header version for this header, can not be null + * @param metaData the meta data associated with this header + * @param genotypeSampleNames the sample names + */ + public VCFHeader(final VCFHeaderVersion vcfHeaderVersion, final Set metaData, final Set genotypeSampleNames) { + this(metaData, new ArrayList(genotypeSampleNames)); + ValidationUtils.nonNull(vcfHeaderVersion); + setVCFHeaderVersion(vcfHeaderVersion); + } + public VCFHeader(final Set metaData, final List genotypeSampleNames) { this(metaData); @@ -146,6 +167,52 @@ public VCFHeader(final Set metaData, final List genotypeS buildVCFReaderMaps(genotypeSampleNames); } + /** + * Establish the header version for this header. If the header version has already been established + * for this header, the new version will be subject to version transition validation. + * @param vcfHeaderVersion + * @throws TribbleException if the requested header version is not compatible with the existing version + */ + public void setVCFHeaderVersion(final VCFHeaderVersion vcfHeaderVersion) { + validateVersionTransition(this.vcfHeaderVersion, vcfHeaderVersion); + this.vcfHeaderVersion = vcfHeaderVersion; + } + + /** + * Throw if {@code fromVersion} is not compatible with a {@code toVersion}. Generally, any version before + * version 4.2 can be up-converted to version 4.2, but not to version 4.3. Once a header is established as + * version 4.3, it cannot be up or down converted, and it must remain at version 4.3. + * @param fromVersion current version. May be null, in which case {@code toVersion} can be any version + * @param toVersion new version. Cannot be null. + * @throws TribbleException if {@code fromVersion} is not compatible with {@code toVersion} + */ + public static void validateVersionTransition(final VCFHeaderVersion fromVersion, final VCFHeaderVersion toVersion) { + ValidationUtils.nonNull(toVersion); + + final String errorMessageFormatString = "VCF cannot be automatically promoted from %s to %s"; + + // fromVersion can be null, in which case anything goes (any transition from null is legal) + if (fromVersion != null) { + if (toVersion.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_3)) { + if (!fromVersion.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_3)) { + // we're trying to go from pre-v4.3 to v4.3+ + throw new TribbleException(String.format(errorMessageFormatString, fromVersion, toVersion)); + } + + } else if (fromVersion.equals(VCFHeaderVersion.VCF4_3)) { + // we're trying to go from v4.3 to pre-v4.3 + throw new TribbleException(String.format(errorMessageFormatString, fromVersion, toVersion)); + } + } + } + + /** + * @return the VCFHeaderVersion for this header. Can be null. + */ + public VCFHeaderVersion getVCFHeaderVersion() { + return vcfHeaderVersion; + } + /** * Tell this VCF header to use pre-calculated sample name ordering and the * sample name -> offset map. This assumes that all VariantContext created @@ -397,9 +464,14 @@ public Set getMetaDataInSortedOrder() { return makeGetMetaDataSet(new TreeSet(mMetaData)); } - private static Set makeGetMetaDataSet(final Set headerLinesInSomeOrder) { + private Set makeGetMetaDataSet(final Set headerLinesInSomeOrder) { final Set lines = new LinkedHashSet(); - lines.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_2.getFormatString(), VCFHeaderVersion.VCF4_2.getVersionString())); + if (vcfHeaderVersion != null && vcfHeaderVersion.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_3)) { + // always propagate version 4.3+ to prevent these header lines from magically being back-versioned to < 4.3 + lines.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_3.getFormatString(), VCFHeaderVersion.VCF4_3.getVersionString())); + } else { + lines.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_2.getFormatString(), VCFHeaderVersion.VCF4_2.getVersionString())); + } lines.addAll(headerLinesInSomeOrder); return Collections.unmodifiableSet(lines); } diff --git a/src/main/java/htsjdk/variant/vcf/VCFHeaderLineTranslator.java b/src/main/java/htsjdk/variant/vcf/VCFHeaderLineTranslator.java index 54213a67ac..6c83574fee 100644 --- a/src/main/java/htsjdk/variant/vcf/VCFHeaderLineTranslator.java +++ b/src/main/java/htsjdk/variant/vcf/VCFHeaderLineTranslator.java @@ -37,15 +37,17 @@ * A class for translating between vcf header versions */ public class VCFHeaderLineTranslator { - private static Map mapping; + private static final Map mapping; static { - mapping = new HashMap(); - mapping.put(VCFHeaderVersion.VCF4_0,new VCF4Parser()); - mapping.put(VCFHeaderVersion.VCF4_1,new VCF4Parser()); - mapping.put(VCFHeaderVersion.VCF4_2,new VCF4Parser()); - mapping.put(VCFHeaderVersion.VCF3_3,new VCF3Parser()); - mapping.put(VCFHeaderVersion.VCF3_2,new VCF3Parser()); + final Map map = new HashMap<>(); + map.put(VCFHeaderVersion.VCF4_0, new VCF4Parser()); + map.put(VCFHeaderVersion.VCF4_1, new VCF4Parser()); + map.put(VCFHeaderVersion.VCF4_2, new VCF4Parser()); + map.put(VCFHeaderVersion.VCF4_3, new VCF4Parser()); + map.put(VCFHeaderVersion.VCF3_3, new VCF3Parser()); + map.put(VCFHeaderVersion.VCF3_2, new VCF3Parser()); + mapping = Collections.unmodifiableMap(map); } public static Map parseLine(VCFHeaderVersion version, String valueLine, List expectedTagOrder) { diff --git a/src/main/java/htsjdk/variant/vcf/VCFHeaderVersion.java b/src/main/java/htsjdk/variant/vcf/VCFHeaderVersion.java index b45d4230df..43f43c65c3 100644 --- a/src/main/java/htsjdk/variant/vcf/VCFHeaderVersion.java +++ b/src/main/java/htsjdk/variant/vcf/VCFHeaderVersion.java @@ -31,11 +31,13 @@ * information that identifies each header version */ public enum VCFHeaderVersion { - VCF3_2("VCRv3.2","format"), - VCF3_3("VCFv3.3","fileformat"), - VCF4_0("VCFv4.0","fileformat"), - VCF4_1("VCFv4.1","fileformat"), - VCF4_2("VCFv4.2","fileformat"); + // Keep this list in increasing (ordinal) order, since isAtLeastAsRecentAs depends on it + VCF3_2("VCRv3.2", "format"), + VCF3_3("VCFv3.3", "fileformat"), + VCF4_0("VCFv4.0", "fileformat"), + VCF4_1("VCFv4.1", "fileformat"), + VCF4_2("VCFv4.2", "fileformat"), + VCF4_3("VCFv4.3", "fileformat"); private final String versionString; private final String formatString; @@ -113,19 +115,7 @@ private static String clean(String s) { * @return true if this version is at least as recent as the target version, false otherwise */ public boolean isAtLeastAsRecentAs(final VCFHeaderVersion target) { - switch (target) { - case VCF4_2: - return this == VCF4_2; - case VCF4_1: - return this == VCF4_1 || this == VCF4_2; - case VCF4_0: - return this != VCF3_2 && this != VCF3_3; - case VCF3_3: - return this != VCF3_2; - case VCF3_2: - default: - return true; - } + return this.ordinal() >= target.ordinal(); } public String getVersionString() { diff --git a/src/main/java/htsjdk/variant/vcf/VCFMetaHeaderLine.java b/src/main/java/htsjdk/variant/vcf/VCFMetaHeaderLine.java new file mode 100644 index 0000000000..991faa806f --- /dev/null +++ b/src/main/java/htsjdk/variant/vcf/VCFMetaHeaderLine.java @@ -0,0 +1,13 @@ +package htsjdk.variant.vcf; + +/** + * A class representing META fields in the VCF header + */ +public class VCFMetaHeaderLine extends VCFSimpleHeaderLine { + private static final long serialVersionUID = 1L; + + public VCFMetaHeaderLine(final String line, final VCFHeaderVersion version) { + super(VCFConstants.META_HEADER_KEY, VCFHeaderLineTranslator.parseLine(version, line, null)); + } + +} diff --git a/src/main/java/htsjdk/variant/vcf/VCFPassThruTextTransformer.java b/src/main/java/htsjdk/variant/vcf/VCFPassThruTextTransformer.java new file mode 100644 index 0000000000..24abed8eb0 --- /dev/null +++ b/src/main/java/htsjdk/variant/vcf/VCFPassThruTextTransformer.java @@ -0,0 +1,30 @@ +package htsjdk.variant.vcf; + +import java.util.List; + +/** + * A no-op implementation of VCFTextTransformer for pre-v43 VCFs, when such encodings are not supported and + * no transformation need be done. + */ +public class VCFPassThruTextTransformer implements VCFTextTransformer { + + /** + * No-op decoder for a single string + * @param rawPart the raw string to be decoded + * @return the raw string with no transformation done + */ + @Override + public String decodeText(final String rawPart) { + return rawPart; + } + + /** + * No-op decoder for lists of strings + * @param rawParts a list of raw strings + * @return the list of raw strings with no transformations done + */ + @Override + public List decodeText(final List rawParts) { + return rawParts; + } +} diff --git a/src/main/java/htsjdk/variant/vcf/VCFPedigreeHeaderLine.java b/src/main/java/htsjdk/variant/vcf/VCFPedigreeHeaderLine.java new file mode 100644 index 0000000000..33f163e8dc --- /dev/null +++ b/src/main/java/htsjdk/variant/vcf/VCFPedigreeHeaderLine.java @@ -0,0 +1,13 @@ +package htsjdk.variant.vcf; + +/** + * A class representing PEDIGREE fields in the VCF header + */ +public class VCFPedigreeHeaderLine extends VCFSimpleHeaderLine { + private static final long serialVersionUID = 1L; + + public VCFPedigreeHeaderLine(String line, VCFHeaderVersion version) { + super(VCFConstants.PEDIGREE_HEADER_KEY, VCFHeaderLineTranslator.parseLine(version, line, null)); + } + +} diff --git a/src/main/java/htsjdk/variant/vcf/VCFPercentEncodedTextTransformer.java b/src/main/java/htsjdk/variant/vcf/VCFPercentEncodedTextTransformer.java new file mode 100644 index 0000000000..4c8015eaa5 --- /dev/null +++ b/src/main/java/htsjdk/variant/vcf/VCFPercentEncodedTextTransformer.java @@ -0,0 +1,74 @@ +package htsjdk.variant.vcf; + +import htsjdk.tribble.TribbleException; + +import java.util.List; +import java.util.stream.Collectors; + +/** + * Text transformer for attribute values embedded in VCF. VCF version 4.3 supports percent-encoding + * of characters that have special meaning in VCF. + */ +public class VCFPercentEncodedTextTransformer implements VCFTextTransformer { + final static private String ENCODING_SENTINEL_STRING = "%"; + final static private char ENCODING_SENTNEL_CHAR = '%'; + final static private int ENCODING_BASE_RADIX = 16; + + /** + * Transform a single string, replacing % encoded values with their corresponding text. + * + * @param rawPart the raw string to be decoded + * @return the decoded string + * @throws TribbleException if the the encoding is uninterpretable + */ + @Override + public String decodeText(final String rawPart) { + return decodePercentEncodedChars(rawPart); + } + + /** + * Transform a list of strings, replacing % encoded values with their corresponding text in each string. + * + * @param rawParts a list of raw strings + * @return a list of decoded strings + * @throws TribbleException if the the encoding is uninterpretable + */ + @Override + public List decodeText(final List rawParts) { + return rawParts.stream().map(this::decodeText).collect(Collectors.toList()); + } + + /** + * Transform input strings containing embedded percent=encoded characters. For example, when given the + * string '%3D%41' will return the string '=A'. + * + * @param rawText a string containing zero or more embedded encodings + * @return a string with all encoded characters replaced with the corresponding character + * @throws TribbleException if the the encoding is uninterpretable + */ + protected static String decodePercentEncodedChars(final String rawText) { + if (rawText.contains(ENCODING_SENTINEL_STRING)) { + StringBuilder builder = new StringBuilder(rawText.length()); + for (int i = 0; i < rawText.length(); i++) { + final char c = rawText.charAt(i); + if (c == ENCODING_SENTNEL_CHAR && ((i + 2) < rawText.length())) { + try { + final char[] trans = Character.toChars(Integer.parseInt(rawText.substring(i + 1, i + 3), ENCODING_BASE_RADIX)); + if (trans.length != 1) { + throw new TribbleException(String.format("escape sequence '%c' corresponds to an invalid encoding in '%s'", c, rawText)); + } + builder.append(trans[0]); + i += 2; + } catch (IllegalArgumentException e) { + builder.append(c); + } + } else { + builder.append(c); + } + } + return builder.toString(); + } + return rawText; + } + +} diff --git a/src/main/java/htsjdk/variant/vcf/VCFSampleHeaderLine.java b/src/main/java/htsjdk/variant/vcf/VCFSampleHeaderLine.java new file mode 100644 index 0000000000..973a976baa --- /dev/null +++ b/src/main/java/htsjdk/variant/vcf/VCFSampleHeaderLine.java @@ -0,0 +1,13 @@ +package htsjdk.variant.vcf; + +/** + * A class representing SAMPLE fields in the VCF header + */ +public class VCFSampleHeaderLine extends VCFSimpleHeaderLine { + private static final long serialVersionUID = 1L; + + public VCFSampleHeaderLine(String line, VCFHeaderVersion version) { + super(VCFConstants.SAMPLE_HEADER_KEY, VCFHeaderLineTranslator.parseLine(version, line, null)); + } + +} diff --git a/src/main/java/htsjdk/variant/vcf/VCFSimpleHeaderLine.java b/src/main/java/htsjdk/variant/vcf/VCFSimpleHeaderLine.java index 9082f965cc..12b45e5bc9 100644 --- a/src/main/java/htsjdk/variant/vcf/VCFSimpleHeaderLine.java +++ b/src/main/java/htsjdk/variant/vcf/VCFSimpleHeaderLine.java @@ -41,6 +41,9 @@ public class VCFSimpleHeaderLine extends VCFHeaderLine implements VCFIDHeaderLin private String name; private Map genericFields = new LinkedHashMap(); + public static final String ID_ATTRIBUTE = "ID"; + public static final String DESCRIPTION_ATTRIBUTE = "Description"; + /** * create a VCF filter header line * @@ -51,7 +54,7 @@ public class VCFSimpleHeaderLine extends VCFHeaderLine implements VCFIDHeaderLin public VCFSimpleHeaderLine(String key, String name, String description) { super(key, ""); Map map = new LinkedHashMap(1); - map.put("Description", description); + map.put(DESCRIPTION_ATTRIBUTE, description); initialize(name, map); } @@ -84,7 +87,7 @@ public VCFSimpleHeaderLine(final String line, final VCFHeaderVersion version, fi public VCFSimpleHeaderLine(final String key, final Map mapping) { super(key, ""); - name = mapping.get("ID"); + name = mapping.get(ID_ATTRIBUTE); initialize(name, mapping); } @@ -111,7 +114,7 @@ protected void initialize(String name, Map genericFields) { @Override protected String toStringEncoding() { Map map = new LinkedHashMap(); - map.put("ID", name); + map.put(ID_ATTRIBUTE, name); map.putAll(genericFields); return getKey() + "=" + VCFHeaderLine.toStringEncoding(map); } diff --git a/src/main/java/htsjdk/variant/vcf/VCFStandardHeaderLines.java b/src/main/java/htsjdk/variant/vcf/VCFStandardHeaderLines.java index 035cdd3c39..6e9e713a20 100644 --- a/src/main/java/htsjdk/variant/vcf/VCFStandardHeaderLines.java +++ b/src/main/java/htsjdk/variant/vcf/VCFStandardHeaderLines.java @@ -59,9 +59,9 @@ public class VCFStandardHeaderLines { * Walks over the VCF header and repairs the standard VCF header lines in it, returning a freshly * allocated {@link VCFHeader} with standard VCF header lines repaired as necessary. */ - public static VCFHeader repairStandardHeaderLines(final VCFHeader header) { - final Set newLines = new LinkedHashSet(header.getMetaDataInInputOrder().size()); - for ( VCFHeaderLine line : header.getMetaDataInInputOrder() ) { + public static VCFHeader repairStandardHeaderLines(final VCFHeader oldHeader) { + final Set newLines = new LinkedHashSet(oldHeader.getMetaDataInInputOrder().size()); + for ( VCFHeaderLine line : oldHeader.getMetaDataInInputOrder() ) { if ( line instanceof VCFFormatHeaderLine ) { line = formatStandards.repair((VCFFormatHeaderLine) line); } else if ( line instanceof VCFInfoHeaderLine) { @@ -71,7 +71,14 @@ public static VCFHeader repairStandardHeaderLines(final VCFHeader header) { newLines.add(line); } - return new VCFHeader(newLines, header.getGenotypeSamples()); + final VCFHeader repairedHeader = new VCFHeader(newLines, oldHeader.getGenotypeSamples()); + final VCFHeaderVersion oldHeaderVersion = oldHeader.getVCFHeaderVersion(); + if (oldHeaderVersion != null && oldHeaderVersion.isAtLeastAsRecentAs(VCFHeaderVersion.VCF4_3)) { + // this needs to maintain version 4.3 (and not back-version to v4.2), so propagate + // the old version only for v4.3 + repairedHeader.setVCFHeaderVersion(oldHeaderVersion); + } + return repairedHeader; } /** diff --git a/src/main/java/htsjdk/variant/vcf/VCFTextTransformer.java b/src/main/java/htsjdk/variant/vcf/VCFTextTransformer.java new file mode 100644 index 0000000000..36f842b20a --- /dev/null +++ b/src/main/java/htsjdk/variant/vcf/VCFTextTransformer.java @@ -0,0 +1,31 @@ +package htsjdk.variant.vcf; + +import htsjdk.tribble.TribbleException; + +import java.util.List; + +/** + * Interface for transforming attribute values embedded in VCF. VCF version 4.3 supports percent-encoding + * of characters that have special meaning in VCF. Pre-v43, such encoding are not supported and no transformation + * needs to be done. + */ +public interface VCFTextTransformer { + /** + * Transform a single string. + * + * @param rawPart the raw string to be decoded + * @return the decoded string + * @throws TribbleException if the the encoding is uninterpretable + */ + String decodeText(final String rawPart); + + /** + * Transform a list of strings. + * + * @param rawParts a list of raw strings + * @return a list of decoded strings + * @throws TribbleException if the the encoding is uninterpretable + */ + List decodeText(final List rawParts); + +} diff --git a/src/main/java/htsjdk/variant/vcf/VCFUtils.java b/src/main/java/htsjdk/variant/vcf/VCFUtils.java index b17513b1c7..2eb213025a 100644 --- a/src/main/java/htsjdk/variant/vcf/VCFUtils.java +++ b/src/main/java/htsjdk/variant/vcf/VCFUtils.java @@ -37,6 +37,7 @@ import java.io.File; import java.io.IOException; import java.util.*; +import java.util.stream.Collectors; public class VCFUtils { @@ -45,11 +46,13 @@ public static Set smartMergeHeaders(final Collection h // This will cause problems for VCFHeader.getSequenceDictionary and anything else that implicitly relies on the line ordering. final LinkedHashMap map = new LinkedHashMap<>(); // from KEY.NAME -> line final HeaderConflictWarner conflictWarner = new HeaderConflictWarner(emitWarnings); + final Set headerVersions = new HashSet<>(2); // todo -- needs to remove all version headers from sources and add its own VCF version line for (final VCFHeader source : headers) { for (final VCFHeaderLine line : source.getMetaDataInSortedOrder()) { + enforceHeaderVersionMergePolicy(headerVersions, source.getVCFHeaderVersion()); String key = line.getKey(); if (line instanceof VCFIDHeaderLine) key = key + "-" + ((VCFIDHeaderLine) line).getID(); @@ -101,10 +104,29 @@ public static Set smartMergeHeaders(final Collection h } } } + // returning a LinkedHashSet so that ordering will be preserved. Ensures the contig lines do not get scrambled. return new LinkedHashSet<>(map.values()); } + // Reject attempts to merge a VCFv4.3 header with any other version + private static void enforceHeaderVersionMergePolicy( + final Set headerVersions, + final VCFHeaderVersion candidateVersion) { + if (candidateVersion != null) { + headerVersions.add(candidateVersion); + if (headerVersions.size() > 1 && headerVersions.contains(VCFHeaderVersion.VCF4_3)) { + throw new IllegalArgumentException( + String.format("Attempt to merge version %s header with incompatible header version %s", + VCFHeaderVersion.VCF4_3.getVersionString(), + headerVersions.stream() + .filter(hv -> !hv.equals(VCFHeaderVersion.VCF4_3)) + .map(VCFHeaderVersion::getVersionString) + .collect(Collectors.joining(" ")))); + } + } + } + /** * Add / replace the contig header lines in the VCFHeader with the in the reference file and master reference dictionary * diff --git a/src/test/java/htsjdk/variant/VariantBaseTest.java b/src/test/java/htsjdk/variant/VariantBaseTest.java index 7a3417b522..dc59309e7b 100644 --- a/src/test/java/htsjdk/variant/VariantBaseTest.java +++ b/src/test/java/htsjdk/variant/VariantBaseTest.java @@ -28,13 +28,18 @@ import htsjdk.HtsjdkTest; import htsjdk.samtools.SAMSequenceDictionary; import htsjdk.samtools.SAMSequenceRecord; +import htsjdk.samtools.util.Tuple; +import htsjdk.utils.ValidationUtils; import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.VCFConstants; +import htsjdk.variant.vcf.VCFFileReader; +import htsjdk.variant.vcf.VCFHeader; import org.testng.Assert; import java.io.File; import java.io.IOException; +import java.nio.file.Path; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; @@ -124,6 +129,23 @@ public static SAMSequenceDictionary createArtificialSequenceDictionary() { return new SAMSequenceDictionary(contigs); } + /** + * Reads an entire VCF into memory, returning both its VCFHeader and all VariantContext records in + * the vcf. + * + * For unit/integration testing purposes only! Do not call this method from actual tools! + * + * @param vcfPath Path of file to be loaded + * @return A Tuple with the VCFHeader as the first element, and a List of all VariantContexts from the VCF + * as the second element + */ + public static Tuple> readEntireVCFIntoMemory(final Path vcfPath) { + ValidationUtils.nonNull(vcfPath); + try ( final VCFFileReader vcfReader = new VCFFileReader(vcfPath, false) ){ + return new Tuple<>(vcfReader.getFileHeader(), vcfReader.iterator().toList()); + } + } + /** * Asserts that the two provided VariantContext objects are equal. * diff --git a/src/test/java/htsjdk/variant/vcf/VCFCodec43FeaturesTest.java b/src/test/java/htsjdk/variant/vcf/VCFCodec43FeaturesTest.java new file mode 100644 index 0000000000..7b5ff4526c --- /dev/null +++ b/src/test/java/htsjdk/variant/vcf/VCFCodec43FeaturesTest.java @@ -0,0 +1,257 @@ +package htsjdk.variant.vcf; + +import htsjdk.samtools.util.CloseableIterator; +import htsjdk.samtools.util.Interval; +import htsjdk.samtools.util.TestUtil; +import htsjdk.samtools.util.Tuple; +import htsjdk.tribble.index.Index; +import htsjdk.tribble.index.IndexFactory; +import htsjdk.tribble.util.TabixUtils; +import htsjdk.variant.VariantBaseTest; +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.VariantContext; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.List; +import java.util.function.Function; +import java.util.stream.Collectors; + +/* + ************************************************************************************************** + * IMPORTANT NOTE: this class contains string constants that contain embedded non-ASCII characters + * used for testing VCF UTF-8 support. Edit with care. + ************************************************************************************************** + */ +public class VCFCodec43FeaturesTest extends VariantBaseTest { + private static final Path TEST_PATH = Paths.get("src/test/resources/htsjdk/variant/vcf43/"); + private static final Path TEST_43_FILE = TEST_PATH.resolve("all43Features.vcf"); + private static final Path TEST_43_GZ_FILE = TEST_PATH.resolve("all43FeaturesCompressed.vcf.gz"); + // NOTE: these test file contain UTF8-encoded characters, and must be edited using a + // UTF8-encoding aware text editor + private static final Path TEST_43_UTF8_FILE = TEST_PATH.resolve("all43Features.utf8.vcf"); + private static final Path TEST_43_UTF8_GZ_FILE = TEST_PATH.resolve("all43FeaturesCompressed.utf8.vcf.gz"); + + @DataProvider(name="all43Files") + private Object[][] allVCF43Files() { + return new Object[][] { + // a .vcf, .vcf.gz, .vcf with UTF8 chars, and .vcf.gz with UTF8 chars + { TEST_43_FILE }, + { TEST_43_UTF8_FILE }, + { TEST_43_GZ_FILE }, + { TEST_43_UTF8_GZ_FILE } + }; + } + + @Test(dataProvider="all43Files") + public void testReadAllVCF43Features(final Path testFile) { + final Tuple> entireVCF = readEntireVCFIntoMemory(testFile); + + Assert.assertEquals(entireVCF.a.getMetaDataInInputOrder().size(), 70); + Assert.assertEquals(entireVCF.b.size(), 25); + } + + @Test(dataProvider="all43Files") + public void testVCF43SampleLine(final Path testFile) { + // ##SAMPLE= + final VCFSampleHeaderLine sampleLine = getHeaderLineFromTestFile( + testFile, + "SAMPLE", + "NA19238", + hl -> (VCFSampleHeaderLine) hl); + + Assert.assertEquals(sampleLine.getGenericFieldValue("Assay"), "WholeGenome"); + Assert.assertEquals(sampleLine.getGenericFieldValue("Ethnicity"), "AFR"); + Assert.assertEquals(sampleLine.getGenericFieldValue("Disease"), "None"); + Assert.assertEquals(sampleLine.getGenericFieldValue("Description"), "Test NA19238 SAMPLE header line"); + Assert.assertEquals(sampleLine.getGenericFieldValue("DOI"), "http://someurl"); + Assert.assertEquals(sampleLine.getGenericFieldValue("ExtraSampleField"), "extra sample"); + } + + @Test(dataProvider="all43Files") + public void testVCF43AltLine(final Path testFile) { + // ##ALT= + final VCFAltHeaderLine altLine = getHeaderLineFromTestFile( + testFile, + "ALT", + "DEL", + hl -> (VCFAltHeaderLine) hl); + + Assert.assertEquals(altLine.getGenericFieldValue("Description"), "Deletion"); + Assert.assertEquals(altLine.getGenericFieldValue("ExtraAltField"), "extra alt"); + } + + @Test(dataProvider="all43Files") + public void testVCF43PedigreeLine(final Path testFile) { + // ##PEDIGREE= + final VCFPedigreeHeaderLine pedigreeLine = getHeaderLineFromTestFile( + testFile, + "PEDIGREE", + "ChildID", + hl -> (VCFPedigreeHeaderLine) hl); + + Assert.assertEquals(pedigreeLine.getGenericFieldValue("Father"), "FatherID"); + Assert.assertEquals(pedigreeLine.getGenericFieldValue("Mother"), "MotherID"); + Assert.assertEquals(pedigreeLine.getGenericFieldValue("ExtraPedigreeField"), "extra pedigree"); + } + + @Test(dataProvider="all43Files") + public void testVCF43MetaLine(final Path testFile) { + // ##META= + final VCFMetaHeaderLine metaLine = getHeaderLineFromTestFile( + testFile, + "META", + "Assay", + hl -> (VCFMetaHeaderLine) hl); + + Assert.assertEquals(metaLine.getGenericFieldValue("Type"), "String"); + Assert.assertEquals(metaLine.getGenericFieldValue("ExtraMetaField"), "extra meta"); + } + + @Test(dataProvider="all43Files") + public void testVCF43PercentEncoding(final Path testFile) { + final Tuple> entireVCF = readEntireVCFIntoMemory(testFile); + + // 1 327 . T <*> 666.18 GATK_STANDARD;HARD_TO_VALIDATE + // AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=fil%3AteredInBoth + final VariantContext vc = entireVCF.b.get(0); + Assert.assertEquals(vc.getContig(), "1"); + Assert.assertEquals(vc.getStart(), 327); + // set=fil%3AteredInBoth + Assert.assertEquals(vc.getCommonInfo().getAttribute("set"), "fil:teredInBoth"); + } + + @Test(dataProvider="all43Files") + public void testSymbolicAlternateAllele(final Path testFile) { + final Tuple> entireVCF = readEntireVCFIntoMemory(testFile); + + // 1 327 . T <*> 666.18 GATK_STANDARD;HARD_TO_VALIDATE + // AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=fil%3AteredInBoth + final VariantContext vc = entireVCF.b.get(0); + Assert.assertEquals(vc.getContig(), "1"); + Assert.assertEquals(vc.getStart(), 327); + + final Allele symbolicAlternateAllele = vc.getAlternateAllele(0); + Assert.assertTrue(symbolicAlternateAllele.isSymbolic()); + Assert.assertTrue(symbolicAlternateAllele.isNonRefAllele()); + Assert.assertTrue(symbolicAlternateAllele.isNonReference()); + Assert.assertEquals(symbolicAlternateAllele, Allele.create(Allele.UNSPECIFIED_ALTERNATE_ALLELE_STRING)); + } + + @DataProvider(name="all43IndexableFiles") + private Object[][] allVCF43IndexableFiles() { + return new Object[][] { + { TEST_43_GZ_FILE }, + { TEST_43_UTF8_GZ_FILE } + }; + } + + @Test(dataProvider="all43IndexableFiles") + public void testVCF43IndexRoundTripQuery(final Path testFile) throws IOException { + final File tempDir = TestUtil.getTempDirectory("VCF43Codec", "indextest"); + tempDir.deleteOnExit(); + + // copy our input vcf to a temp location, and create a tabix index + Files.copy(testFile, (new File (tempDir, testFile.toFile().getName())).toPath()); + final File vcfFileCopy = new File(tempDir, testFile.toFile().getName()); + final Index index = IndexFactory.createIndex(vcfFileCopy, new VCFCodec(), IndexFactory.IndexType.TABIX); + final File indexFile = new File(tempDir, vcfFileCopy.getName() + TabixUtils.STANDARD_INDEX_EXTENSION); + index.write(indexFile); + Assert.assertTrue(indexFile.exists()); + + // query for a variant located after any variants containing percent encoded or UTF8 chars + // 22 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0 + // .00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 + // 1/0:53:99 + try (final VCFFileReader vcfReader = new VCFFileReader(vcfFileCopy, true); + final CloseableIterator vcIt = vcfReader.query(new Interval("22", 327, 327 ))) { + final List vcs = new ArrayList(1); + while (vcIt.hasNext()) { + vcs.add(vcIt.next()); + } + Assert.assertEquals(vcs.size(), 1); + Assert.assertEquals(vcs.get(0).getContig(), "22"); + Assert.assertEquals(vcs.get(0).getStart(), 327); + } + } + + // + // UTF8-specific tests + // + + @Test + public void testVCF43ReadUTF8Attributes() { + final Tuple> entireVCF = readEntireVCFIntoMemory(TEST_43_UTF8_FILE); + final List headerLines = getIDHeaderLinesWithKey(entireVCF.a, "COMMENT"); + + //##COMMENT= + Assert.assertEquals(headerLines.get(0).getValue(), + "This file has 6 embedded UTF8 chars - 1 right here (ä), 3 in the second ALT line, and 2 in the second vc's set attribute value."); + } + + @Test + public void testVCF43AltLineWithUTF8Chars() { + final Tuple> entireVCF = readEntireVCFIntoMemory(TEST_43_UTF8_FILE); + final List headerLines = getIDHeaderLinesWithKey(entireVCF.a,"ALT"); + + //##ALT= + final VCFAltHeaderLine altLine = headerLines + .stream() + .map(hl -> ((VCFAltHeaderLine) hl)) + .filter(hl -> hl.getID().equals("DUP")) + .findFirst() + .get(); + Assert.assertEquals(altLine.getGenericFieldValue("Description"), "Duplication"); + Assert.assertEquals(altLine.getGenericFieldValue("ExtraAltField"), "äääa"); + } + + @Test + public void testVCF43PercentEncodingWithUTF8() { + final Tuple> entireVCF = readEntireVCFIntoMemory(TEST_43_UTF8_FILE); + + //2 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE + // AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=ääa + // GT:DP:GQ 1/0:10:62.65 1/0:37:99.00 1/0:53:99.00 + final VariantContext vc = entireVCF.b.get(1); + Assert.assertEquals(vc.getContig(), "2"); + Assert.assertEquals(vc.getStart(), 327); + + Assert.assertEquals(vc.getCommonInfo().getAttribute("set"), "ääa"); + } + + // given a vcf file, extract a header line with the given key and ID, cast to the target + // header line type (T) via the transformer function + private static T getHeaderLineFromTestFile( + final Path testVCFFile, + final String key, + final String ID, + Function headerLineCastTransformer) + { + final Tuple> entireVCF = readEntireVCFIntoMemory(testVCFFile); + final List headerLines = getIDHeaderLinesWithKey(entireVCF.a, key); + return headerLines + .stream() + .map(headerLineCastTransformer) + .filter(hl -> hl.getID().equals(ID)) + .findFirst() + .get(); + } + + private static List getIDHeaderLinesWithKey(final VCFHeader header, final String key) { + final List headerLines = + header.getMetaDataInInputOrder() + .stream() + .filter(hl -> hl.getKey().equals(key)) + .collect(Collectors.toList()); + return headerLines; + } + +} diff --git a/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java index edc712b483..7d7c43000e 100644 --- a/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java +++ b/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java @@ -351,6 +351,51 @@ public void testVCFHeaderAddDuplicateHeaderLine() { Assert.assertEquals(numHeaderLinesBefore, numHeaderLinesAfter); } + @DataProvider(name="validHeaderVersionTransitions") + public Object[][] validHeaderVersionTransitions() { + // v4.3 can never transition, all other version transitions are allowed + return new Object[][] { + {VCFHeaderVersion.VCF4_0, VCFHeaderVersion.VCF4_0}, + {VCFHeaderVersion.VCF4_0, VCFHeaderVersion.VCF4_1}, + {VCFHeaderVersion.VCF4_0, VCFHeaderVersion.VCF4_2}, + {VCFHeaderVersion.VCF4_1, VCFHeaderVersion.VCF4_1}, + {VCFHeaderVersion.VCF4_1, VCFHeaderVersion.VCF4_2}, + {VCFHeaderVersion.VCF4_2, VCFHeaderVersion.VCF4_2}, + {VCFHeaderVersion.VCF4_3, VCFHeaderVersion.VCF4_3} + }; + } + + @DataProvider(name="invalidHeaderVersionTransitions") + public Object[][] invalidHeaderVersionTransitions() { + // v4.3 can never transition with, all other version transitions are allowed + return new Object[][] { + {VCFHeaderVersion.VCF4_3, VCFHeaderVersion.VCF4_0}, + {VCFHeaderVersion.VCF4_3, VCFHeaderVersion.VCF4_1}, + {VCFHeaderVersion.VCF4_3, VCFHeaderVersion.VCF4_2}, + {VCFHeaderVersion.VCF4_0, VCFHeaderVersion.VCF4_3}, + {VCFHeaderVersion.VCF4_1, VCFHeaderVersion.VCF4_3}, + {VCFHeaderVersion.VCF4_2, VCFHeaderVersion.VCF4_3}, + }; + } + + @Test(dataProvider="validHeaderVersionTransitions") + public void testValidHeaderVersionTransition(final VCFHeaderVersion fromVersion, final VCFHeaderVersion toVersion) { + doHeaderTransition(fromVersion, toVersion); + } + + @Test(dataProvider="invalidHeaderVersionTransitions", expectedExceptions = TribbleException.class) + public void testInvalidHeaderVersionTransition(final VCFHeaderVersion fromVersion, final VCFHeaderVersion toVersion) { + doHeaderTransition(fromVersion, toVersion); + } + + private void doHeaderTransition(final VCFHeaderVersion fromVersion, final VCFHeaderVersion toVersion) { + final VCFHeader vcfHeader = + fromVersion == null ? + new VCFHeader() : + new VCFHeader(fromVersion, Collections.EMPTY_SET, Collections.EMPTY_SET); + vcfHeader.setVCFHeaderVersion(toVersion); + } + @Test public void testVCFHeaderSerialization() throws Exception { final VCFFileReader reader = new VCFFileReader(new File("src/test/resources/htsjdk/variant/HiSeq.10000.vcf"), false); diff --git a/src/test/java/htsjdk/variant/vcf/VCFHeaderVersionTest.java b/src/test/java/htsjdk/variant/vcf/VCFHeaderVersionTest.java new file mode 100644 index 0000000000..57b7163f31 --- /dev/null +++ b/src/test/java/htsjdk/variant/vcf/VCFHeaderVersionTest.java @@ -0,0 +1,43 @@ +package htsjdk.variant.vcf; + +import htsjdk.variant.VariantBaseTest; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +public class VCFHeaderVersionTest extends VariantBaseTest { + @DataProvider(name="vcfVersionRelationships") + public Object[][] vcfVersionRelationships() { + return new Object[][] { + {VCFHeaderVersion.VCF4_3, VCFHeaderVersion.VCF4_2, true}, + {VCFHeaderVersion.VCF4_3, VCFHeaderVersion.VCF4_1, true}, + {VCFHeaderVersion.VCF4_3, VCFHeaderVersion.VCF4_0, true}, + {VCFHeaderVersion.VCF4_3, VCFHeaderVersion.VCF3_3, true}, + {VCFHeaderVersion.VCF4_3, VCFHeaderVersion.VCF3_2, true}, + + {VCFHeaderVersion.VCF4_2, VCFHeaderVersion.VCF4_1, true}, + {VCFHeaderVersion.VCF4_2, VCFHeaderVersion.VCF4_0, true}, + {VCFHeaderVersion.VCF4_2, VCFHeaderVersion.VCF3_3, true}, + {VCFHeaderVersion.VCF4_2, VCFHeaderVersion.VCF3_2, true}, + + {VCFHeaderVersion.VCF4_1, VCFHeaderVersion.VCF4_0, true}, + {VCFHeaderVersion.VCF4_1, VCFHeaderVersion.VCF3_3, true}, + {VCFHeaderVersion.VCF4_1, VCFHeaderVersion.VCF3_2, true}, + + {VCFHeaderVersion.VCF4_0, VCFHeaderVersion.VCF3_3, true}, + {VCFHeaderVersion.VCF4_0, VCFHeaderVersion.VCF3_2, true}, + + {VCFHeaderVersion.VCF3_3, VCFHeaderVersion.VCF3_2, true}, + }; + } + + @Test(dataProvider="vcfVersionRelationships") + public void testVCFVersionRelationships( + final VCFHeaderVersion sourceVersion, + final VCFHeaderVersion targetVersion, + final boolean expectedIsAtLeastAsRecentAs) { + Assert.assertEquals(sourceVersion.isAtLeastAsRecentAs(targetVersion), expectedIsAtLeastAsRecentAs); + Assert.assertNotEquals(targetVersion.isAtLeastAsRecentAs(sourceVersion), expectedIsAtLeastAsRecentAs); + } + +} diff --git a/src/test/java/htsjdk/variant/vcf/VCFTextTransformerTest.java b/src/test/java/htsjdk/variant/vcf/VCFTextTransformerTest.java new file mode 100644 index 0000000000..8bb9927de0 --- /dev/null +++ b/src/test/java/htsjdk/variant/vcf/VCFTextTransformerTest.java @@ -0,0 +1,56 @@ +package htsjdk.variant.vcf; + +import htsjdk.HtsjdkTest; +import htsjdk.tribble.TribbleException; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +public class VCFTextTransformerTest extends HtsjdkTest { + + @DataProvider(name="validPercentEncodings") + public Object[][] validPercentEncodings() { + return new Object[][] { + { "", ""}, + { "%3A", ":"}, + { "%3B", ";"}, + { "%3D", "="}, + { "%25", "%"}, + { "%2C", ","}, + { "%0D", "\r"}, + { "%0A", "\n"}, + { "%09", "\t"}, + { "%3AA", ":A"}, + { "abc%3A", "abc:"}, + { "%3Aabc", ":abc"}, + { "%3Aabc%3A", ":abc:"}, + + // valid text containing % encodings that are not valid, and are passed through in raw form (no decoding) + { "%3", "%3"}, + { "%d", "%d"}, + { "%a", "%a"}, + { "abcdefg%", "abcdefg%"}, + { "%3Aabcdefg%", ":abcdefg%"}, + { "abcdefg%0", "abcdefg%0"}, + { "abcdefg%1", "abcdefg%1"}, + { "abcdefg%a", "abcdefg%a"}, + { "abcdefg%d", "abcdefg%d"}, + { "abcdefg%g", "abcdefg%g"}, + { "abcdefg%gg", "abcdefg%gg"}, + { "abcdefg%-1", "abcdefg%-1"}, + }; + } + + @Test(dataProvider="validPercentEncodings") + public void testDecodeValidEncodings(final String rawText, final String decodedText) { + final VCFTextTransformer vcfTextTransformer = new VCFPercentEncodedTextTransformer(); + Assert.assertEquals(vcfTextTransformer.decodeText(rawText), decodedText); + } + + @Test(dataProvider = "validPercentEncodings") + public void testPassThruValidEncodings(final String rawText, final String unused) { + final VCFPassThruTextTransformer vcfPassThruTransformer = new VCFPassThruTextTransformer(); + Assert.assertEquals(vcfPassThruTransformer.decodeText(rawText), rawText); + } + +} diff --git a/src/test/java/htsjdk/variant/vcf/VCFUtilsTest.java b/src/test/java/htsjdk/variant/vcf/VCFUtilsTest.java new file mode 100644 index 0000000000..b5c35455a3 --- /dev/null +++ b/src/test/java/htsjdk/variant/vcf/VCFUtilsTest.java @@ -0,0 +1,54 @@ +package htsjdk.variant.vcf; + +import htsjdk.HtsjdkTest; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.util.*; + +public class VCFUtilsTest extends HtsjdkTest { + + @DataProvider(name="validHeaderVersionMerger") + public Object[][] validHeaderMergerVersions() { + // v4.3 can only merge with v4.3, all other version mergers are allowed + return new Object[][] { + {Arrays.asList("VCFv4.0", "VCFv4.0")}, + {Arrays.asList("VCFv4.1", "VCFv4.1")}, + {Arrays.asList("VCFv4.2", "VCFv4.2")}, + {Arrays.asList("VCFv4.3", "VCFv4.3")}, + {Arrays.asList("VCFv4.2", "VCFv4.2")}, + {Arrays.asList("VCFv4.2", "VCFv4.2", "VCFv4.2")}, + }; + } + + @DataProvider(name="invalidHeaderVersionMerger") + public Object[][] invalidHeaderVersionMerger() { + // v4.3 can only merge with v4.3, all other version mergers are allowed + return new Object[][] { + {Arrays.asList("VCFv4.0", "VCFv4.3")}, + {Arrays.asList("VCFv4.1", "VCFv4.3")}, + {Arrays.asList("VCFv4.2", "VCFv4.3")}, + {Arrays.asList("VCFv4.0", "VCFv4.0", "VCFv4.2", "VCFv4.3")}, + {Arrays.asList("VCFv4.3", "VCFv4.0", "VCFv4.1", "VCFv4.2")}, + }; + } + + @Test(dataProvider="validHeaderVersionMerger") + public void testValidHeaderVersionMerger(final List headerVersions) { + final List headersToMerge = new ArrayList<>(headerVersions.size()); + headerVersions.forEach(hv -> headersToMerge.add( + new VCFHeader(VCFHeaderVersion.toHeaderVersion(hv), Collections.emptySet(), Collections.emptySet())) + ); + final Set resultHeaders = VCFUtils.smartMergeHeaders(headersToMerge, true); + } + + @Test(dataProvider="invalidHeaderVersionMerger", expectedExceptions = IllegalArgumentException.class) + public void testInvalidHeaderVersionMerger(final List headerVersions) { + final List headersToMerge = new ArrayList<>(headerVersions.size()); + headerVersions.forEach(hv -> headersToMerge.add( + new VCFHeader(VCFHeaderVersion.toHeaderVersion(hv), Collections.emptySet(), Collections.emptySet())) + ); + VCFUtils.smartMergeHeaders(headersToMerge, true); + } + +} diff --git a/src/test/resources/htsjdk/variant/vcf43/all43Features.utf8.vcf b/src/test/resources/htsjdk/variant/vcf43/all43Features.utf8.vcf new file mode 100644 index 0000000000..a35ee04c28 --- /dev/null +++ b/src/test/resources/htsjdk/variant/vcf43/all43Features.utf8.vcf @@ -0,0 +1,96 @@ +##fileformat=VCFv4.3 +##COMMENT=This file has 6 embedded UTF8 chars - 1 right here (ä), 3 in the second ALT line, and 2 in the second vc's set attribute value. +##ALT= +##ALT= +##ALT= +##ALT= +##ALT= +##ALT= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##META= +##META= +##META= +##META= +##PEDIGREE= +##PEDIGREE= +##SAMPLE= +##SAMPLE= +##SAMPLE= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA19238 NA19239 NA19240 +1 327 . T <*> 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=fil%3AteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +2 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=ääa GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +3 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +4 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +4 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +5 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +6 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +7 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +8 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +9 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +10 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +11 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +12 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +13 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +14 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +15 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +16 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +17 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +18 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +19 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +20 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +21 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +22 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +X 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +Y 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 diff --git a/src/test/resources/htsjdk/variant/vcf43/all43Features.vcf b/src/test/resources/htsjdk/variant/vcf43/all43Features.vcf new file mode 100644 index 0000000000..bb13dc93ab --- /dev/null +++ b/src/test/resources/htsjdk/variant/vcf43/all43Features.vcf @@ -0,0 +1,96 @@ +##fileformat=VCFv4.3 +##COMMENT=This file has 0 embedded UTF8 characters, but we need this fake comment line to keep the file aligned with it's utf8 companion test file. +##ALT= +##ALT= +##ALT= +##ALT= +##ALT= +##ALT= +##FILTER= +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##META= +##META= +##META= +##META= +##PEDIGREE= +##PEDIGREE= +##SAMPLE= +##SAMPLE= +##SAMPLE= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA19238 NA19239 NA19240 +1 327 . T <*> 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=fil%3AteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +2 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +3 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +4 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +4 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +5 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +6 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +7 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +8 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +9 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +10 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +11 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +12 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +13 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +14 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +15 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +16 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +17 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +18 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +19 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +20 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +21 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +22 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +X 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 +Y 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62 1/0:37:99 1/0:53:99 diff --git a/src/test/resources/htsjdk/variant/vcf43/all43FeaturesCompressed.utf8.vcf.gz b/src/test/resources/htsjdk/variant/vcf43/all43FeaturesCompressed.utf8.vcf.gz new file mode 100644 index 0000000000..95eb1baa72 Binary files /dev/null and b/src/test/resources/htsjdk/variant/vcf43/all43FeaturesCompressed.utf8.vcf.gz differ diff --git a/src/test/resources/htsjdk/variant/vcf43/all43FeaturesCompressed.vcf.gz b/src/test/resources/htsjdk/variant/vcf43/all43FeaturesCompressed.vcf.gz new file mode 100644 index 0000000000..d6337837a1 Binary files /dev/null and b/src/test/resources/htsjdk/variant/vcf43/all43FeaturesCompressed.vcf.gz differ diff --git a/src/test/resources/htsjdk/variant/vcf43/vcf43.vcf b/src/test/resources/htsjdk/variant/vcf43/vcf43.vcf new file mode 100644 index 0000000000..322256963e --- /dev/null +++ b/src/test/resources/htsjdk/variant/vcf43/vcf43.vcf @@ -0,0 +1,54 @@ +##fileformat=VCFv4.3 +##SAMPLE= +##SAMPLE= +##SAMPLE= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA19238 NA19239 NA19240 +1 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62.65 1/0:37:99.00 1/0:53:99.00 +2 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62.65 1/0:37:99.00 1/0:53:99.00 +3 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62.65 1/0:37:99.00 1/0:53:99.00 +4 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62.65 1/0:37:99.00 1/0:53:99.00 +4 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62.65 1/0:37:99.00 1/0:53:99.00 +5 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62.65 1/0:37:99.00 1/0:53:99.00 +6 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62.65 1/0:37:99.00 1/0:53:99.00 +7 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62.65 1/0:37:99.00 1/0:53:99.00 +8 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62.65 1/0:37:99.00 1/0:53:99.00 +9 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62.65 1/0:37:99.00 1/0:53:99.00 +10 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62.65 1/0:37:99.00 1/0:53:99.00 +11 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62.65 1/0:37:99.00 1/0:53:99.00 +12 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62.65 1/0:37:99.00 1/0:53:99.00 +13 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62.65 1/0:37:99.00 1/0:53:99.00 +14 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62.65 1/0:37:99.00 1/0:53:99.00 +15 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62.65 1/0:37:99.00 1/0:53:99.00 +16 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62.65 1/0:37:99.00 1/0:53:99.00 +17 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62.65 1/0:37:99.00 1/0:53:99.00 +18 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62.65 1/0:37:99.00 1/0:53:99.00 +19 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62.65 1/0:37:99.00 1/0:53:99.00 +20 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62.65 1/0:37:99.00 1/0:53:99.00 +21 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62.65 1/0:37:99.00 1/0:53:99.00 +22 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62.65 1/0:37:99.00 1/0:53:99.00 +X 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62.65 1/0:37:99.00 1/0:53:99.00 +Y 327 . T C 666.18 GATK_STANDARD;HARD_TO_VALIDATE AB=0.74;AC=3;AF=0.50;AN=6;DB=0;DP=936;Dels=0.00;HRun=3;MQ=34.66;MQ0=728;QD=0.71;SB=-268.74;set=filteredInBoth GT:DP:GQ 1/0:10:62.65 1/0:37:99.00 1/0:53:99.00