From 1d4a3161df70992090b488c8485f2ed87daf14a0 Mon Sep 17 00:00:00 2001 From: Yossi Farjoun Date: Tue, 16 Jul 2019 11:06:43 -0400 Subject: [PATCH] make VariantContextBuilder safer (#1344) Adds tests and fixes unsafe building patterns that were enabled by VariantContextBuilder. This required a small change in the API around adding a genotype context. Now when .genotypes() is called with a GenotypeContext object the builder will call immutable() on that object, to make sure that the user doesn't change the GC prior to the creation of the VariantContext...If folks really need to reuse the GenotypeContext objects across multiple build calls, we could add a .genotypesKeepMutable() method with a scary javaDoc note..... Another change in the API (somewhat) is that validation was added for the filters as per the hts-spec. fixes #1365 --- .../htsjdk/variant/variantcontext/Allele.java | 20 +- .../variant/variantcontext/CommonInfo.java | 4 +- .../variantcontext/GenotypesContext.java | 8 +- .../variantcontext/VariantContext.java | 178 +++++++---- .../variantcontext/VariantContextBuilder.java | 93 +++++- .../VariantContextBuilderTest.java | 288 ++++++++++++++++-- .../VariantContextTestProvider.java | 41 +-- .../writer/VariantContextWritersUnitTest.java | 1 - 8 files changed, 497 insertions(+), 136 deletions(-) diff --git a/src/main/java/htsjdk/variant/variantcontext/Allele.java b/src/main/java/htsjdk/variant/variantcontext/Allele.java index 387a8f7437..d2587a2cf0 100644 --- a/src/main/java/htsjdk/variant/variantcontext/Allele.java +++ b/src/main/java/htsjdk/variant/variantcontext/Allele.java @@ -190,16 +190,16 @@ protected Allele(final Allele allele, final boolean ignoreRefState) { this.isSymbolic = allele.isSymbolic; } - private static final Allele REF_A = new Allele("A", true); - private static final Allele ALT_A = new Allele("A", false); - private static final Allele REF_C = new Allele("C", true); - private static final Allele ALT_C = new Allele("C", false); - private static final Allele REF_G = new Allele("G", true); - private static final Allele ALT_G = new Allele("G", false); - private static final Allele REF_T = new Allele("T", true); - private static final Allele ALT_T = new Allele("T", false); - private static final Allele REF_N = new Allele("N", true); - private static final Allele ALT_N = new Allele("N", false); + public static final Allele REF_A = new Allele("A", true); + public static final Allele ALT_A = new Allele("A", false); + public static final Allele REF_C = new Allele("C", true); + public static final Allele ALT_C = new Allele("C", false); + public static final Allele REF_G = new Allele("G", true); + public static final Allele ALT_G = new Allele("G", false); + public static final Allele REF_T = new Allele("T", true); + public static final Allele ALT_T = new Allele("T", false); + public static final Allele REF_N = new Allele("N", true); + public static final Allele ALT_N = new Allele("N", false); public static final Allele SPAN_DEL = new Allele(SPAN_DEL_STRING, false); public static final Allele NO_CALL = new Allele(NO_CALL_STRING, false); public static final Allele NON_REF_ALLELE = new Allele(NON_REF_STRING, false); diff --git a/src/main/java/htsjdk/variant/variantcontext/CommonInfo.java b/src/main/java/htsjdk/variant/variantcontext/CommonInfo.java index c9a4843bf4..144fd963c2 100644 --- a/src/main/java/htsjdk/variant/variantcontext/CommonInfo.java +++ b/src/main/java/htsjdk/variant/variantcontext/CommonInfo.java @@ -106,7 +106,7 @@ public boolean filtersWereApplied() { } public boolean isFiltered() { - return filters == null ? false : !filters.isEmpty(); + return filters != null && !filters.isEmpty(); } public boolean isNotFiltered() { @@ -115,7 +115,7 @@ public boolean isNotFiltered() { public void addFilter(String filter) { if ( filters == null ) // immutable -> mutable - filters = new HashSet(); + filters = new HashSet<>(); if ( filter == null ) throw new IllegalArgumentException("BUG: Attempting to add null filter " + this); if ( getFilters().contains(filter) ) throw new IllegalArgumentException("BUG: Attempting to add duplicate filter " + filter + " at " + this); diff --git a/src/main/java/htsjdk/variant/variantcontext/GenotypesContext.java b/src/main/java/htsjdk/variant/variantcontext/GenotypesContext.java index c1468758aa..7586db64db 100644 --- a/src/main/java/htsjdk/variant/variantcontext/GenotypesContext.java +++ b/src/main/java/htsjdk/variant/variantcontext/GenotypesContext.java @@ -26,7 +26,6 @@ package htsjdk.variant.variantcontext; import java.io.IOException; -import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.io.Serializable; import java.util.ArrayList; @@ -244,9 +243,9 @@ public boolean isMutable() { return ! immutable; } - public final void checkImmutability() { + public final void checkImmutability() throws UnsupportedOperationException { if ( immutable ) - throw new IllegalAccessError("GenotypeMap is currently immutable, but a mutator method was invoked on it"); + throw new UnsupportedOperationException("GenotypeMap is currently immutable, but a mutator method was invoked on it"); } // --------------------------------------------------------------------------- @@ -346,9 +345,10 @@ public boolean isEmpty() { * * @param genotype * @return + * @throws UnsupportedOperationException if the context has been made immutable */ @Override - public boolean add(final Genotype genotype) { + public boolean add(final Genotype genotype) throws UnsupportedOperationException { checkImmutability(); invalidateSampleOrdering(); diff --git a/src/main/java/htsjdk/variant/variantcontext/VariantContext.java b/src/main/java/htsjdk/variant/variantcontext/VariantContext.java index 39fe66cae4..72c84c36af 100644 --- a/src/main/java/htsjdk/variant/variantcontext/VariantContext.java +++ b/src/main/java/htsjdk/variant/variantcontext/VariantContext.java @@ -32,7 +32,17 @@ import htsjdk.variant.vcf.*; import java.io.Serializable; -import java.util.*; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Comparator; +import java.util.EnumSet; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.regex.Pattern; import java.util.stream.Collectors; /** @@ -215,25 +225,25 @@ public class VariantContext implements Feature, Serializable { public static final long serialVersionUID = 1L; - private final static boolean WARN_ABOUT_BAD_END = true; - private final static int MAX_ALLELE_SIZE_FOR_NON_SV = 150; + private static final boolean WARN_ABOUT_BAD_END = true; + private static final int MAX_ALLELE_SIZE_FOR_NON_SV = 150; private boolean fullyDecoded = false; protected CommonInfo commonInfo = null; - public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR; + public static final double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR; - public final static Set PASSES_FILTERS = Collections.unmodifiableSet(new LinkedHashSet()); + public static final Set PASSES_FILTERS = Collections.emptySet(); /** The location of this VariantContext */ - final protected String contig; - final protected long start; - final protected long stop; + protected final String contig; + protected final long start; + protected final long stop; private final String ID; /** The type (cached for performance reasons) of this context */ protected Type type = null; /** A set of the alleles segregating in this context */ - final protected List alleles; + protected final List alleles; /** A mapping from sampleName -> genotype objects for all genotypes associated with this context */ protected GenotypesContext genotypes = null; @@ -241,7 +251,7 @@ public class VariantContext implements Feature, Serializable { /** Counts for each of the possible Genotype types in this context */ protected int[] genotypeCounts = null; - public final static GenotypesContext NO_GENOTYPES = GenotypesContext.NO_GENOTYPES; + public static final GenotypesContext NO_GENOTYPES = GenotypesContext.NO_GENOTYPES; // a fast cached access point to the ref / alt alleles for biallelic case private Allele REF = null; @@ -253,9 +263,10 @@ public class VariantContext implements Feature, Serializable { private Boolean monomorphic = null; /* -* Determine which genotype fields are in use in the genotypes in VC -* @return an ordered list of genotype fields in use in VC. If vc has genotypes this will always include GT first -*/ + * Determine which genotype fields are in use in the genotypes in VC + * @return an ordered list of genotype fields in use in VC. If vc has genotypes this will always include GT first + */ + public List calcVCFGenotypeKeys(final VCFHeader header) { final Set keys = new HashSet<>(); @@ -306,12 +317,91 @@ public List calcVCFGenotypeKeys(final VCFHeader header) { // // --------------------------------------------------------------------------------------------------------- + //no controls and white-spaces characters, no semicolon. + public static final Pattern VALID_FILTER = Pattern.compile("^[!-:<-~]+$"); + public enum Validation { - ALLELES, - GENOTYPES + ALLELES() { + void validate(VariantContext variantContext) { + validateAlleles(variantContext); + } + }, + GENOTYPES() { + void validate(VariantContext variantContext) { + validateGenotypes(variantContext); + } + }, + FILTERS { + void validate(VariantContext variantContext) { + validateFilters(variantContext); + } + }; + + abstract void validate(VariantContext variantContext); + + + private static void validateAlleles(final VariantContext vc) { + + boolean alreadySeenRef = false; + + for (final Allele allele : vc.alleles) { + // make sure there's only one reference allele + if (allele.isReference()) { + if (alreadySeenRef) { + throw new IllegalArgumentException("BUG: Received two reference tagged alleles in VariantContext " + vc.alleles + " vc=" + vc); + } + alreadySeenRef = true; + } + + if (allele.isNoCall()) { + throw new IllegalArgumentException("BUG: Cannot add a no call allele to a variant context " + vc.alleles + " vc=" + vc); + } + } + + // make sure there's one reference allele + if (!alreadySeenRef) { + throw new IllegalArgumentException("No reference allele found in VariantContext"); + } + } + + private static void validateGenotypes(final VariantContext variantContext) { + + final ArrayList genotypes = variantContext.genotypes.getGenotypes(); + + if (genotypes == null) { + throw new IllegalStateException("Genotypes is null"); + } + + for (int i = 0; i < genotypes.size(); i++) { + final Genotype genotype = genotypes.get(i); + if (genotype.isAvailable()) { + final List alleles = genotype.getAlleles(); + for (int j = 0, size = alleles.size(); j < size; j++) { + final Allele gAllele = alleles.get(j); + if (!variantContext.hasAllele(gAllele) && gAllele.isCalled()) { + throw new IllegalStateException("Allele in genotype " + gAllele + " not in the variant context " + alleles); + } + } + } + } + } + + private static void validateFilters(final VariantContext variantContext) { + final Set filters = variantContext.getFilters(); + if (filters == null) { + return; + } + + for (String filter : filters) { + if (!VALID_FILTER.matcher(filter).matches()) { + throw new IllegalStateException("Filter '" + filter + + "' contains an illegal character. It must conform to the regex ;'" + VALID_FILTER); + } + } + } } - private final static EnumSet NO_VALIDATION = EnumSet.noneOf(Validation.class); + private static final EnumSet NO_VALIDATION = EnumSet.noneOf(Validation.class); // --------------------------------------------------------------------------------------------------------- // @@ -1198,7 +1288,7 @@ public void validateAlternateAlleles() { // maintain a list of non-symbolic alleles expected in the REF and ALT fields of the record // (we exclude symbolic alleles because it's commonly expected that they don't show up in the genotypes, e.g. with GATK gVCFs) - final List reportedAlleles = new ArrayList(); + final List reportedAlleles = new ArrayList<>(); for ( final Allele allele : getAlleles() ) { if ( !allele.isSymbolic() ) reportedAlleles.add(allele); @@ -1286,17 +1376,9 @@ public void validateChromosomeCounts() { // // --------------------------------------------------------------------------------------------------------- - private boolean validate(final EnumSet validationToPerform) { + private void validate(final EnumSet validationsToPerform) { validateStop(); - for (final Validation val : validationToPerform ) { - switch (val) { - case ALLELES: validateAlleles(); break; - case GENOTYPES: validateGenotypes(); break; - default: throw new IllegalArgumentException("Unexpected validation mode " + val); - } - } - - return true; + validationsToPerform.forEach(v->v.validate(this)); } /** @@ -1312,56 +1394,18 @@ private void validateStop() { + " but this VariantContext contains an END key with value " + end; if ( GeneralUtils.DEBUG_MODE_ENABLED && WARN_ABOUT_BAD_END ) { System.err.println(message); - } - else { + } else { throw new TribbleException(message); } } } else { final long length = (stop - start) + 1; - if ( ! hasSymbolicAlleles() && length != getReference().length() ) { + if (!hasSymbolicAlleles() && length != getReference().length()) { throw new IllegalStateException("BUG: GenomeLoc " + contig + ":" + start + "-" + stop + " has a size == " + length + " but the variation reference allele has length " + getReference().length() + " this = " + this); } } } - private void validateAlleles() { - - boolean alreadySeenRef = false; - - for ( final Allele allele : alleles ) { - // make sure there's only one reference allele - if ( allele.isReference() ) { - if ( alreadySeenRef ) throw new IllegalArgumentException("BUG: Received two reference tagged alleles in VariantContext " + alleles + " this=" + this); - alreadySeenRef = true; - } - - if ( allele.isNoCall() ) { - throw new IllegalArgumentException("BUG: Cannot add a no call allele to a variant context " + alleles + " this=" + this); - } - } - - // make sure there's one reference allele - if ( ! alreadySeenRef ) - throw new IllegalArgumentException("No reference allele found in VariantContext"); - } - - private void validateGenotypes() { - if ( this.genotypes == null ) throw new IllegalStateException("Genotypes is null"); - - for ( int i = 0; i < genotypes.size(); i++ ) { - Genotype genotype = genotypes.get(i); - if ( genotype.isAvailable() ) { - final List alleles = genotype.getAlleles(); - for ( int j = 0, size = alleles.size(); j < size; j++ ) { - final Allele gAllele = alleles.get(j); - if ( ! hasAllele(gAllele) && gAllele.isCalled() ) - throw new IllegalStateException("Allele in genotype " + gAllele + " not in the variant context " + alleles); - } - } - } - } - // --------------------------------------------------------------------------------------------------------- // // utility routines diff --git a/src/main/java/htsjdk/variant/variantcontext/VariantContextBuilder.java b/src/main/java/htsjdk/variant/variantcontext/VariantContextBuilder.java index 921f0630cb..182b5fb9e4 100644 --- a/src/main/java/htsjdk/variant/variantcontext/VariantContextBuilder.java +++ b/src/main/java/htsjdk/variant/variantcontext/VariantContextBuilder.java @@ -34,6 +34,7 @@ import java.util.Collections; import java.util.EnumSet; import java.util.HashMap; +import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; @@ -79,6 +80,7 @@ public class VariantContextBuilder { private Set filters = null; private Map attributes = null; private boolean attributesCanBeModified = false; + private boolean filtersCanBeModified = false; /** enum of what must be validated */ final private EnumSet toValidate = EnumSet.noneOf(VariantContext.Validation.class); @@ -167,12 +169,12 @@ public Map getAttributes() { * @param parent Cannot be null */ public VariantContextBuilder(final VariantContext parent) { - if ( parent == null ) throw new IllegalArgumentException("BUG: VariantContextBuilder parent argument cannot be null in VariantContextBuilder"); + if (parent == null) { + throw new IllegalArgumentException("BUG: VariantContextBuilder parent argument cannot be null in VariantContextBuilder"); + } this.alleles = parent.getAlleles(); - this.attributes = parent.getAttributes(); - this.attributesCanBeModified = false; this.contig = parent.getContig(); - this.filters = parent.getFiltersMaybeNull(); + this.genotypes = parent.getGenotypes(); this.ID = parent.getID(); this.log10PError = parent.getLog10PError(); @@ -180,12 +182,22 @@ public VariantContextBuilder(final VariantContext parent) { this.start = parent.getStart(); this.stop = parent.getEnd(); this.fullyDecoded = parent.isFullyDecoded(); + + this.attributes(parent.getAttributes()); + if (parent.filtersWereApplied()) { + this.filters(parent.getFilters()); + } else { + this.unfiltered(); + } } public VariantContextBuilder(final VariantContextBuilder parent) { if ( parent == null ) throw new IllegalArgumentException("BUG: VariantContext parent argument cannot be null in VariantContextBuilder"); - this.alleles = parent.alleles; + this.attributesCanBeModified = false; + this.filtersCanBeModified = false; + + this.alleles = parent.alleles; this.contig = parent.contig; this.genotypes = parent.genotypes; this.ID = parent.ID; @@ -323,12 +335,28 @@ public VariantContextBuilder rmAttributes(final List keys) { * collection, so methods that want to add / remove records require the attributes to be copied to a */ private void makeAttributesModifiable() { - if ( ! attributesCanBeModified ) { + if (!attributesCanBeModified) { this.attributesCanBeModified = true; - if (attributes == null) { - this.attributes = new HashMap(); + + final Map tempAttributes = attributes; + if (tempAttributes != null) { + this.attributes = new HashMap<>(tempAttributes); } else { - this.attributes = new HashMap(attributes); + this.attributes = new HashMap<>(); + } + } + } + + /** + * Makes the filters modifiable. + */ + private void makeFiltersModifiable() { + if (!filtersCanBeModified) { + this.filtersCanBeModified = true; + final Set tempFilters = filters; + this.filters = new HashSet<>(); + if (tempFilters != null) { + this.filters.addAll(tempFilters); } } } @@ -339,13 +367,34 @@ private void makeAttributesModifiable() { * filters can be null -> meaning there are no filters * * @param filters Set of strings to set as the filters for this builder + * This set will be copied so that external set can be + * safely changed. * @return this builder */ public VariantContextBuilder filters(final Set filters) { - this.filters = filters; + if (filters == null) { + unfiltered(); + } else { + this.filtersCanBeModified = true; + filtersAsIs(new HashSet<>(filters)); + } return this; } + /** + * This builder's filters are set to this value + * + * filters can be null -> meaning there are no filters + * + * @param filters Set of strings to set as the filters for this builder + * @return this builder + */ + private void filtersAsIs(final Set filters) { + this.filters = filters; + toValidate.add(VariantContext.Validation.FILTERS); + } + + /** * {@link #filters} * @@ -353,12 +402,18 @@ public VariantContextBuilder filters(final Set filters) { * @return this builder */ public VariantContextBuilder filters(final String ... filters) { - filters(new LinkedHashSet(Arrays.asList(filters))); + filtersAsIs(new LinkedHashSet<>(Arrays.asList(filters))); return this; } + /** Adds the given filter to the list of filters + * + * @param filter + * @return + */ public VariantContextBuilder filter(final String filter) { - if ( this.filters == null ) this.filters = new LinkedHashSet(1); + makeFiltersModifiable(); + this.filters.add(filter); return this; } @@ -369,7 +424,8 @@ public VariantContextBuilder filter(final String filter) { * @return this builder */ public VariantContextBuilder passFilters() { - return filters(VariantContext.PASSES_FILTERS); + filtersAsIs(VariantContext.PASSES_FILTERS); + return this; } /** @@ -385,6 +441,8 @@ public VariantContextBuilder unfiltered() { /** * Tells this builder that the resulting VariantContext should use this genotype's GenotypeContext. * + * Note that this method will call the immutable method on the provided genotypes object + * to ensure that the user will not modify it further. * Note that genotypes can be null -> meaning there are no genotypes * * @param genotypes GenotypeContext to use in this builder @@ -392,8 +450,10 @@ public VariantContextBuilder unfiltered() { */ public VariantContextBuilder genotypes(final GenotypesContext genotypes) { this.genotypes = genotypes; - if ( genotypes != null ) + if (genotypes != null) { + genotypes.immutable(); toValidate.add(VariantContext.Validation.GENOTYPES); + } return this; } @@ -574,7 +634,10 @@ public VariantContext make() { } public VariantContext make(final boolean leaveModifyableAsIs) { - if(!leaveModifyableAsIs) attributesCanBeModified = false; + if (!leaveModifyableAsIs) { + attributesCanBeModified = false; + filtersCanBeModified = false; + } return new VariantContext(source, ID, contig, start, stop, alleles, genotypes, log10PError, filters, attributes, diff --git a/src/test/java/htsjdk/variant/variantcontext/VariantContextBuilderTest.java b/src/test/java/htsjdk/variant/variantcontext/VariantContextBuilderTest.java index f4000283ee..df309d5166 100644 --- a/src/test/java/htsjdk/variant/variantcontext/VariantContextBuilderTest.java +++ b/src/test/java/htsjdk/variant/variantcontext/VariantContextBuilderTest.java @@ -2,27 +2,27 @@ import htsjdk.variant.VariantBaseTest; import org.testng.Assert; -import org.testng.annotations.BeforeTest; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; public class VariantContextBuilderTest extends VariantBaseTest { - Allele Aref, C; + static final Allele Aref = Allele.REF_A; + static final Allele Tref = Allele.REF_T; + static final Allele G = Allele.ALT_G; + static final Allele C = Allele.ALT_C; - String snpLoc = "chr1"; + String snpChr = "chr1"; String snpSource = "test"; long snpLocStart = 10; long snpLocStop = 10; - @BeforeTest - public void before() { - - C = Allele.create("C"); - Aref = Allele.create("A", true); - } - @DataProvider(name = "trueFalse") public Object[][] testAttributesWorksTest() { return new Object[][]{{true}, {false}}; @@ -30,8 +30,8 @@ public Object[][] testAttributesWorksTest() { @Test(dataProvider = "trueFalse") public void testAttributeResettingWorks(final boolean leaveModifyableAsIs) { - final VariantContextBuilder root1 = new VariantContextBuilder(snpSource, snpLoc, snpLocStart, snpLocStop, Arrays.asList(Aref, C)); - final VariantContextBuilder root2 = new VariantContextBuilder(snpSource, snpLoc, snpLocStart, snpLocStop, Arrays.asList(Aref, C)); + final VariantContextBuilder root1 = new VariantContextBuilder(snpSource, snpChr, snpLocStart, snpLocStop, Arrays.asList(Aref, C)); + final VariantContextBuilder root2 = new VariantContextBuilder(snpSource, snpChr, snpLocStart, snpLocStop, Arrays.asList(Aref, C)); final VariantContext result1 = root1.attribute("AC", 1).make(leaveModifyableAsIs); @@ -47,19 +47,271 @@ public void testAttributeResettingWorks(final boolean leaveModifyableAsIs) { } } + @Test() - public void testAttributeResettingWorks() { - final VariantContextBuilder root1 = new VariantContextBuilder(snpSource, snpLoc, snpLocStart, snpLocStop, Arrays.asList(Aref, C)); - final VariantContextBuilder root2 = new VariantContextBuilder(snpSource, snpLoc, snpLocStart, snpLocStop, Arrays.asList(Aref, C)); + public void testBulkAttributeResettingWorks() { + final VariantContextBuilder root1 = new VariantContextBuilder(snpSource, snpChr, snpLocStart, snpLocStop, Arrays.asList(Aref, C)); + final VariantContext result1 = root1 + .attribute("AC", 1) + .attribute("AN", 2) + .make(); - final VariantContext result1 = root1.attribute("AC", 1).make(); + final VariantContextBuilder root2 = new VariantContextBuilder(result1); //this is a red-herring and should not change anything. final VariantContext ignored = root1.attribute("AC", 2).make(); + final VariantContext result2 = root2.make(); + + Assert.assertEquals(result1.getAttribute("AC"), result2.getAttribute("AC")); + Assert.assertEquals(result1.getAttribute("AN"), result2.getAttribute("AN")); + + final HashMap map = new HashMap<>(); + map.put("AC", 2); + map.put("AN", 4); + root2.attributes(map); + + final VariantContext result3 = root2.make(); + + Assert.assertEquals(result1.getAttribute("AC"), 1); + Assert.assertEquals(result1.getAttribute("AN"), 2); + + Assert.assertEquals(result2.getAttribute("AC"), 1); + Assert.assertEquals(result2.getAttribute("AN"), 2); + + Assert.assertEquals(result3.getAttribute("AC"), 2); + Assert.assertEquals(result3.getAttribute("AN"), 4); + } + + enum VCBuilderScheme { + NEW_ON_BUILDER { + @Override + VariantContextBuilder getOtherBuilder(final VariantContextBuilder builder, final VariantContext vc) { + return new VariantContextBuilder(builder); + } + }, + NEW_ON_VC { + @Override + VariantContextBuilder getOtherBuilder(final VariantContextBuilder builder, final VariantContext vc) { + return new VariantContextBuilder(vc); + } + }, + SAME { + @Override + VariantContextBuilder getOtherBuilder(final VariantContextBuilder builder, final VariantContext vc) { + return builder; + } + }; + + abstract VariantContextBuilder getOtherBuilder(final VariantContextBuilder builder, VariantContext vc); + } + + @DataProvider + public Object[][] builderSchemes() { + return new Object[][]{ + {VCBuilderScheme.NEW_ON_BUILDER}, + {VCBuilderScheme.NEW_ON_VC}, + {VCBuilderScheme.SAME}}; + } + + @Test(dataProvider = "builderSchemes") + public void testAttributeResettingWorks(final VCBuilderScheme builderScheme) { + final VariantContextBuilder root1 = new VariantContextBuilder(snpSource, snpChr, snpLocStart, snpLocStop, Arrays.asList(Aref, C)); + + final VariantContext result1 = root1.attribute("AC", 1).make(); + final VariantContextBuilder root2 = builderScheme.getOtherBuilder(root1, result1) + .source(snpSource) + .chr(snpChr) + .start(snpLocStart) + .stop(snpLocStop) + .alleles(Arrays.asList(Aref, C)); + + //this is a red-herring and should not change anything. + final VariantContext ignored = root1.attribute("AC", 2).make(); final VariantContext result2 = root2.attribute("AC", 1).make(); - Assert.assertEquals(result1.getAttribute("AC"), result2.getAttribute("AC")); + Assert.assertEquals(result1.getAttribute("AC"), 1); + Assert.assertEquals(result2.getAttribute("AC"), 1); + } + + @Test(dataProvider = "builderSchemes") + public void testAttributeResettingWorksPreMake1(final VCBuilderScheme builderScheme) { + final VariantContextBuilder root1 = new VariantContextBuilder(snpSource, snpChr, snpLocStart, snpLocStop, Arrays.asList(Aref, C)) + .attribute("AC", 1); + + final VariantContext result1 = root1.make(); + + final VariantContextBuilder root2 = builderScheme.getOtherBuilder(root1, root1.make()) + .source(snpSource) + .chr(snpChr) + .start(snpLocStart) + .stop(snpLocStop) + .alleles(Arrays.asList(Aref, C)); + + //this is a red-herring and should not change anything. + final VariantContext result2 = root2.attribute("AC", 2).make(); + + Assert.assertEquals(result1.getAttribute("AC"), 1); + Assert.assertEquals(result2.getAttribute("AC"), 2); + } + + + @Test(dataProvider = "builderSchemes") + public void testAttributeResettingWorksPreMake2(final VCBuilderScheme builderScheme) { + final VariantContextBuilder root1 = new VariantContextBuilder(snpSource, snpChr, snpLocStart, snpLocStop, Arrays.asList(Aref, C)).attribute("AC", 1); + + final VariantContextBuilder root2 = builderScheme.getOtherBuilder(root1, root1.make()) + .source(snpSource) + .chr(snpChr) + .start(snpLocStart) + .stop(snpLocStop) + .alleles(Arrays.asList(Aref, C)); + + //this is a red-herring and should not change anything. + final VariantContext result1 = root1.make(); + final VariantContext result2 = root2.attribute("AC", 2).make(); + + Assert.assertEquals(result1.getAttribute("AC"), 1); + Assert.assertEquals(result2.getAttribute("AC"), 2); + } + + + @Test(dataProvider = "builderSchemes") + public void testAttributeResettingWorks2(final VCBuilderScheme builderScheme) { + final VariantContextBuilder root1 = new VariantContextBuilder(snpSource, snpChr, snpLocStart, snpLocStop, Arrays.asList(Aref, C)) + .attribute("AC", 1); + + final VariantContext result1 = root1.make(); + final VariantContextBuilder root2 = builderScheme.getOtherBuilder(root1, result1) + .source(snpSource) + .chr(snpChr) + .start(snpLocStart) + .stop(snpLocStop) + .alleles(Arrays.asList(Aref, C)); + + final VariantContext result2 = root2.attribute("AC", 2).make(); + + Assert.assertEquals(result1.getAttribute("AC"), 1); + Assert.assertEquals(result2.getAttribute("AC"), 2); + } + + @Test(dataProvider = "builderSchemes") + public void testAttributeResettingWorks3(final VCBuilderScheme builderScheme) { + final VariantContextBuilder root1 = new VariantContextBuilder(snpSource, snpChr, snpLocStart, snpLocStop, Arrays.asList(Aref, C)); + + Map attributes = new HashMap<>(); + attributes.put("AC", 1); + + final VariantContext result1 = root1.attributes(attributes).make(); + final VariantContextBuilder root2 = builderScheme.getOtherBuilder(root1, result1) + .source(snpSource).chr(snpChr).start(snpLocStart).stop(snpLocStop).alleles(Arrays.asList(Aref, C)); + + attributes.put("AC", 2); + + final VariantContext result2 = root2.attributes(attributes).make(); + + Assert.assertEquals(result1.getAttribute("AC"), 1); + Assert.assertEquals(result2.getAttribute("AC"), 2); + } + + @Test(dataProvider = "builderSchemes") + public void testFiltersUnaffectedByClonedVariants(final VCBuilderScheme builderScheme) { + final VariantContextBuilder builder = new VariantContextBuilder("source", "contig", 1, 1, Arrays.asList(Tref, C)).filter("TEST"); + final VariantContext vc1 = builder.make(); + final VariantContext vc2 = builderScheme.getOtherBuilder(builder, vc1).filters("TEST2").make(); + Assert.assertNotEquals(vc2.getFilters(), vc1.getFilters(), "The two lists of filters should be different"); + } + + @Test(dataProvider = "builderSchemes") + public void testFilterExternalSetUnaffectedByClonedVariantsBuilders(final VCBuilderScheme builderScheme) { + final Set filters = new HashSet<>(); + filters.add("TEST"); + final VariantContextBuilder builder = new VariantContextBuilder("source", "contig", 1, 1, Arrays.asList(Tref, C)).filters(filters); + final VariantContext vc1 = builder.make(); + + filters.clear(); + filters.add("TEST2"); + + final VariantContext vc2 = builderScheme.getOtherBuilder(builder, vc1).filters(filters).make(); + Assert.assertNotEquals(vc2.getFilters(), vc1.getFilters(), "The two lists of filters should be different"); + } + + @Test + public void testFilterCanUseUnmodifiableSet() { + final Set filters = new HashSet<>(); + filters.add("TEST"); + final VariantContextBuilder builder = new VariantContextBuilder("source", "contig", 1, 1, Arrays.asList(Tref, C)).filters(Collections.unmodifiableSet(filters)); + builder.filter("CanIHazAFilter?"); + final VariantContext vc1 = builder.make(); + + Assert.assertEquals(vc1.getFilters().size(),2); + } + + @DataProvider + public static Object[][] illegalFilterStrings() { + return new Object[][]{ + {"Tab\t"}, + {"newLine\n"}, + {"space "}, + {"semicolon;"}, + {"carriage return\r"} + }; + } + + @Test(dataProvider = "illegalFilterStrings", expectedExceptions = IllegalStateException.class) + public void testFilterCannotUseBadFilters(final String filter) { + final Set filters = new HashSet<>(); + filters.add(filter); + final VariantContextBuilder builder = new VariantContextBuilder("source", "contig", 1, 1, Arrays.asList(Tref, C)).filters(Collections.unmodifiableSet(filters)); + final VariantContext vc1 = builder.make(); + + //shouldn't have gotten here + Assert.fail("a bad filter should have not been permitted: '" + filter + "'"); + } + + + @Test(dataProvider = "builderSchemes") + public void testAllelesUnaffectedByClonedVariants(final VCBuilderScheme builderScheme) { + final VariantContextBuilder builder = new VariantContextBuilder("source", "contig", 1, 1, Arrays.asList(Tref, C)).filter("TEST"); + final VariantContext ignored = builder.make(); + final VariantContext vc2 = builderScheme.getOtherBuilder(builder, ignored).alleles(Collections.singleton(Aref)).make(); + + final VariantContext vc1 = builder.make(); + + if (builderScheme == VCBuilderScheme.SAME) { + Assert.assertEquals(vc2.getAlleles(), vc1.getAlleles(), "The two lists of alleles should be the same"); + } else { + Assert.assertNotEquals(vc2.getAlleles(), vc1.getAlleles(), "The two lists of alleles should be different"); + } + } + + @Test(dataProvider = "builderSchemes") + public void testGenotypesUnaffectedByClonedVariants(final VCBuilderScheme builderScheme) { + if (builderScheme == VCBuilderScheme.NEW_ON_VC) { + return; + } + + final VariantContextBuilder builder = new VariantContextBuilder("source", "contig", 1, 1, Arrays.asList(Tref, C, G)).filter("TEST"); + + final Genotype sample1 = GenotypeBuilder.create("sample1", Arrays.asList(Tref, C)); + final Genotype sample2 = GenotypeBuilder.create("sample2", Arrays.asList(Tref, G)); + + GenotypesContext gc = GenotypesContext.create(sample1); + builder.genotypes(gc); + try { + gc.add(sample2); + Assert.fail("shouldn't have gotten here"); + } catch (UnsupportedOperationException e) { + // The exception is expected since calling builder.genotypes(gc) should make the + // gc object immutable (to protect the builder from unexpected changes) + + // By making a new gc object and setting it to sample2 the resulting vc1 and vc2 will be different + gc = GenotypesContext.create(sample2); + } + + final VariantContext vc1 = builder.make(); + final VariantContext vc2 = builderScheme.getOtherBuilder(builder, null).genotypes(gc).make(); + Assert.assertNotEquals(vc2.getGenotypes(), vc1.getGenotypes(), "The two genotype lists should be different. only saw " + vc1.getGenotypes().toString()); } -} \ No newline at end of file +} diff --git a/src/test/java/htsjdk/variant/variantcontext/VariantContextTestProvider.java b/src/test/java/htsjdk/variant/variantcontext/VariantContextTestProvider.java index b8476592e7..511cf74145 100644 --- a/src/test/java/htsjdk/variant/variantcontext/VariantContextTestProvider.java +++ b/src/test/java/htsjdk/variant/variantcontext/VariantContextTestProvider.java @@ -29,7 +29,10 @@ import htsjdk.tribble.FeatureCodec; import htsjdk.tribble.FeatureCodecHeader; import htsjdk.tribble.Tribble; -import htsjdk.tribble.readers.*; +import htsjdk.tribble.readers.LineIterator; +import htsjdk.tribble.readers.LineIteratorImpl; +import htsjdk.tribble.readers.PositionalBufferedStream; +import htsjdk.tribble.readers.SynchronousLineReader; import htsjdk.variant.VariantBaseTest; import htsjdk.variant.bcf2.BCF2Codec; import htsjdk.variant.utils.GeneralUtils; @@ -82,10 +85,10 @@ public class VariantContextTestProvider extends HtsjdkTest { final private static List TWENTY_INTS = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20); private static VCFHeader syntheticHeader; - final static List TEST_DATAs = new ArrayList(); + final static List TEST_DATAs = new ArrayList<>(); private static VariantContext ROOT; - private final static List testSourceVCFs = new ArrayList(); + private final static List testSourceVCFs = new ArrayList<>(); static { testSourceVCFs.add(new File(VariantBaseTest.variantTestDataRoot + "ILLUMINA.wex.broad_phase2_baseline.20111114.both.exome.genotypes.1000.vcf")); testSourceVCFs.add(new File(VariantBaseTest.variantTestDataRoot + "ex2.vcf")); @@ -143,7 +146,7 @@ public VariantContextTestData(final VCFHeader header, final VariantContextBuilde } public VariantContextTestData(final VCFHeader header, final List vcs) { - final Set samples = new HashSet(); + final Set samples = new HashSet<>(); for ( final VariantContext vc : vcs ) if ( vc.hasGenotypes() ) samples.addAll(vc.getSampleNames()); @@ -194,7 +197,7 @@ private static void makeEmpiricalTests() throws IOException { for ( final File file : testSourceVCFs ) { VCFCodec codec = new VCFCodec(); VariantContextContainer x = readAllVCs( file, codec ); - List fullyDecoded = new ArrayList(); + List fullyDecoded = new ArrayList<>(); for ( final VariantContext raw : x.getVCs() ) { if ( raw != null ) @@ -219,7 +222,7 @@ private final static void addHeaderLine(final Set metaData, final } private static void createSyntheticHeader() { - Set metaData = new TreeSet(); + Set metaData = new TreeSet<>(); addHeaderLine(metaData, "STRING1", 1, VCFHeaderLineType.String); addHeaderLine(metaData, "END", 1, VCFHeaderLineType.Integer); @@ -337,7 +340,7 @@ private static void addSymbolicAlleleTests() { } private static void addGenotypesToTestData() { - final ArrayList sites = new ArrayList(); + final ArrayList sites = new ArrayList<>(); sites.add(builder().alleles("A").make()); sites.add(builder().alleles("A", "C", "T").make()); @@ -392,7 +395,7 @@ private static void addGenotypes( final VariantContext site) { addGenotypeTests(site, homRef, het, homVar); // test no GT at all - addGenotypeTests(site, new GenotypeBuilder("noGT", new ArrayList(0)).attribute("INT1", 10).make()); + addGenotypeTests(site, new GenotypeBuilder("noGT", new ArrayList<>(0)).attribute("INT1", 10).make()); final List noCall = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL); @@ -599,12 +602,12 @@ private static void addGenotypesAndGTests() { final Allele ref = site.getReference(); // base genotype is ref/.../ref up to ploidy - final List baseGenotype = new ArrayList(ploidy); + final List baseGenotype = new ArrayList<>(ploidy); for ( int i = 0; i < ploidy; i++) baseGenotype.add(ref); final int nPLs = GenotypeLikelihoods.numLikelihoods(nAlleles, ploidy); // ada is 0, 1, ..., nAlleles - 1 - final List ada = new ArrayList(nAlleles); + final List ada = new ArrayList<>(nAlleles); for ( int i = 0; i < nAlleles - 1; i++ ) ada.add(i); // pl is 0, 1, ..., up to nPLs (complex calc of nAlleles and ploidy) @@ -651,9 +654,9 @@ public static void testReaderWriterWithMissingGenotypes(final VariantContextIOTe final EnumSet options = EnumSet.of(Options.INDEX_ON_THE_FLY); final VariantContextWriter writer = tester.makeWriter(tmpFile, options); - final Set samplesInVCF = new HashSet(data.header.getGenotypeSamples()); + final Set samplesInVCF = new HashSet<>(data.header.getGenotypeSamples()); final List missingSamples = Arrays.asList("MISSING1", "MISSING2"); - final List allSamples = new ArrayList(missingSamples); + final List allSamples = new ArrayList<>(missingSamples); allSamples.addAll(samplesInVCF); final VCFHeader header = new VCFHeader(data.header.getMetaDataInInputOrder(), allSamples); @@ -886,7 +889,7 @@ public static void assertEquals(final Genotype actual, final Genotype expected) } private static void assertAttributesEquals(final Map actual, Map expected) { - final Set expectedKeys = new HashSet(expected.keySet()); + final Set expectedKeys = new HashSet<>(expected.keySet()); for ( final Map.Entry act : actual.entrySet() ) { final Object actualValue = act.getValue(); @@ -949,7 +952,7 @@ public static void addComplexGenotypesTest() { final List siteAlleles = allAlleles.subList(0, nAlleles); // possible alleles for genotypes - final List possibleGenotypeAlleles = new ArrayList(siteAlleles); + final List possibleGenotypeAlleles = new ArrayList<>(siteAlleles); possibleGenotypeAlleles.add(Allele.NO_CALL); // there are n^ploidy possible genotypes @@ -960,14 +963,14 @@ public static void addComplexGenotypesTest() { // first test -- create n copies of each genotype for ( int i = 0; i < nPossibleGenotypes; i++ ) { - final List samples = new ArrayList(3); + final List samples = new ArrayList<>(3); samples.add(GenotypeBuilder.create("sample" + i, possibleGenotypes.get(i))); add(vb.genotypes(samples)); } // second test -- create one sample with each genotype { - final List samples = new ArrayList(nPossibleGenotypes); + final List samples = new ArrayList<>(nPossibleGenotypes); for ( int i = 0; i < nPossibleGenotypes; i++ ) { samples.add(GenotypeBuilder.create("sample" + i, possibleGenotypes.get(i))); } @@ -977,7 +980,7 @@ public static void addComplexGenotypesTest() { // test mixed ploidy for ( int i = 0; i < nPossibleGenotypes; i++ ) { for ( int ploidy = 1; ploidy < highestPloidy; ploidy++ ) { - final List samples = new ArrayList(highestPloidy); + final List samples = new ArrayList<>(highestPloidy); final List genotype = possibleGenotypes.get(i).subList(0, ploidy); samples.add(GenotypeBuilder.create("sample" + i, genotype)); add(vb.genotypes(samples)); @@ -996,8 +999,8 @@ public static void assertEquals(final VCFHeader actual, final VCFHeader expected // for some reason set.equals() is returning false but all paired elements are .equals(). Perhaps compare to is busted? //Assert.assertEquals(actual.getMetaDataInInputOrder(), expected.getMetaDataInInputOrder()); - final List actualLines = new ArrayList(actual.getMetaDataInSortedOrder()); - final List expectedLines = new ArrayList(expected.getMetaDataInSortedOrder()); + final List actualLines = new ArrayList<>(actual.getMetaDataInSortedOrder()); + final List expectedLines = new ArrayList<>(expected.getMetaDataInSortedOrder()); for ( int i = 0; i < actualLines.size(); i++ ) { Assert.assertEquals(actualLines.get(i), expectedLines.get(i), "VCF header lines"); } diff --git a/src/test/java/htsjdk/variant/variantcontext/writer/VariantContextWritersUnitTest.java b/src/test/java/htsjdk/variant/variantcontext/writer/VariantContextWritersUnitTest.java index 31b20fb8d0..8f41531b5a 100644 --- a/src/test/java/htsjdk/variant/variantcontext/writer/VariantContextWritersUnitTest.java +++ b/src/test/java/htsjdk/variant/variantcontext/writer/VariantContextWritersUnitTest.java @@ -37,7 +37,6 @@ import htsjdk.variant.variantcontext.VariantContextTestProvider; import htsjdk.variant.vcf.VCFCodec; import htsjdk.variant.vcf.VCFHeader; -import htsjdk.variant.vcf.VCFUtils; import org.testng.annotations.BeforeSuite; import org.testng.annotations.DataProvider; import org.testng.annotations.Test;