From 2cc88ac64db1a11599ff1ae64c2f7f28dc8201c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20G=C3=B3mez-S=C3=A1nchez?= Date: Wed, 20 Apr 2016 18:35:11 +0200 Subject: [PATCH] Added makeWithShallowCopy to GenotypeBuilder --- .../variant/variantcontext/FastGenotype.java | 5 +- .../variantcontext/GenotypeBuilder.java | 35 ++++++++- .../variantcontext/GenotypeBuilderTest.java | 75 +++++++++++++++++++ 3 files changed, 110 insertions(+), 5 deletions(-) create mode 100644 src/test/java/htsjdk/variant/variantcontext/GenotypeBuilderTest.java diff --git a/src/main/java/htsjdk/variant/variantcontext/FastGenotype.java b/src/main/java/htsjdk/variant/variantcontext/FastGenotype.java index 935d825d8d..665e672425 100644 --- a/src/main/java/htsjdk/variant/variantcontext/FastGenotype.java +++ b/src/main/java/htsjdk/variant/variantcontext/FastGenotype.java @@ -29,7 +29,10 @@ import java.util.Map; /** - * This class encompasses all the basic information about a genotype. It is immutable. + * This class encompasses all the basic information about a genotype. + * + * For the sake of performance, it does not make a copy of the Collections/arrays it's constructed from, and so + * subsequent changes to those Collections/arrays will be reflected in the FastGenotype object * * A genotype has several key fields * diff --git a/src/main/java/htsjdk/variant/variantcontext/GenotypeBuilder.java b/src/main/java/htsjdk/variant/variantcontext/GenotypeBuilder.java index a0ec1c8ae8..483e1c617d 100644 --- a/src/main/java/htsjdk/variant/variantcontext/GenotypeBuilder.java +++ b/src/main/java/htsjdk/variant/variantcontext/GenotypeBuilder.java @@ -28,6 +28,7 @@ import htsjdk.tribble.util.ParsingUtils; import htsjdk.variant.vcf.VCFConstants; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; @@ -49,6 +50,11 @@ * or with intervening sets to conveniently make similar Genotypes with * slight modifications. * + * Re-using the same GenotypeBuilder to build multiple Genotype objects via calls + * to make() is dangerous, since reference types in the builder (eg., Collections/arrays) + * don't get copied when making each Genotype. To safely re-use the same builder object + * multiple times, use makeWithShallowCopy() instead of make(). + * * @author Mark DePristo * @since 06/12 */ @@ -185,13 +191,34 @@ public final void reset(final boolean keepSampleName) { * created, althrough the contents of array values like PL should never be modified * inline as they are not copied for efficiency reasons. * + * Note: if attributes are added via this builder after a call to make(), the new Genotype will + * be modified. Use {@link #makeWithShallowCopy} to safely re-use the same builder object + * multiple times. + * * @return a newly minted Genotype object with values provided from this builder */ public Genotype make() { - final Map ea = extendedAttributes == null ? NO_ATTRIBUTES : extendedAttributes; + final Map ea = (extendedAttributes == null) ? NO_ATTRIBUTES : extendedAttributes; return new FastGenotype(sampleName, alleles, isPhased, GQ, DP, AD, PL, filters, ea); } + /** + * Create a new Genotype object using the values set in this builder, and perform a + * shallow copy of reference types to allow safer re-use of this builder + * + * After creation the values in this builder can be modified and more Genotypes + * created. + * + * @return a newly minted Genotype object with values provided from this builder + */ + public Genotype makeWithShallowCopy() { + final Map ea = (extendedAttributes == null) ? NO_ATTRIBUTES : new HashMap<>(extendedAttributes); + final List al = new ArrayList<>(alleles); + final int[] copyAD = (AD == null) ? null : Arrays.copyOf(AD, AD.length); + final int[] copyPL = (PL == null) ? null : Arrays.copyOf(PL, PL.length); + return new FastGenotype(sampleName, al, isPhased, GQ, DP, copyAD, copyPL, filters, ea); + } + /** * Set this genotype's name * @param sampleName @@ -303,9 +330,9 @@ public GenotypeBuilder PL(final double[] GLs) { } /** - * This genotype has these attributes. + * This genotype has these attributes. Attributes are added to previous ones. * - * Cannot contain inline attributes (DP, AD, GQ, PL) + * Cannot contain inline attributes (DP, AD, GQ, PL). Note: this is not checked * @return */ public GenotypeBuilder attributes(final Map attributes) { @@ -327,7 +354,7 @@ public GenotypeBuilder noAttributes() { /** * This genotype has this attribute key / value pair. * - * Cannot contain inline attributes (DP, AD, GQ, PL) + * Cannot contain inline attributes (DP, AD, GQ, PL). Note: this is not checked * @return */ public GenotypeBuilder attribute(final String key, final Object value) { diff --git a/src/test/java/htsjdk/variant/variantcontext/GenotypeBuilderTest.java b/src/test/java/htsjdk/variant/variantcontext/GenotypeBuilderTest.java new file mode 100644 index 0000000000..5e3f0b9eb8 --- /dev/null +++ b/src/test/java/htsjdk/variant/variantcontext/GenotypeBuilderTest.java @@ -0,0 +1,75 @@ +/* +* Copyright (c) 2016 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package htsjdk.variant.variantcontext; + +import htsjdk.variant.VariantBaseTest; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class GenotypeBuilderTest extends VariantBaseTest { + + @Test + public void testMakeWithShallowCopy() { + final GenotypeBuilder gb = new GenotypeBuilder("test"); + final List alleles = new ArrayList<>( + Arrays.asList(Allele.create("A", true), Allele.create("T"))); + final int[] ad = new int[]{1,5}; + final int[] pl = new int[]{1,6}; + final int[] first = new int[]{1, 2}; + final int[] second = new int[]{3, 4}; + final Genotype firstG = gb.alleles(alleles).attribute("first", first).makeWithShallowCopy(); + final Genotype secondG = gb.AD(ad).PL(pl).attribute("second", second).makeWithShallowCopy(); + // both genotypes have the first field + Assert.assertEquals(first, firstG.getExtendedAttribute("first")); + Assert.assertEquals(first, secondG.getExtendedAttribute("first")); + // both genotypes have the the alleles + Assert.assertEquals(alleles, firstG.getAlleles()); + Assert.assertEquals(alleles, secondG.getAlleles()); + // only the second genotype should have the AD field + Assert.assertNull(firstG.getAD()); + Assert.assertEquals(ad, secondG.getAD()); + // only the second genotype should have the PL field + Assert.assertNull(firstG.getPL()); + Assert.assertEquals(pl, secondG.getPL()); + // only the second genotype should have the second field + Assert.assertNull(firstG.getExtendedAttribute("second")); + Assert.assertEquals(second, secondG.getExtendedAttribute("second")); + // modification of alleles does not change the genotypes + alleles.add(Allele.create("C")); + Assert.assertNotEquals(alleles, firstG.getAlleles()); + Assert.assertNotEquals(alleles, secondG.getAlleles()); + // modification of ad or pl does not change the genotypes + ad[0] = 0; + pl[0] = 10; + Assert.assertNotEquals(ad, secondG.getAD()); + Assert.assertNotEquals(pl, secondG.getPL()); + } + +} \ No newline at end of file