diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/BaseFuncotatorArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/BaseFuncotatorArgumentCollection.java index ca1307a76d3..afe0b5d6676 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/BaseFuncotatorArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/BaseFuncotatorArgumentCollection.java @@ -123,4 +123,12 @@ public abstract class BaseFuncotatorArgumentCollection implements Serializable { doc = "TSV File containing custom Variant Classification severity map of the form: VARIANT_CLASSIFICATION\tSEV. VARIANT_CLASSIFICATION must match one of the VariantClassification names (" + GencodeFuncotation.VariantClassification.ALL_VC_NAMES + "). SEV is an unsigned integer, where lower is sorted first. When using this option it is HIGHLY recommended you also use the `BEST_EFFECT` transcript selection mode." ) public GATKPath customVariantClassificationOrderFile = null; + + @Argument( + fullName = FuncotatorArgumentDefinitions.SPLICE_SITE_WINDOW_SIZE, + optional = true, + minValue = 0, + doc = "Number of bases on either side of a splice site for a variant to be classified as a SPLICE_SITE variant (default: " + FuncotatorUtils.DEFAULT_SPLICE_SITE_WINDOW_SIZE + ")." + ) + public int spliceSiteWindow = FuncotatorUtils.DEFAULT_SPLICE_SITE_WINDOW_SIZE; } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotateSegments.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotateSegments.java index 51fa2e031ed..15f4e1dc41b 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotateSegments.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotateSegments.java @@ -146,7 +146,8 @@ public void onTraversalStart() { funcotatorArgs.lookaheadFeatureCachingInBp, new FlankSettings(0,0), true, - funcotatorArgs.minNumBasesForValidSegment + funcotatorArgs.minNumBasesForValidSegment, + funcotatorArgs.spliceSiteWindow ).stream() .filter(DataSourceFuncotationFactory::isSupportingSegmentFuncotation) .collect(Collectors.toList()); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/Funcotator.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/Funcotator.java index 8f97e31347b..74aec83c89e 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/Funcotator.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/Funcotator.java @@ -793,7 +793,8 @@ public void onTraversalStart() { funcotatorArgs.lookaheadFeatureCachingInBp, new FlankSettings(funcotatorArgs.fivePrimeFlankSize, funcotatorArgs.threePrimeFlankSize), false, - funcotatorArgs.minNumBasesForValidSegment + funcotatorArgs.minNumBasesForValidSegment, + funcotatorArgs.spliceSiteWindow ); logger.info("Initializing Funcotator Engine..."); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorArgumentDefinitions.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorArgumentDefinitions.java index a0502bf67ee..09ec1e3ada8 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorArgumentDefinitions.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorArgumentDefinitions.java @@ -69,6 +69,8 @@ public class FuncotatorArgumentDefinitions { public static final String CUSTOM_VARIANT_CLASS_ORDER_FILE = "custom-variant-classification-order"; + public static final String SPLICE_SITE_WINDOW_SIZE = "splice-site-window-size"; + // ------------------------------------------------------------ // Helper Types: diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorUtils.java index 4592af66c87..c7ea74905ed 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorUtils.java @@ -59,6 +59,11 @@ private FuncotatorUtils() {} public static final int DEFAULT_MIN_NUM_BASES_FOR_VALID_SEGMENT = 150; + /** + * The default window on either side of splice sites to mark variants as {@link org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation.VariantClassification#SPLICE_SITE}. + */ + public static final int DEFAULT_SPLICE_SITE_WINDOW_SIZE = 2; + private static final Map tableByCodon; private static final Map tableByCode; private static final Map tableByLetter; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/DataSourceUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/DataSourceUtils.java index 4bac84acc05..48dd6560bee 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/DataSourceUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/DataSourceUtils.java @@ -304,6 +304,8 @@ private static boolean isValidDirectory(final Path p) { * be annotated with a gencode/transcript datasource. * Not all datasources support this flag and it is * ignored for those that don't. + * @param minBasesForValidSegment The minimum number of bases for a segment to be considered valid. + * @param spliceSiteWindowSize The number of bases on either side of a splice site for a variant to be a {@link org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation.VariantClassification#SPLICE_SITE} variant. * @return A {@link List} of {@link DataSourceFuncotationFactory} given the data source metadata, overrides, and transcript reporting priority information. */ public static List createDataSourceFuncotationFactoriesForDataSources(final Map dataSourceMetaData, @@ -314,7 +316,8 @@ public static List createDataSourceFuncotationFact final int lookaheadFeatureCachingInBp, final FlankSettings flankSettings, final boolean doAttemptSegmentFuncotationForTranscriptDatasources, - final int minBasesForValidSegment) { + final int minBasesForValidSegment, + final int spliceSiteWindowSize) { Utils.nonNull(dataSourceMetaData); Utils.nonNull(annotationOverridesMap); Utils.nonNull(transcriptSelectionMode); @@ -353,7 +356,7 @@ public static List createDataSourceFuncotationFact case GENCODE: featureInput = createAndRegisterFeatureInputs(path, properties, gatkToolInstance, lookaheadFeatureCachingInBp, GencodeGtfFeature.class, false); funcotationFactory = DataSourceUtils.createGencodeDataSource(path, properties, annotationOverridesMap, transcriptSelectionMode, - userTranscriptIdSet, featureInput, flankSettings, doAttemptSegmentFuncotationForTranscriptDatasources, minBasesForValidSegment); + userTranscriptIdSet, featureInput, flankSettings, doAttemptSegmentFuncotationForTranscriptDatasources, minBasesForValidSegment, spliceSiteWindowSize); break; case VCF: featureInput = createAndRegisterFeatureInputs(path, properties, gatkToolInstance, lookaheadFeatureCachingInBp, VariantContext.class, false); @@ -557,6 +560,8 @@ private static CosmicFuncotationFactory createCosmicDataSource(final Path dataSo * @param isSegmentFuncotationEnabled Do we want to allow the output Gencode Funcotation Factory to do segment annotations? If false, * segments will be funcotated with variant classifications of * {@link org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation.VariantClassification#COULD_NOT_DETERMINE} + * @param minBasesForValidSegment The minimum number of bases for a segment to be considered valid. + * @param spliceSiteWindowSize The number of bases on either side of a splice site for a variant to be a {@link org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation.VariantClassification#SPLICE_SITE} variant. * @return A new {@link GencodeFuncotationFactory} based on the given data source file information, field overrides map, and transcript information. */ private static GencodeFuncotationFactory createGencodeDataSource(final Path dataSourceFile, @@ -567,7 +572,8 @@ private static GencodeFuncotationFactory createGencodeDataSource(final Path data final FeatureInput featureInput, final FlankSettings flankSettings, final boolean isSegmentFuncotationEnabled, - final int minBasesForValidSegment) { + final int minBasesForValidSegment, + final int spliceSiteWindowSize) { Utils.nonNull(dataSourceFile); Utils.nonNull(dataSourceProperties); Utils.nonNull(annotationOverridesMap); @@ -596,7 +602,8 @@ private static GencodeFuncotationFactory createGencodeDataSource(final Path data isB37, ncbiBuildVersion, isSegmentFuncotationEnabled, - minBasesForValidSegment + minBasesForValidSegment, + spliceSiteWindowSize ); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationFactory.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationFactory.java index 281f0ca74a0..ec65f35c2b0 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationFactory.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationFactory.java @@ -72,11 +72,6 @@ public class GencodeFuncotationFactory extends DataSourceFuncotationFactory { private static final String LOCAL_GENCODE_TRANSCRIPT_TMP_DIR_PREFIX = "localGencodeTranscriptFastaFolder"; private static final String LOCAL_GENCODE_TRANSCRIPT_FILE_BASE_NAME = "gencodeTranscriptFastaFile"; - /** - * The window around splice sites to mark variants as {@link org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation.VariantClassification#SPLICE_SITE}. - */ - private static final int spliceSiteVariantWindowBases = 2; - /** * Number of bases to the left and right of a variant in which to calculate the GC content. */ @@ -222,6 +217,11 @@ public class GencodeFuncotationFactory extends DataSourceFuncotationFactory { */ private String ncbiBuildVersion = null; + /** + * The window on either side of splice sites to mark variants as {@link org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation.VariantClassification#SPLICE_SITE}. + */ + private int spliceSiteVariantWindowBases = FuncotatorUtils.DEFAULT_SPLICE_SITE_WINDOW_SIZE; + /** * Comparator to be used when sorting {@link Funcotation}s created by this {@link GencodeFuncotationFactory}. * Will be either {@link TranscriptSelectionMode.BestEffectGencodeFuncotationComparator} or {@link TranscriptSelectionMode.CanonicalGencodeFuncotationComparator}. @@ -352,6 +352,41 @@ public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFilePath, final boolean isSegmentFuncotationEnabled, final int minBasesForValidSegment) { + this(gencodeTranscriptFastaFilePath, version, name, transcriptSelectionMode, userRequestedTranscripts, + annotationOverrides, mainFeatureInput, flankSettings, isDataSourceB37, ncbiBuildVersion, + isSegmentFuncotationEnabled, minBasesForValidSegment, FuncotatorUtils.DEFAULT_SPLICE_SITE_WINDOW_SIZE); + } + + /** + * Create a {@link GencodeFuncotationFactory}. + * + * @param gencodeTranscriptFastaFilePath {@link Path} to the FASTA file containing the sequences of all transcripts in the Gencode data source. + * @param version The version {@link String} of Gencode from which {@link Funcotation}s will be made. + * @param name A {@link String} containing the name of this {@link GencodeFuncotationFactory}. + * @param transcriptSelectionMode The {@link TranscriptSelectionMode} by which representative/verbose transcripts will be chosen for overlapping variants. + * @param userRequestedTranscripts A {@link Set} containing Gencode TranscriptIDs that the user requests to be annotated with priority over all other transcripts for overlapping variants. + * @param annotationOverrides A {@link LinkedHashMap} containing user-specified overrides for specific {@link Funcotation}s. + * @param mainFeatureInput The backing {@link FeatureInput} for this {@link GencodeFuncotationFactory}, from which all {@link Funcotation}s will be created. + * @param flankSettings Settings object containing our 5'/3' flank sizes + * @param isDataSourceB37 If {@code true}, indicates that the data source behind this {@link GencodeFuncotationFactory} contains B37 data. + * @param ncbiBuildVersion The NCBI build version for this {@link GencodeFuncotationFactory} (can be found in the datasource config file) + * @param minBasesForValidSegment The minimum number of bases for a segment to be considered valid. + * @param spliceSiteWindowSize The number of bases on either side of a splice site for a variant to be a {@link org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation.VariantClassification#SPLICE_SITE} variant. + */ + public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFilePath, + final String version, + final String name, + final TranscriptSelectionMode transcriptSelectionMode, + final Set userRequestedTranscripts, + final LinkedHashMap annotationOverrides, + final FeatureInput mainFeatureInput, + final FlankSettings flankSettings, + final boolean isDataSourceB37, + final String ncbiBuildVersion, + final boolean isSegmentFuncotationEnabled, + final int minBasesForValidSegment, + final int spliceSiteWindowSize) { + super(mainFeatureInput, minBasesForValidSegment); // Set up our local transcript fasta file. @@ -375,6 +410,8 @@ public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFilePath, this.isSegmentFuncotationEnabled = isSegmentFuncotationEnabled; + this.spliceSiteVariantWindowBases = spliceSiteWindowSize; + // Go through each requested transcript and remove the version numbers from them if they exist: this.userRequestedTranscripts = new HashSet<>(); for ( final String transcript : userRequestedTranscripts ) { @@ -1270,7 +1307,9 @@ private GencodeFuncotation createCodingRegionFuncotationForProteinCodingFeature( .setProteinChange(proteinChange); // Set the Variant Classification: - final GencodeFuncotation.VariantClassification varClass = createVariantClassification(variant, altAllele, variantType, exon, transcript.getExons().size(), sequenceComparison); + final GencodeFuncotation.VariantClassification varClass = createVariantClassification( + variant, altAllele, variantType, exon, transcript.getExons().size(), sequenceComparison, spliceSiteVariantWindowBases + ); final GencodeFuncotation.VariantClassification secondaryVarClass; gencodeFuncotationBuilder.setVariantClassification(varClass); if ( varClass == GencodeFuncotation.VariantClassification.SPLICE_SITE ) { @@ -1354,6 +1393,7 @@ else if ( exon.getStopCodon() != null ) { * @param exon The {@link GencodeGtfExonFeature} in which the given {@code variant} occurs. * @param numberOfExonsInTranscript The number of exons in the transcript in which the given {@code variant} occurs. (Must be > 0). * @param sequenceComparison The {@link org.broadinstitute.hellbender.tools.funcotator.SequenceComparison} for the given {@code variant}. + * @param spliceSiteWindowBases The window on either side of splice sites to mark variants as {@link org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation.VariantClassification#SPLICE_SITE}. * @return A {@link org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation.VariantClassification} based on the given {@code allele}, {@code variant}, {@code exon}, and {@code sequenceComparison}. */ @VisibleForTesting @@ -1362,7 +1402,8 @@ static GencodeFuncotation.VariantClassification createVariantClassification(fina final GencodeFuncotation.VariantType variantType, final GencodeGtfExonFeature exon, final int numberOfExonsInTranscript, - final SequenceComparison sequenceComparison ){ + final SequenceComparison sequenceComparison, + final int spliceSiteWindowBases){ Utils.nonNull(variant); Utils.nonNull(altAllele); Utils.nonNull(variantType); @@ -1422,12 +1463,17 @@ static GencodeFuncotation.VariantClassification createVariantClassification(fina final int adjustedExonStart = adjustLocusForInsertion(exon.getStart(), variant, altAllele, realVariationInterval); final int adjustedExonEnd = adjustLocusForInsertion(exon.getEnd(), variant, altAllele, realVariationInterval); + // If we have 0 padding, we want to make sure the variant overlaps the exon start itself, + // not within a base of the exon start, so we have to create this adjustment here so that we can + // not subtract one and create an invalid interval. + final int intervalEndCoordAdjuster = spliceSiteWindowBases > 0 ? 1 : 0; + if ( doLeftOverlapCheck ) { - final SimpleInterval leftSideInterval = new SimpleInterval(exon.getContig(), adjustedExonStart - spliceSiteVariantWindowBases, adjustedExonStart + (spliceSiteVariantWindowBases-1)); + final SimpleInterval leftSideInterval = new SimpleInterval(exon.getContig(), adjustedExonStart - spliceSiteWindowBases, adjustedExonStart + (spliceSiteWindowBases-intervalEndCoordAdjuster)); overlapsLeft = leftSideInterval.overlaps(realVariationInterval); } if ( doRightOverlapCheck ) { - final SimpleInterval rightSideInterval = new SimpleInterval(exon.getContig(), adjustedExonEnd - spliceSiteVariantWindowBases + 1, adjustedExonEnd + (spliceSiteVariantWindowBases-1) + 1); + final SimpleInterval rightSideInterval = new SimpleInterval(exon.getContig(), adjustedExonEnd - spliceSiteWindowBases + 1, adjustedExonEnd + (spliceSiteWindowBases-intervalEndCoordAdjuster) + 1); overlapsRight = rightSideInterval.overlaps(realVariationInterval); } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorEngineUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorEngineUnitTest.java index bde803d482f..6b637072805 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorEngineUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorEngineUnitTest.java @@ -64,7 +64,8 @@ public void testGetFuncotationFactoriesAndCreateFuncotationMapForVariant(final F FuncotatorArgumentDefinitions.LOOKAHEAD_CACHE_IN_BP_DEFAULT_VALUE, new FlankSettings(0, 0), false, - FuncotatorUtils.DEFAULT_MIN_NUM_BASES_FOR_VALID_SEGMENT) + FuncotatorUtils.DEFAULT_MIN_NUM_BASES_FOR_VALID_SEGMENT, + FuncotatorUtils.DEFAULT_SPLICE_SITE_WINDOW_SIZE) ); for (int i = 0; i < entireVcf.getRight().size(); i++) { diff --git a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationFactoryUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationFactoryUnitTest.java index 32f7df59926..59b9d69299a 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationFactoryUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/gencode/GencodeFuncotationFactoryUnitTest.java @@ -411,6 +411,29 @@ Object[][] provideMuc16SnpDataForGetVariantClassification() { return l.toArray(new Object[][]{{}}); } + @DataProvider + Object[][] provideForTestSpliceSiteWindowSettings() { + return new Object[][] { + { 19, 8987107, 8987107, GencodeFuncotation.VariantType.SNP, "A", "G", 0, GencodeFuncotation.VariantClassification.SILENT }, + { 19, 8987107, 8987107, GencodeFuncotation.VariantType.SNP, "A", "G", 1, GencodeFuncotation.VariantClassification.SILENT }, + { 19, 8987107, 8987107, GencodeFuncotation.VariantType.SNP, "A", "G", 2, GencodeFuncotation.VariantClassification.SILENT }, + { 19, 8987107, 8987107, GencodeFuncotation.VariantType.SNP, "A", "G", 3, GencodeFuncotation.VariantClassification.SILENT }, + { 19, 8987107, 8987107, GencodeFuncotation.VariantType.SNP, "A", "G", 4, GencodeFuncotation.VariantClassification.SILENT }, + { 19, 8987107, 8987107, GencodeFuncotation.VariantType.SNP, "A", "G", 5, GencodeFuncotation.VariantClassification.SILENT }, + { 19, 8987107, 8987107, GencodeFuncotation.VariantType.SNP, "A", "G", 6, GencodeFuncotation.VariantClassification.SPLICE_SITE }, + { 19, 8987107, 8987107, GencodeFuncotation.VariantType.SNP, "A", "G", 7, GencodeFuncotation.VariantClassification.SPLICE_SITE }, + { 19, 8987107, 8987107, GencodeFuncotation.VariantType.SNP, "A", "G", 8, GencodeFuncotation.VariantClassification.SPLICE_SITE }, + { 19, 8987049, 8987049, GencodeFuncotation.VariantType.SNP, "G", "A", 0, GencodeFuncotation.VariantClassification.SILENT }, + { 19, 8987049, 8987049, GencodeFuncotation.VariantType.SNP, "G", "A", 1, GencodeFuncotation.VariantClassification.SILENT }, + { 19, 8987049, 8987049, GencodeFuncotation.VariantType.SNP, "G", "A", 2, GencodeFuncotation.VariantClassification.SILENT }, + { 19, 8987049, 8987049, GencodeFuncotation.VariantType.SNP, "G", "A", 3, GencodeFuncotation.VariantClassification.SILENT }, + { 19, 8987049, 8987049, GencodeFuncotation.VariantType.SNP, "G", "A", 4, GencodeFuncotation.VariantClassification.SILENT }, + { 19, 8987049, 8987049, GencodeFuncotation.VariantType.SNP, "G", "A", 5, GencodeFuncotation.VariantClassification.SPLICE_SITE }, + { 19, 8987049, 8987049, GencodeFuncotation.VariantType.SNP, "G", "A", 6, GencodeFuncotation.VariantClassification.SPLICE_SITE }, + { 19, 8987049, 8987049, GencodeFuncotation.VariantType.SNP, "G", "A", 7, GencodeFuncotation.VariantClassification.SPLICE_SITE }, + }; + } + @DataProvider Object[][] provideMuc16SnpDataForGetVariantClassificationWithOutOfCdsData() { final List l = new ArrayList<>(); @@ -1339,7 +1362,97 @@ void testGetVariantClassificationForCodingRegions(final int chromosomeNumber, variantType, exon, transcript.getExons().size(), - seqComp + seqComp, + FuncotatorUtils.DEFAULT_SPLICE_SITE_WINDOW_SIZE + ); + + Assert.assertEquals( varClass, expectedVariantClassification ); + } + + @Test (dataProvider = "provideForTestSpliceSiteWindowSettings") + void testSpliceSiteWindowSettings(final int chromosomeNumber, + final int start, + final int end, + final GencodeFuncotation.VariantType variantType, + final String ref, + final String alt, + final int spliceSiteWindow, + final GencodeFuncotation.VariantClassification expectedVariantClassification) { + + // This test can only deal with variants in coding regions. + // So we ignore any tests that are expected outside of coding regions. + // i.e. expectedVariantClassification is one of: + // { INTRON, FIVE_PRIME_UTR, THREE_PRIME_UTR, IGR, FIVE_PRIME_FLANK, DE_NOVO_START_IN_FRAME, DE_NOVO_START_OUT_FRAME, RNA, LINCRNA } + // We test these cases in another unit test. + if ((expectedVariantClassification == GencodeFuncotation.VariantClassification.INTRON) || + (expectedVariantClassification == GencodeFuncotation.VariantClassification.FIVE_PRIME_UTR) || + (expectedVariantClassification == GencodeFuncotation.VariantClassification.THREE_PRIME_UTR) || + (expectedVariantClassification == GencodeFuncotation.VariantClassification.IGR) || + (expectedVariantClassification == GencodeFuncotation.VariantClassification.FIVE_PRIME_FLANK) || + (expectedVariantClassification == GencodeFuncotation.VariantClassification.DE_NOVO_START_IN_FRAME) || + (expectedVariantClassification == GencodeFuncotation.VariantClassification.DE_NOVO_START_OUT_FRAME) || + (expectedVariantClassification == GencodeFuncotation.VariantClassification.RNA) || + (expectedVariantClassification == GencodeFuncotation.VariantClassification.LINCRNA) ) + { + return; + } + + final String contig = "chr" + Integer.toString(chromosomeNumber); + final SimpleInterval variantInterval = new SimpleInterval( contig, start, end ); + + final Allele refAllele = Allele.create(ref, true); + final Allele altAllele = Allele.create(alt); + + final VariantContextBuilder variantContextBuilder = new VariantContextBuilder( + FuncotatorReferenceTestUtils.retrieveHg19Chr19Ref(), + contig, + start, + end, + Arrays.asList(refAllele, altAllele) + ); + final VariantContext variantContext = variantContextBuilder.make(); + + // Get our gene feature iterator: + final CloseableTribbleIterator gtfFeatureIterator; + try { + gtfFeatureIterator = gencodeHg19FeatureReader.query(contig, start, end); + } + catch (final IOException ex) { + throw new GATKException("Could not finish the test!", ex); + } + + // Get the gene. + // We know the first gene is the right one - the gene in question is the MUC16 gene: + final GencodeGtfGeneFeature gene = (GencodeGtfGeneFeature) gtfFeatureIterator.next(); + final GencodeGtfTranscriptFeature transcript = getMuc16Transcript(gene); + final GencodeGtfExonFeature exon = getExonForVariant( gene, variantInterval ); + + final ReferenceContext referenceContext = new ReferenceContext( refDataSourceHg19Ch19, variantInterval ); + + final List exonPositionList = GencodeFuncotationFactory.getSortedCdsAndStartStopPositions(transcript); + + final ReferenceDataSource muc16TranscriptDataSource = ReferenceDataSource.of(new File(FuncotatorTestConstants.GENCODE_DATA_SOURCE_FASTA_PATH_HG19).toPath()); + final Map muc16TranscriptIdMap = GencodeFuncotationFactory. createTranscriptIdMap(muc16TranscriptDataSource); + + final SequenceComparison seqComp = + GencodeFuncotationFactory.createSequenceComparison( + variantContext, + altAllele, + referenceContext, + transcript, + exonPositionList, + muc16TranscriptIdMap, + muc16TranscriptDataSource, + true); + + final GencodeFuncotation.VariantClassification varClass = GencodeFuncotationFactory.createVariantClassification( + variantContext, + altAllele, + variantType, + exon, + transcript.getExons().size(), + seqComp, + spliceSiteWindow ); Assert.assertEquals( varClass, expectedVariantClassification ); diff --git a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/mafOutput/MafOutputRendererUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/mafOutput/MafOutputRendererUnitTest.java index d3d2e70e5ac..30140b6e337 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/mafOutput/MafOutputRendererUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/mafOutput/MafOutputRendererUnitTest.java @@ -123,7 +123,8 @@ private MafOutputRenderer createMafOutputRenderer(final File outputFile, final S FuncotatorArgumentDefinitions.LOOKAHEAD_CACHE_IN_BP_DEFAULT_VALUE, new FlankSettings(0, 0), false, - FuncotatorUtils.DEFAULT_MIN_NUM_BASES_FOR_VALID_SEGMENT + FuncotatorUtils.DEFAULT_MIN_NUM_BASES_FOR_VALID_SEGMENT, + FuncotatorUtils.DEFAULT_SPLICE_SITE_WINDOW_SIZE ); // Sort the datasources to ensure the same order every time: