diff --git a/HISTORY.rst b/HISTORY.rst index bd685daee..9d46d5c56 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -46,6 +46,7 @@ End-User Summary - Adjusting SV filtration presets (#616). - Fix bug with thousand genomes frequencies in SV filtration (#619). - Displaying disease gene icon also for SVs (#620). +- Fix bug with gene constraint display for intergenic variants (#620). Full Change List ================ @@ -87,6 +88,7 @@ Full Change List - Adjusting SV filtration presets (#616). - Fix bug with thousand genomes frequencies in SV filtration (#619). - Displaying disease gene icon also for SVs (#620). +- Fix bug with gene constraint display for intergenic variants (#620). ------ v1.2.0 diff --git a/svs/templates/svs/_filter_form.html b/svs/templates/svs/_filter_form.html index 0d5c76222..c7e8660e0 100644 --- a/svs/templates/svs/_filter_form.html +++ b/svs/templates/svs/_filter_form.html @@ -23,8 +23,8 @@ diff --git a/variants/queries.py b/variants/queries.py index f77b0ee85..40487596b 100644 --- a/variants/queries.py +++ b/variants/queries.py @@ -28,6 +28,7 @@ ExacConstraints, MgiMapping, RefseqToGeneSymbol, + RefseqToEnsembl, EnsemblToGeneSymbol, GeneIdInHpo, ) @@ -1350,25 +1351,59 @@ class ExtendQueryPartsGnomadConstraintsJoin(ExtendQueryPartsBase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.fields = ["pLI", "mis_z", "syn_z", "oe_lof", "oe_lof_upper", "oe_lof_lower"] - self.subquery = ( - select( - [ - func.max(getattr(GnomadConstraints.sa, field)).label(field) - for field in self.fields - ] - + [func.max(GnomadConstraints.sa.oe_lof_upper + 0.001).label("loeuf")] - ) - .select_from(GnomadConstraints.sa) - .where(SmallVariant.sa.ensembl_gene_id == GnomadConstraints.sa.ensembl_gene_id) - .group_by(GnomadConstraints.sa.ensembl_gene_id) - .lateral("gnomad_constraints_subquery") - ) + self.subquery = self._build_subquery() self.fields.append("loeuf") + def _build_subquery(self): + """Build sub query, depending on selected database (refseq/ensembl).""" + if self.kwargs["database_select"] == "ensembl": + return ( + select( + [ + func.max(getattr(GnomadConstraints.sa, field)).label(field) + for field in self.fields + ] + + [func.max(GnomadConstraints.sa.oe_lof_upper + 0.001).label("loeuf")] + ) + .select_from(GnomadConstraints.sa) + .where(SmallVariant.sa.ensembl_gene_id == GnomadConstraints.sa.ensembl_gene_id) + .group_by(GnomadConstraints.sa.ensembl_gene_id) + .lateral("gnomad_constraints_subquery") + ) + else: + self.subquery_refseq_to_ensembl = ( + select([func.max(RefseqToEnsembl.sa.ensembl_gene_id).label("ensembl_gene_id")]) + .select_from(RefseqToEnsembl.sa) + .where(SmallVariant.sa.refseq_gene_id == RefseqToEnsembl.sa.entrez_id) + .group_by(RefseqToEnsembl.sa.entrez_id) + .lateral("refseqtoensembl_subquery_gnomad_constraints") + ) + link = ( + self.subquery_refseq_to_ensembl.c.ensembl_gene_id + == GnomadConstraints.sa.ensembl_gene_id + ) + return ( + select( + [ + func.max(getattr(GnomadConstraints.sa, field)).label(field) + for field in self.fields + ] + + [func.max(GnomadConstraints.sa.oe_lof_upper + 0.001).label("loeuf")] + ) + .select_from(GnomadConstraints.sa) + .where(link) + .group_by(GnomadConstraints.sa.ensembl_gene_id) + .lateral("gnomad_constraints_subquery") + ) + def extend_fields(self, _query_parts): - return [getattr(self.subquery.c, field).label("gnomad_%s" % field) for field in self.fields] + return [ + getattr(self.subquery.c, field).label("gnomad_%s" % field) for field in self.fields + ] + [self.subquery_refseq_to_ensembl.c.ensembl_gene_id.label("ensembl_gene_idY")] def extend_selectable(self, query_parts): + if self.kwargs["database_select"] == "refseq": + query_parts = query_parts.selectable.outerjoin(self.subquery_refseq_to_ensembl, true()) return query_parts.selectable.outerjoin(self.subquery, true()) @@ -1376,23 +1411,68 @@ class ExtendQueryPartsExacConstraintsJoin(ExtendQueryPartsBase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.fields = ["pLI", "mis_z", "syn_z"] - self.subquery = ( - select( - [func.max(getattr(ExacConstraints.sa, field)).label(field) for field in self.fields] + self.subquery = self._build_subquery() + self.subquery = self._build_subquery() + + def _build_subquery(self): + """Build sub query, depending on selected database (refseq/ensembl).""" + if self.kwargs["database_select"] == "ensembl": + return ( + select( + [ + func.max(getattr(ExacConstraints.sa, field)).label(field) + for field in self.fields + ] + ) + .select_from(ExacConstraints.sa) + .where( + func.split_part(SmallVariant.sa.ensembl_transcript_id, ".", 1) + == ExacConstraints.sa.ensembl_transcript_id + ) + .group_by(ExacConstraints.sa.ensembl_transcript_id) + .lateral("exac_constraints_subquery") ) - .select_from(ExacConstraints.sa) - .where( - func.split_part(SmallVariant.sa.ensembl_transcript_id, ".", 1) + else: + self.subquery_refseq_to_ensembl = ( + select( + [ + func.max(RefseqToEnsembl.sa.ensembl_transcript_id).label( + "ensembl_transcript_id" + ) + ] + ) + .select_from(RefseqToEnsembl.sa) + .where(SmallVariant.sa.refseq_gene_id == RefseqToEnsembl.sa.entrez_id) + .group_by(RefseqToEnsembl.sa.entrez_id) + .lateral("refseqtoensembl_subquery_exac_constraints") + ) + link = ( + self.subquery_refseq_to_ensembl.c.ensembl_transcript_id == ExacConstraints.sa.ensembl_transcript_id ) - .group_by(ExacConstraints.sa.ensembl_transcript_id) - .lateral("exac_constraints_subquery") - ) + return ( + select( + [ + func.max(getattr(ExacConstraints.sa, field)).label(field) + for field in self.fields + ] + ) + .select_from(ExacConstraints.sa) + .where(link) + .group_by(ExacConstraints.sa.ensembl_transcript_id) + .lateral("exac_constraints_subquery") + ) def extend_fields(self, _query_parts): - return [getattr(self.subquery.c, field).label("exac_%s" % field) for field in self.fields] + return [ + getattr(self.subquery.c, field).label("exac_%s" % field) for field in self.fields + ] + [ + self.subquery_refseq_to_ensembl.c.ensembl_transcript_id.label("ensembl_transcript_idY") + ] def extend_selectable(self, query_parts): + if self.kwargs["database_select"] == "refseq": + query_parts = query_parts.selectable.outerjoin(self.subquery_refseq_to_ensembl, true()) return query_parts.selectable.outerjoin(self.subquery, true()) diff --git a/variants/tests/test_queries.py b/variants/tests/test_queries.py index adc30eb8e..b5b8977a0 100644 --- a/variants/tests/test_queries.py +++ b/variants/tests/test_queries.py @@ -35,6 +35,7 @@ MgiMappingFactory, RefseqToGeneSymbolFactory, EnsemblToGeneSymbolFactory, + RefseqToEnsemblFactory, ) from dbsnp.tests.factories import DbsnpFactory from .factories import ( @@ -150,6 +151,91 @@ def test_load_prefetched_project_cases_results(self): self.assertIsNone(results[1].mgi_id) +class TestCaseRefSeqIntergenicPLI(SupportQueryTestBase): + """Test the case that the entrez ID does not correspond to the ensembl ID + + cf. bug in https://github.com/bihealth/varfish-server/issues/622 + """ + + def setUp(self): + super().setUp() + case, variant_set, _ = CaseWithVariantSetFactory.get("small") + self.acmg_0 = AcmgFactory(entrez_id="1000", ensembl_gene_id="ENSG1001") + self.acmg_1 = AcmgFactory(entrez_id="1000", ensembl_gene_id="ENSG1000") + small_vars = [ + SmallVariantFactory( + chromosome=normalize_chrom("1", case.release), + ensembl_effect=["missense_variant"], + refseq_effect=["intergenic_variant"], + refseq_gene_id=self.acmg_1.entrez_id, + ensembl_gene_id=self.acmg_0.ensembl_gene_id, + variant_set=variant_set, + ), + SmallVariantFactory( + chromosome=normalize_chrom("1", case.release), + ensembl_effect=["missense_variant"], + refseq_effect=["missense_variant_variant"], + refseq_gene_id=self.acmg_1.entrez_id, + ensembl_gene_id=self.acmg_1.ensembl_gene_id, + variant_set=variant_set, + ), + ] + # Prepare constraints + self.gnomad_constraints = [ + GnomadConstraintsFactory(ensembl_gene_id=small_vars[0].ensembl_gene_id, pLI=0.8,), + GnomadConstraintsFactory(ensembl_gene_id=small_vars[1].ensembl_gene_id, pLI=0.4,), + ] + self.exac_constraints = [ + ExacConstraintsFactory( + ensembl_transcript_id=small_vars[0].ensembl_transcript_id, pLI=1.0, + ), + ExacConstraintsFactory( + ensembl_transcript_id=small_vars[1].ensembl_transcript_id, pLI=0.5, + ), + ] + # Prepare smallvariant query results + self.smallvariantquery = SmallVariantQueryFactory(case=case) + self.smallvariantquery.query_results.add(small_vars[0].id, small_vars[1].id) + # Prepare projectcases smallvariant query results + self.projectcasessmallvariantquery = ProjectCasesSmallVariantQueryFactory( + project=case.project + ) + self.projectcasessmallvariantquery.query_results.add(small_vars[0].id, small_vars[1].id) + # Create appropriate refseq to ensembl mappings + # entrez_id = factory.Sequence(lambda n: str(n)) + # ensembl_gene_id = factory.Sequence(lambda n: "ENSG%d" % n) + # ensembl_transcript_id = factory.Sequence(lambda n: "ENST%d" % n) + RefseqToEnsemblFactory( + entrez_id="1000", + ensembl_gene_id="ENSG1000", + ensembl_transcript_id=small_vars[1].ensembl_transcript_id, + ) + + def test_run_query_refseq(self): + results = self.run_query( + ProjectLoadPrefetchedQuery, + {"filter_job_id": self.projectcasessmallvariantquery.id, "database_select": "refseq"}, + 2, + query_type="project", + ) + self.assertEqual(results[0].exac_pLI, 0.5) + self.assertEqual(results[1].exac_pLI, 0.5) + self.assertEqual(results[0].gnomad_pLI, 0.4) + self.assertEqual(results[1].gnomad_pLI, 0.4) + + def test_run_query_ensembl(self): + results = self.run_query( + ProjectLoadPrefetchedQuery, + {"filter_job_id": self.projectcasessmallvariantquery.id, "database_select": "ensembl"}, + 2, + query_type="project", + ) + self.assertEqual(results[0].exac_pLI, 1.0) + self.assertEqual(results[1].exac_pLI, 0.5) + self.assertEqual(results[0].gnomad_pLI, 0.8) + self.assertEqual(results[1].gnomad_pLI, 0.4) + + class TestCaseLoadPrefetchedSorting(SupportQueryTestBase): def setUp(self): super().setUp()