Skip to content

Commit

Permalink
Fix gene constraints for RefSeq intergenic vars (#622).
Browse files Browse the repository at this point in the history
Related-Issue: #622
Closes: #622
Projected-Results-Impact: none
  • Loading branch information
holtgrewe committed Aug 29, 2022
1 parent 7056603 commit 205b744
Show file tree
Hide file tree
Showing 5 changed files with 194 additions and 25 deletions.
2 changes: 2 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ End-User Summary
- Adjusting SV filtration presets (#616).
- Fix bug with thousand genomes frequencies in SV filtration (#619).
- Displaying disease gene icon also for SVs (#620).
- Fix bug with gene constraint display for intergenic variants (#620).

Full Change List
================
Expand Down Expand Up @@ -93,6 +94,7 @@ Full Change List
- Adjusting SV filtration presets (#616).
- Fix bug with thousand genomes frequencies in SV filtration (#619).
- Displaying disease gene icon also for SVs (#620).
- Fix bug with gene constraint display for intergenic variants (#620).

------
v1.2.0
Expand Down
4 changes: 2 additions & 2 deletions svs/templates/svs/_filter_form.html
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
</a>

<div class="dropdown-menu" aria-labelledby="dropdownFreqs">
<a class="dropdown-item" data-preset="freq-in_house_only">in-house only (default)</a>
<a class="dropdown-item" data-preset="freq-relaxed">relaxed</a>
<a class="dropdown-item" data-preset="freq-in_house_only">in-house only</a>
<a class="dropdown-item" data-preset="freq-relaxed">relaxed (default)</a>
<a class="dropdown-item" data-preset="freq-strict">strict</a>
</div>
</div>
Expand Down
118 changes: 96 additions & 22 deletions variants/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
ExacConstraints,
MgiMapping,
RefseqToGeneSymbol,
RefseqToEnsembl,
EnsemblToGeneSymbol,
GeneIdInHpo,
)
Expand Down Expand Up @@ -1360,49 +1361,122 @@ def __init__(self, *args, **kwargs):
"oe_lof_upper",
"oe_lof_lower",
]
self.subquery = (
select(
[
func.max(getattr(GnomadConstraints.sa, field)).label(field)
for field in self.fields
]
+ [func.max(GnomadConstraints.sa.oe_lof_upper + 0.001).label("loeuf")]
)
.select_from(GnomadConstraints.sa)
.where(SmallVariant.sa.ensembl_gene_id == GnomadConstraints.sa.ensembl_gene_id)
.group_by(GnomadConstraints.sa.ensembl_gene_id)
.lateral("gnomad_constraints_subquery")
)
self.subquery = self._build_subquery()
self.fields.append("loeuf")

def _build_subquery(self):
"""Build sub query, depending on selected database (refseq/ensembl)."""
if self.kwargs["database_select"] == "ensembl":
return (
select(
[
func.max(getattr(GnomadConstraints.sa, field)).label(field)
for field in self.fields
]
+ [func.max(GnomadConstraints.sa.oe_lof_upper + 0.001).label("loeuf")]
)
.select_from(GnomadConstraints.sa)
.where(SmallVariant.sa.ensembl_gene_id == GnomadConstraints.sa.ensembl_gene_id)
.group_by(GnomadConstraints.sa.ensembl_gene_id)
.lateral("gnomad_constraints_subquery")
)
else:
self.subquery_refseq_to_ensembl = (
select([func.max(RefseqToEnsembl.sa.ensembl_gene_id).label("ensembl_gene_id")])
.select_from(RefseqToEnsembl.sa)
.where(SmallVariant.sa.refseq_gene_id == RefseqToEnsembl.sa.entrez_id)
.group_by(RefseqToEnsembl.sa.entrez_id)
.lateral("refseqtoensembl_subquery_gnomad_constraints")
)
link = (
self.subquery_refseq_to_ensembl.c.ensembl_gene_id
== GnomadConstraints.sa.ensembl_gene_id
)
return (
select(
[
func.max(getattr(GnomadConstraints.sa, field)).label(field)
for field in self.fields
]
+ [func.max(GnomadConstraints.sa.oe_lof_upper + 0.001).label("loeuf")]
)
.select_from(GnomadConstraints.sa)
.where(link)
.group_by(GnomadConstraints.sa.ensembl_gene_id)
.lateral("gnomad_constraints_subquery")
)

def extend_fields(self, _query_parts):
return [getattr(self.subquery.c, field).label("gnomad_%s" % field) for field in self.fields]

def extend_selectable(self, query_parts):
if self.kwargs["database_select"] == "refseq":
query_parts = query_parts.selectable.outerjoin(self.subquery_refseq_to_ensembl, true())
return query_parts.selectable.outerjoin(self.subquery, true())


class ExtendQueryPartsExacConstraintsJoin(ExtendQueryPartsBase):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.fields = ["pLI", "mis_z", "syn_z"]
self.subquery = (
select(
[func.max(getattr(ExacConstraints.sa, field)).label(field) for field in self.fields]
self.subquery = self._build_subquery()
self.subquery = self._build_subquery()

def _build_subquery(self):
"""Build sub query, depending on selected database (refseq/ensembl)."""
if self.kwargs["database_select"] == "ensembl":
return (
select(
[
func.max(getattr(ExacConstraints.sa, field)).label(field)
for field in self.fields
]
)
.select_from(ExacConstraints.sa)
.where(
func.split_part(SmallVariant.sa.ensembl_transcript_id, ".", 1)
== ExacConstraints.sa.ensembl_transcript_id
)
.group_by(ExacConstraints.sa.ensembl_transcript_id)
.lateral("exac_constraints_subquery")
)
.select_from(ExacConstraints.sa)
.where(
func.split_part(SmallVariant.sa.ensembl_transcript_id, ".", 1)
else:
self.subquery_refseq_to_ensembl = (
select(
[
func.max(RefseqToEnsembl.sa.ensembl_transcript_id).label(
"ensembl_transcript_id"
)
]
)
.select_from(RefseqToEnsembl.sa)
.where(SmallVariant.sa.refseq_gene_id == RefseqToEnsembl.sa.entrez_id)
.group_by(RefseqToEnsembl.sa.entrez_id)
.lateral("refseqtoensembl_subquery_exac_constraints")
)
link = (
self.subquery_refseq_to_ensembl.c.ensembl_transcript_id
== ExacConstraints.sa.ensembl_transcript_id
)
.group_by(ExacConstraints.sa.ensembl_transcript_id)
.lateral("exac_constraints_subquery")
)
return (
select(
[
func.max(getattr(ExacConstraints.sa, field)).label(field)
for field in self.fields
]
)
.select_from(ExacConstraints.sa)
.where(link)
.group_by(ExacConstraints.sa.ensembl_transcript_id)
.lateral("exac_constraints_subquery")
)

def extend_fields(self, _query_parts):
return [getattr(self.subquery.c, field).label("exac_%s" % field) for field in self.fields]

def extend_selectable(self, query_parts):
if self.kwargs["database_select"] == "refseq":
query_parts = query_parts.selectable.outerjoin(self.subquery_refseq_to_ensembl, true())
return query_parts.selectable.outerjoin(self.subquery, true())


Expand Down
7 changes: 6 additions & 1 deletion variants/tests/test_file_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from clinvar.tests.factories import ClinvarFactory
from cohorts.tests.factories import TestCohortBase
from extra_annos.tests.factories import ExtraAnnoFieldFactory, ExtraAnnoFactory
from geneinfo.tests.factories import GnomadConstraintsFactory
from geneinfo.tests.factories import GnomadConstraintsFactory, RefseqToEnsemblFactory
from variants.tests.factories import (
SmallVariantFactory,
ResubmitFormDataFactory,
Expand Down Expand Up @@ -93,6 +93,11 @@ def setUp(self):
pathogenicity_summary="uncertain significance",
)
GnomadConstraintsFactory(ensembl_gene_id=small_var.ensembl_gene_id)
RefseqToEnsemblFactory(
entrez_id=small_var.refseq_gene_id,
ensembl_gene_id=small_var.ensembl_gene_id,
ensembl_transcript_id=small_var.ensembl_transcript_id,
)

def _set_janno_mocker(self, database, mock_):
if database == "refseq":
Expand Down
88 changes: 88 additions & 0 deletions variants/tests/test_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
MgiMappingFactory,
RefseqToGeneSymbolFactory,
EnsemblToGeneSymbolFactory,
RefseqToEnsemblFactory,
)
from dbsnp.tests.factories import DbsnpFactory
from .factories import (
Expand Down Expand Up @@ -112,6 +113,11 @@ def setUp(self):
project=case.project
)
self.projectcasessmallvariantquery.query_results.add(small_vars[0].id, small_vars[2].id)
RefseqToEnsemblFactory(
entrez_id=small_vars[0].refseq_gene_id,
ensembl_gene_id=small_vars[0].ensembl_gene_id,
ensembl_transcript_id=small_vars[0].ensembl_transcript_id,
)

def test_load_prefetched_case_results(self):
results = self.run_query(
Expand Down Expand Up @@ -150,6 +156,88 @@ def test_load_prefetched_project_cases_results(self):
self.assertIsNone(results[1].mgi_id)


class TestCaseRefSeqIntergenicPLI(SupportQueryTestBase):
"""Test the case that the entrez ID does not correspond to the ensembl ID
cf. bug in https://github.com/bihealth/varfish-server/issues/622
"""

def setUp(self):
super().setUp()
case, variant_set, _ = CaseWithVariantSetFactory.get("small")
self.acmg_0 = AcmgFactory(entrez_id="1000", ensembl_gene_id="ENSG1001")
self.acmg_1 = AcmgFactory(entrez_id="1000", ensembl_gene_id="ENSG1000")
small_vars = [
SmallVariantFactory(
chromosome=normalize_chrom("1", case.release),
ensembl_effect=["missense_variant"],
refseq_effect=["intergenic_variant"],
refseq_gene_id=self.acmg_1.entrez_id,
ensembl_gene_id=self.acmg_0.ensembl_gene_id,
variant_set=variant_set,
),
SmallVariantFactory(
chromosome=normalize_chrom("1", case.release),
ensembl_effect=["missense_variant"],
refseq_effect=["missense_variant_variant"],
refseq_gene_id=self.acmg_1.entrez_id,
ensembl_gene_id=self.acmg_1.ensembl_gene_id,
variant_set=variant_set,
),
]
# Prepare constraints
self.gnomad_constraints = [
GnomadConstraintsFactory(ensembl_gene_id=small_vars[0].ensembl_gene_id, pLI=0.8,),
GnomadConstraintsFactory(ensembl_gene_id=small_vars[1].ensembl_gene_id, pLI=0.4,),
]
self.exac_constraints = [
ExacConstraintsFactory(
ensembl_transcript_id=small_vars[0].ensembl_transcript_id, pLI=1.0,
),
ExacConstraintsFactory(
ensembl_transcript_id=small_vars[1].ensembl_transcript_id, pLI=0.5,
),
]
# Prepare smallvariant query results
self.smallvariantquery = SmallVariantQueryFactory(case=case)
self.smallvariantquery.query_results.add(small_vars[0].id, small_vars[1].id)
# Prepare projectcases smallvariant query results
self.projectcasessmallvariantquery = ProjectCasesSmallVariantQueryFactory(
project=case.project
)
self.projectcasessmallvariantquery.query_results.add(small_vars[0].id, small_vars[1].id)
# Create appropriate refseq to ensembl mappings
RefseqToEnsemblFactory(
entrez_id="1000",
ensembl_gene_id="ENSG1000",
ensembl_transcript_id=small_vars[1].ensembl_transcript_id,
)

def test_run_query_refseq(self):
results = self.run_query(
ProjectLoadPrefetchedQuery,
{"filter_job_id": self.projectcasessmallvariantquery.id, "database_select": "refseq"},
2,
query_type="project",
)
self.assertEqual(results[0].exac_pLI, 0.5)
self.assertEqual(results[1].exac_pLI, 0.5)
self.assertEqual(results[0].gnomad_pLI, 0.4)
self.assertEqual(results[1].gnomad_pLI, 0.4)

def test_run_query_ensembl(self):
results = self.run_query(
ProjectLoadPrefetchedQuery,
{"filter_job_id": self.projectcasessmallvariantquery.id, "database_select": "ensembl"},
2,
query_type="project",
)
self.assertEqual(results[0].exac_pLI, 1.0)
self.assertEqual(results[1].exac_pLI, 0.5)
self.assertEqual(results[0].gnomad_pLI, 0.8)
self.assertEqual(results[1].gnomad_pLI, 0.4)


class TestCaseLoadPrefetchedSorting(SupportQueryTestBase):
def setUp(self):
super().setUp()
Expand Down

0 comments on commit 205b744

Please sign in to comment.