varfish-org · stolpeo · Sep 27, 2024 · Apr 3, 2023 · Oct 12, 2023 · Nov 10, 2023
diff --git a/backend/cases/views.py b/backend/cases/views.py
@@ -59,6 +59,7 @@ def get_context_data(self, *args, **kwargs):
                 ),
                 "exomiser_enabled": settings.VARFISH_ENABLE_EXOMISER_PRIORITISER,
                 "cadd_enabled": settings.VARFISH_ENABLE_CADD,
+                "cada_enabled": settings.VARFISH_ENABLE_CADA,
                 "extra_anno_fields": extra_anno_fields,
                 "url_prefixes": {
                     "annonars": settings.VARFISH_BACKEND_URL_PREFIX_ANNONARS,

diff --git a/backend/config/settings/base.py b/backend/config/settings/base.py
@@ -132,6 +132,7 @@
     "varannos.apps.VarannosConfig",
     # Legacy apps - not used anymore!
     "hgmd.apps.HgmdConfig",
+    "ext_gestaltmatcher.apps.ExtGestaltmatcherConfig",
 ]
 
 # See: https://docs.djangoproject.com/en/dev/ref/settings/#installed-apps
@@ -537,6 +538,15 @@
     "VARFISH_CADA_REST_API_URL", "https://cada.gene-talk.de/api/process"
 )
 
+# Enable PEDIA prioritization.
+VARFISH_ENABLE_PEDIA = env.bool("VARFISH_ENABLE_PEDIA", default=False)
+VARFISH_PEDIA_REST_API_URL = env.str("VARFISH_PEDIA_REST_API_URL", "http://127.0.0.1:9000/pedia")
+
+# Enable Gestalt-based prioritization.
+VARFISH_ENABLE_GESTALT_MATCHER = env.bool("VARFISH_ENABLE_GESTALT_MATCHER", default=False)
+# Configure URL to GestaltMatcher REST API
+VARFISH_GM_SENDER_URL = env.str("VARFISH_GM_SENDER_URL", "http://127.0.0.1:7000/")
+
 # Enable submission of variants to CADD server.
 VARFISH_ENABLE_CADD_SUBMISSION = env.bool("VARFISH_ENABLE_CADD_SUBMISSION", default=False)
 # CADD version to use for for submission
@@ -780,21 +790,16 @@ def set_logging(level):
     AUTH_LDAP_SERVER_URI = env.str("AUTH_LDAP_SERVER_URI", None)
     AUTH_LDAP_BIND_DN = env.str("AUTH_LDAP_BIND_DN", None)
     AUTH_LDAP_BIND_PASSWORD = env.str("AUTH_LDAP_BIND_PASSWORD", None)
-    AUTH_LDAP_START_TLS = env.str("AUTH_LDAP_START_TLS", False)
-    AUTH_LDAP_CA_CERT_FILE = env.str("AUTH_LDAP_CA_CERT_FILE", None)
-    AUTH_LDAP_CONNECTION_OPTIONS = {**LDAP_DEFAULT_CONN_OPTIONS}
-    if AUTH_LDAP_CA_CERT_FILE:
-        AUTH_LDAP_CONNECTION_OPTIONS[ldap.OPT_X_TLS_CACERTFILE] = AUTH_LDAP_CA_CERT_FILE
-        AUTH_LDAP_CONNECTION_OPTIONS[ldap.OPT_X_TLS_NEWCTX] = 0
-    AUTH_LDAP_USER_FILTER = env.str("AUTH_LDAP_USER_FILTER", "(sAMAccountName=%(user)s)")
-
-    AUTH_LDAP_USER_SEARCH_BASE = env.str("AUTH_LDAP_USER_SEARCH_BASE", None)
+    AUTH_LDAP_CONNECTION_OPTIONS = LDAP_DEFAULT_CONN_OPTIONS
+
     AUTH_LDAP_USER_SEARCH = LDAPSearch(
-        AUTH_LDAP_USER_SEARCH_BASE, ldap.SCOPE_SUBTREE, LDAP_DEFAULT_FILTERSTR
+        env.str("AUTH_LDAP_USER_SEARCH_BASE", None),
+        ldap.SCOPE_SUBTREE,
+        LDAP_DEFAULT_FILTERSTR,
     )
     AUTH_LDAP_USER_ATTR_MAP = LDAP_DEFAULT_ATTR_MAP
     AUTH_LDAP_USERNAME_DOMAIN = env.str("AUTH_LDAP_USERNAME_DOMAIN", None)
-    AUTH_LDAP_DOMAIN_PRINTABLE = env.str("AUTH_LDAP_DOMAIN_PRINTABLE", AUTH_LDAP_USERNAME_DOMAIN)
+    AUTH_LDAP_DOMAIN_PRINTABLE = env.str("AUTH_LDAP_DOMAIN_PRINTABLE", None)
 
     AUTHENTICATION_BACKENDS = tuple(
         itertools.chain(("projectroles.auth_backends.PrimaryLDAPBackend",), AUTHENTICATION_BACKENDS)

diff --git a/backend/ext_gestaltmatcher/__init__.py b/backend/ext_gestaltmatcher/__init__.py
diff --git a/backend/ext_gestaltmatcher/admin.py b/backend/ext_gestaltmatcher/admin.py
@@ -0,0 +1,7 @@
+from django.contrib import admin
+
+from .models import SmallVariantQueryGestaltMatcherScores, SmallVariantQueryPediaScores
+
+# Register your models here.
+admin.site.register(SmallVariantQueryGestaltMatcherScores)
+admin.site.register(SmallVariantQueryPediaScores)
diff --git a/backend/ext_gestaltmatcher/apps.py b/backend/ext_gestaltmatcher/apps.py
@@ -0,0 +1,6 @@
+from django.apps import AppConfig
+
+
+class ExtGestaltmatcherConfig(AppConfig):
+    default_auto_field = "django.db.models.BigAutoField"
+    name = "ext_gestaltmatcher"
diff --git a/backend/ext_gestaltmatcher/migrations/0001_initial.py b/backend/ext_gestaltmatcher/migrations/0001_initial.py
@@ -0,0 +1,34 @@
+# -*- coding: utf-8 -*-
+# Generated by Django 1.11.20 on 2023-10-20 07:18
+from __future__ import unicode_literals
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+    dependencies = []
+
+    operations = [
+        migrations.CreateModel(
+            name="SmallVariantQueryGestaltMatcherScores",
+            fields=[
+                (
+                    "id",
+                    models.AutoField(
+                        auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
+                    ),
+                ),
+                ("gene_id", models.CharField(help_text="Entrez gene ID", max_length=64)),
+                ("gene_symbol", models.CharField(help_text="The gene symbol", max_length=128)),
+                ("priority_type", models.CharField(help_text="The priority type", max_length=64)),
+                ("score", models.FloatField(help_text="The gene score")),
+                (
+                    "query",
+                    models.ForeignKey(
+                        on_delete=django.db.models.deletion.CASCADE, to="variants.SmallVariantQuery"
+                    ),
+                ),
+            ],
+        )
+    ]
diff --git a/backend/ext_gestaltmatcher/migrations/0002_smallvariantquerypediascores.py b/backend/ext_gestaltmatcher/migrations/0002_smallvariantquerypediascores.py
@@ -0,0 +1,33 @@
+# -*- coding: utf-8 -*-
+# Generated by Django 1.11.20 on 2023-11-14 07:18
+from __future__ import unicode_literals
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+    dependencies = [("ext_gestaltmatcher", "0001_initial")]
+
+    operations = [
+        migrations.CreateModel(
+            name="SmallVariantQueryPediaScores",
+            fields=[
+                (
+                    "id",
+                    models.AutoField(
+                        auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
+                    ),
+                ),
+                ("gene_id", models.CharField(help_text="Entrez gene ID", max_length=64)),
+                ("gene_symbol", models.CharField(help_text="The gene symbol", max_length=128)),
+                ("score", models.FloatField(help_text="The gene score")),
+                (
+                    "query",
+                    models.ForeignKey(
+                        on_delete=django.db.models.deletion.CASCADE, to="variants.SmallVariantQuery"
+                    ),
+                ),
+            ],
+        )
+    ]
diff --git a/backend/ext_gestaltmatcher/migrations/__init__.py b/backend/ext_gestaltmatcher/migrations/__init__.py
diff --git a/backend/ext_gestaltmatcher/models.py b/backend/ext_gestaltmatcher/models.py
@@ -0,0 +1,43 @@
+from django.db import models
+
+
+# Create your models here.
+class SmallVariantQueryGestaltMatcherScores(models.Model):
+    """Annotate ``SmallVariantQuery`` with Gestalt Matcher scores (if configured to do so)."""
+
+    #: The query to annotate.
+    query = models.ForeignKey("variants.SmallVariantQuery", on_delete=models.CASCADE)
+
+    #: The Entrez gene ID.
+    gene_id = models.CharField(max_length=64, null=False, blank=False, help_text="Entrez gene ID")
+
+    #: The gene symbol.
+    gene_symbol = models.CharField(
+        max_length=128, null=False, blank=False, help_text="The gene symbol"
+    )
+
+    #: The priority type.
+    priority_type = models.CharField(
+        max_length=64, null=False, blank=False, help_text="The priority type"
+    )
+
+    #: The score.
+    score = models.FloatField(null=False, blank=False, help_text="The gene score")
+
+
+class SmallVariantQueryPediaScores(models.Model):
+    """Annotate ``SmallVariantQuery`` with PEDIA scores (if configured to do so)."""
+
+    #: The query to annotate.
+    query = models.ForeignKey("variants.SmallVariantQuery", on_delete=models.CASCADE)
+
+    #: The Entrez gene ID.
+    gene_id = models.CharField(max_length=64, null=False, blank=False, help_text="Entrez gene ID")
+
+    #: The gene symbol.
+    gene_symbol = models.CharField(
+        max_length=128, null=False, blank=False, help_text="The gene symbol"
+    )
+
+    #: The score.
+    score = models.FloatField(null=False, blank=False, help_text="The gene score")
diff --git a/backend/variants/file_export.py b/backend/variants/file_export.py
@@ -24,11 +24,15 @@
     ExportProjectCasesFileBgJobResult,
     SmallVariantComment,
     VariantScoresFactory,
+    annotate_with_gm_scores,
     annotate_with_joint_scores,
     annotate_with_pathogenicity_scores,
+    annotate_with_pedia_scores,
     annotate_with_phenotype_scores,
     annotate_with_transcripts,
+    get_pedia_scores,
     prioritize_genes,
+    prioritize_genes_gm,
     unroll_extra_annos_result,
 )
 from .queries import (
@@ -122,6 +126,16 @@ def to_str(val):
     ("phenotype_rank", "Phenotype Rank", int),
 )
 
+HEADERS_GM_SCORES = (
+    ("gm_score", "Gestalt Score", float),
+    ("gm_rank", "Gestalt Rank", int),
+)
+
+HEADERS_PEDIA_SCORES = (
+    ("pedia_score", "PEDIA Score", float),
+    ("pedia_rank", "PEDIA Rank", int),
+)
+
 #: Names of the pathogenicity scoring header columns.
 HEADERS_PATHO_SCORES = (
     ("pathogenicity_score", "Pathogenicity Score", float),
@@ -318,6 +332,14 @@ def _is_prioritization_enabled(self):
             )
         )
 
+    def _is_gm_enabled(self):
+        """Return whether Gestalt Matcher prioritization is enabled in this query."""
+        return settings.VARFISH_ENABLE_GESTALT_MATCHER and self.query_args.get("gm_enabled")
+
+    def _is_pedia_enabled(self):
+        """Return whether PEDIA prioritization is enabled in this query."""
+        return settings.VARFISH_ENABLE_PEDIA and self.query_args.get("pedia_enabled")
+
     def _is_pathogenicity_enabled(self):
         """Return whether pathogenicity scoring is enabled in this query."""
         return settings.VARFISH_ENABLE_CADD and all(
@@ -352,6 +374,10 @@ def _yield_columns(self, members):
             header += HEADERS_TRANSCRIPTS
         if self._is_prioritization_enabled() and self._is_pathogenicity_enabled():
             header += HEADERS_JOINT_SCORES
+        if self._is_gm_enabled():
+            header += HEADERS_GM_SCORES
+        if self._is_pedia_enabled():
+            header += HEADERS_PEDIA_SCORES
         header += HEADER_FLAGS
         header += HEADER_COMMENTS
         header += self.get_extra_annos_headers()
@@ -391,13 +417,25 @@ def _yield_smallvars(self):
                 _result = annotate_with_pathogenicity_scores(_result, variant_scores)
             if self._is_prioritization_enabled() and self._is_pathogenicity_enabled():
                 _result = annotate_with_joint_scores(_result)
+            if self._is_gm_enabled():
+                gene_scores = self._fetch_gm_scores([entry.entrez_id for entry in _result])
+                _result = annotate_with_gm_scores(_result, gene_scores)
+            if self._is_pedia_enabled():
+                pedia_scores = self._fetch_pedia_scores(_result)
+                if pedia_scores:
+                    _result = annotate_with_pedia_scores(_result, pedia_scores)
             fields = {x[1].label: x[0] for x in enumerate(list(ExtraAnnoField.objects.all()))}
             _result = unroll_extra_annos_result(_result, fields)
             self.job.add_log_entry("Writing output file...")
             total = len(_result)
             steps = math.ceil(total / 10)
             for i, small_var in enumerate(_result):
-                if self._is_prioritization_enabled() or self._is_pathogenicity_enabled():
+                if (
+                    self._is_prioritization_enabled()
+                    or self._is_pathogenicity_enabled()
+                    or self._is_gm_enabled
+                    or self._is_pedia_enabled()
+                ):
                     if i % steps == 0:
                         self.job.add_log_entry("{}%".format(int(100 * i / total)))
                 else:
@@ -421,7 +459,7 @@ def _fetch_gene_scores(self, entrez_ids):
         if self._is_prioritization_enabled():
             try:
                 prio_algorithm = self.query_args.get("prio_algorithm")
-                hpo_terms = tuple(sorted(self.query_args.get("prio_hpo_terms_curated", [])))
+                hpo_terms = tuple(sorted(self.query_args.get("prio_hpo_terms", [])))
                 return {
                     str(gene_id): score
                     for gene_id, _, score, _ in prioritize_genes(
@@ -433,6 +471,63 @@ def _fetch_gene_scores(self, entrez_ids):
         else:
             return {}
 
+    def _fetch_gm_scores(self, entrez_ids):
+        prio_gm = self.query_args.get("prio_gm")
+        if all((self._is_gm_enabled(), prio_gm)):
+            try:
+                return {
+                    str(gene_id): score
+                    for gene_id, gene_symbol, score, priority_type in prioritize_genes_gm(
+                        prio_gm, logging=self.job.add_log_entry
+                    )
+                }
+            except ConnectionError as e:
+                self.job.add_log_entry(e)
+        else:
+            return {}
+
+    def _fetch_pedia_scores(self, result):
+        if self._is_pedia_enabled():
+            try:
+                payloadList = []
+
+                """Read and json object by reading ``result`` ."""
+                for line in result:
+                    payload = dict()
+
+                    if all(
+                        (
+                            line.entrez_id,
+                            hasattr(line, "phenotype_score"),
+                            hasattr(line, "pathogenicity_score"),
+                            hasattr(line, "gm_score"),
+                        )
+                    ):
+                        payload["gene_name"] = line.symbol
+                        payload["gene_id"] = line.entrez_id
+
+                        payload["cada_score"] = line.phenotype_score
+                        payload["cadd_score"] = line.pathogenicity_score
+                        payload["gestalt_score"] = (
+                            0 if line.gm_score == float("inf") else line.gm_score
+                        )
+
+                    payload["label"] = False
+                    payloadList.append(payload)
+
+                case_name = self.job.case.name
+                if case_name.startswith("F_"):
+                    name = case_name[2:]  # Remove the first two characters ("F_")
+                else:
+                    name = case_name
+                scores = {"case_name": name, "genes": payloadList}
+
+                return {str(gene_id): score for gene_id, _, score in get_pedia_scores(scores)}
+            except ConnectionError as e:
+                self.job.add_log_entry(e)
+        else:
+            return {}
+
     def _fetch_variant_scores(self, variants):
         if self._is_pathogenicity_enabled():
             try: