Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generate and curate a new set of MESH-CHEBI mappings #89

Merged
merged 18 commits into from
Apr 23, 2022
Merged
60 changes: 60 additions & 0 deletions scripts/generate_chebi_mesh_mappings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
"""Generate mappings using Gilda from UBERON to MeSH."""

from collections import Counter

from gilda.api import grounder
from indra.ontology.bio import bio_ontology

from biomappings.resources import PredictionTuple, append_prediction_tuples


if __name__ == "__main__":
grounder.ground("x")
ambigs = {
k: v for k, v in grounder.grounder.entries.items() if len({(vv.db, vv.id) for vv in v}) > 1
}
mesh_chebi = [
v for v in ambigs.values() if len(v) == 2 and {vv.db for vv in v} == {"MESH", "CHEBI"}
]
entries = []
for mesh_chebi_pair in mesh_chebi:
for term in mesh_chebi_pair:
entries.append((term.db, term.id))
cnt = Counter(entries)
mesh_chebi_simple = []
for mesh_chebi_pair in mesh_chebi:
if any(cnt[(term.db, term.id)] > 1 for term in mesh_chebi_pair):
continue
mesh_chebi_simple.append(mesh_chebi_pair)

print("Found %d CHEBI-MESH mappings." % len(mesh_chebi_simple))

predictions = []
n_redundant = 0
for pair in mesh_chebi_simple:
chebi_term = [term for term in pair if term.db == "CHEBI"][0]
mesh_term = [term for term in pair if term.db == "MESH"][0]

mappings = bio_ontology.get_mappings("MESH", mesh_term.id)
if ("CHEBI", chebi_term.id) in mappings:
n_redundant += 1

pred = PredictionTuple(
source_prefix="chebi",
source_id=chebi_term.id,
source_name=chebi_term.entry_name,
relation="skos:exactMatch",
target_prefix="mesh",
target_identifier=mesh_term.id,
target_name=mesh_term.entry_name,
type="lexical",
confidence=0.95,
source="generate_chebi_mesh_mappings.py",
)
predictions.append(pred)

print(
"A total of %d mappings could be indirectly inferred from"
"INDRA ontology xrefs" % len(n_redundant)
)
append_prediction_tuples(predictions, deduplicate=True, sort=True)
4 changes: 4 additions & 0 deletions src/biomappings/resources/incorrect.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ ccle PC3_PROSTATE PC-3 skos:exactMatch cellosaurus CVCL_S982 PC-3 [Human lung ca
ccle PL21_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE PL-21 skos:exactMatch cellosaurus CVCL_E295 TS 0129 manually_reviewed orcid:0000-0001-9439-5346
ccle TC71_BONE TC-71 skos:exactMatch cellosaurus CVCL_S882 TC71 manually_reviewed orcid:0000-0003-4423-4370
ccle WM239A WM239A skos:exactMatch cellosaurus CVCL_RN56 WM239B manually_reviewed orcid:0000-0003-4423-4370
chebi CHEBI:135025 rimazolium skos:exactMatch mesh C100296 probon manually_reviewed orcid:0000-0001-9439-5346
chebi CHEBI:135083 cibenzoline skos:exactMatch mesh C032151 cifenline manually_reviewed orcid:0000-0001-9439-5346
chebi CHEBI:17544 hydrogencarbonate skos:exactMatch mesh D001639 Bicarbonates manually_reviewed orcid:0000-0003-4423-4370
chebi CHEBI:29806 fumarate(2-) skos:exactMatch mesh D005650 Fumarates manually_reviewed orcid:0000-0003-4423-4370
chebi CHEBI:33224 chromane skos:exactMatch mesh D002839 Chromans manually_reviewed orcid:0000-0003-4423-4370
Expand Down Expand Up @@ -363,6 +365,7 @@ mesh D007161 Immunoproliferative Small Intestinal Disease skos:exactMatch doid D
mesh D007319 Sleep Initiation and Maintenance Disorders skos:exactMatch doid DOID:0111141 delayed sleep phase syndrome manually_reviewed orcid:0000-0003-1307-2508
mesh D007588 Job Satisfaction skos:exactMatch efo 0009723 job satisfaction measurement manually_reviewed orcid:0000-0003-4423-4370
mesh D007589 Job Syndrome skos:exactMatch doid DOID:0080545 hyper IgE syndrome manually_reviewed orcid:0000-0003-1307-2508
mesh D007605 Juvenile Hormones skos:exactMatch chebi CHEBI:24851 insect growth regulator manually_reviewed orcid:0000-0001-9439-5346
mesh D007680 Kidney Neoplasms skos:exactMatch doid DOID:4451 renal carcinoma manually_reviewed orcid:0000-0003-1307-2508
mesh D007713 Klinefelter Syndrome skos:exactMatch doid DOID:0090070 hypogonadotropic hypogonadism manually_reviewed orcid:0000-0003-1307-2508
mesh D007762 Labyrinthitis skos:exactMatch doid DOID:3930 otitis interna manually_reviewed orcid:0000-0003-1307-2508
Expand Down Expand Up @@ -476,6 +479,7 @@ mesh D051057 Proto-Oncogene Proteins c-akt skos:exactMatch hgnc 391 AKT1 manual
mesh D051236 Receptors, Pituitary Adenylate Cyclase-Activating Polypeptide skos:exactMatch go GO:0001634 pituitary adenylate cyclase-activating polypeptide receptor activity manually_reviewed orcid:0000-0003-4423-4370
mesh D052003 NF-kappa B p52 Subunit skos:exactMatch hgnc 7795 NFKB2 manually_reviewed orcid:0000-0003-4423-4370
mesh D053483 Eye Movement Measurements skos:exactMatch efo 0007699 eye movement measurement manually_reviewed orcid:0000-0003-4423-4370
mesh D053496 Inositol 1,4,5-Trisphosphate Receptors skos:exactMatch chebi CHEBI:131186 IP3 receptor antagonist manually_reviewed orcid:0000-0001-9439-5346
mesh D053840 Brugada Syndrome skos:exactMatch doid DOID:0110218 Brugada syndrome 1 manually_reviewed orcid:0000-0003-1307-2508
mesh D054000 Nevus, Sebaceous of Jadassohn skos:exactMatch doid DOID:7039 Borst-Jadassohn intraepidermal carcinoma manually_reviewed orcid:0000-0003-1307-2508
mesh D054467 Phospholipases A2 skos:exactMatch hgnc 9030 PLA2G1B manually_reviewed orcid:0000-0001-9439-5346
Expand Down
Loading