Skip to content

Commit

Permalink
CU-8694u3yd2 cleanup name removal (#450)
Browse files Browse the repository at this point in the history
* CU-8694u3yd2: Add logged warning for when using full-unlink

* CU-8694u3yd2: Make CDB.remove_names simply expect an iterable of names

* CU-8694u3yd2: Improve CDB.remove_names doc string

* CU-8694u3yd2: Explicitly pass the keys to CDB.remove_names in CAT.unlink_concept_name

* CU-8694u3yd2: Add note regarding state (and order) dependent tests to some CDB maker tests

* CU-8694u3yd2: Rename/make protected CDB.remove_names method

* CU-8694u3yd2: Create deprecated CDB.remove_names method
  • Loading branch information
mart-r authored Jun 19, 2024
1 parent df9f225 commit 1c3628d
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 8 deletions.
7 changes: 5 additions & 2 deletions medcat/cat.py
Original file line number Diff line number Diff line change
Expand Up @@ -689,13 +689,16 @@ def unlink_concept_name(self, cui: str, name: str, preprocessed_name: bool = Fal
names = prepare_name(name, self.pipe.spacy_nlp, {}, self.config)

# If full unlink find all CUIs
if self.config.general.get('full_unlink', False):
if self.config.general.full_unlink:
logger.warning("In the config `full_unlink` is set to `True`. "
"Thus removing all CUIs linked to the specified name"
" (%s)", name)
for n in names:
cuis.extend(self.cdb.name2cuis.get(n, []))

# Remove name from all CUIs
for c in cuis:
self.cdb.remove_names(cui=c, names=names)
self.cdb._remove_names(cui=c, names=names.keys())

def add_and_train_concept(self,
cui: str,
Expand Down
15 changes: 10 additions & 5 deletions medcat/cdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import logging
import aiofiles
import numpy as np
from typing import Dict, Set, Optional, List, Union, cast
from typing import Dict, Set, Optional, List, Union, cast, Iterable
import os

from medcat import __version__
Expand Down Expand Up @@ -148,7 +148,12 @@ def update_cui2average_confidence(self, cui: str, new_sim: float) -> None:
(self.cui2count_train.get(cui, 0) + 1)
self.is_dirty = True

def remove_names(self, cui: str, names: Dict[str, Dict]) -> None:
@deprecated("Deprecated. For internal use only. Use CAT.unlink_concept_name instead",
depr_version=(1, 12, 0), removal_version=(1, 13, 0))
def remove_names(self, cui: str, names: Iterable[str]) -> None:
self._remove_names(cui, names)

def _remove_names(self, cui: str, names: Iterable[str]) -> None:
"""Remove names from an existing concept - effect is this name will never again be used to link to this concept.
This will only remove the name from the linker (namely name2cuis and name2cuis2status), the name will still be present everywhere else.
Why? Because it is bothersome to remove it from everywhere, but
Expand All @@ -157,10 +162,10 @@ def remove_names(self, cui: str, names: Dict[str, Dict]) -> None:
Args:
cui (str):
Concept ID or unique identifer in this database.
names (Dict[str, Dict]):
Names to be removed, should look like: `{'name': {'tokens': tokens, 'snames': snames, 'raw_name': raw_name}, ...}`
names (Iterable[str]):
Names to be removed (e.g list, set, or even a dict (in which case keys will be used)).
"""
for name in names.keys():
for name in names:
if name in self.name2cuis:
if cui in self.name2cuis[name]:
self.name2cuis[name].remove(cui)
Expand Down
20 changes: 19 additions & 1 deletion tests/test_cdb_maker.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,24 @@ def setUpClass(cls):
def tearDownClass(cls) -> None:
cls.maker.destroy_pipe()

# NOTE: The following tests are state-dependent. That is to say,
# if the order in which they're executed changes, they may fail.
# They currently rely on the fact that test methods are executed
# in anlphabetic order. But this is overall not good test design
# since failure of one unit could lead to the failure of another
# in unsexpected ways (since there's an expectation on the state).
#
# e.g, if I run:
# python -m unittest\
# tests.test_cdb_maker.B_CDBMakerEditTests.test_bd_addition_of_context_vector_positive\
# tests.test_cdb_maker.B_CDBMakerEditTests.test_bc_filter_by_cui\
# tests.test_cdb_maker.B_CDBMakerEditTests.test_bb_removal_of_name\
# tests.test_cdb_maker.B_CDBMakerEditTests.test_ba_addition_of_new_name
# Then there will be failures in `test_ba_addition_of_new_name` and `test_bb_removal_of_name`
# due to the changes in state.
#
# Though to make it clear, in the standard configuration the tests run in the
# "correct" order and are successful.
def test_ba_addition_of_new_name(self):
self.cdb.add_names(cui='C0000239', names=prepare_name('MY: new,-_! Name.', self.maker.pipe.spacy_nlp, {}, self.config), name_status='P', full_build=True)
self.assertEqual(len(self.cdb.name2cuis), 6, "Should equal 6")
Expand All @@ -142,7 +160,7 @@ def test_ba_addition_of_new_name(self):
self.assertIn('my~:~new~name~.', self.cdb.name2cuis2status)

def test_bb_removal_of_name(self):
self.cdb.remove_names(cui='C0000239', names=prepare_name('MY: new,-_! Name.', self.maker.pipe.spacy_nlp, {}, self.config))
self.cdb._remove_names(cui='C0000239', names=prepare_name('MY: new,-_! Name.', self.maker.pipe.spacy_nlp, {}, self.config))
self.assertEqual(len(self.cdb.name2cuis), 5, "Should equal 5")
self.assertNotIn('my:newname.', self.cdb.name2cuis2status)

Expand Down

0 comments on commit 1c3628d

Please sign in to comment.