diff --git a/medcat/cat.py b/medcat/cat.py index 9cca44b0f..ef6099566 100644 --- a/medcat/cat.py +++ b/medcat/cat.py @@ -842,7 +842,7 @@ def add_and_train_concept(self, names = prepare_name(name, self.pipe.spacy_nlp, {}, self.config) # Only if not negative, otherwise do not add the new name if in fact it should not be detected if do_add_concept and not negative: - self.cdb.add_concept(cui=cui, names=names, ontologies=ontologies, name_status=name_status, type_ids=type_ids, description=description, + self.cdb._add_concept(cui=cui, names=names, ontologies=ontologies, name_status=name_status, type_ids=type_ids, description=description, full_build=full_build) if spacy_entity is not None and spacy_doc is not None: diff --git a/medcat/cdb.py b/medcat/cdb.py index 5a648f4af..1110a3b84 100644 --- a/medcat/cdb.py +++ b/medcat/cdb.py @@ -13,6 +13,7 @@ from medcat.utils.hasher import Hasher from medcat.utils.matutils import unitvec from medcat.utils.ml_utils import get_lr_linking +from medcat.utils.decorators import deprecated from medcat.config import Config, weighted_average, workers from medcat.utils.saving.serializer import CDBSerializer @@ -222,8 +223,9 @@ def add_names(self, cui: str, names: Dict, name_status: str = 'A', full_build: b # Name status must be one of the three name_status = 'A' - self.add_concept(cui=cui, names=names, ontologies=set(), name_status=name_status, type_ids=set(), description='', full_build=full_build) + self._add_concept(cui=cui, names=names, ontologies=set(), name_status=name_status, type_ids=set(), description='', full_build=full_build) + @deprecated("Use `cdb._add_concept` as this will be removed in a future release.") def add_concept(self, cui: str, names: Dict, @@ -232,6 +234,43 @@ def add_concept(self, type_ids: Set[str], description: str, full_build: bool = False) -> None: + """ + Deprecated: Use `cdb._add_concept` as this will be removed in a future release. + + Add a concept to internal Concept Database (CDB). Depending on what you are providing + this will add a large number of properties for each concept. + + Args: + cui (str): + Concept ID or unique identifier in this database, all concepts that have + the same CUI will be merged internally. + names (Dict[str, Dict]): + Names for this concept, or the value that if found in free text can be linked to this concept. + Names is a dict like: `{name: {'tokens': tokens, 'snames': snames, 'raw_name': raw_name}, ...}` + Names should be generated by helper function 'medcat.preprocessing.cleaners.prepare_name' + ontologies (Set[str]): + ontologies in which the concept exists (e.g. SNOMEDCT, HPO) + name_status (str): + One of `P`, `N`, `A` + type_ids (Set[str]): + Semantic type identifier (have a look at TUIs in UMLS or SNOMED-CT) + description (str): + Description of this concept. + full_build (bool): + If True the dictionary self.addl_info will also be populated, contains a lot of extra information + about concepts, but can be very memory consuming. This is not necessary + for normal functioning of MedCAT (Default Value `False`). + """ + self._add_concept(cui, names, ontologies, name_status, type_ids, description, full_build) + + def _add_concept(self, + cui: str, + names: Dict, + ontologies: set, + name_status: str, + type_ids: Set[str], + description: str, + full_build: bool = False) -> None: """Add a concept to internal Concept Database (CDB). Depending on what you are providing this will add a large number of properties for each concept. @@ -241,7 +280,8 @@ def add_concept(self, the same CUI will be merged internally. names (Dict[str, Dict]): Names for this concept, or the value that if found in free text can be linked to this concept. - Names is an dict like: `{name: {'tokens': tokens, 'snames': snames, 'raw_name': raw_name}, ...}` + Names is a dict like: `{name: {'tokens': tokens, 'snames': snames, 'raw_name': raw_name}, ...}` + Names should be generated by helper function 'medcat.preprocessing.cleaners.prepare_name' ontologies (Set[str]): ontologies in which the concept exists (e.g. SNOMEDCT, HPO) name_status (str): diff --git a/medcat/cdb_maker.py b/medcat/cdb_maker.py index e9c72d12e..ca98f821e 100644 --- a/medcat/cdb_maker.py +++ b/medcat/cdb_maker.py @@ -173,7 +173,7 @@ def prepare_csvs(self, if len(raw_name) >= self.config.cdb_maker['remove_parenthesis']: prepare_name(raw_name, self.pipe.spacy_nlp, names, self.config) - self.cdb.add_concept(cui=cui, names=names, ontologies=ontologies, name_status=name_status, type_ids=type_ids, + self.cdb._add_concept(cui=cui, names=names, ontologies=ontologies, name_status=name_status, type_ids=type_ids, description=description, full_build=full_build) # DEBUG logger.debug("\n\n**** Added\n CUI: %s\n Names: %s\n Ontologies: %s\n Name status: %s\n Type IDs: %s\n Description: %s\n Is full build: %s", diff --git a/tests/archive_tests/test_cdb_maker_archive.py b/tests/archive_tests/test_cdb_maker_archive.py index 329408999..9e2fc2d72 100644 --- a/tests/archive_tests/test_cdb_maker_archive.py +++ b/tests/archive_tests/test_cdb_maker_archive.py @@ -108,7 +108,7 @@ def test_concept_similarity(self): for i in range(500): cui = "C" + str(i) type_ids = {'T-' + str(i%10)} - cdb.add_concept(cui=cui, names=prepare_name('Name: ' + str(i), self.maker.pipe.get_spacy_nlp(), {}, self.config), ontologies=set(), + cdb._add_concept(cui=cui, names=prepare_name('Name: ' + str(i), self.maker.pipe.get_spacy_nlp(), {}, self.config), ontologies=set(), name_status='P', type_ids=type_ids, description='', full_build=True) vectors = {} diff --git a/tests/utils/test_hashing.py b/tests/utils/test_hashing.py index b6681461f..0fd6b5891 100644 --- a/tests/utils/test_hashing.py +++ b/tests/utils/test_hashing.py @@ -135,7 +135,7 @@ class CATHashingTestsWithChange(CATHashingTestsWithFakeHash): def test_when_changes_do_calc(self): with unittest.mock.patch.object(CDB, 'calculate_hash', return_value='abcd1234') as patch_method: - self.undertest.cdb.add_concept(**self.concept_kwargs) + self.undertest.cdb._add_concept(**self.concept_kwargs) hash = self.undertest.get_hash() self.assertIsInstance(hash, str) patch_method.assert_called() @@ -151,10 +151,10 @@ def test_default_cdb_not_dirty(self): self.assertFalse(self.undertest.cdb.is_dirty) def test_after_add_concept_is_dirty(self): - self.undertest.cdb.add_concept(**self.concept_kwargs) + self.undertest.cdb._add_concept(**self.concept_kwargs) self.assertTrue(self.undertest.cdb.is_dirty) def test_after_recalc_not_dirty(self): - self.undertest.cdb.add_concept(**self.concept_kwargs) + self.undertest.cdb._add_concept(**self.concept_kwargs) self.undertest.get_hash() self.assertFalse(self.undertest.cdb.is_dirty)