From 7950c57d89ef54a006b126767b82ad03ea5473dc Mon Sep 17 00:00:00 2001 From: mart-r Date: Thu, 22 Dec 2022 10:19:12 +0000 Subject: [PATCH] CU-862hyd5wx Unify rosalind/vocab downloading in tests, identify and fail meaningfully in case of 503 --- tests/archive_tests/test_ner_archive.py | 11 +++++------ tests/medmentions/make_cdb.py | 12 ++++++------ tests/test_ner.py | 10 +++++----- tests/test_pipe.py | 10 +++++----- 4 files changed, 21 insertions(+), 22 deletions(-) diff --git a/tests/archive_tests/test_ner_archive.py b/tests/archive_tests/test_ner_archive.py index 439413073..4e44cb368 100644 --- a/tests/archive_tests/test_ner_archive.py +++ b/tests/archive_tests/test_ner_archive.py @@ -15,6 +15,8 @@ from medcat.linking.context_based_linker import Linker from medcat.config import Config +from ..helper import VocabDownloader + class NerArchiveTests(unittest.TestCase): @@ -35,12 +37,9 @@ def setUp(self) -> None: # Check #assert cdb.cui2names == {'S-229004': {'movar', 'movarvirus', 'movarviruses'}, 'S-229005': {'cdb'}} - self.vocab_path = "./tmp_vocab.dat" - if not os.path.exists(self.vocab_path): - import requests - tmp = requests.get("https://medcat.rosalind.kcl.ac.uk/media/vocab.dat") - with open(self.vocab_path, 'wb') as f: - f.write(tmp.content) + downloader = VocabDownloader() + self.vocab_path = downloader.vocab_path + downloader.check_or_download() vocab = Vocab.load(self.vocab_path) # Make the pipeline diff --git a/tests/medmentions/make_cdb.py b/tests/medmentions/make_cdb.py index 52929b31f..b99c78fbc 100644 --- a/tests/medmentions/make_cdb.py +++ b/tests/medmentions/make_cdb.py @@ -5,6 +5,9 @@ import logging import os +from ..helper import VocabDownloader + + config = Config() config.general['log_level'] = logging.INFO config.general['spacy_model'] = 'en_core_sci_lg' @@ -21,12 +24,9 @@ from medcat.cdb import CDB from medcat.cat import CAT -vocab_path = "./tmp_vocab.dat" -if not os.path.exists(vocab_path): - import requests - tmp = requests.get("https://s3-eu-west-1.amazonaws.com/zkcl/vocab.dat") - with open(vocab_path, 'wb') as f: - f.write(tmp.content) +downloader = VocabDownloader() +vocab_path = downloader.vocab_path +downloader.check_or_download() config = Config() cdb = CDB.load("./tmp_cdb.dat", config=config) diff --git a/tests/test_ner.py b/tests/test_ner.py index 1ae6e375d..6f4d34e76 100644 --- a/tests/test_ner.py +++ b/tests/test_ner.py @@ -14,6 +14,8 @@ from medcat.config import Config from medcat.cdb import CDB +from .helper import VocabDownloader + class A_NERTests(unittest.TestCase): @classmethod @@ -25,11 +27,9 @@ def setUpClass(cls): cls.cdb = CDB(config=cls.config) print("Set up Vocab") - vocab_path = "./tmp_vocab.dat" - if not os.path.exists(vocab_path): - tmp = requests.get("https://medcat.rosalind.kcl.ac.uk/media/vocab.dat") - with open(vocab_path, 'wb') as f: - f.write(tmp.content) + downloader = VocabDownloader() + vocab_path = downloader.vocab_path + downloader.check_or_download() cls.vocab = Vocab.load(vocab_path) diff --git a/tests/test_pipe.py b/tests/test_pipe.py index 7f5bd2ece..583a6ecbf 100644 --- a/tests/test_pipe.py +++ b/tests/test_pipe.py @@ -17,6 +17,8 @@ from transformers import AutoTokenizer +from .helper import VocabDownloader + class PipeTests(unittest.TestCase): @@ -30,11 +32,9 @@ def setUpClass(cls) -> None: cls.config.linking['disamb_length_limit'] = 2 cls.cdb = CDB(config=cls.config) - vocab_path = "./tmp_vocab.dat" - if not os.path.exists(vocab_path): - tmp = requests.get("https://medcat.rosalind.kcl.ac.uk/media/vocab.dat") - with open(vocab_path, 'wb') as f: - f.write(tmp.content) + downloader = VocabDownloader() + vocab_path = downloader.vocab_path + downloader.check_or_download() cls.vocab = Vocab.load(vocab_path) cls.spell_checker = BasicSpellChecker(cdb_vocab=cls.cdb.vocab, config=cls.config, data_vocab=cls.vocab)