Skip to content

Commit

Permalink
Analyser exception handling (#69)
Browse files Browse the repository at this point in the history
* start of analyser tests

* analyser error handling and testing
  • Loading branch information
breezykermo authored and samludford committed Jun 18, 2019
1 parent e366b90 commit 76cf367
Show file tree
Hide file tree
Showing 12 changed files with 167 additions and 29 deletions.
1 change: 1 addition & 0 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,7 @@ def __run_lib_tests():
"python",
"-m",
"pytest",
"test",
]
)
if returncode is 1:
Expand Down
47 changes: 40 additions & 7 deletions src/lib/common/analyser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,12 @@
from abc import ABC, abstractmethod
from pathlib import Path
from lib.common.util import save_logs
from lib.common.exceptions import ElementShouldSkipError, ElementShouldRetryError
from lib.common.exceptions import (
ElementShouldSkipError,
ElementShouldRetryError,
InvalidAnalyserConfigError,
MTriageStorageCorruptedError,
)
from lib.common.mtmodule import MTModule


Expand Down Expand Up @@ -54,7 +59,28 @@ class Analyser(MTModule):
DERIVED_EXT = "derived"

def __init__(self, config, module, dir):
super().__init__(module, dir)
try:
super().__init__(module, dir)
except PermissionError as e:
raise InvalidAnalyserConfigError("You must provide a valid directory path")

if not "elements_in" in config:
raise InvalidAnalyserConfigError(
"The config must contain an 'elements_in' whitelist indicating the analyser's input."
)
elif type(config["elements_in"]) is not list or len(config["elements_in"]) is 0:
raise InvalidAnalyserConfigError(
"The 'elements_in' whitelist must be a list containing at least one string"
)

if type(module) is not str or module == "":
raise InvalidAnalyserConfigError(
"You must provide a name for your analyser"
)

if type(dir) is not str:
raise InvalidAnalyserConfigError("You must provide a valid directory path")

self.CONFIG = config

@abstractmethod
Expand All @@ -69,10 +95,15 @@ def analyse_element(self, element, config):
return NotImplemented

def start_analysing(self):
self.__pre_analyse()
derived_dirs = self.__analyse()
self.__post_analyse(derived_dirs)
self.save_and_clear_logs()
# generic error handling protocol may get undescriptive in development
# should probably toggle off during development
try:
self.__pre_analyse()
derived_dirs = self.__analyse()
self.__post_analyse(derived_dirs)
self.save_and_clear_logs()
except:
raise MTriageStorageCorruptedError()

def pre_analyse(self, config):
"""option to set up class variables"""
Expand Down Expand Up @@ -237,13 +268,15 @@ def __attempt_analyse(self, attempts, element, config):
try:
self.analyse_element(element, config)
except ElementShouldSkipError as e:
os.rmdir(element["dest"])
self.error_logger(str(e), element)
return
except ElementShouldRetryError as e:
self.error_logger(str(e), element)
if attempts > 1:
return self.attempt_analyse(attempts - 1, element, config)
return self.__attempt_analyse(attempts - 1, element, config)
else:
os.rmdir(element["dest"])
self.error_logger(
"failed after maximum retries - skipping element", element
)
Expand Down
7 changes: 7 additions & 0 deletions src/lib/common/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,10 @@ def __init__(self, fname):
f"""The method '{fname}' does not belong to a class that inherits from MTModule. The
logged_phase decorator can only be applied to methods on such a class."""
)


class MTriageStorageCorruptedError(Exception):
def __init__(self, fname):
super().__init__(
"MTriage encountered an unexpected file structure in selectors or analysers. Ensure you specified the correct working directory."
)
2 changes: 1 addition & 1 deletion src/lib/common/mtmodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,6 @@ def error_logger(self, msg, element=None):
def __get_context(self, element):
context = f"{self.NAME}: {self.PHASE_KEY}: "
if element != None:
el_id = element["element_id"]
el_id = element["id"]
context = context + f"{el_id}: "
return context
6 changes: 3 additions & 3 deletions src/lib/common/selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def index(self, config):
Should populate a dataframe with the results, keep logs, and then call:
self.index_complete(df, logs)
REQUIRED: each result in the dataframe must contain an 'element_id' field containing
REQUIRED: each result in the dataframe must contain an 'id' field containing
a unique identifier for the element.
NOTE: should be a relatively light pass that designates the space to be retrieved.
Expand Down Expand Up @@ -79,8 +79,8 @@ def __pre_retrieve(self):
def __retrieve(self, df):
for index, row in df.iterrows():
element = row.to_dict()
element_id = row["element_id"]
element["dest"] = f"{self.ELEMENT_DIR}/{element_id}"
id = row["id"]
element["dest"] = f"{self.ELEMENT_DIR}/{id}"
self.__attempt_retrieve(5, element)

@MTModule.logged_phase("post-retrieve")
Expand Down
2 changes: 1 addition & 1 deletion src/lib/selectors/local/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def _run(self, config, output_path):
"name": f[0],
"extension": f[1],
"path": os.path.join(root, file),
"element_id": f"{f[0]}{f[1]}",
"id": f"{f[0]}{f[1]}",
}
)
self.logger("indexed file: " + os.path.join(root, file))
Expand Down
4 changes: 2 additions & 2 deletions src/lib/selectors/youtube/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,14 +99,14 @@ def _add_to_csv_obj(self, csv_obj, s_res):
desc = search_result["snippet"]["description"]
publishedAt = search_result["snippet"]["publishedAt"]
url = f"https://www.youtube.com/watch?v={videoId}"
element_id = self._id_from_url(url)
id = self._id_from_url(url)
csv_obj.append(
{
"url": url,
"title": title.replace(",", ";"),
"desc": desc.replace(",", ";"),
"published": publishedAt[0:10],
"element_id": element_id,
"id": id,
}
)
return csv_obj
Expand Down
102 changes: 102 additions & 0 deletions src/test/test_analyser_errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
from lib.common.analyser import Analyser
import os
import unittest
from lib.common.exceptions import (
ElementShouldRetryError,
ElementShouldSkipError,
InvalidAnalyserConfigError,
MTriageStorageCorruptedError,
)
from test.utils import (
TEMP_ELEMENT_DIR,
scaffold_empty,
scaffold_elementmap,
cleanup,
get_element_path,
)
import pandas


class ErrorThrowingAnalyser(Analyser):
def __init__(self, *args):
super().__init__(*args)
self.retryCount = 0

def analyse_element(self, element, config):
if element["id"] == "skip":
raise ElementShouldSkipError("test")
elif element["id"] == "retry3" and self.retryCount < 3:
self.retryCount += 1
raise ElementShouldRetryError("test")
elif element["id"] == "retryN":
raise ElementShouldRetryError("test")
else:
pass


class TestAnalyserErrors(unittest.TestCase):
@classmethod
def setUpClass(self):
self.selname = "stub_sel"

scaffold_empty(self.selname, elements=["skip", "retry3", "retryN", "pass"])
good = {"elements_in": [self.selname]}

self.an = ErrorThrowingAnalyser(good, "analyserErrorSelector", TEMP_ELEMENT_DIR)

@classmethod
def tearDownClass(self):
cleanup()

def test_analyse_skip_error(self):
with self.assertRaisesRegex(ElementShouldSkipError, "test - skipping element"):
self.an.analyse_element({"id": "skip"}, {})

def test_analyse_retry_error(self):
with self.assertRaisesRegex(ElementShouldRetryError, "test - attempt retry"):
self.an.analyse_element({"id": "retryN"}, {})

def test_bad_init_error(self):
bad0 = {}
bad1 = {"elements_in": []}
bad2 = {"elements_in": None}
good = {"elements_in": ["selname"]}

with self.assertRaisesRegex(
InvalidAnalyserConfigError, "must contain an 'elements_in' whitelist"
):
no_elements_in = ErrorThrowingAnalyser(bad0, "stub", TEMP_ELEMENT_DIR)

with self.assertRaisesRegex(
InvalidAnalyserConfigError,
"The 'elements_in' whitelist must be a list containing at least one string",
):
empty_elements_in = ErrorThrowingAnalyser(bad1, "stub", TEMP_ELEMENT_DIR)

with self.assertRaisesRegex(
InvalidAnalyserConfigError,
"The 'elements_in' whitelist must be a list containing at least one string",
):
empty_elements_in = ErrorThrowingAnalyser(bad2, "stub", TEMP_ELEMENT_DIR)

with self.assertRaisesRegex(
InvalidAnalyserConfigError, "You must provide a name for your analyser"
):
badan2 = ErrorThrowingAnalyser(good, "", TEMP_ELEMENT_DIR)

def test_integration(self):
self.assertEqual(self.an.retryCount, 0)
self.an.start_analysing()

skip_path = get_element_path(self.selname, "skip", analyser=self.an.NAME)
self.assertFalse(os.path.exists(skip_path))

retryn_path = get_element_path(self.selname, "retryN", analyser=self.an.NAME)
self.assertFalse(os.path.exists(retryn_path))

retry3_path = get_element_path(self.selname, "retry3", analyser=self.an.NAME)
self.assertEqual(self.an.retryCount, 3)
self.assertTrue(os.path.exists(retry3_path))

pass_path = get_element_path(self.selname, "pass", analyser=self.an.NAME)
self.assertTrue(os.path.exists(pass_path))
5 changes: 3 additions & 2 deletions src/test/test_mtmodule.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from abc import ABC
from test.utils import TEMP_ELEMENT_DIR, cleanup
from lib.common.exceptions import ImproperLoggedPhaseError
from lib.common.mtmodule import MTModule
import os
Expand All @@ -13,12 +14,12 @@ class EmptyMTModule(MTModule):
class TestEmptyMTModule(unittest.TestCase):
@classmethod
def setUpClass(self):
self.BASE_DIR = "../tempdir"
self.BASE_DIR = TEMP_ELEMENT_DIR
self.mod = EmptyMTModule("empty", self.BASE_DIR)

@classmethod
def tearDownClass(self):
shutil.rmtree(self.BASE_DIR)
cleanup()

def test_class_variables(self):
self.assertEqual(self.mod.NAME, "empty")
Expand Down
2 changes: 1 addition & 1 deletion src/test/test_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
class EmptySelector(Selector):
def index(self, config):
if not os.path.exists(self.ELEMENT_MAP):
df = pd.DataFrame([{"element_id": "test"}])
df = pd.DataFrame([{"id": "test"}])
return df
else:
return None
Expand Down
10 changes: 5 additions & 5 deletions src/test/test_selector_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@ def index(self, config):
return scaffold_elementmap(elements)

def retrieve_element(self, element, config):
if element["element_id"] == "skip":
if element["id"] == "skip":
raise ElementShouldSkipError("test")
elif element["element_id"] == "retry3" and self.retryCount < 3:
elif element["id"] == "retry3" and self.retryCount < 3:
self.retryCount += 1
raise ElementShouldRetryError("test")
elif element["element_id"] == "retryN":
elif element["id"] == "retryN":
raise ElementShouldRetryError("test")
else:
pass
Expand Down Expand Up @@ -75,11 +75,11 @@ def test_index_error(self):

def test_retrieve_skip_error(self):
with self.assertRaisesRegex(ElementShouldSkipError, "test - skipping element"):
self.retrieveErrorSelector.retrieve_element({"element_id": "skip"}, {})
self.retrieveErrorSelector.retrieve_element({"id": "skip"}, {})

def test_retrieve_retry_error(self):
with self.assertRaisesRegex(ElementShouldRetryError, "test - attempt retry"):
self.retrieveErrorSelector.retrieve_element({"element_id": "retryN"}, {})
self.retrieveErrorSelector.retrieve_element({"id": "retryN"}, {})

def test_integration(self):
self.assertEqual(self.retrieveErrorSelector.retryCount, 0)
Expand Down
8 changes: 1 addition & 7 deletions src/test/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,6 @@


def scaffold_empty(selname, elements=[], analysers=[]):
print("scaffold run")
if not os.path.exists(TEMP_ELEMENT_DIR):
raise Exception(
"temp element dir doesn't exist - you need to instantiate it in a selector or analyser before using test methods."
)

os.makedirs(f"{TEMP_ELEMENT_DIR}/{selname}/{Analyser.DERIVED_EXT}")

for element in elements:
Expand All @@ -32,7 +26,7 @@ def get_element_path(selname, elementId, analyser=None):


def scaffold_elementmap(elements=[]):
rows = list(map(lambda elid: {"element_id": elid}, elements))
rows = list(map(lambda elid: {"id": elid}, elements))
return pd.DataFrame(rows)


Expand Down

0 comments on commit 76cf367

Please sign in to comment.