Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use BaseStatMonitor to implement built-in downloader exception monitor #334

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 18 additions & 17 deletions spidermon/contrib/scrapy/monitors.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
SPIDERMON_MAX_ITEM_VALIDATION_ERRORS = "SPIDERMON_MAX_ITEM_VALIDATION_ERRORS"
SPIDERMON_MAX_EXECUTION_TIME = "SPIDERMON_MAX_EXECUTION_TIME"
SPIDERMON_MAX_RETRIES = "SPIDERMON_MAX_RETRIES"
SPIDERMON_MAX_DOWNLOADER_EXCEPTIONS = "SPIDERMON_MAX_DOWNLOADER_EXCEPTIONS"
SPIDERMON_MIN_SUCCESSFUL_REQUESTS = "SPIDERMON_MIN_SUCCESSFUL_REQUESTS"
SPIDERMON_MAX_REQUESTS_ALLOWED = "SPIDERMON_MAX_REQUESTS_ALLOWED"

Expand Down Expand Up @@ -302,24 +301,26 @@ def test_check_unwanted_http_codes(self):


@monitors.name("Downloader Exceptions monitor")
class DownloaderExceptionMonitor(BaseScrapyMonitor):
"""Check the amount of downloader exceptions (timeouts, rejected
connections, etc.).

You can configure it using the ``SPIDERMON_MAX_DOWNLOADER_EXCEPTIONS`` setting.
The default is ``-1`` which disables the monitor.
class DownloaderExceptionMonitor(BaseStatMonitor):
"""This monitor checks if the amount of downloader
exceptions (timeouts, rejected connections, etc.) is
lesser or equal to a specified threshold.

This amount is provided by ``downloader/exception_count``
value of your job statistics. If the value is not available
in the statistics (i.e., no exception was raised), the monitor
will pass.

Configure the threshold using the ``SPIDERMON_MAX_DOWNLOADER_EXCEPTIONS``
setting. There's **NO** default value for this setting.
If you try to use this monitor without a value specified, a
``NotConfigured`` exception will be raised.
"""

@monitors.name("Should not hit the limit of downloader exceptions")
def test_maximum_downloader_exceptions(self):
exception_count = self.stats.get("downloader/exception_count", 0)
threshold = self.crawler.settings.getint(
SPIDERMON_MAX_DOWNLOADER_EXCEPTIONS, -1
)
if threshold < 0:
return
msg = "Too many downloader exceptions ({})".format(exception_count)
self.assertLessEqual(exception_count, threshold, msg=msg)
stat_name = "downloader/exception_count"
threshold_setting = "SPIDERMON_MAX_DOWNLOADER_EXCEPTIONS"
assert_type = "<="
fail_if_stat_missing = False


@monitors.name("Retry Count monitor")
Expand Down
75 changes: 34 additions & 41 deletions tests/contrib/scrapy/monitors/test_downloader_exception_monitor.py
Original file line number Diff line number Diff line change
@@ -1,55 +1,48 @@
import pytest

from spidermon.contrib.scrapy.monitors import (
DownloaderExceptionMonitor,
SPIDERMON_MAX_DOWNLOADER_EXCEPTIONS,
)
from spidermon import MonitorSuite
from spidermon.exceptions import NotConfigured
from spidermon import settings


def new_suite():
@pytest.fixture
def downloader_exception_suite():
return MonitorSuite(monitors=[DownloaderExceptionMonitor])


def test_downloader_exception_monitor_should_fail(make_data):
"""Downloader Exceptions should fail if the downloader exceptions count is higher than expected"""

data = make_data({SPIDERMON_MAX_DOWNLOADER_EXCEPTIONS: 10})
runner = data.pop("runner")
suite = new_suite()
data["stats"]["downloader/exception_count"] = 12
runner.run(suite, **data)
assert (
"Too many downloader exceptions (12)" in runner.result.monitor_results[0].error
)


def test_downloader_exception_monitor_should_pass_disabled(make_data):
"""Downloader Exceptions should pass if the limit is negative"""

data = make_data({SPIDERMON_MAX_DOWNLOADER_EXCEPTIONS: -1})
runner = data.pop("runner")
suite = new_suite()
data["stats"]["downloader/exception_count"] = 99999
runner.run(suite, **data)
assert runner.result.monitor_results[0].error is None


def test_downloader_exception_monitor_should_pass_default(make_data):
"""Downloader Exceptions should pass if the limit is not set"""

def test_needs_to_configure_downloader_exception_monitor(
make_data, downloader_exception_suite
):
data = make_data()
runner = data.pop("runner")
suite = new_suite()
data["stats"]["downloader/exception_count"] = 99999
runner.run(suite, **data)
assert runner.result.monitor_results[0].error is None
data["crawler"].stats.set_value(DownloaderExceptionMonitor.stat_name, 10)
with pytest.raises(NotConfigured):
runner.run(downloader_exception_suite, **data)


@pytest.mark.parametrize(
"value,threshold,expected_status",
[
(0, 100, settings.MONITOR.STATUS.SUCCESS),
(50, 100, settings.MONITOR.STATUS.SUCCESS),
(99, 100, settings.MONITOR.STATUS.SUCCESS),
(100, 100, settings.MONITOR.STATUS.SUCCESS),
(101, 100, settings.MONITOR.STATUS.FAILURE),
(1000, 1, settings.MONITOR.STATUS.FAILURE),
],
)
def test_downloader_exception_monitor_validation(
make_data, downloader_exception_suite, value, threshold, expected_status
):
data = make_data({DownloaderExceptionMonitor.threshold_setting: threshold})
runner = data.pop("runner")

data["stats"][DownloaderExceptionMonitor.stat_name] = value

def test_downloader_exception_monitor_should_pass_under_limit(make_data):
"""Downloader Exceptions should pass if the downloader exceptions count is not higher than expected"""
runner.run(downloader_exception_suite, **data)

data = make_data({SPIDERMON_MAX_DOWNLOADER_EXCEPTIONS: 10})
runner = data.pop("runner")
suite = new_suite()
data["stats"]["downloader/exception_count"] = 3
runner.run(suite, **data)
assert runner.result.monitor_results[0].error is None
assert len(runner.result.monitor_results) == 1
assert runner.result.monitor_results[0].status == expected_status