Skip to content

Commit

Permalink
Issue #51: Limit of processed pages (tests)
Browse files Browse the repository at this point in the history
  • Loading branch information
Nekmo committed Oct 22, 2018
1 parent f4b97f2 commit a2c92a9
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 2 deletions.
4 changes: 2 additions & 2 deletions dirhunt/tests/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
class CrawlerTestBase(object):
url = 'http://domain.com/path/'

def get_crawler(self):
return Crawler(interesting_extensions=['php'], interesting_files=['error_log'])
def get_crawler(self, **kwargs):
return Crawler(interesting_extensions=['php'], interesting_files=['error_log'], **kwargs)

def get_crawler_url(self):
crawler = self.get_crawler()
Expand Down
8 changes: 8 additions & 0 deletions dirhunt/tests/test_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@ def test_print_results(self):
crawler.results.put(GenericProcessor(None, crawler_url))
crawler.print_results()

def test_print_results_limit(self):
crawler = self.get_crawler(limit=1)
crawler.current_processed_count = 1
crawler_url = CrawlerUrl(crawler, self.url)
crawler.results.put(GenericProcessor(None, crawler_url))
crawler.print_results()
self.assertTrue(crawler.closing)

def test_add_url(self):
crawler = self.get_crawler()
crawler.domains.add('domain.com')
Expand Down
26 changes: 26 additions & 0 deletions dirhunt/tests/test_crawler_url.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import unittest

import requests
import requests_mock

from dirhunt.crawler_url import CrawlerUrl
from dirhunt.tests.base import CrawlerTestBase

from dirhunt.tests._compat import patch, Mock


class TestCrawlerUrl(CrawlerTestBase, unittest.TestCase):
def test_start(self):
Expand All @@ -17,3 +20,26 @@ def test_start(self):
crawler_url.start()
self.assertIn(self.url, crawler.processed)
self.assertNotIn(self.url, crawler.processing)
self.assertEqual(crawler.current_processed_count, 1)

@requests_mock.mock()
def test_session_exception(self, req_mock):
req_mock.get(self.url, exc=requests.exceptions.ConnectTimeout)
crawler = self.get_crawler()
with patch('dirhunt.crawler_url.CrawlerUrl.close') as m:
crawler_url = CrawlerUrl(crawler, self.url)
self.assertEqual(crawler_url.start(), crawler_url)
self.assertEqual(crawler.current_processed_count, 1)
m.assert_called_once()

def test_session_read_exception(self):
crawler = self.get_crawler()
crawler.sessions = Mock()
crawler.sessions.get_session.return_value.get.return_value.status_code = 200
crawler.sessions.get_session.return_value.get.return_value.raw.read.side_effect = \
requests.exceptions.ConnectTimeout()
with patch('dirhunt.crawler_url.CrawlerUrl.close') as m:
crawler_url = CrawlerUrl(crawler, self.url)
self.assertEqual(crawler_url.start(), crawler_url)
self.assertEqual(crawler.current_processed_count, 1)
m.assert_called_once()

0 comments on commit a2c92a9

Please sign in to comment.