Skip to content

Commit

Permalink
Issue #26: Extract dates and match different date files (TestCommonDi…
Browse files Browse the repository at this point in the history
…rectoryList.test_is_applicable)
  • Loading branch information
Nekmo committed Sep 25, 2018
1 parent 804edb7 commit 01be2ec
Showing 1 changed file with 9 additions and 4 deletions.
13 changes: 9 additions & 4 deletions dirhunt/tests/test_directory_lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@


class DirectoryListsTestBase(CrawlerTestBase):
def get_beautiful_soup(self, html=None):
html = html or self.html
return BeautifulSoup(html, 'html.parser')

def get_processor(self):
return ProcessIndexOfRequest(None, self.get_crawler_url())

Expand Down Expand Up @@ -35,9 +39,6 @@ class TestApacheDirectoryLists(DirectoryListsTestBase, unittest.TestCase):
</body></html>
"""

def get_beautiful_soup(self, html=None):
html = html or self.html
return BeautifulSoup(html, 'html.parser')

def test_is_applicable(self):
beautiful_soup = self.get_beautiful_soup()
Expand Down Expand Up @@ -80,6 +81,10 @@ class TestCommonDirectoryList(DirectoryListsTestBase, unittest.TestCase):

def test_process(self):
directory_list = CommonDirectoryList(self.get_processor())
links = directory_list.get_links(self.html, BeautifulSoup(self.html, 'html.parser'))
links = directory_list.get_links(self.html, self.get_beautiful_soup())
urls = [link.url for link in links]
self.assertEqual(urls, self.urls)

def test_is_applicable(self):
beautiful_soup = self.get_beautiful_soup()
self.assertTrue(CommonDirectoryList.is_applicable(None, self.html, self.get_crawler_url(), beautiful_soup))

0 comments on commit 01be2ec

Please sign in to comment.