Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
erlichsefi committed Dec 3, 2023
2 parents 73d42af + e8d1879 commit e680078
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 9 deletions.
21 changes: 14 additions & 7 deletions il_supermarket_scarper/engines/multipage_web.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from urllib.parse import urlsplit
import re
import ntpath
import lxml.html
import lxml
import requests

from il_supermarket_scarper.utils.connection import download_connection_retry
from il_supermarket_scarper.utils.connection import url_connection_retry


from il_supermarket_scarper.utils import (
Expand All @@ -24,7 +25,7 @@ def __init__(
self,
chain,
chain_id,
url="http://prices.shufersal.co.il/",
url,
folder_name=None,
total_page_xpath="""//*[@id="gridContainer"]/table/
tfoot/tr/td/a[6]/@href""",
Expand All @@ -34,13 +35,19 @@ def __init__(
self.total_page_xpath = total_page_xpath
self.total_pages_pattern = total_pages_pattern

@download_connection_retry()
def get_number_of_pages(self, url):
@url_connection_retry()
def get_number_of_pages(self, url, timeout=15):
"""get the number of pages to scarpe"""

html = lxml.html.parse(url)
response = requests.get(url, timeout=timeout)
if response.status_code != 200:
raise ValueError(
f"Fetching resources failed from {url}, status code: {response.status_code}"
)

html_body = lxml.html.fromstring(response.content)

total_pages = self.get_total_pages(html)
total_pages = self.get_total_pages(html_body)
Logger.info(f"Found {total_pages} pages")

return total_pages
Expand Down
5 changes: 4 additions & 1 deletion il_supermarket_scarper/scrappers/shufersal.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,8 @@ class Shufersal(MultiPageWeb):

def __init__(self, folder_name=None):
super().__init__(
chain="Shufersal", chain_id="7290027600007", folder_name=folder_name
url="https://prices.shufersal.co.il/",
chain="Shufersal",
chain_id="7290027600007",
folder_name=folder_name,
)
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
tests_require=["pytest==7.1", "black==23.9.1", "pylint==3.0.1"],
extras_require={"test": ["pytest"]},
# *strongly* suggested for sharing
version="0.3.3",
version="0.3.4",
# The license can be anything you like
license="MIT",
description="python package that implement a scraping for israeli supermarket data",
Expand Down

0 comments on commit e680078

Please sign in to comment.