Skip to content

Commit

Permalink
.
Browse files Browse the repository at this point in the history
  • Loading branch information
erlichsefi committed Nov 21, 2024
1 parent c58519e commit 70852e6
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 13 deletions.
70 changes: 66 additions & 4 deletions il_supermarket_scarper/scrappers/hazihinam.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,76 @@
from il_supermarket_scarper.engines import Cerberus
from il_supermarket_scarper.utils import DumpFolderNames
import urllib.parse
import datetime
from il_supermarket_scarper.engines import MultiPageWeb
from il_supermarket_scarper.utils import DumpFolderNames, FileTypesFilters

# class HaziHinam(Cerberus):
# """scrper fro hazi hinam"""

class HaziHinam(Cerberus):
# def __init__(self, folder_name=None):
# super().__init__(
# chain=DumpFolderNames.HAZI_HINAM,
# chain_id="7290700100008",
# folder_name=folder_name,
# ftp_username="HaziHinam",
# )


class HaziHinam(MultiPageWeb):
"""scrper fro hazi hinam"""

def __init__(self, folder_name=None):
super().__init__(
chain=DumpFolderNames.HAZI_HINAM,
chain_id="7290700100008",
url="https://shop.hazi-hinam.co.il/Prices",
folder_name=folder_name,
ftp_username="HaziHinam",
total_page_xpath="(//li[contains(concat(' ', normalize-space(@class), ' '),"
+ "' pagination-item ')])[last()]/a/@href",
total_pages_pattern=r"\d+",
page_argument="&p",
)

def collect_files_details_from_page(self, html):
"""collect the details deom one page"""
links = []
filenames = []
for link in html.xpath("//table/tbody/tr"):
links.append(link.xpath("td[5]/a/@href")[0])
filenames.append(link.xpath("td[2]")[0].text.strip() + ".xml.gz")
return links, filenames

def get_file_types_id(self, files_types=None):
"""get the file type id"""
if files_types is None or files_types == FileTypesFilters.all_types():
return [{"t": "null", "f": "null"}]

types = []
for ftype in files_types:
if ftype == FileTypesFilters.STORE_FILE.name:
types.append({"t": "3", "f": "null"})
if ftype == FileTypesFilters.PRICE_FILE.name:
types.append({"t": "1", "f": "null"})
if ftype == FileTypesFilters.PROMO_FILE.name:
types.append({"t": "2", "f": "null"})
if ftype == FileTypesFilters.PRICE_FULL_FILE.name:
types.append({"t": "1", "f": "null"})
if ftype == FileTypesFilters.PROMO_FULL_FILE.name:
types.append({"t": "2", "f": "null"})
return types

def build_params(self, files_types=None, store_id=None, when_date=None):
"""build the params for the request"""

all_params = []
for type_params in self.get_file_types_id(files_types):
params = {"d": "", "s": ""}

if store_id:
params["s"] = "null"
if when_date and isinstance(when_date, datetime.datetime):
params["d"] = when_date.strftime("%Y-%m-%d")
if files_types:
params = {**params, **type_params}
all_params.append(params)

return ["?" + urllib.parse.urlencode(params) for params in all_params]
2 changes: 1 addition & 1 deletion il_supermarket_scarper/scrappers/tests/test_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class GoodPharmTestCase(make_test_case(ScraperFactory.GOOD_PHARM, 952)):
"""Test case for ScraperFactory.GOOD_PHARM."""


class HaziHinamTestCase(make_test_case(ScraperFactory.HAZI_HINAM, 2)):
class HaziHinamTestCase(make_test_case(ScraperFactory.HAZI_HINAM, 206)):
"""Test case for ScraperFactory.HAZI_HINAM."""


Expand Down
14 changes: 7 additions & 7 deletions il_supermarket_scarper/utils/tests/cpfta_prices_regulations

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,4 @@ def test_scrapers_are_updated():
def test_update_date():
"""test date the site update"""
date = get_status_date()
assert date.date() == datetime.datetime(2024, 10, 28).date(), "gov il site changed"
assert date.date() == datetime.datetime(2024, 11, 19).date(), "gov il site changed"

0 comments on commit 70852e6

Please sign in to comment.