-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
c58519e
commit 70852e6
Showing
4 changed files
with
75 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,14 +1,76 @@ | ||
from il_supermarket_scarper.engines import Cerberus | ||
from il_supermarket_scarper.utils import DumpFolderNames | ||
import urllib.parse | ||
import datetime | ||
from il_supermarket_scarper.engines import MultiPageWeb | ||
from il_supermarket_scarper.utils import DumpFolderNames, FileTypesFilters | ||
|
||
# class HaziHinam(Cerberus): | ||
# """scrper fro hazi hinam""" | ||
|
||
class HaziHinam(Cerberus): | ||
# def __init__(self, folder_name=None): | ||
# super().__init__( | ||
# chain=DumpFolderNames.HAZI_HINAM, | ||
# chain_id="7290700100008", | ||
# folder_name=folder_name, | ||
# ftp_username="HaziHinam", | ||
# ) | ||
|
||
|
||
class HaziHinam(MultiPageWeb): | ||
"""scrper fro hazi hinam""" | ||
|
||
def __init__(self, folder_name=None): | ||
super().__init__( | ||
chain=DumpFolderNames.HAZI_HINAM, | ||
chain_id="7290700100008", | ||
url="https://shop.hazi-hinam.co.il/Prices", | ||
folder_name=folder_name, | ||
ftp_username="HaziHinam", | ||
total_page_xpath="(//li[contains(concat(' ', normalize-space(@class), ' ')," | ||
+ "' pagination-item ')])[last()]/a/@href", | ||
total_pages_pattern=r"\d+", | ||
page_argument="&p", | ||
) | ||
|
||
def collect_files_details_from_page(self, html): | ||
"""collect the details deom one page""" | ||
links = [] | ||
filenames = [] | ||
for link in html.xpath("//table/tbody/tr"): | ||
links.append(link.xpath("td[5]/a/@href")[0]) | ||
filenames.append(link.xpath("td[2]")[0].text.strip() + ".xml.gz") | ||
return links, filenames | ||
|
||
def get_file_types_id(self, files_types=None): | ||
"""get the file type id""" | ||
if files_types is None or files_types == FileTypesFilters.all_types(): | ||
return [{"t": "null", "f": "null"}] | ||
|
||
types = [] | ||
for ftype in files_types: | ||
if ftype == FileTypesFilters.STORE_FILE.name: | ||
types.append({"t": "3", "f": "null"}) | ||
if ftype == FileTypesFilters.PRICE_FILE.name: | ||
types.append({"t": "1", "f": "null"}) | ||
if ftype == FileTypesFilters.PROMO_FILE.name: | ||
types.append({"t": "2", "f": "null"}) | ||
if ftype == FileTypesFilters.PRICE_FULL_FILE.name: | ||
types.append({"t": "1", "f": "null"}) | ||
if ftype == FileTypesFilters.PROMO_FULL_FILE.name: | ||
types.append({"t": "2", "f": "null"}) | ||
return types | ||
|
||
def build_params(self, files_types=None, store_id=None, when_date=None): | ||
"""build the params for the request""" | ||
|
||
all_params = [] | ||
for type_params in self.get_file_types_id(files_types): | ||
params = {"d": "", "s": ""} | ||
|
||
if store_id: | ||
params["s"] = "null" | ||
if when_date and isinstance(when_date, datetime.datetime): | ||
params["d"] = when_date.strftime("%Y-%m-%d") | ||
if files_types: | ||
params = {**params, **type_params} | ||
all_params.append(params) | ||
|
||
return ["?" + urllib.parse.urlencode(params) for params in all_params] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
14 changes: 7 additions & 7 deletions
14
il_supermarket_scarper/utils/tests/cpfta_prices_regulations
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters