From 3af2e421d0c2326191675c140386e793226ee99b Mon Sep 17 00:00:00 2001
From: Sefi Erlich <erlichsefi@gmail.com>
Date: Wed, 9 Oct 2024 19:20:32 +0000
Subject: [PATCH 1/9] stress test

---
 stress_test.py | 38 +++++++++++++++++++++++++++++++-------
 1 file changed, 31 insertions(+), 7 deletions(-)

diff --git a/stress_test.py b/stress_test.py
index c4017e3..10803e7 100644
--- a/stress_test.py
+++ b/stress_test.py
@@ -4,10 +4,39 @@
 import tempfile
 import pstats
 import cProfile
-from io import StringIO
+import io
 from il_supermarket_scarper.scrappers_factory import ScraperFactory
 
 
+def format_stats_as_json(pr, project_name):
+    stream = io.StringIO()
+    ps = pstats.Stats(pr, stream=stream)
+    ps.sort_stats(pstats.SortKey.CUMULATIVE)  # Sort by cumulative time
+    ps.print_stats()
+
+    # Convert the printed stats to a list of lines
+    stats_output = stream.getvalue().splitlines()
+
+    # Filter the lines to include only functions within the project
+    project_stats = []
+    for line in stats_output:
+        if project_name in line:  # Filter for project-specific lines
+            # Extract relevant fields from the profiling output
+            # The typical format is (Function location, Number of calls, Total time, Cumulative time, etc.)
+            parts = line.split()
+            if len(parts) >= 5:  # Basic sanity check for the parts
+                function_data = {
+                    "function": parts[-1],       # Function path
+                    "ncalls": parts[0],         # Number of calls
+                    "tottime": parts[1], 
+                    "tottime_per_call": parts[2],# Time spent in function
+                    "cumtime": parts[3],         # Cumulative time including subcalls
+                    "cumtime_per_call": parts[4]         #
+                }
+                project_stats.append(function_data)
+
+    return project_stats
+
 if __name__ == "__main__":
 
     result = {}
@@ -28,14 +57,9 @@ def full_execution(scraper):
 
         pr.disable()
 
-        stream = StringIO()
-        ps = pstats.Stats(pr, stream=stream)
-        ps.print_stats()
-        stream.seek(0)
-
         end_time = time.time()
         result[scraper_name] = {
-            "status": stream.read(),
+            "status": format_stats_as_json(pr, "israeli-supermarket-scarpers"),
             "execution_time": execution_time,
             "start_time": start_time,
             "end_time": end_time,

From 1185f32a629174d77c8611384ef8735c7d0ab872 Mon Sep 17 00:00:00 2001
From: Sefi Erlich <erlichsefi@gmail.com>
Date: Thu, 10 Oct 2024 05:12:15 +0000
Subject: [PATCH 2/9] .

---
 il_supermarket_scarper/engines/cerberus.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/il_supermarket_scarper/engines/cerberus.py b/il_supermarket_scarper/engines/cerberus.py
index 302de4b..febdca6 100644
--- a/il_supermarket_scarper/engines/cerberus.py
+++ b/il_supermarket_scarper/engines/cerberus.py
@@ -73,6 +73,7 @@ def scrape(
             return results
         except Exception as e:  # pylint: disable=broad-except
             self.on_download_fail(e, files=files)
+            Logger.error_execption(e)
             return []
 
     def collect_files_details_from_site(

From 12cd26085660b93c7200fb099cdf333258600e30 Mon Sep 17 00:00:00 2001
From: Sefi Erlich <erlichsefi@gmail.com>
Date: Thu, 10 Oct 2024 07:42:59 +0000
Subject: [PATCH 3/9] .

---
 il_supermarket_scarper/engines/cerberus.py    | 10 ++---
 il_supermarket_scarper/engines/engine.py      | 45 +++++++++++++------
 .../engines/multipage_web.py                  |  7 ++-
 .../engines/publishprice.py                   |  4 +-
 il_supermarket_scarper/engines/web.py         | 13 +++---
 il_supermarket_scarper/main.py                |  6 +--
 il_supermarket_scarper/scrapper_runner.py     |  8 ++--
 il_supermarket_scarper/scrappers/bareket.py   |  2 +-
 .../scrappers/meshnat_yosef.py                |  2 +-
 .../scrappers/nativ_hashed.py                 |  2 +-
 il_supermarket_scarper/scrappers/polizer.py   |  2 +-
 .../scrappers/tests/test_cases.py             | 18 ++++----
 il_supermarket_scarper/scrappers/tivtaam.py   |  2 +-
 il_supermarket_scarper/utils/gzip_utils.py    |  2 +-
 stress_test.py                                | 21 ++++-----
 15 files changed, 80 insertions(+), 64 deletions(-)

diff --git a/il_supermarket_scarper/engines/cerberus.py b/il_supermarket_scarper/engines/cerberus.py
index febdca6..9f447d1 100644
--- a/il_supermarket_scarper/engines/cerberus.py
+++ b/il_supermarket_scarper/engines/cerberus.py
@@ -41,7 +41,7 @@ def scrape(
         limit=None,
         files_types=None,
         store_id=None,
-        only_latest=False,
+        when_date=None,
         files_names_to_scrape=None,
         filter_null=False,
         filter_zero=False,
@@ -52,7 +52,7 @@ def scrape(
                 limit=limit,
                 files_types=files_types,
                 store_id=store_id,
-                only_latest=only_latest,
+                when_date=when_date,
             )
             files = self.collect_files_details_from_site(
                 limit=limit,
@@ -60,7 +60,7 @@ def scrape(
                 filter_null=filter_null,
                 filter_zero=filter_zero,
                 store_id=store_id,
-                only_latest=only_latest,
+                when_date=when_date,
                 files_names_to_scrape=files_names_to_scrape,
             )
             self.on_collected_details(files)
@@ -83,7 +83,7 @@ def collect_files_details_from_site(
         filter_null=False,
         filter_zero=False,
         store_id=None,
-        only_latest=False,
+        when_date=None,
         files_names_to_scrape=None,
     ):
         """collect all files to download from the site"""
@@ -118,7 +118,7 @@ def collect_files_details_from_site(
             limit=limit,
             files_types=files_types,
             store_id=store_id,
-            only_latest=only_latest,
+            when_date=when_date,
             files_names_to_scrape=files_names_to_scrape,
         )
         Logger.info(f"After applying limit: Found {len(files)} files")
diff --git a/il_supermarket_scarper/engines/engine.py b/il_supermarket_scarper/engines/engine.py
index b36ecf8..a1ba629 100644
--- a/il_supermarket_scarper/engines/engine.py
+++ b/il_supermarket_scarper/engines/engine.py
@@ -1,7 +1,7 @@
 from abc import ABC
 import os
 import re
-
+import datetime
 
 from il_supermarket_scarper.utils import (
     get_output_folder,
@@ -38,16 +38,16 @@ def get_storage_path(self):
         return self.storage_path
 
     def _is_validate_scraper_found_no_files(
-        self, limit=None, files_types=None, store_id=None, only_latest=False
+        self, limit=None, files_types=None, store_id=None, when_date=None
     ):
         Logger.info(
             f"check if fail is allowd with, limit={limit},"
-            f"files_types={files_types},store_id={store_id},only_latest={only_latest}"
+            f"files_types={files_types},store_id={store_id},when_date={when_date}"
         )
         return False
 
     def is_validate_scraper_found_no_files(
-        self, limit=None, files_types=None, store_id=None, only_latest=False
+        self, limit=None, files_types=None, store_id=None, when_date=None
     ):
         """return true if its ok the scarper reuturn no enrty"""
 
@@ -58,7 +58,7 @@ def is_validate_scraper_found_no_files(
             for file_type in files_types:
                 if file_type in FileTypesFilters.all_full_files():
                     request_only_update_file = False
-        Logger.info(f"the value of {only_latest} should not affect.")
+        Logger.info(f"the value of {when_date} should not affect.")
         return (
             limit == 0
             or files_types == []
@@ -68,7 +68,7 @@ def is_validate_scraper_found_no_files(
                 limit=limit,
                 files_types=files_types,
                 store_id=store_id,
-                only_latest=only_latest,
+                when_date=when_date,
             )
         )
 
@@ -83,13 +83,13 @@ def apply_limit(
         files_types=None,
         by_function=lambda x: x,
         store_id=None,
-        only_latest=False,
+        when_date=None,
         files_names_to_scrape=None,
     ):
         """filter the list according to condition"""
         assert (
-            not only_latest or limit is None
-        ), "only_latest flag can't be applied with limit."
+            when_date is not None or limit is None
+        ), "when_date flag can't be applied with limit."
 
         # filter files already downloaded
         intreable_ = self.filter_already_downloaded(
@@ -124,8 +124,12 @@ def apply_limit(
             )
         Logger.info(f"Number of entry after filter file type id is {len(intreable_)}")
 
-        if only_latest:
+        if isinstance(when_date, datetime.datetime):
+            intreable_ = self.get_by_date(when_date, by_function, intreable_)
+        elif isinstance(when_date, str) and when_date == "latest":
             intreable_ = self.get_only_latest(by_function, intreable_)
+        else:
+            raise ValueError(f"when_date should be datetime or bool, got {when_date}")
 
         Logger.info(f"Number of entry after filter keeping latast is {len(intreable_)}")
 
@@ -144,12 +148,12 @@ def apply_limit(
                     limit=limit,
                     files_types=files_types,
                     store_id=store_id,
-                    only_latest=only_latest,
+                    when_date=when_date,
                 )
             ):
                 raise ValueError(
                     f"No files to download for file files_types={files_types},"
-                    f"limit={limit},store_id={store_id},only_latest={only_latest}"
+                    f"limit={limit},store_id={store_id},when_date={when_date}"
                 )
         return intreable_
 
@@ -182,6 +186,19 @@ def get_only_latest(self, by_function, intreable_):
                 groups_value[store_info] = file
         return list(groups_value.values())
 
+    def get_by_date(self, requested_date, by_function, intreable_):
+        """get by date"""
+
+        groups_value = []
+        for file in intreable_:
+            name_split = by_function(file).split("-")
+            date_info = "-".join(name_split[2:]).rsplit(".", maxsplit=1)[-1]
+
+            if date_info == requested_date:
+                groups_value.append(file)
+
+        return groups_value
+
     @classmethod
     def unique(cls, iterable, by_function=lambda x: x):
         """Returns the type of the file."""
@@ -210,7 +227,7 @@ def scrape(
         limit=None,
         files_types=None,
         store_id=None,
-        only_latest=False,
+        when_date=None,
         files_names_to_scrape=None,
         filter_null=False,
         filter_zero=False,
@@ -222,7 +239,7 @@ def scrape(
             files_types=files_types,
             store_id=store_id,
             files_names_to_scrape=files_names_to_scrape,
-            only_latest=only_latest,
+            when_date=when_date,
             filter_null=filter_null,
             filter_zero=filter_zero,
         )
diff --git a/il_supermarket_scarper/engines/multipage_web.py b/il_supermarket_scarper/engines/multipage_web.py
index 9b6742b..8145b81 100644
--- a/il_supermarket_scarper/engines/multipage_web.py
+++ b/il_supermarket_scarper/engines/multipage_web.py
@@ -77,7 +77,6 @@ def collect_files_details_from_site(
         files_types=None,
         store_id=None,
         when_date=None,
-        only_latest=False,
         files_names_to_scrape=None,
     ):
         self.post_scraping()
@@ -109,7 +108,7 @@ def collect_files_details_from_site(
             limit=limit,
             files_types=files_types,
             store_id=store_id,
-            only_latest=only_latest,
+            when_date=when_date,
             files_names_to_scrape=files_names_to_scrape,
         )
 
@@ -125,7 +124,7 @@ def collect_files_details_from_page(self, html):
         return links, filenames
 
     def process_links_before_download(
-        self, page, limit=None, files_types=None, store_id=None, only_latest=None
+        self, page, limit=None, files_types=None, store_id=None, when_date=None
     ):
         """additional processing to the links before download"""
         response = self.session_with_cookies_by_chain(page)
@@ -141,7 +140,7 @@ def process_links_before_download(
             limit=limit,
             files_types=files_types,
             store_id=store_id,
-            only_latest=only_latest,
+            when_date=when_date,
         )
 
         Logger.info(
diff --git a/il_supermarket_scarper/engines/publishprice.py b/il_supermarket_scarper/engines/publishprice.py
index c8acd80..d87e7ab 100644
--- a/il_supermarket_scarper/engines/publishprice.py
+++ b/il_supermarket_scarper/engines/publishprice.py
@@ -88,14 +88,14 @@ def get_name_from_herf(x):
         return download_urls, file_names
 
     def _is_validate_scraper_found_no_files(
-        self, limit=None, files_types=None, store_id=None, only_latest=False
+        self, limit=None, files_types=None, store_id=None, when_date=None
     ):
         return (
             super()._is_validate_scraper_found_no_files(  # what fails the rest
                 limit=limit,
                 files_types=files_types,
                 store_id=store_id,
-                only_latest=only_latest,
+                when_date=when_date,
             )
             or (  # if we are looking for one store file in a weekend or holiday
                 store_id and (_is_weekend_in_israel() or _is_holiday_in_israel())
diff --git a/il_supermarket_scarper/engines/web.py b/il_supermarket_scarper/engines/web.py
index c232b56..5043ca5 100644
--- a/il_supermarket_scarper/engines/web.py
+++ b/il_supermarket_scarper/engines/web.py
@@ -45,7 +45,7 @@ def apply_limit_zip(
         files_types=None,
         by_function=lambda x: x[0],
         store_id=None,
-        only_latest=False,
+        when_date=None,
         files_names_to_scrape=None,
     ):
         """apply limit to zip"""
@@ -55,7 +55,7 @@ def apply_limit_zip(
             files_types=files_types,
             by_function=by_function,
             store_id=store_id,
-            only_latest=only_latest,
+            when_date=when_date,
             files_names_to_scrape=files_names_to_scrape,
         )
         if len(ziped) == 0:
@@ -69,7 +69,6 @@ def collect_files_details_from_site(
         files_types=None,
         store_id=None,
         when_date=None,
-        only_latest=False,
         files_names_to_scrape=None,
     ):
         """collect all enteris to download from site"""
@@ -95,7 +94,7 @@ def collect_files_details_from_site(
                 limit=limit,
                 files_types=files_types,
                 store_id=store_id,
-                only_latest=only_latest,
+                when_date=when_date,
                 files_names_to_scrape=files_names_to_scrape,
             )
 
@@ -109,7 +108,7 @@ def scrape(
         limit=None,
         files_types=None,
         store_id=None,
-        only_latest=False,
+        when_date=None,
         files_names_to_scrape=None,
         filter_null=False,
         filter_zero=False,
@@ -121,7 +120,7 @@ def scrape(
                 limit,
                 files_types=files_types,
                 store_id=store_id,
-                only_latest=only_latest,
+                when_date=when_date,
                 filter_null=filter_null,
                 filter_zero=filter_zero,
             )
@@ -130,7 +129,7 @@ def scrape(
                 limit=limit,
                 files_types=files_types,
                 store_id=store_id,
-                only_latest=only_latest,
+                when_date=when_date,
                 files_names_to_scrape=files_names_to_scrape,
             )
 
diff --git a/il_supermarket_scarper/main.py b/il_supermarket_scarper/main.py
index b688941..3136359 100644
--- a/il_supermarket_scarper/main.py
+++ b/il_supermarket_scarper/main.py
@@ -10,7 +10,7 @@ def __init__(
         size_estimation_mode=False,
         enabled_scrapers=None,
         limit=None,
-        only_latest=False,
+        when_date=None,
         files_types=FileTypesFilters.all_types(),
         dump_folder_name=None,
         lookup_in_db=True,
@@ -27,7 +27,7 @@ def __init__(
         self.dump_folder_name = dump_folder_name
         self.limit = limit
         self.files_types = files_types
-        self.only_latest = only_latest
+        self.when_date = when_date
 
     def get_dump_folder_name(self):
         """get the dump folder name"""
@@ -36,5 +36,5 @@ def get_dump_folder_name(self):
     def start(self):
         """run the scraping"""
         return self.runner.run(
-            limit=self.limit, files_types=self.files_types, only_latest=self.only_latest
+            limit=self.limit, files_types=self.files_types, when_date=self.when_date
         )
diff --git a/il_supermarket_scarper/scrapper_runner.py b/il_supermarket_scarper/scrapper_runner.py
index 3e6978b..141759c 100644
--- a/il_supermarket_scarper/scrapper_runner.py
+++ b/il_supermarket_scarper/scrapper_runner.py
@@ -38,7 +38,7 @@ def __init__(
         self.multiprocessing = multiprocessing
         self.lookup_in_db = lookup_in_db
 
-    def run(self, limit=None, files_types=None, only_latest=False):
+    def run(self, limit=None, files_types=None, when_date=False):
         """run the scraper"""
         Logger.info(f"Limit is {limit}")
         Logger.info(f"files_types is {files_types}")
@@ -54,7 +54,7 @@ def run(self, limit=None, files_types=None, only_latest=False):
                             {
                                 "limit": limit,
                                 "files_types": files_types,
-                                "only_latest": only_latest,
+                                "when_date": when_date,
                             },
                         ),
                         self.enabled_scrapers,
@@ -77,7 +77,7 @@ def scrape_one(
         limit=None,
         files_types=None,
         store_id=None,
-        only_latest=False,
+        when_date=None,
     ):
         """scrape one"""
         chain_scrapper_constractor = ScraperFactory.get(chain_scrapper_class)
@@ -94,7 +94,7 @@ def scrape_one(
             limit=limit,
             files_types=files_types,
             store_id=store_id,
-            only_latest=only_latest,
+            when_date=when_date,
             files_names_to_scrape=None,
             filter_null=False,
             filter_zero=False,
diff --git a/il_supermarket_scarper/scrappers/bareket.py b/il_supermarket_scarper/scrappers/bareket.py
index 2f2bcd9..88a7456 100644
--- a/il_supermarket_scarper/scrappers/bareket.py
+++ b/il_supermarket_scarper/scrappers/bareket.py
@@ -16,7 +16,7 @@ def __init__(self, folder_name=None):
         )
 
     def _is_validate_scraper_found_no_files(
-        self, limit=None, files_types=None, store_id=None, only_latest=False
+        self, limit=None, files_types=None, store_id=None, when_date=None
     ):
         # no data on shabat if you test a single store file.
         return _is_saturday_in_israel() or _is_holiday_in_israel() and store_id
diff --git a/il_supermarket_scarper/scrappers/meshnat_yosef.py b/il_supermarket_scarper/scrappers/meshnat_yosef.py
index 91a1c9a..b82d942 100644
--- a/il_supermarket_scarper/scrappers/meshnat_yosef.py
+++ b/il_supermarket_scarper/scrappers/meshnat_yosef.py
@@ -34,7 +34,7 @@ def extract_task_from_entry(self, all_trs):
         return download_urls, file_names
 
     def _is_validate_scraper_found_no_files(
-        self, limit=None, files_types=None, store_id=None, only_latest=False
+        self, limit=None, files_types=None, store_id=None, when_date=None
     ):
         # no data on shabat
         return _is_saturday_in_israel() or _is_holiday_in_israel()
diff --git a/il_supermarket_scarper/scrappers/nativ_hashed.py b/il_supermarket_scarper/scrappers/nativ_hashed.py
index 81c9773..5a7f6fa 100644
--- a/il_supermarket_scarper/scrappers/nativ_hashed.py
+++ b/il_supermarket_scarper/scrappers/nativ_hashed.py
@@ -19,7 +19,7 @@ def __init__(self, folder_name=None):
         )
 
     def _is_validate_scraper_found_no_files(
-        self, limit=None, files_types=None, store_id=None, only_latest=False
+        self, limit=None, files_types=None, store_id=None, when_date=None
     ):
         # no data on shabat
         return _is_saturday_in_israel() or _is_holiday_in_israel()
diff --git a/il_supermarket_scarper/scrappers/polizer.py b/il_supermarket_scarper/scrappers/polizer.py
index f20d171..6f8819c 100644
--- a/il_supermarket_scarper/scrappers/polizer.py
+++ b/il_supermarket_scarper/scrappers/polizer.py
@@ -14,7 +14,7 @@ def __init__(self, folder_name=None):
         )
 
     def _is_validate_scraper_found_no_files(
-        self, limit=None, files_types=None, store_id=None, only_latest=False
+        self, limit=None, files_types=None, store_id=None, when_date=None
     ):
         # no data on shabat
         return (
diff --git a/il_supermarket_scarper/scrappers/tests/test_cases.py b/il_supermarket_scarper/scrappers/tests/test_cases.py
index 79064ce..77d0e35 100644
--- a/il_supermarket_scarper/scrappers/tests/test_cases.py
+++ b/il_supermarket_scarper/scrappers/tests/test_cases.py
@@ -42,7 +42,7 @@ def _make_sure_filter_work(
             file_type=None,
             limit=None,
             store_id=None,
-            only_latest=False,
+            when_date=None,
         ):
             """make sure the file type filter works"""
             if file_type:
@@ -55,7 +55,7 @@ def _make_sure_filter_work(
                 for file in files_found:
                     store_mark.append(int(file.split("-")[1]))
                 assert len(set(store_mark)) == 1 and len(store_mark) == len(files_found)
-            if only_latest:
+            if when_date:
                 files_sources = []
                 for file in files_found:
                     source = file.split("-")[:2]
@@ -107,7 +107,7 @@ def _clean_scarpe_delete(
             store_id=None,
             limit=None,
             file_type=None,
-            only_latest=False,
+            when_date=None,
         ):
             with tempfile.TemporaryDirectory() as tmpdirname:
                 self.__clean_scarpe_delete(
@@ -116,7 +116,7 @@ def _clean_scarpe_delete(
                     store_id=store_id,
                     limit=limit,
                     file_type=file_type,
-                    only_latest=only_latest,
+                    when_date=when_date,
                 )
 
         def __clean_scarpe_delete(
@@ -126,7 +126,7 @@ def __clean_scarpe_delete(
             store_id=None,
             limit=None,
             file_type=None,
-            only_latest=False,
+            when_date=None,
         ):
             self._delete_download_folder(dump_path)
             os.makedirs(dump_path)
@@ -142,7 +142,7 @@ def __clean_scarpe_delete(
                         "limit": limit,
                         "files_types": file_type,
                         "store_id": store_id,
-                        "only_latest": only_latest,
+                        "when_date": when_date,
                         "filter_null": True,
                         "filter_zero": True,
                     }
@@ -164,14 +164,14 @@ def __clean_scarpe_delete(
                         limit=limit,
                         files_types=file_type,
                         store_id=store_id,
-                        only_latest=only_latest,
+                        when_date=when_date,
                     ) and not hasattr(scraper, "_is_flaky"):
                         self._make_sure_filter_work(
                             files_found,
                             file_type=file_type,
                             limit=limit,
                             store_id=store_id,
-                            only_latest=only_latest,
+                            when_date=when_date,
                         )
 
                     for file in files_found:
@@ -255,7 +255,7 @@ def test_scrape_file_from_single_store_last(self):
             self._clean_scarpe_delete(
                 scraper_enum,
                 store_id=store_id,
-                only_latest=True,
+                when_date="lastast",
             )
 
     return TestScapers
diff --git a/il_supermarket_scarper/scrappers/tivtaam.py b/il_supermarket_scarper/scrappers/tivtaam.py
index 3fbf6ba..f6b174f 100644
--- a/il_supermarket_scarper/scrappers/tivtaam.py
+++ b/il_supermarket_scarper/scrappers/tivtaam.py
@@ -19,7 +19,7 @@ def __init__(self, folder_name=None):
         )
 
     def is_validate_scraper_found_no_files(
-        self, limit=None, files_types=None, store_id=None, only_latest=False
+        self, limit=None, files_types=None, store_id=None, when_date=None
     ):
         return (
             _is_saturday_in_israel()
diff --git a/il_supermarket_scarper/utils/gzip_utils.py b/il_supermarket_scarper/utils/gzip_utils.py
index 2f8ad52..20aeffe 100644
--- a/il_supermarket_scarper/utils/gzip_utils.py
+++ b/il_supermarket_scarper/utils/gzip_utils.py
@@ -22,7 +22,7 @@ def extract_xml_file_from_gz_file(file_save_path):
                         with open(target_file_name, "wb") as f_out:
                             f_out.write(the_file.read())
 
-        except ( # pylint: disable=broad-except,redefined-outer-name
+        except (  # pylint: disable=broad-except,redefined-outer-name
             Exception
         ) as exception:
             report_failed_zip(exception, file_save_path, target_file_name)
diff --git a/stress_test.py b/stress_test.py
index 10803e7..fdc9b52 100644
--- a/stress_test.py
+++ b/stress_test.py
@@ -8,9 +8,10 @@
 from il_supermarket_scarper.scrappers_factory import ScraperFactory
 
 
-def format_stats_as_json(pr, project_name):
+def format_stats_as_json(profile, project_name):
+    """get the stats from the profiler and format them as json"""
     stream = io.StringIO()
-    ps = pstats.Stats(pr, stream=stream)
+    ps = pstats.Stats(profile, stream=stream)
     ps.sort_stats(pstats.SortKey.CUMULATIVE)  # Sort by cumulative time
     ps.print_stats()
 
@@ -21,22 +22,22 @@ def format_stats_as_json(pr, project_name):
     project_stats = []
     for line in stats_output:
         if project_name in line:  # Filter for project-specific lines
-            # Extract relevant fields from the profiling output
-            # The typical format is (Function location, Number of calls, Total time, Cumulative time, etc.)
+
             parts = line.split()
             if len(parts) >= 5:  # Basic sanity check for the parts
                 function_data = {
-                    "function": parts[-1],       # Function path
-                    "ncalls": parts[0],         # Number of calls
-                    "tottime": parts[1], 
-                    "tottime_per_call": parts[2],# Time spent in function
-                    "cumtime": parts[3],         # Cumulative time including subcalls
-                    "cumtime_per_call": parts[4]         #
+                    "function": parts[-1],  # Function path
+                    "ncalls": parts[0],  # Number of calls
+                    "tottime": parts[1],
+                    "tottime_per_call": parts[2],  # Time spent in function
+                    "cumtime": parts[3],  # Cumulative time including subcalls
+                    "cumtime_per_call": parts[4],  #
                 }
                 project_stats.append(function_data)
 
     return project_stats
 
+
 if __name__ == "__main__":
 
     result = {}

From 5dfaa479719108f4a55d1012625230a1122a7a41 Mon Sep 17 00:00:00 2001
From: Sefi Erlich <erlichsefi@gmail.com>
Date: Thu, 10 Oct 2024 07:55:25 +0000
Subject: [PATCH 4/9] .

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 6de0864..081b398 100644
--- a/setup.py
+++ b/setup.py
@@ -29,7 +29,7 @@
     tests_require=dev_required,
     extras_require={"test": ["pytest", "pytest-xdist"]},
     # *strongly* suggested for sharing
-    version="0.4.8",
+    version="0.4.9",
     # The license can be anything you like
     license="MIT",
     description="python package that implement a scraping for israeli supermarket data",

From a65f6fe82c9fd53cb2b231abc14a37890aee061d Mon Sep 17 00:00:00 2001
From: Sefi Erlich <erlichsefi@gmail.com>
Date: Thu, 10 Oct 2024 15:21:59 +0000
Subject: [PATCH 5/9] .

---
 il_supermarket_scarper/engines/apsx.py        | 55 ++++++++++---------
 il_supermarket_scarper/engines/engine.py      |  2 +-
 il_supermarket_scarper/engines/web.py         |  1 +
 .../scrappers/tests/test_cases.py             |  4 +-
 4 files changed, 33 insertions(+), 29 deletions(-)

diff --git a/il_supermarket_scarper/engines/apsx.py b/il_supermarket_scarper/engines/apsx.py
index 53a6eca..12e6978 100644
--- a/il_supermarket_scarper/engines/apsx.py
+++ b/il_supermarket_scarper/engines/apsx.py
@@ -44,7 +44,7 @@ def _build_query_url(self, query_params):
             res.append(base + self.aspx_page + query_params)
         return res
 
-    def _get_all_possible_query_string_params(
+    def _get_all_possible_query_string_params( #pylint: disable=unused-argument
         self, files_types=None, store_id=None, when_date=None
     ):
         """get the arguments need to add to the url"""
@@ -52,35 +52,38 @@ def _get_all_possible_query_string_params(
             res = []
             for c_id in self.chain_id:
                 res.append(f"?code={c_id}")
-            return res
+            chains_urls = res
+
         chains_urls = [f"?code={self.chain_id}"]
 
-        # add file types to url
-        if files_types:
-            chains_urls_with_types = []
-            for files_type in files_types:
-                file_type_id = self.file_type_id(files_type)
-                chains_urls_with_types.extend(
-                    [
-                        f"{chain_url}&WFileType={file_type_id}"
-                        for chain_url in chains_urls
-                    ]
-                )
-            chains_urls = chains_urls_with_types
-
-        # add store id
-        if store_id:
-            for chain_url in chains_urls:
-                chain_url += f"&WStore={store_id}"
-
-        # posting date
-        if when_date:
-            for chain_url in chains_urls:
-                chain_url += (
-                    f"&WDate={when_date.strftime('%d/%m/%Y').reaplce('/','%2F')}"
-                )
         return chains_urls
 
+        # # add file types to url
+        # if files_types:
+        #     chains_urls_with_types = []
+        #     for files_type in files_types:
+        #         file_type_id = self.file_type_id(files_type)
+        #         chains_urls_with_types.extend(
+        #             [
+        #                 f"{chain_url}&WFileType={file_type_id}"
+        #                 for chain_url in chains_urls
+        #             ]
+        #         )
+        #     chains_urls = chains_urls_with_types
+
+        # # add store id
+        # if store_id:
+        #     for chain_url in chains_urls:
+        #         chain_url += f"&WStore={store_id}"
+
+        # # posting date
+        # if when_date:
+        #     for chain_url in chains_urls:
+        #         chain_url += (
+        #             f"&WDate={when_date.strftime('%d/%m/%Y').reaplce('/','%2F')}"
+        #         )
+        # return chains_urls
+
     def get_request_url(self, files_types=None, store_id=None, when_date=None):
         result = []
         for query_params in self._get_all_possible_query_string_params(
diff --git a/il_supermarket_scarper/engines/engine.py b/il_supermarket_scarper/engines/engine.py
index a1ba629..c958fea 100644
--- a/il_supermarket_scarper/engines/engine.py
+++ b/il_supermarket_scarper/engines/engine.py
@@ -194,7 +194,7 @@ def get_by_date(self, requested_date, by_function, intreable_):
             name_split = by_function(file).split("-")
             date_info = "-".join(name_split[2:]).rsplit(".", maxsplit=1)[-1]
 
-            if date_info == requested_date:
+            if date_info.startswith(requested_date.strftime("%Y%d%m")):
                 groups_value.append(file)
 
         return groups_value
diff --git a/il_supermarket_scarper/engines/web.py b/il_supermarket_scarper/engines/web.py
index 5043ca5..476ed89 100644
--- a/il_supermarket_scarper/engines/web.py
+++ b/il_supermarket_scarper/engines/web.py
@@ -152,4 +152,5 @@ def scrape(
             return results
         except Exception as e:  # pylint: disable=broad-except
             self.on_download_fail(e, download_urls=download_urls, file_names=file_names)
+            Logger.error_execption(e)
             return []
diff --git a/il_supermarket_scarper/scrappers/tests/test_cases.py b/il_supermarket_scarper/scrappers/tests/test_cases.py
index 77d0e35..03c38f2 100644
--- a/il_supermarket_scarper/scrappers/tests/test_cases.py
+++ b/il_supermarket_scarper/scrappers/tests/test_cases.py
@@ -4,7 +4,7 @@
 import os
 import uuid
 import xml.etree.ElementTree as ET
-from il_supermarket_scarper.utils import FileTypesFilters, Logger, DumpFolderNames
+from il_supermarket_scarper.utils import FileTypesFilters, Logger, DumpFolderNames, _now
 from il_supermarket_scarper.scrappers_factory import ScraperFactory
 
 
@@ -255,7 +255,7 @@ def test_scrape_file_from_single_store_last(self):
             self._clean_scarpe_delete(
                 scraper_enum,
                 store_id=store_id,
-                when_date="lastast",
+                when_date=_now(),
             )
 
     return TestScapers

From 187b80e35a6ce26c9172b4bf1a23569569468a39 Mon Sep 17 00:00:00 2001
From: Sefi Erlich <erlichsefi@gmail.com>
Date: Thu, 10 Oct 2024 15:23:50 +0000
Subject: [PATCH 6/9] reduce time

---
 il_supermarket_scarper/engines/apsx.py               | 2 +-
 il_supermarket_scarper/engines/engine.py             | 6 +++---
 il_supermarket_scarper/scrappers/tests/test_cases.py | 4 +---
 3 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/il_supermarket_scarper/engines/apsx.py b/il_supermarket_scarper/engines/apsx.py
index 12e6978..34970df 100644
--- a/il_supermarket_scarper/engines/apsx.py
+++ b/il_supermarket_scarper/engines/apsx.py
@@ -44,7 +44,7 @@ def _build_query_url(self, query_params):
             res.append(base + self.aspx_page + query_params)
         return res
 
-    def _get_all_possible_query_string_params( #pylint: disable=unused-argument
+    def _get_all_possible_query_string_params(  # pylint: disable=unused-argument
         self, files_types=None, store_id=None, when_date=None
     ):
         """get the arguments need to add to the url"""
diff --git a/il_supermarket_scarper/engines/engine.py b/il_supermarket_scarper/engines/engine.py
index c958fea..85cf55b 100644
--- a/il_supermarket_scarper/engines/engine.py
+++ b/il_supermarket_scarper/engines/engine.py
@@ -87,9 +87,9 @@ def apply_limit(
         files_names_to_scrape=None,
     ):
         """filter the list according to condition"""
-        assert (
-            when_date is not None or limit is None
-        ), "when_date flag can't be applied with limit."
+        # assert (
+        #     when_date is not None or limit is None
+        # ), "when_date flag can't be applied with limit."
 
         # filter files already downloaded
         intreable_ = self.filter_already_downloaded(
diff --git a/il_supermarket_scarper/scrappers/tests/test_cases.py b/il_supermarket_scarper/scrappers/tests/test_cases.py
index 03c38f2..8e8d0c2 100644
--- a/il_supermarket_scarper/scrappers/tests/test_cases.py
+++ b/il_supermarket_scarper/scrappers/tests/test_cases.py
@@ -253,9 +253,7 @@ def test_scrape_file_from_single_store(self):
         def test_scrape_file_from_single_store_last(self):
             """test fetching latest file only"""
             self._clean_scarpe_delete(
-                scraper_enum,
-                store_id=store_id,
-                when_date=_now(),
+                scraper_enum, store_id=store_id, when_date=_now(), limit=1
             )
 
     return TestScapers

From fb980994624f7beca9324399f3ce38583defab8d Mon Sep 17 00:00:00 2001
From: Sefi Erlich <erlichsefi@gmail.com>
Date: Thu, 10 Oct 2024 15:51:39 +0000
Subject: [PATCH 7/9] .

---
 il_supermarket_scarper/engines/engine.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/il_supermarket_scarper/engines/engine.py b/il_supermarket_scarper/engines/engine.py
index 85cf55b..29c95f7 100644
--- a/il_supermarket_scarper/engines/engine.py
+++ b/il_supermarket_scarper/engines/engine.py
@@ -128,7 +128,7 @@ def apply_limit(
             intreable_ = self.get_by_date(when_date, by_function, intreable_)
         elif isinstance(when_date, str) and when_date == "latest":
             intreable_ = self.get_only_latest(by_function, intreable_)
-        else:
+        elif when_date is not None:
             raise ValueError(f"when_date should be datetime or bool, got {when_date}")
 
         Logger.info(f"Number of entry after filter keeping latast is {len(intreable_)}")

From 0089414ba1bf6b46b4f7021a74dbb42ab96efbf1 Mon Sep 17 00:00:00 2001
From: Sefi Erlich <erlichsefi@gmail.com>
Date: Thu, 10 Oct 2024 17:20:48 +0000
Subject: [PATCH 8/9] runtime

---
 il_supermarket_scarper/engines/engine.py             | 4 ++--
 il_supermarket_scarper/scrappers/tests/test_cases.py | 7 +++----
 il_supermarket_scarper/utils/connection.py           | 4 ++--
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/il_supermarket_scarper/engines/engine.py b/il_supermarket_scarper/engines/engine.py
index 29c95f7..2832c40 100644
--- a/il_supermarket_scarper/engines/engine.py
+++ b/il_supermarket_scarper/engines/engine.py
@@ -134,7 +134,7 @@ def apply_limit(
         Logger.info(f"Number of entry after filter keeping latast is {len(intreable_)}")
 
         # filter by limit if the 'files_types' filter is not on.
-        if limit and files_types is None:
+        if limit:
             assert limit > 0, "Limit must be greater than 0"
             Logger.info(f"Limit: {limit}")
             intreable_ = intreable_[: min(limit, len(list(intreable_)))]
@@ -192,7 +192,7 @@ def get_by_date(self, requested_date, by_function, intreable_):
         groups_value = []
         for file in intreable_:
             name_split = by_function(file).split("-")
-            date_info = "-".join(name_split[2:]).rsplit(".", maxsplit=1)[-1]
+            date_info = name_split[-1].rsplit(".", maxsplit=1)[0]
 
             if date_info.startswith(requested_date.strftime("%Y%d%m")):
                 groups_value.append(file)
diff --git a/il_supermarket_scarper/scrappers/tests/test_cases.py b/il_supermarket_scarper/scrappers/tests/test_cases.py
index 8e8d0c2..9bd0dd7 100644
--- a/il_supermarket_scarper/scrappers/tests/test_cases.py
+++ b/il_supermarket_scarper/scrappers/tests/test_cases.py
@@ -63,9 +63,8 @@ def _make_sure_filter_work(
                     store_mark.append(source)
 
             assert (
-                not limit or len(files_found) == limit
-            ), f""" Found {files_found}
-                                                                f"files but should be {limit}"""
+                limit is None or len(files_found) == limit
+            ), f""" Found {files_found} f"files but should be {limit}"""
 
         def _make_sure_file_contain_chain_ids(self, chain_ids, file):
             """make sure the scraper download only the chain id"""
@@ -201,7 +200,7 @@ def test_scrape_one(self):
 
         def test_scrape_ten(self):
             """scrape ten file and make sure they exists"""
-            self._clean_scarpe_delete(scraper_enum, limit=None)
+            self._clean_scarpe_delete(scraper_enum, limit=10)
 
         def test_scrape_promo(self):
             """scrape one promo file and make sure it exists"""
diff --git a/il_supermarket_scarper/utils/connection.py b/il_supermarket_scarper/utils/connection.py
index 5e17bcd..785e842 100644
--- a/il_supermarket_scarper/utils/connection.py
+++ b/il_supermarket_scarper/utils/connection.py
@@ -292,12 +292,12 @@ def collect_from_ftp(ftp_host, ftp_username, ftp_password, ftp_path, timeout=60
 
 @download_connection_retry()
 def fetch_temporary_gz_file_from_ftp(
-    ftp_host, ftp_username, ftp_password, ftp_path, temporary_gz_file_path
+    ftp_host, ftp_username, ftp_password, ftp_path, temporary_gz_file_path, timeout=15
 ):
     """download a file from a cerberus base site."""
     with open(temporary_gz_file_path, "wb") as file_ftp:
         file_name = ntpath.basename(temporary_gz_file_path)
-        ftp = FTP_TLS(ftp_host, ftp_username, ftp_password)
+        ftp = FTP_TLS(ftp_host, ftp_username, ftp_password, timeout=timeout)
         ftp.trust_server_pasv_ipv4_address = True
         ftp.cwd(ftp_path)
         ftp.retrbinary("RETR " + file_name, file_ftp.write)

From 966f57baee929cf7add32ed89f4cbb7216fbf623 Mon Sep 17 00:00:00 2001
From: Sefi Erlich <erlichsefi@gmail.com>
Date: Fri, 11 Oct 2024 09:03:00 +0000
Subject: [PATCH 9/9] .

---
 il_supermarket_scarper/engines/engine.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/il_supermarket_scarper/engines/engine.py b/il_supermarket_scarper/engines/engine.py
index 2832c40..a55741f 100644
--- a/il_supermarket_scarper/engines/engine.py
+++ b/il_supermarket_scarper/engines/engine.py
@@ -188,13 +188,15 @@ def get_only_latest(self, by_function, intreable_):
 
     def get_by_date(self, requested_date, by_function, intreable_):
         """get by date"""
-
+        #
+        date_format = requested_date.strftime("%Y%m%d")
+        #
         groups_value = []
         for file in intreable_:
-            name_split = by_function(file).split("-")
+            name_split = by_function(file).split("-", maxsplit=2)
             date_info = name_split[-1].rsplit(".", maxsplit=1)[0]
 
-            if date_info.startswith(requested_date.strftime("%Y%d%m")):
+            if date_info.startswith(date_format):
                 groups_value.append(file)
 
         return groups_value