From 6666b3df56bd2ffd4e215dbddbdbe4d42bbb7ed8 Mon Sep 17 00:00:00 2001
From: Sefi Erlich <erlichsefi@gmail.com>
Date: Sat, 28 Sep 2024 05:43:17 +0000
Subject: [PATCH 01/20] remove asyncio

---
 il_supermarket_scarper/engines/cerberus.py    |  4 +-
 .../engines/multipage_web.py                  |  4 +-
 il_supermarket_scarper/engines/web.py         |  4 +-
 il_supermarket_scarper/utils/__init__.py      |  2 +-
 il_supermarket_scarper/utils/loop.py          | 62 +++++--------------
 5 files changed, 24 insertions(+), 52 deletions(-)

diff --git a/il_supermarket_scarper/engines/cerberus.py b/il_supermarket_scarper/engines/cerberus.py
index ca91676..5d47b1b 100644
--- a/il_supermarket_scarper/engines/cerberus.py
+++ b/il_supermarket_scarper/engines/cerberus.py
@@ -4,7 +4,7 @@
 from il_supermarket_scarper.utils import (
     extract_xml_file_from_gz_file,
     Logger,
-    execute_in_event_loop,
+    execute_in_parallels,
     collect_from_ftp,
     fetch_temporary_gz_file_from_ftp,
     retry_files,
@@ -62,7 +62,7 @@ def scrape(
             )
             self.on_collected_details(files)
 
-            results = execute_in_event_loop(
+            results = execute_in_parallels(
                 self.persist_from_ftp, files, max_workers=self.max_workers
             )
             self.on_download_completed(results=results)
diff --git a/il_supermarket_scarper/engines/multipage_web.py b/il_supermarket_scarper/engines/multipage_web.py
index ccd3b5c..f7982bc 100644
--- a/il_supermarket_scarper/engines/multipage_web.py
+++ b/il_supermarket_scarper/engines/multipage_web.py
@@ -10,7 +10,7 @@
 
 from il_supermarket_scarper.utils import (
     Logger,
-    execute_in_event_loop,
+    execute_in_parallels,
     multiple_page_aggregtion,
 )
 from .web import WebBase
@@ -87,7 +87,7 @@ def collect_files_details_from_site(
             )
         )
 
-        download_urls, file_names = execute_in_event_loop(
+        download_urls, file_names = execute_in_parallels(
             self.process_links_before_download,
             pages_to_scrape,
             aggregtion_function=multiple_page_aggregtion,
diff --git a/il_supermarket_scarper/engines/web.py b/il_supermarket_scarper/engines/web.py
index 474a019..20742e5 100644
--- a/il_supermarket_scarper/engines/web.py
+++ b/il_supermarket_scarper/engines/web.py
@@ -1,7 +1,7 @@
 from bs4 import BeautifulSoup
 from il_supermarket_scarper.utils import (
     Logger,
-    execute_in_event_loop,
+    execute_in_parallels,
     session_and_check_status,
     retry_files,
 )
@@ -129,7 +129,7 @@ def scrape(
 
             Logger.info(f"collected {len(download_urls)} to download.")
             if len(download_urls) > 0:
-                results = execute_in_event_loop(
+                results = execute_in_parallels(
                     self.save_and_extract,
                     zip(download_urls, file_names),
                     max_workers=self.max_workers,
diff --git a/il_supermarket_scarper/utils/__init__.py b/il_supermarket_scarper/utils/__init__.py
index 97acc5c..b2ccb25 100644
--- a/il_supermarket_scarper/utils/__init__.py
+++ b/il_supermarket_scarper/utils/__init__.py
@@ -23,7 +23,7 @@
     fetch_temporary_gz_file_from_ftp,
     wget_file,
 )
-from .loop import execute_in_event_loop, multiple_page_aggregtion
+from .loop import execute_in_parallels, multiple_page_aggregtion
 from .exceptions import RestartSessionError
 from .retry import retry_files
 from .marking import FlakyScraper
diff --git a/il_supermarket_scarper/utils/loop.py b/il_supermarket_scarper/utils/loop.py
index a8a5f92..02091ed 100644
--- a/il_supermarket_scarper/utils/loop.py
+++ b/il_supermarket_scarper/utils/loop.py
@@ -1,16 +1,4 @@
-import asyncio
 import concurrent.futures
-from .logger import Logger
-
-
-def get_event_loop():
-    """get the current running event loop"""
-    try:
-        return asyncio.get_event_loop()
-    except RuntimeError:
-        loop = asyncio.new_event_loop()
-        asyncio.set_event_loop(loop)
-        return loop
 
 
 def defualt_aggregtion_function(all_done):
@@ -35,7 +23,7 @@ def multiple_page_aggregtion(pages_to_scrape):
     return download_urls, file_names
 
 
-def execute_in_event_loop(
+def execute_in_parallels(
     function_to_execute,
     iterable,
     max_workers=None,
@@ -43,43 +31,27 @@ def execute_in_event_loop(
 ):
     """execute a job in the event loop"""
 
-    loop = get_event_loop()
-    return loop.run_until_complete(
-        run_task_async(
-            function_to_execute,
-            iterable,
-            max_workers=max_workers,
-            aggregtion_function=aggregtion_function,
-        )
+    results = run_tasks(
+        function_to_execute,
+        iterable,
+        max_workers=max_workers,
     )
+    
+    all_done = aggregtion_function(results)
+    print(f"Done with {len(all_done)} tasks")
+    return all_done
 
-
-async def run_task_async(
+def run_tasks(
     function_to_execute,
     iterable,
-    max_workers=None,
-    aggregtion_function=defualt_aggregtion_function,
+    max_workers: int = None,
 ):
-    """run task in multi-thread"""
-    loop = get_event_loop()
-
+    """Run tasks in multi-thread or sequentially"""
     if max_workers:
-        # use multi-thread
-        futures = []
+        # Use multi-thread
         with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
-            for arg in iterable:
-                futures.append(loop.run_in_executor(executor, function_to_execute, arg))
-
-        if len(futures) == 0:
-            return []
-        all_done, not_done = await asyncio.wait(futures)
-        assert len(not_done) == 0, "Not all tasks are done, should be blocking."
+            futures = [executor.submit(function_to_execute, arg) for arg in iterable]
+            return [future.result() for future in concurrent.futures.as_completed(futures)]
     else:
-        # or just itreate over all
-        all_done = []
-        for arg in iterable:
-            all_done.append(function_to_execute(arg))
-    all_done = aggregtion_function(list(all_done))
-
-    Logger.info(f"Done with {len(all_done)} files")
-    return all_done
+        # Or just iterate over all
+        return [function_to_execute(arg) for arg in iterable]
\ No newline at end of file

From 05e1ddaf1bd8e46ab18a53d9fa132c7733f897b3 Mon Sep 17 00:00:00 2001
From: Sefi Erlich <erlichsefi@gmail.com>
Date: Sat, 28 Sep 2024 05:51:26 +0000
Subject: [PATCH 02/20] .

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 489255c..76b5823 100644
--- a/setup.py
+++ b/setup.py
@@ -29,7 +29,7 @@
     tests_require=dev_required,
     extras_require={"test": ["pytest"]},
     # *strongly* suggested for sharing
-    version="0.4.5",
+    version="0.4.6",
     # The license can be anything you like
     license="MIT",
     description="python package that implement a scraping for israeli supermarket data",

From 9ba86d03999e9b3057daa40ab3e4329951154f82 Mon Sep 17 00:00:00 2001
From: Sefi Erlich <erlichsefi@gmail.com>
Date: Sat, 28 Sep 2024 06:00:40 +0000
Subject: [PATCH 03/20] increase thread

---
 il_supermarket_scarper/engines/engine.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/il_supermarket_scarper/engines/engine.py b/il_supermarket_scarper/engines/engine.py
index cd8016b..e1c2148 100644
--- a/il_supermarket_scarper/engines/engine.py
+++ b/il_supermarket_scarper/engines/engine.py
@@ -29,7 +29,7 @@ def __init__(self, chain, chain_id, folder_name=None):
         super().__init__(chain.value, "status", folder_name=folder_name)
         self.chain = chain
         self.chain_id = chain_id
-        self.max_workers = 5
+        self.max_workers = 10
         self.storage_path = get_output_folder(self.chain.value, folder_name=folder_name)
         Logger.info(f"Storage path: {self.storage_path}")
 

From b8904ad2953a96ae1b5c5044f8e0edbafa94cfd9 Mon Sep 17 00:00:00 2001
From: Sefi Erlich <erlichsefi@gmail.com>
Date: Sat, 28 Sep 2024 10:54:36 +0000
Subject: [PATCH 04/20] .

---
 il_supermarket_scarper/engines/cerberus.py           |  8 ++++----
 il_supermarket_scarper/engines/multipage_web.py      |  4 ++--
 il_supermarket_scarper/engines/web.py                |  4 ++--
 il_supermarket_scarper/scrappers/tests/test_cases.py |  2 +-
 il_supermarket_scarper/utils/__init__.py             |  2 +-
 il_supermarket_scarper/utils/logger.py               | 10 +++++++++-
 il_supermarket_scarper/utils/loop.py                 |  9 +++++----
 7 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/il_supermarket_scarper/engines/cerberus.py b/il_supermarket_scarper/engines/cerberus.py
index 5d47b1b..27a8ff8 100644
--- a/il_supermarket_scarper/engines/cerberus.py
+++ b/il_supermarket_scarper/engines/cerberus.py
@@ -4,7 +4,7 @@
 from il_supermarket_scarper.utils import (
     extract_xml_file_from_gz_file,
     Logger,
-    execute_in_parallels,
+    execute_in_parallel,
     collect_from_ftp,
     fetch_temporary_gz_file_from_ftp,
     retry_files,
@@ -54,15 +54,15 @@ def scrape(
             files = self.collect_files_details_from_site(
                 limit=limit,
                 files_types=files_types,
-                filter_null=True,
-                filter_zero=True,
+                filter_null=False,
+                filter_zero=False,
                 store_id=store_id,
                 only_latest=only_latest,
                 files_names_to_scrape=files_names_to_scrape,
             )
             self.on_collected_details(files)
 
-            results = execute_in_parallels(
+            results = execute_in_parallel(
                 self.persist_from_ftp, files, max_workers=self.max_workers
             )
             self.on_download_completed(results=results)
diff --git a/il_supermarket_scarper/engines/multipage_web.py b/il_supermarket_scarper/engines/multipage_web.py
index f7982bc..ad3fcb3 100644
--- a/il_supermarket_scarper/engines/multipage_web.py
+++ b/il_supermarket_scarper/engines/multipage_web.py
@@ -10,7 +10,7 @@
 
 from il_supermarket_scarper.utils import (
     Logger,
-    execute_in_parallels,
+    execute_in_parallel,
     multiple_page_aggregtion,
 )
 from .web import WebBase
@@ -87,7 +87,7 @@ def collect_files_details_from_site(
             )
         )
 
-        download_urls, file_names = execute_in_parallels(
+        download_urls, file_names = execute_in_parallel(
             self.process_links_before_download,
             pages_to_scrape,
             aggregtion_function=multiple_page_aggregtion,
diff --git a/il_supermarket_scarper/engines/web.py b/il_supermarket_scarper/engines/web.py
index 20742e5..ca272fc 100644
--- a/il_supermarket_scarper/engines/web.py
+++ b/il_supermarket_scarper/engines/web.py
@@ -1,7 +1,7 @@
 from bs4 import BeautifulSoup
 from il_supermarket_scarper.utils import (
     Logger,
-    execute_in_parallels,
+    execute_in_parallel,
     session_and_check_status,
     retry_files,
 )
@@ -129,7 +129,7 @@ def scrape(
 
             Logger.info(f"collected {len(download_urls)} to download.")
             if len(download_urls) > 0:
-                results = execute_in_parallels(
+                results = execute_in_parallel(
                     self.save_and_extract,
                     zip(download_urls, file_names),
                     max_workers=self.max_workers,
diff --git a/il_supermarket_scarper/scrappers/tests/test_cases.py b/il_supermarket_scarper/scrappers/tests/test_cases.py
index 3ad23d0..37fab63 100644
--- a/il_supermarket_scarper/scrappers/tests/test_cases.py
+++ b/il_supermarket_scarper/scrappers/tests/test_cases.py
@@ -199,7 +199,7 @@ def test_scrape_one(self):
 
         def test_scrape_ten(self):
             """scrape ten file and make sure they exists"""
-            self._clean_scarpe_delete(scraper_enum, limit=10)
+            self._clean_scarpe_delete(scraper_enum, limit=None)
 
         def test_scrape_promo(self):
             """scrape one promo file and make sure it exists"""
diff --git a/il_supermarket_scarper/utils/__init__.py b/il_supermarket_scarper/utils/__init__.py
index b2ccb25..97956ee 100644
--- a/il_supermarket_scarper/utils/__init__.py
+++ b/il_supermarket_scarper/utils/__init__.py
@@ -23,7 +23,7 @@
     fetch_temporary_gz_file_from_ftp,
     wget_file,
 )
-from .loop import execute_in_parallels, multiple_page_aggregtion
+from .loop import execute_in_parallel, multiple_page_aggregtion
 from .exceptions import RestartSessionError
 from .retry import retry_files
 from .marking import FlakyScraper
diff --git a/il_supermarket_scarper/utils/logger.py b/il_supermarket_scarper/utils/logger.py
index ae5f747..bd2d8ff 100644
--- a/il_supermarket_scarper/utils/logger.py
+++ b/il_supermarket_scarper/utils/logger.py
@@ -10,7 +10,7 @@ def build_logger():
     if not logger.handlers:
         logger.setLevel(logging.DEBUG)  # set logger level
         log_formatter = logging.Formatter(
-            "%(name)-12s %(asctime)s %(levelname)-8s %(filename)s:%(funcName)s %(message)s"
+            "%(name)-12s %(asctime)s %(levelname)-8s [%(threadName)s] %(filename)s:%(funcName)s %(message)s"
         )
         console_handler = logging.StreamHandler(
             sys.stdout
@@ -42,6 +42,14 @@ def info(cls, msg, *args, **kwargs):
         if cls.enabled:
             cls.logger.info(msg, *args, **kwargs)
 
+
+    @classmethod
+    def debug(cls, msg, *args, **kwargs):
+        """log info"""
+        if cls.enabled:
+            cls.logger.debug(msg, *args, **kwargs)
+
+
     @classmethod
     def error(cls, msg, *args, **kwargs):
         """log error"""
diff --git a/il_supermarket_scarper/utils/loop.py b/il_supermarket_scarper/utils/loop.py
index 02091ed..aff36a3 100644
--- a/il_supermarket_scarper/utils/loop.py
+++ b/il_supermarket_scarper/utils/loop.py
@@ -1,5 +1,5 @@
 import concurrent.futures
-
+from il_supermarket_scarper.utils import Logger
 
 def defualt_aggregtion_function(all_done):
     """format the scraping result to the final input"""
@@ -23,7 +23,7 @@ def multiple_page_aggregtion(pages_to_scrape):
     return download_urls, file_names
 
 
-def execute_in_parallels(
+def execute_in_parallel(
     function_to_execute,
     iterable,
     max_workers=None,
@@ -31,6 +31,7 @@ def execute_in_parallels(
 ):
     """execute a job in the event loop"""
 
+    Logger.info(f"Running {len(iterable)} tasks in parallel")
     results = run_tasks(
         function_to_execute,
         iterable,
@@ -38,7 +39,7 @@ def execute_in_parallels(
     )
     
     all_done = aggregtion_function(results)
-    print(f"Done with {len(all_done)} tasks")
+    print(f"Done with {len(all_done)} tasks in parallel")
     return all_done
 
 def run_tasks(
@@ -49,7 +50,7 @@ def run_tasks(
     """Run tasks in multi-thread or sequentially"""
     if max_workers:
         # Use multi-thread
-        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers,thread_name_prefix="PullingThread") as executor:
             futures = [executor.submit(function_to_execute, arg) for arg in iterable]
             return [future.result() for future in concurrent.futures.as_completed(futures)]
     else:

From 324b4879cc249fbfd6ed365e2e826deef1773f08 Mon Sep 17 00:00:00 2001
From: Sefi Erlich <erlichsefi@gmail.com>
Date: Sat, 28 Sep 2024 13:54:24 +0000
Subject: [PATCH 05/20] ,

---
 il_supermarket_scarper/engines/engine.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/il_supermarket_scarper/engines/engine.py b/il_supermarket_scarper/engines/engine.py
index e1c2148..49debb1 100644
--- a/il_supermarket_scarper/engines/engine.py
+++ b/il_supermarket_scarper/engines/engine.py
@@ -29,7 +29,7 @@ def __init__(self, chain, chain_id, folder_name=None):
         super().__init__(chain.value, "status", folder_name=folder_name)
         self.chain = chain
         self.chain_id = chain_id
-        self.max_workers = 10
+        self.max_workers = 20
         self.storage_path = get_output_folder(self.chain.value, folder_name=folder_name)
         Logger.info(f"Storage path: {self.storage_path}")
 

From c97ec69b6fd0490c52a2545f70ccdbaaac740358 Mon Sep 17 00:00:00 2001
From: Sefi Erlich <erlichsefi@gmail.com>
Date: Sat, 28 Sep 2024 15:42:27 +0000
Subject: [PATCH 06/20] max_threads

---
 il_supermarket_scarper/engines/apsx.py        |  8 +++++--
 il_supermarket_scarper/engines/cerberus.py    |  5 +++--
 il_supermarket_scarper/engines/engine.py      |  4 ++--
 .../engines/multipage_web.py                  |  7 ++++--
 .../engines/publishprice.py                   | 11 +++++++++-
 il_supermarket_scarper/engines/web.py         |  6 ++---
 il_supermarket_scarper/utils/logger.py        |  2 --
 il_supermarket_scarper/utils/loop.py          | 22 ++++++++++++-------
 8 files changed, 43 insertions(+), 22 deletions(-)

diff --git a/il_supermarket_scarper/engines/apsx.py b/il_supermarket_scarper/engines/apsx.py
index c507be0..d822799 100644
--- a/il_supermarket_scarper/engines/apsx.py
+++ b/il_supermarket_scarper/engines/apsx.py
@@ -7,8 +7,12 @@
 class Aspx(WebBase, ABC):
     """class for aspx scapers"""
 
-    def __init__(self, chain, chain_id, url, aspx_page, folder_name=None):
-        super().__init__(chain, chain_id, url, folder_name=folder_name)
+    def __init__(
+        self, chain, chain_id, url, aspx_page, folder_name=None, max_threads=5
+    ):
+        super().__init__(
+            chain, chain_id, url, folder_name=folder_name, max_threads=max_threads
+        )
         self.aspx_page = aspx_page
 
     def extract_task_from_entry(self, all_trs):
diff --git a/il_supermarket_scarper/engines/cerberus.py b/il_supermarket_scarper/engines/cerberus.py
index 27a8ff8..fbfdb6b 100644
--- a/il_supermarket_scarper/engines/cerberus.py
+++ b/il_supermarket_scarper/engines/cerberus.py
@@ -26,8 +26,9 @@ def __init__(
         ftp_path="/",
         ftp_username="",
         ftp_password="",
+        max_threads=5,
     ):
-        super().__init__(chain, chain_id, folder_name)
+        super().__init__(chain, chain_id, folder_name, max_threads)
         self.ftp_host = ftp_host
         self.ftp_path = ftp_path
         self.ftp_username = ftp_username
@@ -63,7 +64,7 @@ def scrape(
             self.on_collected_details(files)
 
             results = execute_in_parallel(
-                self.persist_from_ftp, files, max_workers=self.max_workers
+                self.persist_from_ftp, files, max_threads=self.max_threads
             )
             self.on_download_completed(results=results)
             self.on_scrape_completed(self.get_storage_path())
diff --git a/il_supermarket_scarper/engines/engine.py b/il_supermarket_scarper/engines/engine.py
index 49debb1..b04db4c 100644
--- a/il_supermarket_scarper/engines/engine.py
+++ b/il_supermarket_scarper/engines/engine.py
@@ -21,7 +21,7 @@
 class Engine(ScraperStatus, ABC):
     """base engine for scraping"""
 
-    def __init__(self, chain, chain_id, folder_name=None):
+    def __init__(self, chain, chain_id, folder_name=None, max_threads=10):
         assert DumpFolderNames.is_valid_folder_name(
             chain
         ), "chain name can contain only abc and -"
@@ -29,7 +29,7 @@ def __init__(self, chain, chain_id, folder_name=None):
         super().__init__(chain.value, "status", folder_name=folder_name)
         self.chain = chain
         self.chain_id = chain_id
-        self.max_workers = 20
+        self.max_threads = max_threads
         self.storage_path = get_output_folder(self.chain.value, folder_name=folder_name)
         Logger.info(f"Storage path: {self.storage_path}")
 
diff --git a/il_supermarket_scarper/engines/multipage_web.py b/il_supermarket_scarper/engines/multipage_web.py
index ad3fcb3..ceafb3a 100644
--- a/il_supermarket_scarper/engines/multipage_web.py
+++ b/il_supermarket_scarper/engines/multipage_web.py
@@ -31,8 +31,11 @@ def __init__(
         total_page_xpath="""//*[@id="gridContainer"]/table/
                                             tfoot/tr/td/a[6]/@href""",
         total_pages_pattern=r"^\/\?page\=([0-9]{3})$",
+        max_threads=5,
     ):
-        super().__init__(chain, chain_id, url=url, folder_name=folder_name)
+        super().__init__(
+            chain, chain_id, url=url, folder_name=folder_name, max_threads=max_threads
+        )
         self.total_page_xpath = total_page_xpath
         self.total_pages_pattern = total_pages_pattern
 
@@ -91,7 +94,7 @@ def collect_files_details_from_site(
             self.process_links_before_download,
             pages_to_scrape,
             aggregtion_function=multiple_page_aggregtion,
-            max_workers=self.max_workers,
+            max_threads=self.max_threads,
         )
         file_names, download_urls = self.apply_limit_zip(
             file_names,
diff --git a/il_supermarket_scarper/engines/publishprice.py b/il_supermarket_scarper/engines/publishprice.py
index a958bb7..c8acd80 100644
--- a/il_supermarket_scarper/engines/publishprice.py
+++ b/il_supermarket_scarper/engines/publishprice.py
@@ -17,12 +17,21 @@ class PublishPrice(WebBase):
     but this is not implemented.
     """
 
-    def __init__(self, chain, chain_id, site_infix, folder_name=None, domain="prices"):
+    def __init__(
+        self,
+        chain,
+        chain_id,
+        site_infix,
+        folder_name=None,
+        domain="prices",
+        max_threads=5,
+    ):
         super().__init__(
             chain,
             chain_id,
             url=f"https://{domain}.{site_infix}.co.il/",
             folder_name=folder_name,
+            max_threads=max_threads,
         )
         self.folder = None
 
diff --git a/il_supermarket_scarper/engines/web.py b/il_supermarket_scarper/engines/web.py
index ca272fc..07d9da6 100644
--- a/il_supermarket_scarper/engines/web.py
+++ b/il_supermarket_scarper/engines/web.py
@@ -12,8 +12,8 @@
 class WebBase(Engine):
     """scrape the file of websites that the only why to download them is via web"""
 
-    def __init__(self, chain, chain_id, url, folder_name=None):
-        super().__init__(chain, chain_id, folder_name)
+    def __init__(self, chain, chain_id, url, folder_name=None, max_threads=5):
+        super().__init__(chain, chain_id, folder_name, max_threads=max_threads)
         self.url = url
         self.max_retry = 2
 
@@ -132,7 +132,7 @@ def scrape(
                 results = execute_in_parallel(
                     self.save_and_extract,
                     zip(download_urls, file_names),
-                    max_workers=self.max_workers,
+                    max_threads=self.max_threads,
                 )
             else:
                 results = []
diff --git a/il_supermarket_scarper/utils/logger.py b/il_supermarket_scarper/utils/logger.py
index bd2d8ff..ce8bf4c 100644
--- a/il_supermarket_scarper/utils/logger.py
+++ b/il_supermarket_scarper/utils/logger.py
@@ -42,14 +42,12 @@ def info(cls, msg, *args, **kwargs):
         if cls.enabled:
             cls.logger.info(msg, *args, **kwargs)
 
-
     @classmethod
     def debug(cls, msg, *args, **kwargs):
         """log info"""
         if cls.enabled:
             cls.logger.debug(msg, *args, **kwargs)
 
-
     @classmethod
     def error(cls, msg, *args, **kwargs):
         """log error"""
diff --git a/il_supermarket_scarper/utils/loop.py b/il_supermarket_scarper/utils/loop.py
index aff36a3..55c355e 100644
--- a/il_supermarket_scarper/utils/loop.py
+++ b/il_supermarket_scarper/utils/loop.py
@@ -1,6 +1,7 @@
 import concurrent.futures
 from il_supermarket_scarper.utils import Logger
 
+
 def defualt_aggregtion_function(all_done):
     """format the scraping result to the final input"""
     result = []
@@ -26,7 +27,7 @@ def multiple_page_aggregtion(pages_to_scrape):
 def execute_in_parallel(
     function_to_execute,
     iterable,
-    max_workers=None,
+    max_threads=None,
     aggregtion_function=defualt_aggregtion_function,
 ):
     """execute a job in the event loop"""
@@ -35,24 +36,29 @@ def execute_in_parallel(
     results = run_tasks(
         function_to_execute,
         iterable,
-        max_workers=max_workers,
+        max_threads=max_threads,
     )
-    
+
     all_done = aggregtion_function(results)
     print(f"Done with {len(all_done)} tasks in parallel")
     return all_done
 
+
 def run_tasks(
     function_to_execute,
     iterable,
-    max_workers: int = None,
+    max_threads: int = None,
 ):
     """Run tasks in multi-thread or sequentially"""
-    if max_workers:
+    if max_threads:
         # Use multi-thread
-        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers,thread_name_prefix="PullingThread") as executor:
+        with concurrent.futures.ThreadPoolExecutor(
+            max_threads=max_threads, thread_name_prefix="PullingThread"
+        ) as executor:
             futures = [executor.submit(function_to_execute, arg) for arg in iterable]
-            return [future.result() for future in concurrent.futures.as_completed(futures)]
+            return [
+                future.result() for future in concurrent.futures.as_completed(futures)
+            ]
     else:
         # Or just iterate over all
-        return [function_to_execute(arg) for arg in iterable]
\ No newline at end of file
+        return [function_to_execute(arg) for arg in iterable]

From 6a7a9148f7e8146340119d1fb03c9e587cfea2fa Mon Sep 17 00:00:00 2001
From: Sefi Erlich <erlichsefi@gmail.com>
Date: Sat, 28 Sep 2024 15:47:01 +0000
Subject: [PATCH 07/20] .

---
 il_supermarket_scarper/scrappers/ramilevy.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/il_supermarket_scarper/scrappers/ramilevy.py b/il_supermarket_scarper/scrappers/ramilevy.py
index 3c203f6..ba43834 100644
--- a/il_supermarket_scarper/scrappers/ramilevy.py
+++ b/il_supermarket_scarper/scrappers/ramilevy.py
@@ -11,4 +11,5 @@ def __init__(self, folder_name=None):
             chain_id="7290058140886",
             folder_name=folder_name,
             ftp_username="RamiLevi",
+            max_threads=10,
         )

From 843cda71c26091353b5abc56d2b7952317bbf8ee Mon Sep 17 00:00:00 2001
From: Sefi Erlich <erlichsefi@gmail.com>
Date: Sat, 28 Sep 2024 16:06:52 +0000
Subject: [PATCH 08/20] .

---
 il_supermarket_scarper/engines/cerberus.py    |  2 +-
 .../engines/multipage_web.py                  |  2 +-
 il_supermarket_scarper/engines/web.py         |  2 +-
 il_supermarket_scarper/utils/loop.py          |  2 +-
 stress_test.py                                | 31 +++++++++++++++++++
 5 files changed, 35 insertions(+), 4 deletions(-)
 create mode 100644 stress_test.py

diff --git a/il_supermarket_scarper/engines/cerberus.py b/il_supermarket_scarper/engines/cerberus.py
index fbfdb6b..4539f2a 100644
--- a/il_supermarket_scarper/engines/cerberus.py
+++ b/il_supermarket_scarper/engines/cerberus.py
@@ -64,7 +64,7 @@ def scrape(
             self.on_collected_details(files)
 
             results = execute_in_parallel(
-                self.persist_from_ftp, files, max_threads=self.max_threads
+                self.persist_from_ftp, list(files), max_threads=self.max_threads
             )
             self.on_download_completed(results=results)
             self.on_scrape_completed(self.get_storage_path())
diff --git a/il_supermarket_scarper/engines/multipage_web.py b/il_supermarket_scarper/engines/multipage_web.py
index ceafb3a..cb2520a 100644
--- a/il_supermarket_scarper/engines/multipage_web.py
+++ b/il_supermarket_scarper/engines/multipage_web.py
@@ -92,7 +92,7 @@ def collect_files_details_from_site(
 
         download_urls, file_names = execute_in_parallel(
             self.process_links_before_download,
-            pages_to_scrape,
+            list(pages_to_scrape),
             aggregtion_function=multiple_page_aggregtion,
             max_threads=self.max_threads,
         )
diff --git a/il_supermarket_scarper/engines/web.py b/il_supermarket_scarper/engines/web.py
index 07d9da6..d8c05e0 100644
--- a/il_supermarket_scarper/engines/web.py
+++ b/il_supermarket_scarper/engines/web.py
@@ -131,7 +131,7 @@ def scrape(
             if len(download_urls) > 0:
                 results = execute_in_parallel(
                     self.save_and_extract,
-                    zip(download_urls, file_names),
+                    list(zip(download_urls, file_names)),
                     max_threads=self.max_threads,
                 )
             else:
diff --git a/il_supermarket_scarper/utils/loop.py b/il_supermarket_scarper/utils/loop.py
index 55c355e..51c847f 100644
--- a/il_supermarket_scarper/utils/loop.py
+++ b/il_supermarket_scarper/utils/loop.py
@@ -53,7 +53,7 @@ def run_tasks(
     if max_threads:
         # Use multi-thread
         with concurrent.futures.ThreadPoolExecutor(
-            max_threads=max_threads, thread_name_prefix="PullingThread"
+            max_workers=max_threads, thread_name_prefix="PullingThread"
         ) as executor:
             futures = [executor.submit(function_to_execute, arg) for arg in iterable]
             return [
diff --git a/stress_test.py b/stress_test.py
new file mode 100644
index 0000000..4989077
--- /dev/null
+++ b/stress_test.py
@@ -0,0 +1,31 @@
+from il_supermarket_scarper.scrappers_factory import ScraperFactory
+import time,json
+import datetime
+
+if __name__ == "__main__":
+
+    result = {}
+    for scraper in ScraperFactory.all_scrapers_name():
+
+        def full_execution():
+            initer = ScraperFactory.get(scraper)()
+            return initer.scrape(limit=None)
+        
+        execution_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        start_time = time.time()
+        files = full_execution()
+        end_time = time.time()
+        result[scraper] = {
+            "execution_time":execution_time,
+            "start_time":start_time,
+            "end_time":end_time,
+            "time": end_time - start_time,
+            "files": len(files)
+        }
+        
+        with open("stress_test_results.json", "w") as f:
+            json.dump(result, f)
+    
+
+
+    

From 02c302e8b50c5503364a428cbb83ebcd3da08b9c Mon Sep 17 00:00:00 2001
From: Sefi Erlich <erlichsefi@gmail.com>
Date: Sat, 28 Sep 2024 16:09:46 +0000
Subject: [PATCH 09/20] .

---
 il_supermarket_scarper/utils/logger.py |  3 ++-
 stress_test.py                         | 35 +++++++++++++-------------
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/il_supermarket_scarper/utils/logger.py b/il_supermarket_scarper/utils/logger.py
index ce8bf4c..63dce82 100644
--- a/il_supermarket_scarper/utils/logger.py
+++ b/il_supermarket_scarper/utils/logger.py
@@ -10,7 +10,8 @@ def build_logger():
     if not logger.handlers:
         logger.setLevel(logging.DEBUG)  # set logger level
         log_formatter = logging.Formatter(
-            "%(name)-12s %(asctime)s %(levelname)-8s [%(threadName)s] %(filename)s:%(funcName)s %(message)s"
+            "%(name)-12s %(asctime)s %(levelname)-8s "
+            "[%(threadName)s] %(filename)s:%(funcName)s %(message)s"
         )
         console_handler = logging.StreamHandler(
             sys.stdout
diff --git a/stress_test.py b/stress_test.py
index 4989077..c3d9efd 100644
--- a/stress_test.py
+++ b/stress_test.py
@@ -1,31 +1,30 @@
-from il_supermarket_scarper.scrappers_factory import ScraperFactory
-import time,json
+import time
+import json
 import datetime
+from il_supermarket_scarper.scrappers_factory import ScraperFactory
+
 
 if __name__ == "__main__":
 
     result = {}
-    for scraper in ScraperFactory.all_scrapers_name():
+    for scraper_name in ScraperFactory.all_scrapers_name():
 
-        def full_execution():
+        def full_execution(scraper):
+            """full execution of the scraper"""
             initer = ScraperFactory.get(scraper)()
-            return initer.scrape(limit=None)
-        
+            return initer.scrape()
+
         execution_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
         start_time = time.time()
-        files = full_execution()
+        files = full_execution(scraper_name)
         end_time = time.time()
-        result[scraper] = {
-            "execution_time":execution_time,
-            "start_time":start_time,
-            "end_time":end_time,
+        result[scraper_name] = {
+            "execution_time": execution_time,
+            "start_time": start_time,
+            "end_time": end_time,
             "time": end_time - start_time,
-            "files": len(files)
+            "files": len(files),
         }
-        
-        with open("stress_test_results.json", "w") as f:
-            json.dump(result, f)
-    
-
 
-    
+        with open("stress_test_results.json", "w", encoding="utf-8") as f:
+            json.dump(result, f)

From 20d53fee6a7da4260d35e0247ec5dbf1995ddc45 Mon Sep 17 00:00:00 2001
From: Sefi Erlich <erlichsefi@gmail.com>
Date: Sat, 28 Sep 2024 16:33:41 +0000
Subject: [PATCH 10/20] .

---
 il_supermarket_scarper/utils/retry.py | 2 +-
 stress_test.py                        | 7 +++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/il_supermarket_scarper/utils/retry.py b/il_supermarket_scarper/utils/retry.py
index b4b23cb..b898268 100644
--- a/il_supermarket_scarper/utils/retry.py
+++ b/il_supermarket_scarper/utils/retry.py
@@ -233,7 +233,7 @@ def __retry_files(
         # next iteration
         retry_list, other_results = compute_retry(results)
 
-        all_results.append(other_results)
+        all_results.extend(other_results)
         # if there is not files in the retry list, break
         if len(retry_list) == 0:
             break
diff --git a/stress_test.py b/stress_test.py
index c3d9efd..8afd864 100644
--- a/stress_test.py
+++ b/stress_test.py
@@ -1,9 +1,11 @@
 import time
 import json
 import datetime
+import tempfile
 from il_supermarket_scarper.scrappers_factory import ScraperFactory
 
 
+
 if __name__ == "__main__":
 
     result = {}
@@ -11,8 +13,9 @@
 
         def full_execution(scraper):
             """full execution of the scraper"""
-            initer = ScraperFactory.get(scraper)()
-            return initer.scrape()
+            with tempfile.TemporaryDirectory() as tmpdirname:
+                initer = ScraperFactory.get(scraper)(folder_name=tmpdirname)
+                return initer.scrape()
 
         execution_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
         start_time = time.time()

From 000e1823f76ea53dab5a45c55a1a391e5070775c Mon Sep 17 00:00:00 2001
From: Sefi Erlich <erlichsefi@gmail.com>
Date: Sat, 28 Sep 2024 18:34:58 +0000
Subject: [PATCH 11/20] .

---
 il_supermarket_scarper/engines/apsx.py        | 54 +++++++++++++++++--
 .../engines/multipage_web.py                  |  3 +-
 il_supermarket_scarper/engines/web.py         |  5 +-
 il_supermarket_scarper/scrappers/yellow.py    |  1 +
 .../utils/databases/json_file.py              | 51 +++++++++++++-----
 .../utils/scraper_status.py                   |  8 ++-
 stress_test.py                                | 18 +++++++
 7 files changed, 118 insertions(+), 22 deletions(-)

diff --git a/il_supermarket_scarper/engines/apsx.py b/il_supermarket_scarper/engines/apsx.py
index d822799..3c454e8 100644
--- a/il_supermarket_scarper/engines/apsx.py
+++ b/il_supermarket_scarper/engines/apsx.py
@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from il_supermarket_scarper.utils import Logger
+from il_supermarket_scarper.utils import Logger, FileTypesFilters
 
 from .web import WebBase
 
@@ -15,6 +15,20 @@ def __init__(
         )
         self.aspx_page = aspx_page
 
+    def file_type_id(self, file_type):
+        """get the file type id"""
+        if file_type == FileTypesFilters.STORE_FILE.name:
+            return 1
+        if file_type == FileTypesFilters.PRICE_FILE.name:
+            return 2
+        if file_type == FileTypesFilters.PROMO_FILE.name:
+            return 3
+        if file_type == FileTypesFilters.PRICE_FULL_FILE.name:
+            return 4
+        if file_type == FileTypesFilters.PROMO_FULL_FILE.name:
+            return 5
+        raise ValueError(f"file type {file_type} not supported")
+
     def extract_task_from_entry(self, all_trs):
         download_urls: list = list(
             map(lambda x: self.url + self.get_href_from_entry(x), all_trs)
@@ -30,18 +44,48 @@ def _build_query_url(self, query_params):
             res.append(base + self.aspx_page + query_params)
         return res
 
-    def _get_all_possible_query_string_params(self):
+    def _get_all_possible_query_string_params(
+        self, files_types=None, store_id=None, when_date=None
+    ):
         """get the arguments need to add to the url"""
         if isinstance(self.chain_id, list):
             res = []
             for c_id in self.chain_id:
                 res.append(f"?code=={c_id}")
             return res
-        return [f"?code={self.chain_id}"]
+        chains_urls = [f"?code={self.chain_id}"]
+
+        # add file types to url
+        if files_types:
+            chains_urls_with_types = []
+            for files_type in files_types:
+                file_type_id = self.file_type_id(files_type)
+                chains_urls_with_types.extend(
+                    [
+                        f"{chain_url}&WFileType={file_type_id}"
+                        for chain_url in chains_urls
+                    ]
+                )
+            chains_urls = chains_urls_with_types
+        
+        # add store id
+        if store_id:
+            for chain_url in chains_urls:
+                chain_url += f"&WStore={store_id}"
+        
+        # posting date
+        if when_date:
+            for chain_url in chains_urls:
+                chain_url += (
+                    f"&WDate={when_date.strftime('%d/%m/%Y').reaplce('/','%2F')}"
+                )
+        return chains_urls
 
-    def get_request_url(self):
+    def get_request_url(self, files_types=None, store_id=None, when_date=None):
         result = []
-        for query_params in self._get_all_possible_query_string_params():
+        for query_params in self._get_all_possible_query_string_params(
+            files_types=files_types, store_id=store_id, when_date=when_date
+        ):
             result.extend(self._build_query_url(query_params))
         Logger.info(f"Request url: {result}")
         return result
diff --git a/il_supermarket_scarper/engines/multipage_web.py b/il_supermarket_scarper/engines/multipage_web.py
index cb2520a..81b159a 100644
--- a/il_supermarket_scarper/engines/multipage_web.py
+++ b/il_supermarket_scarper/engines/multipage_web.py
@@ -74,11 +74,12 @@ def collect_files_details_from_site(
         limit=None,
         files_types=None,
         store_id=None,
+        when_date=None,
         only_latest=False,
         files_names_to_scrape=None,
     ):
         self.post_scraping()
-        url = self.get_request_url()
+        url = self.get_request_url(files_types=files_types, store_id=store_id, when_date=when_date)
 
         total_pages = self.get_number_of_pages(url[0])
         Logger.info(f"Found {total_pages} pages")
diff --git a/il_supermarket_scarper/engines/web.py b/il_supermarket_scarper/engines/web.py
index d8c05e0..d74faec 100644
--- a/il_supermarket_scarper/engines/web.py
+++ b/il_supermarket_scarper/engines/web.py
@@ -66,11 +66,14 @@ def collect_files_details_from_site(
         limit=None,
         files_types=None,
         store_id=None,
+        when_date=None,
         only_latest=False,
         files_names_to_scrape=None,
     ):
         """collect all enteris to download from site"""
-        urls_to_collect_link_from = self.get_request_url()
+        urls_to_collect_link_from = self.get_request_url(
+            files_types, store_id, when_date
+        )
 
         all_trs = []
         for url in urls_to_collect_link_from:
diff --git a/il_supermarket_scarper/scrappers/yellow.py b/il_supermarket_scarper/scrappers/yellow.py
index 65b60cd..1c14187 100644
--- a/il_supermarket_scarper/scrappers/yellow.py
+++ b/il_supermarket_scarper/scrappers/yellow.py
@@ -12,4 +12,5 @@ def __init__(self, folder_name=None):
             folder_name=folder_name,
             ftp_username="Paz_bo",
             ftp_password="paz468",
+            max_threads=10,
         )
diff --git a/il_supermarket_scarper/utils/databases/json_file.py b/il_supermarket_scarper/utils/databases/json_file.py
index f7b2588..98e6ce0 100644
--- a/il_supermarket_scarper/utils/databases/json_file.py
+++ b/il_supermarket_scarper/utils/databases/json_file.py
@@ -30,21 +30,47 @@ def _get_database_file_path(self):
         """Get the full path to the database JSON file."""
         return os.path.join(self.base_path, self.database_file)
 
-    def insert_document(self, collection_name, document):
+    def _read_database(self):
+        """Read the JSON database file and return its contents."""
+        file_path = self._get_database_file_path()
+        data = {}
+
+        # Load existing data from the file
+        if os.path.exists(file_path):
+            with open(file_path, "r", encoding="utf-8") as file:
+                try:
+                    data = json.load(file)
+                except json.JSONDecodeError:
+                    Logger.warning(f"File {file_path} is corrupted, resetting it.")
+                    data = {}
+        return data
+    
+    def _write_database(self, data):
+        """Write data to the JSON database file."""
+        file_path = self._get_database_file_path()
+
+        with open(file_path, "w", encoding="utf-8") as file:
+            json.dump(dict(sorted(data.items())), file, default=str, indent=4)
+
+    def insert_documents(self, collection_name, document):
         """Insert a document into a collection inside the JSON database."""
         if self.collection_status:
-            file_path = self._get_database_file_path()
-            data = {}
+            
+            data = self._read_database()
+            # Ensure the collection exists in the database
+            if collection_name not in data:
+                data[collection_name] = []
 
-            # Load existing data from the file
-            if os.path.exists(file_path):
-                with open(file_path, "r", encoding="utf-8") as file:
-                    try:
-                        data = json.load(file)
-                    except json.JSONDecodeError:
-                        Logger.warning(f"File {file_path} is corrupted, resetting it.")
-                        data = {}
+            # Add the new document to the collection
+            data[collection_name].extend(document)
 
+            # Save the updated data back to the file
+            self._write_database(data)
+
+    def insert_document(self, collection_name, document):
+        """Insert a document into a collection inside the JSON database."""
+        if self.collection_status:
+            data = self._read_database()
             # Ensure the collection exists in the database
             if collection_name not in data:
                 data[collection_name] = []
@@ -53,8 +79,7 @@ def insert_document(self, collection_name, document):
             data[collection_name].append(document)
 
             # Save the updated data back to the file
-            with open(file_path, "w", encoding="utf-8") as file:
-                json.dump(dict(sorted(data.items())), file, default=str, indent=4)
+            self._write_database(data)
 
     def find_document(self, collection_name, query):
         """Find a document in a collection based on a query."""
diff --git a/il_supermarket_scarper/utils/scraper_status.py b/il_supermarket_scarper/utils/scraper_status.py
index 3a7efbc..a0078a1 100644
--- a/il_supermarket_scarper/utils/scraper_status.py
+++ b/il_supermarket_scarper/utils/scraper_status.py
@@ -100,12 +100,16 @@ def _add_downloaded_files_to_list(self, results, **_):
         """Add downloaded files to the MongoDB collection."""
         if self.database.is_collection_enabled():
             when = _now()
+
+            documents = []
             for res in results:
                 if res["extract_succefully"]:
-                    self.database.insert_document(
-                        self.VERIFIED_DOWNLOADS,
+                    documents.append(
                         {"file_name": res["file_name"], "when": when},
                     )
+            self.database.insert_documents(
+                self.VERIFIED_DOWNLOADS,
+                documents)
 
     @lock_by_string()
     def on_scrape_completed(self, folder_name):
diff --git a/stress_test.py b/stress_test.py
index 8afd864..1bddb40 100644
--- a/stress_test.py
+++ b/stress_test.py
@@ -1,8 +1,14 @@
 import time
 import json
+import sys
 import datetime
 import tempfile
 from il_supermarket_scarper.scrappers_factory import ScraperFactory
+import pstats
+import cProfile
+from io import StringIO
+
+
 
 
 
@@ -19,9 +25,21 @@ def full_execution(scraper):
 
         execution_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
         start_time = time.time()
+        pr = cProfile.Profile()
+        pr.enable()
+
         files = full_execution(scraper_name)
+
+        pr.disable()
+
+        stream = StringIO()
+        ps = pstats.Stats(pr, stream=stream)
+        ps.print_stats()
+        stream.seek(0)
+
         end_time = time.time()
         result[scraper_name] = {
+            "status": stream.read(),
             "execution_time": execution_time,
             "start_time": start_time,
             "end_time": end_time,

From 0b4c19a3fc221717a0f828031f9f1eb7ab7c5c3b Mon Sep 17 00:00:00 2001
From: Sefi Erlich <erlichsefi@gmail.com>
Date: Sun, 29 Sep 2024 17:24:56 +0000
Subject: [PATCH 12/20] .

---
 il_supermarket_scarper/engines/apsx.py              | 4 ++--
 il_supermarket_scarper/engines/multipage_web.py     | 4 +++-
 il_supermarket_scarper/engines/web.py               | 4 ++--
 il_supermarket_scarper/utils/databases/json_file.py | 4 ++--
 il_supermarket_scarper/utils/scraper_status.py      | 4 +---
 stress_test.py                                      | 6 +-----
 6 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/il_supermarket_scarper/engines/apsx.py b/il_supermarket_scarper/engines/apsx.py
index 3c454e8..72b9a7b 100644
--- a/il_supermarket_scarper/engines/apsx.py
+++ b/il_supermarket_scarper/engines/apsx.py
@@ -67,12 +67,12 @@ def _get_all_possible_query_string_params(
                     ]
                 )
             chains_urls = chains_urls_with_types
-        
+
         # add store id
         if store_id:
             for chain_url in chains_urls:
                 chain_url += f"&WStore={store_id}"
-        
+
         # posting date
         if when_date:
             for chain_url in chains_urls:
diff --git a/il_supermarket_scarper/engines/multipage_web.py b/il_supermarket_scarper/engines/multipage_web.py
index 81b159a..b42c6b2 100644
--- a/il_supermarket_scarper/engines/multipage_web.py
+++ b/il_supermarket_scarper/engines/multipage_web.py
@@ -79,7 +79,9 @@ def collect_files_details_from_site(
         files_names_to_scrape=None,
     ):
         self.post_scraping()
-        url = self.get_request_url(files_types=files_types, store_id=store_id, when_date=when_date)
+        url = self.get_request_url(
+            files_types=files_types, store_id=store_id, when_date=when_date
+        )
 
         total_pages = self.get_number_of_pages(url[0])
         Logger.info(f"Found {total_pages} pages")
diff --git a/il_supermarket_scarper/engines/web.py b/il_supermarket_scarper/engines/web.py
index d74faec..d513e9b 100644
--- a/il_supermarket_scarper/engines/web.py
+++ b/il_supermarket_scarper/engines/web.py
@@ -22,7 +22,7 @@ def get_data_from_page(self, req_res):
         soup = BeautifulSoup(req_res.text, features="lxml")
         return soup.find_all("tr")[1:]
 
-    def get_request_url(self):
+    def get_request_url(self,files_types=None, store_id=None, when_date=None): #pylint: disable=unused-argument
         """get all links to collect download links from"""
         return [self.url]
 
@@ -72,7 +72,7 @@ def collect_files_details_from_site(
     ):
         """collect all enteris to download from site"""
         urls_to_collect_link_from = self.get_request_url(
-            files_types, store_id, when_date
+            files_types=files_types, store_id=store_id, when_date=when_date
         )
 
         all_trs = []
diff --git a/il_supermarket_scarper/utils/databases/json_file.py b/il_supermarket_scarper/utils/databases/json_file.py
index 98e6ce0..b50ce41 100644
--- a/il_supermarket_scarper/utils/databases/json_file.py
+++ b/il_supermarket_scarper/utils/databases/json_file.py
@@ -44,7 +44,7 @@ def _read_database(self):
                     Logger.warning(f"File {file_path} is corrupted, resetting it.")
                     data = {}
         return data
-    
+
     def _write_database(self, data):
         """Write data to the JSON database file."""
         file_path = self._get_database_file_path()
@@ -55,7 +55,7 @@ def _write_database(self, data):
     def insert_documents(self, collection_name, document):
         """Insert a document into a collection inside the JSON database."""
         if self.collection_status:
-            
+
             data = self._read_database()
             # Ensure the collection exists in the database
             if collection_name not in data:
diff --git a/il_supermarket_scarper/utils/scraper_status.py b/il_supermarket_scarper/utils/scraper_status.py
index a0078a1..a138540 100644
--- a/il_supermarket_scarper/utils/scraper_status.py
+++ b/il_supermarket_scarper/utils/scraper_status.py
@@ -107,9 +107,7 @@ def _add_downloaded_files_to_list(self, results, **_):
                     documents.append(
                         {"file_name": res["file_name"], "when": when},
                     )
-            self.database.insert_documents(
-                self.VERIFIED_DOWNLOADS,
-                documents)
+            self.database.insert_documents(self.VERIFIED_DOWNLOADS, documents)
 
     @lock_by_string()
     def on_scrape_completed(self, folder_name):
diff --git a/stress_test.py b/stress_test.py
index 1bddb40..c4017e3 100644
--- a/stress_test.py
+++ b/stress_test.py
@@ -1,15 +1,11 @@
 import time
 import json
-import sys
 import datetime
 import tempfile
-from il_supermarket_scarper.scrappers_factory import ScraperFactory
 import pstats
 import cProfile
 from io import StringIO
-
-
-
+from il_supermarket_scarper.scrappers_factory import ScraperFactory
 
 
 if __name__ == "__main__":

From f573a1a3b721aa09af0f8db8b66a676e8e67cf23 Mon Sep 17 00:00:00 2001
From: Sefi Erlich <erlichsefi@gmail.com>
Date: Tue, 1 Oct 2024 20:28:19 +0000
Subject: [PATCH 13/20] change sore

---
 il_supermarket_scarper/scrappers/tests/test_all.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/il_supermarket_scarper/scrappers/tests/test_all.py b/il_supermarket_scarper/scrappers/tests/test_all.py
index fddbf0e..dd96f72 100644
--- a/il_supermarket_scarper/scrappers/tests/test_all.py
+++ b/il_supermarket_scarper/scrappers/tests/test_all.py
@@ -6,7 +6,7 @@ class BareketTestCase(make_test_case(ScraperFactory.BAREKET, 5)):
     """Test case for ScraperFactory.BAREKET."""
 
 
-class YaynotBitanTestCase(make_test_case(ScraperFactory.YAYNO_BITAN, 6)):
+class YaynotBitanTestCase(make_test_case(ScraperFactory.YAYNO_BITAN, 9032)):
     """Test case for ScraperFactory.YAYNO_BITAN."""
 
 

From 3f59fa946de725eccd1bc9c76834e3d73a97cda8 Mon Sep 17 00:00:00 2001
From: Sefi Erlich <erlichsefi@gmail.com>
Date: Tue, 1 Oct 2024 20:35:54 +0000
Subject: [PATCH 14/20] .

---
 il_supermarket_scarper/engines/cerberus.py           | 6 ++++--
 il_supermarket_scarper/engines/engine.py             | 4 ++++
 il_supermarket_scarper/engines/web.py                | 8 +++++++-
 il_supermarket_scarper/scrapper_runner.py            | 2 ++
 il_supermarket_scarper/scrappers/tests/test_cases.py | 2 ++
 5 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/il_supermarket_scarper/engines/cerberus.py b/il_supermarket_scarper/engines/cerberus.py
index 4539f2a..302de4b 100644
--- a/il_supermarket_scarper/engines/cerberus.py
+++ b/il_supermarket_scarper/engines/cerberus.py
@@ -43,6 +43,8 @@ def scrape(
         store_id=None,
         only_latest=False,
         files_names_to_scrape=None,
+        filter_null=False,
+        filter_zero=False,
     ):
         files = []
         try:
@@ -55,8 +57,8 @@ def scrape(
             files = self.collect_files_details_from_site(
                 limit=limit,
                 files_types=files_types,
-                filter_null=False,
-                filter_zero=False,
+                filter_null=filter_null,
+                filter_zero=filter_zero,
                 store_id=store_id,
                 only_latest=only_latest,
                 files_names_to_scrape=files_names_to_scrape,
diff --git a/il_supermarket_scarper/engines/engine.py b/il_supermarket_scarper/engines/engine.py
index b04db4c..b36ecf8 100644
--- a/il_supermarket_scarper/engines/engine.py
+++ b/il_supermarket_scarper/engines/engine.py
@@ -212,6 +212,8 @@ def scrape(
         store_id=None,
         only_latest=False,
         files_names_to_scrape=None,
+        filter_null=False,
+        filter_zero=False,
     ):
         """run the scraping logic"""
         self.post_scraping()
@@ -221,6 +223,8 @@ def scrape(
             store_id=store_id,
             files_names_to_scrape=files_names_to_scrape,
             only_latest=only_latest,
+            filter_null=filter_null,
+            filter_zero=filter_zero,
         )
         Logger.info(f"Starting scraping for {self.chain}")
         self.make_storage_path_dir()
diff --git a/il_supermarket_scarper/engines/web.py b/il_supermarket_scarper/engines/web.py
index d513e9b..c232b56 100644
--- a/il_supermarket_scarper/engines/web.py
+++ b/il_supermarket_scarper/engines/web.py
@@ -22,7 +22,9 @@ def get_data_from_page(self, req_res):
         soup = BeautifulSoup(req_res.text, features="lxml")
         return soup.find_all("tr")[1:]
 
-    def get_request_url(self,files_types=None, store_id=None, when_date=None): #pylint: disable=unused-argument
+    def get_request_url(
+        self, files_types=None, store_id=None, when_date=None
+    ):  # pylint: disable=unused-argument
         """get all links to collect download links from"""
         return [self.url]
 
@@ -109,6 +111,8 @@ def scrape(
         store_id=None,
         only_latest=False,
         files_names_to_scrape=None,
+        filter_null=False,
+        filter_zero=False,
     ):
         """scarpe the files from multipage sites"""
         download_urls, file_names = [], []
@@ -118,6 +122,8 @@ def scrape(
                 files_types=files_types,
                 store_id=store_id,
                 only_latest=only_latest,
+                filter_null=filter_null,
+                filter_zero=filter_zero,
             )
 
             download_urls, file_names = self.collect_files_details_from_site(
diff --git a/il_supermarket_scarper/scrapper_runner.py b/il_supermarket_scarper/scrapper_runner.py
index b0a5a34..3e6978b 100644
--- a/il_supermarket_scarper/scrapper_runner.py
+++ b/il_supermarket_scarper/scrapper_runner.py
@@ -96,6 +96,8 @@ def scrape_one(
             store_id=store_id,
             only_latest=only_latest,
             files_names_to_scrape=None,
+            filter_null=False,
+            filter_zero=False,
         )
         Logger.info(f"done scraping {chain_name}")
 
diff --git a/il_supermarket_scarper/scrappers/tests/test_cases.py b/il_supermarket_scarper/scrappers/tests/test_cases.py
index 37fab63..79064ce 100644
--- a/il_supermarket_scarper/scrappers/tests/test_cases.py
+++ b/il_supermarket_scarper/scrappers/tests/test_cases.py
@@ -143,6 +143,8 @@ def __clean_scarpe_delete(
                         "files_types": file_type,
                         "store_id": store_id,
                         "only_latest": only_latest,
+                        "filter_null": True,
+                        "filter_zero": True,
                     }
 
                     scraper.scrape(**kwarg)

From db81cd11961acfecd9da325742fbb7d07cc509f5 Mon Sep 17 00:00:00 2001
From: Sefi Erlich <erlichsefi@gmail.com>
Date: Tue, 1 Oct 2024 20:52:23 +0000
Subject: [PATCH 15/20] .

---
 il_supermarket_scarper/engines/apsx.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/il_supermarket_scarper/engines/apsx.py b/il_supermarket_scarper/engines/apsx.py
index 72b9a7b..53a6eca 100644
--- a/il_supermarket_scarper/engines/apsx.py
+++ b/il_supermarket_scarper/engines/apsx.py
@@ -51,7 +51,7 @@ def _get_all_possible_query_string_params(
         if isinstance(self.chain_id, list):
             res = []
             for c_id in self.chain_id:
-                res.append(f"?code=={c_id}")
+                res.append(f"?code={c_id}")
             return res
         chains_urls = [f"?code={self.chain_id}"]
 

From 6297b9bade15b34cd0e74408672d8db270982397 Mon Sep 17 00:00:00 2001
From: Sefi Erlich <erlichsefi@gmail.com>
Date: Tue, 1 Oct 2024 22:34:15 +0000
Subject: [PATCH 16/20] fixed

---
 il_supermarket_scarper/utils/loop.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/il_supermarket_scarper/utils/loop.py b/il_supermarket_scarper/utils/loop.py
index 51c847f..3a1babc 100644
--- a/il_supermarket_scarper/utils/loop.py
+++ b/il_supermarket_scarper/utils/loop.py
@@ -1,5 +1,5 @@
 import concurrent.futures
-from il_supermarket_scarper.utils import Logger
+from logger import Logger
 
 
 def defualt_aggregtion_function(all_done):

From 01c385913b54a6eabaff4b5c8fc4b7612e7809bf Mon Sep 17 00:00:00 2001
From: Sefi Erlich <erlichsefi@gmail.com>
Date: Wed, 2 Oct 2024 13:20:02 +0000
Subject: [PATCH 17/20] .

---
 il_supermarket_scarper/utils/loop.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/il_supermarket_scarper/utils/loop.py b/il_supermarket_scarper/utils/loop.py
index 3a1babc..d04bffb 100644
--- a/il_supermarket_scarper/utils/loop.py
+++ b/il_supermarket_scarper/utils/loop.py
@@ -1,5 +1,5 @@
 import concurrent.futures
-from logger import Logger
+from .logger import Logger
 
 
 def defualt_aggregtion_function(all_done):

From 8d9ce41904161e50f62f343314abacd206903931 Mon Sep 17 00:00:00 2001
From: Sefi Erlich <erlichsefi@gmail.com>
Date: Wed, 2 Oct 2024 16:33:07 +0000
Subject: [PATCH 18/20] .

---
 il_supermarket_scarper/utils/loop.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/il_supermarket_scarper/utils/loop.py b/il_supermarket_scarper/utils/loop.py
index d04bffb..b8c40be 100644
--- a/il_supermarket_scarper/utils/loop.py
+++ b/il_supermarket_scarper/utils/loop.py
@@ -18,7 +18,10 @@ def multiple_page_aggregtion(pages_to_scrape):
     download_urls = []
     file_names = []
     for result in pages_to_scrape:
-        page_download_urls, page_file_names = result.result()
+        if hasattr(result, "result"):
+            page_download_urls, page_file_names = result.result()
+        else:
+            page_download_urls, page_file_names = result
         download_urls.extend(page_download_urls)
         file_names.extend(page_file_names)
     return download_urls, file_names

From 4020d46a5dd0d9df35ba2d0959902047b56ef1b5 Mon Sep 17 00:00:00 2001
From: Sefi Erlich <erlichsefi@gmail.com>
Date: Wed, 2 Oct 2024 19:39:54 +0000
Subject: [PATCH 19/20] add execption inforamtion

---
 il_supermarket_scarper/utils/scraper_status.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/il_supermarket_scarper/utils/scraper_status.py b/il_supermarket_scarper/utils/scraper_status.py
index a138540..dd733ab 100644
--- a/il_supermarket_scarper/utils/scraper_status.py
+++ b/il_supermarket_scarper/utils/scraper_status.py
@@ -1,4 +1,5 @@
 import os
+import traceback
 
 from .logger import Logger
 from .status import log_folder_details
@@ -120,7 +121,10 @@ def on_scrape_completed(self, folder_name):
     def on_download_fail(self, execption, **additional_info):
         """report when the scraping in failed"""
         self._insert_an_update(
-            ScraperStatus.FAILED, execption=str(execption), **additional_info
+            ScraperStatus.FAILED, 
+            execption=str(execption), 
+            traceback=traceback.format_exc(),
+            **additional_info
         )
 
     def _insert_an_update(self, status, **additional_info):

From e9d3b4b94e1517e8472e506574cb012414b45e7d Mon Sep 17 00:00:00 2001
From: Sefi Erlich <erlichsefi@gmail.com>
Date: Wed, 2 Oct 2024 19:40:10 +0000
Subject: [PATCH 20/20] .

---
 il_supermarket_scarper/utils/scraper_status.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/il_supermarket_scarper/utils/scraper_status.py b/il_supermarket_scarper/utils/scraper_status.py
index dd733ab..92ece14 100644
--- a/il_supermarket_scarper/utils/scraper_status.py
+++ b/il_supermarket_scarper/utils/scraper_status.py
@@ -121,10 +121,10 @@ def on_scrape_completed(self, folder_name):
     def on_download_fail(self, execption, **additional_info):
         """report when the scraping in failed"""
         self._insert_an_update(
-            ScraperStatus.FAILED, 
-            execption=str(execption), 
+            ScraperStatus.FAILED,
+            execption=str(execption),
             traceback=traceback.format_exc(),
-            **additional_info
+            **additional_info,
         )
 
     def _insert_an_update(self, status, **additional_info):