dgtlmoon · dgtlmoon · Apr 11, 2025 · Apr 10, 2025 · Apr 10, 2025 · Apr 10, 2025
diff --git a/changedetectionio/blueprint/browser_steps/browser_steps.py b/changedetectionio/blueprint/browser_steps/browser_steps.py
@@ -4,7 +4,7 @@
 from random import randint
 from loguru import logger
 
-from changedetectionio.content_fetchers.helpers import capture_full_page
+from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT
 from changedetectionio.content_fetchers.base import manage_user_agent
 from changedetectionio.safe_jinja import render as jinja_render
 
@@ -293,26 +293,38 @@ def has_expired(self):
     def get_current_state(self):
         """Return the screenshot and interactive elements mapping, generally always called after action_()"""
         import importlib.resources
+        import json
+        # because we for now only run browser steps in playwright mode (not puppeteer mode)
+        from changedetectionio.content_fetchers.playwright import capture_full_page
+
         xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
 
         now = time.time()
         self.page.wait_for_timeout(1 * 1000)
 
-        screenshot = capture_full_page(self.page)
+        screenshot = capture_full_page(page=self.page)
 
         logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s")
 
         now = time.time()
         self.page.evaluate("var include_filters=''")
         # Go find the interactive elements
         # @todo in the future, something smarter that can scan for elements with .click/focus etc event handlers?
-        elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span'
-        xpath_element_js = xpath_element_js.replace('%ELEMENTS%', elements)
 
-        xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")
+        self.page.request_gc()
+
+        scan_elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span'
+
+        MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
+        xpath_data = json.loads(self.page.evaluate(xpath_element_js, {
+            "visualselector_xpath_selectors": scan_elements,
+            "max_height": MAX_TOTAL_HEIGHT
+        }))
+        self.page.request_gc()
+
         # So the JS will find the smallest one first
         xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True)
-        logger.debug(f"Time to scrape xpath element data in browser {time.time()-now:.2f}s")
+        logger.debug(f"Time to scrape xPath element data in browser {time.time()-now:.2f}s")
 
         # playwright._impl._api_types.Error: Browser closed.
         # @todo show some countdown timer?

diff --git a/changedetectionio/content_fetchers/__init__.py b/changedetectionio/content_fetchers/__init__.py
@@ -7,11 +7,29 @@
 # Visual Selector scraper - 'Button' is there because some sites have <button>OUT OF STOCK</button>.
 visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4,header,footer,section,article,aside,details,main,nav,section,summary,button'
 
+SCREENSHOT_MAX_HEIGHT_DEFAULT = 16000
+SCREENSHOT_DEFAULT_QUALITY = 40
+
+# Maximum total height for the final image (When in stitch mode).
+# We limit this to 16000px due to the huge amount of RAM that was being used
+# Example: 16000 × 1400 × 3 = 67,200,000 bytes ≈ 64.1 MB (not including buffers in PIL etc)
+MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
+
+# The size at which we will switch to stitching method, when below this (and
+# MAX_TOTAL_HEIGHT which can be set by a user) we will use the default
+# screenshot method.
+SCREENSHOT_SIZE_STITCH_THRESHOLD = 8000
 
 # available_fetchers() will scan this implementation looking for anything starting with html_
 # this information is used in the form selections
 from changedetectionio.content_fetchers.requests import fetcher as html_requests
 
+
+import importlib.resources
+XPATH_ELEMENT_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8')
+INSTOCK_DATA_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8')
+
+
 def available_fetchers():
     # See the if statement at the bottom of this file for how we switch between playwright and webdriver
     import inspect

diff --git a/changedetectionio/content_fetchers/base.py b/changedetectionio/content_fetchers/base.py
@@ -63,11 +63,6 @@ class Fetcher():
     # Time ONTOP of the system defined env minimum time
     render_extract_delay = 0
 
-    def __init__(self):
-        import importlib.resources
-        self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8')
-        self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8')
-
     @abstractmethod
     def get_error(self):
         return self.error
@@ -143,6 +138,7 @@ def iterate_browser_steps(self, start_url=None):
                 logger.debug(f">> Iterating check - browser Step n {step_n} - {step['operation']}...")
                 self.screenshot_step("before-" + str(step_n))
                 self.save_step_html("before-" + str(step_n))
+
                 try:
                     optional_value = step['optional_value']
                     selector = step['selector']

diff --git a/changedetectionio/content_fetchers/helpers.py b/changedetectionio/content_fetchers/helpers.py