From f682a80c43a375f688b875b712ba6c7103326c3a Mon Sep 17 00:00:00 2001
From: dgtlmoon <dgtlmoon@gmail.com>
Date: Thu, 27 Feb 2025 15:34:02 +0100
Subject: [PATCH 1/7] BrowserSteps - Speed up scraping, refactor screenshot
 handling for very long pages

---
 .../blueprint/browser_steps/__init__.py       | 29 ++++++-------
 .../blueprint/browser_steps/browser_steps.py  | 43 ++++++++-----------
 .../content_fetchers/playwright.py            | 14 ++++--
 3 files changed, 42 insertions(+), 44 deletions(-)

diff --git a/changedetectionio/blueprint/browser_steps/__init__.py b/changedetectionio/blueprint/browser_steps/__init__.py
index a472ba4b055..3e5c50c51ba 100644
--- a/changedetectionio/blueprint/browser_steps/__init__.py
+++ b/changedetectionio/blueprint/browser_steps/__init__.py
@@ -160,14 +160,13 @@ def browsersteps_ui_update():
         if not browsersteps_sessions.get(browsersteps_session_id):
             return make_response('No session exists under that ID', 500)
 
-
+        is_last_step = False
         # Actions - step/apply/etc, do the thing and return state
         if request.method == 'POST':
             # @todo - should always be an existing session
             step_operation = request.form.get('operation')
             step_selector = request.form.get('selector')
             step_optional_value = request.form.get('optional_value')
-            step_n = int(request.form.get('step_n'))
             is_last_step = strtobool(request.form.get('is_last_step'))
 
             # @todo try.. accept.. nice errors not popups..
@@ -182,16 +181,6 @@ def browsersteps_ui_update():
                 # Try to find something of value to give back to the user
                 return make_response(str(e).splitlines()[0], 401)
 
-            # Get visual selector ready/update its data (also use the current filter info from the page?)
-            # When the last 'apply' button was pressed
-            # @todo this adds overhead because the xpath selection is happening twice
-            u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url
-            if is_last_step and u:
-                (screenshot, xpath_data) = browsersteps_sessions[browsersteps_session_id]['browserstepper'].request_visualselector_data()
-                watch = datastore.data['watching'].get(uuid)
-                if watch:
-                    watch.save_screenshot(screenshot=screenshot)
-                    watch.save_xpath_data(data=xpath_data)
 
 #        if not this_session.page:
 #            cleanup_playwright_session()
@@ -199,10 +188,20 @@ def browsersteps_ui_update():
 
         # Screenshots and other info only needed on requesting a step (POST)
         try:
-            state = browsersteps_sessions[browsersteps_session_id]['browserstepper'].get_current_state()
+            (screenshot, xpath_data) = browsersteps_sessions[browsersteps_session_id]['browserstepper'].get_current_state()
+            if is_last_step:
+                watch = datastore.data['watching'].get(uuid)
+                u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url
+                if watch and u:
+                    watch.save_screenshot(screenshot=screenshot)
+                    watch.save_xpath_data(data=xpath_data)
+
         except playwright._impl._api_types.Error as e:
             return make_response("Browser session ran out of time :( Please reload this page."+str(e), 401)
+        except Exception as e:
+            return make_response("Error fetching screenshot and element data - " + str(e), 401)
 
+        # SEND THIS BACK TO THE BROWSER
         # Use send_file() which is way faster than read/write loop on bytes
         import json
         from tempfile import mkstemp
@@ -210,8 +209,8 @@ def browsersteps_ui_update():
         tmp_fd, tmp_file = mkstemp(text=True, suffix=".json", prefix="changedetectionio-")
 
         output = json.dumps({'screenshot': "data:image/jpeg;base64,{}".format(
-            base64.b64encode(state[0]).decode('ascii')),
-            'xpath_data': state[1],
+            base64.b64encode(screenshot).decode('ascii')),
+            'xpath_data': xpath_data,
             'session_age_start': browsersteps_sessions[browsersteps_session_id]['browserstepper'].age_start,
             'browser_time_remaining': round(remaining)
         })
diff --git a/changedetectionio/blueprint/browser_steps/browser_steps.py b/changedetectionio/blueprint/browser_steps/browser_steps.py
index f533edf0b82..00a30a36c68 100644
--- a/changedetectionio/blueprint/browser_steps/browser_steps.py
+++ b/changedetectionio/blueprint/browser_steps/browser_steps.py
@@ -1,14 +1,15 @@
-#!/usr/bin/env python3
-
 import os
 import time
 import re
 from random import randint
 from loguru import logger
 
+from changedetectionio.content_fetchers.helpers import capture_stitched_together_full_page, SCREENSHOT_SIZE_STITCH_THRESHOLD
 from changedetectionio.content_fetchers.base import manage_user_agent
 from changedetectionio.safe_jinja import render as jinja_render
 
+
+
 # Two flags, tell the JS which of the "Selector" or "Value" field should be enabled in the front end
 # 0- off, 1- on
 browser_step_ui_config = {'Choose one': '0 0',
@@ -279,6 +280,7 @@ def connect(self, proxy=None):
         logger.debug(f"Time to browser setup {time.time()-now:.2f}s")
         self.page.wait_for_timeout(1 * 1000)
 
+
     def mark_as_closed(self):
         logger.debug("Page closed, cleaning up..")
 
@@ -296,39 +298,30 @@ def get_current_state(self):
         now = time.time()
         self.page.wait_for_timeout(1 * 1000)
 
-        # The actual screenshot
-        screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=40)
 
+        full_height = self.page.evaluate("document.documentElement.scrollHeight")
+
+        if full_height >= SCREENSHOT_SIZE_STITCH_THRESHOLD:
+            logger.warning(f"Page full Height: {full_height}px longer than {SCREENSHOT_SIZE_STITCH_THRESHOLD}px, using 'stitched screenshot method'.")
+            screenshot = capture_stitched_together_full_page(self.page)
+        else:
+            screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=40)
+
+        logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s")
+
+        now = time.time()
         self.page.evaluate("var include_filters=''")
         # Go find the interactive elements
         # @todo in the future, something smarter that can scan for elements with .click/focus etc event handlers?
         elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span'
         xpath_element_js = xpath_element_js.replace('%ELEMENTS%', elements)
+
         xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")
         # So the JS will find the smallest one first
         xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True)
-        logger.debug(f"Time to complete get_current_state of browser {time.time()-now:.2f}s")
-        # except
+        logger.debug(f"Time to scrape xpath element data in browser {time.time()-now:.2f}s")
+
         # playwright._impl._api_types.Error: Browser closed.
         # @todo show some countdown timer?
         return (screenshot, xpath_data)
 
-    def request_visualselector_data(self):
-        """
-        Does the same that the playwright operation in content_fetcher does
-        This is used to just bump the VisualSelector data so it' ready to go if they click on the tab
-        @todo refactor and remove duplicate code, add include_filters
-        :param xpath_data:
-        :param screenshot:
-        :param current_include_filters:
-        :return:
-        """
-        import importlib.resources
-        self.page.evaluate("var include_filters=''")
-        xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
-        from changedetectionio.content_fetchers import visualselector_xpath_selectors
-        xpath_element_js = xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors)
-        xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")
-        screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
-
-        return (screenshot, xpath_data)
diff --git a/changedetectionio/content_fetchers/playwright.py b/changedetectionio/content_fetchers/playwright.py
index 53be33f1d04..70a3c6972b8 100644
--- a/changedetectionio/content_fetchers/playwright.py
+++ b/changedetectionio/content_fetchers/playwright.py
@@ -4,6 +4,7 @@
 
 from loguru import logger
 
+from changedetectionio.content_fetchers.helpers import capture_stitched_together_full_page, SCREENSHOT_SIZE_STITCH_THRESHOLD
 from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
 from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable
 
@@ -199,10 +200,15 @@ def run(self,
             # acceptable screenshot quality here
             try:
                 # The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
-                self.screenshot = self.page.screenshot(type='jpeg',
-                                                       full_page=True,
-                                                       quality=int(os.getenv("SCREENSHOT_QUALITY", 72)),
-                                                       )
+                full_height = self.page.evaluate("document.documentElement.scrollHeight")
+
+                if full_height >= SCREENSHOT_SIZE_STITCH_THRESHOLD:
+                    logger.warning(
+                        f"Page full Height: {full_height}px longer than {SCREENSHOT_SIZE_STITCH_THRESHOLD}px, using 'stitched screenshot method'.")
+                    self.screenshot = capture_stitched_together_full_page(self.page)
+                else:
+                    self.screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("SCREENSHOT_QUALITY", 30)))
+
             except Exception as e:
                 # It's likely the screenshot was too long/big and something crashed
                 raise ScreenshotUnavailable(url=url, status_code=self.status_code)

From 7ea66929e1a1e5a5f6e3a0c5b5ba1a457e99103d Mon Sep 17 00:00:00 2001
From: dgtlmoon <dgtlmoon@gmail.com>
Date: Thu, 27 Feb 2025 15:39:43 +0100
Subject: [PATCH 2/7] No need to read the temp response from disk

---
 .../blueprint/browser_steps/__init__.py       | 41 +++++++++----------
 1 file changed, 19 insertions(+), 22 deletions(-)

diff --git a/changedetectionio/blueprint/browser_steps/__init__.py b/changedetectionio/blueprint/browser_steps/__init__.py
index 3e5c50c51ba..dc51c8da4b7 100644
--- a/changedetectionio/blueprint/browser_steps/__init__.py
+++ b/changedetectionio/blueprint/browser_steps/__init__.py
@@ -22,7 +22,10 @@
 
 browsersteps_sessions = {}
 io_interface_context = None
-
+import json
+import base64
+import hashlib
+from flask import Response
 
 def construct_blueprint(datastore: ChangeDetectionStore):
     browser_steps_blueprint = Blueprint('browser_steps', __name__, template_folder="templates")
@@ -202,27 +205,21 @@ def browsersteps_ui_update():
             return make_response("Error fetching screenshot and element data - " + str(e), 401)
 
         # SEND THIS BACK TO THE BROWSER
-        # Use send_file() which is way faster than read/write loop on bytes
-        import json
-        from tempfile import mkstemp
-        from flask import send_file
-        tmp_fd, tmp_file = mkstemp(text=True, suffix=".json", prefix="changedetectionio-")
-
-        output = json.dumps({'screenshot': "data:image/jpeg;base64,{}".format(
-            base64.b64encode(screenshot).decode('ascii')),
-            'xpath_data': xpath_data,
-            'session_age_start': browsersteps_sessions[browsersteps_session_id]['browserstepper'].age_start,
-            'browser_time_remaining': round(remaining)
-        })
-
-        with os.fdopen(tmp_fd, 'w') as f:
-            f.write(output)
-
-        response = make_response(send_file(path_or_file=tmp_file,
-                                           mimetype='application/json; charset=UTF-8',
-                                           etag=True))
-        # No longer needed
-        os.unlink(tmp_file)
+
+        output = {
+            "screenshot": f"data:image/jpeg;base64,{base64.b64encode(screenshot).decode('ascii')}",
+            "xpath_data": xpath_data,
+            "session_age_start": browsersteps_sessions[browsersteps_session_id]['browserstepper'].age_start,
+            "browser_time_remaining": round(remaining)
+        }
+        json_data = json.dumps(output)
+
+        # Generate an ETag (hash of the response body)
+        etag_hash = hashlib.md5(json_data.encode('utf-8')).hexdigest()
+
+        # Create the response with ETag
+        response = Response(json_data, mimetype="application/json; charset=UTF-8")
+        response.set_etag(etag_hash)
 
         return response
 

From fa549b6e3903b4cfe866f4fd27a0887f0686eb0b Mon Sep 17 00:00:00 2001
From: dgtlmoon <dgtlmoon@gmail.com>
Date: Thu, 27 Feb 2025 15:44:28 +0100
Subject: [PATCH 3/7] Woops

---
 changedetectionio/content_fetchers/helpers.py | 101 ++++++++++++++++++
 .../res/xpath_element_scraper.js              |   2 +-
 2 files changed, 102 insertions(+), 1 deletion(-)
 create mode 100644 changedetectionio/content_fetchers/helpers.py

diff --git a/changedetectionio/content_fetchers/helpers.py b/changedetectionio/content_fetchers/helpers.py
new file mode 100644
index 00000000000..25b47234ea1
--- /dev/null
+++ b/changedetectionio/content_fetchers/helpers.py
@@ -0,0 +1,101 @@
+
+# Pages with a vertical height longer than this will use the 'stitch together' method.
+
+# - Many GPUs have a max texture size of 16384x16384px (or lower on older devices).
+# - If a page is taller than ~8000–10000px, it risks exceeding GPU memory limits.
+# - This is especially important on headless Chromium, where Playwright may fail to allocate a massive full-page buffer.
+
+SCREENSHOT_SIZE_STITCH_THRESHOLD=16000
+from loguru import logger
+
+def capture_stitched_together_full_page(page):
+    import io
+    import os
+    import time
+    from PIL import Image, ImageDraw, ImageFont
+
+    MAX_TOTAL_HEIGHT = 12000  # Maximum total height for the final image
+    MAX_CHUNK_HEIGHT = 5000  # Height per screenshot chunk
+    WARNING_HEIGHT = 20  # Height of the warning text overlay
+
+    # Save the original viewport size
+    original_viewport = page.viewport_size
+    now = time.time()
+
+    try:
+        viewport = page.viewport_size
+        page_height = page.evaluate("document.documentElement.scrollHeight")
+
+        # Limit the total capture height
+        capture_height = min(page_height, MAX_TOTAL_HEIGHT)
+
+        images = []
+        total_captured_height = 0
+
+        for offset in range(0, capture_height, MAX_CHUNK_HEIGHT):
+            # Ensure we do not exceed the total height limit
+            chunk_height = min(MAX_CHUNK_HEIGHT, MAX_TOTAL_HEIGHT - total_captured_height)
+
+            # Adjust viewport size for this chunk
+            page.set_viewport_size({"width": viewport["width"], "height": chunk_height})
+
+            # Scroll to the correct position
+            page.evaluate(f"window.scrollTo(0, {offset})")
+
+            # Capture screenshot chunk
+            screenshot_bytes = page.screenshot(type='jpeg', quality=int(os.getenv("SCREENSHOT_QUALITY", 30)))
+            images.append(Image.open(io.BytesIO(screenshot_bytes)))
+
+            total_captured_height += chunk_height
+
+            # Stop if we reached the maximum total height
+            if total_captured_height >= MAX_TOTAL_HEIGHT:
+                break
+
+        # Create the final stitched image
+        stitched_image = Image.new('RGB', (viewport["width"], total_captured_height))
+        y_offset = 0
+
+        # Stitch the screenshot chunks together
+        for img in images:
+            stitched_image.paste(img, (0, y_offset))
+            y_offset += img.height
+
+        logger.debug(f"Screenshot stitched together in {time.time()-now:.2f}s")
+
+        # Overlay warning text if the screenshot was trimmed
+        if page_height > MAX_TOTAL_HEIGHT:
+            draw = ImageDraw.Draw(stitched_image)
+            warning_text = f"WARNING: Screenshot was {page_height}px but trimmed to {MAX_TOTAL_HEIGHT}px because it was too long"
+
+            # Load font (default system font if Arial is unavailable)
+            try:
+                font = ImageFont.truetype("arial.ttf", 21)  # Arial (Windows/Mac)
+            except IOError:
+                font = ImageFont.load_default()  # Default font if Arial not found
+
+            # Get text bounding box (correct method for newer Pillow versions)
+            text_bbox = draw.textbbox((0, 0), warning_text, font=font)
+            text_width = text_bbox[2] - text_bbox[0]  # Calculate text width
+            text_height = text_bbox[3] - text_bbox[1]  # Calculate text height
+
+            # Define background rectangle (top of the image)
+            draw.rectangle([(0, 0), (viewport["width"], WARNING_HEIGHT)], fill="white")
+
+            # Center text horizontally within the warning area
+            text_x = (viewport["width"] - text_width) // 2
+            text_y = (WARNING_HEIGHT - text_height) // 2
+
+            # Draw the warning text in red
+            draw.text((text_x, text_y), warning_text, fill="red", font=font)
+
+        # Save or return the final image
+        output = io.BytesIO()
+        stitched_image.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", 30)))
+        screenshot = output.getvalue()
+
+    finally:
+        # Restore the original viewport size
+        page.set_viewport_size(original_viewport)
+
+    return screenshot
diff --git a/changedetectionio/content_fetchers/res/xpath_element_scraper.js b/changedetectionio/content_fetchers/res/xpath_element_scraper.js
index ccd89436973..d514a2064d9 100644
--- a/changedetectionio/content_fetchers/res/xpath_element_scraper.js
+++ b/changedetectionio/content_fetchers/res/xpath_element_scraper.js
@@ -41,7 +41,7 @@ const findUpTag = (el) => {
 
     //  Strategy 1: If it's an input, with name, and there's only one, prefer that
     if (el.name !== undefined && el.name.length) {
-        var proposed = el.tagName + "[name=" + el.name + "]";
+        var proposed = el.tagName + "[name=\"" + CSS.escape(el.name) + "\"]";
         var proposed_element = window.document.querySelectorAll(proposed);
         if (proposed_element.length) {
             if (proposed_element.length === 1) {

From 1f7a855529a38de3c1a74eead5773ce382a34215 Mon Sep 17 00:00:00 2001
From: dgtlmoon <dgtlmoon@gmail.com>
Date: Thu, 27 Feb 2025 16:01:41 +0100
Subject: [PATCH 4/7] Small CPU improvements

---
 changedetectionio/content_fetchers/playwright.py  |  4 ++++
 .../content_fetchers/res/xpath_element_scraper.js | 15 +++++++++------
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/changedetectionio/content_fetchers/playwright.py b/changedetectionio/content_fetchers/playwright.py
index 70a3c6972b8..861cea60ec7 100644
--- a/changedetectionio/content_fetchers/playwright.py
+++ b/changedetectionio/content_fetchers/playwright.py
@@ -90,6 +90,7 @@ def run(self,
         from playwright.sync_api import sync_playwright
         import playwright._impl._errors
         from changedetectionio.content_fetchers import visualselector_xpath_selectors
+        import time
         self.delete_browser_steps_screenshots()
         response = None
 
@@ -180,6 +181,7 @@ def run(self,
 
             self.page.wait_for_timeout(extra_wait * 1000)
 
+            now = time.time()
             # So we can find an element on the page where its selector was entered manually (maybe not xPath etc)
             if current_include_filters is not None:
                 self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
@@ -191,6 +193,8 @@ def run(self,
             self.instock_data = self.page.evaluate("async () => {" + self.instock_data_js + "}")
 
             self.content = self.page.content()
+            logger.debug(f"Time to scrape xpath element data in browser {time.time() - now:.2f}s")
+
             # Bug 3 in Playwright screenshot handling
             # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
             # JPEG is better here because the screenshots can be very very large
diff --git a/changedetectionio/content_fetchers/res/xpath_element_scraper.js b/changedetectionio/content_fetchers/res/xpath_element_scraper.js
index d514a2064d9..182a9b1df92 100644
--- a/changedetectionio/content_fetchers/res/xpath_element_scraper.js
+++ b/changedetectionio/content_fetchers/res/xpath_element_scraper.js
@@ -102,13 +102,15 @@ function collectVisibleElements(parent, visibleElements) {
     const children = parent.children;
     for (let i = 0; i < children.length; i++) {
         const child = children[i];
+        const computedStyle = window.getComputedStyle(child);
+
         if (
             child.nodeType === Node.ELEMENT_NODE &&
-            window.getComputedStyle(child).display !== 'none' &&
-            window.getComputedStyle(child).visibility !== 'hidden' &&
+            computedStyle.display !== 'none' &&
+            computedStyle.visibility !== 'hidden' &&
             child.offsetWidth >= 0 &&
             child.offsetHeight >= 0 &&
-            window.getComputedStyle(child).contentVisibility !== 'hidden'
+            computedStyle.contentVisibility !== 'hidden'
         ) {
             // If the child is an element and is visible, recursively collect visible elements
             collectVisibleElements(child, visibleElements);
@@ -173,6 +175,7 @@ visibleElementsArray.forEach(function (element) {
 
     // Try to identify any possible currency amounts "Sale: 4000" or "Sale now 3000 Kc", can help with the training.
     const hasDigitCurrency = (/\d/.test(text.slice(0, 6)) || /\d/.test(text.slice(-6)) ) &&  /([€£$¥₩₹]|USD|AUD|EUR|Kč|kr|SEK|,–)/.test(text) ;
+    const computedStyle = window.getComputedStyle(element);
 
     size_pos.push({
         xpath: xpath_result,
@@ -184,10 +187,10 @@ visibleElementsArray.forEach(function (element) {
         tagName: (element.tagName) ? element.tagName.toLowerCase() : '',
         // tagtype used by Browser Steps
         tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '',
-        isClickable: window.getComputedStyle(element).cursor === "pointer",
+        isClickable: computedStyle.cursor === "pointer",
         // Used by the keras trainer
-        fontSize: window.getComputedStyle(element).getPropertyValue('font-size'),
-        fontWeight: window.getComputedStyle(element).getPropertyValue('font-weight'),
+        fontSize: computedStyle.getPropertyValue('font-size'),
+        fontWeight: computedStyle.getPropertyValue('font-weight'),
         hasDigitCurrency: hasDigitCurrency,
         label: label,
     });

From 3e7f2f2badded23640bfc0426d35bc6e1eeb40ac Mon Sep 17 00:00:00 2001
From: dgtlmoon <dgtlmoon@gmail.com>
Date: Thu, 27 Feb 2025 16:16:11 +0100
Subject: [PATCH 5/7] Add build debug

---
 .github/workflows/pypi-release.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml
index 3c27c2dabf8..45ab84a245e 100644
--- a/.github/workflows/pypi-release.yml
+++ b/.github/workflows/pypi-release.yml
@@ -45,6 +45,10 @@ jobs:
     - name: Test that the basic pip built package runs without error
       run: |
         set -ex
+        ls -alR 
+        find .
+        ls -al dist
+        
         pip3 install dist/changedetection.io*.whl
         changedetection.io -d /tmp -p 10000 &
         sleep 3

From acb642a9376bbf47067128d0d37886089884e883 Mon Sep 17 00:00:00 2001
From: dgtlmoon <dgtlmoon@gmail.com>
Date: Thu, 27 Feb 2025 16:20:36 +0100
Subject: [PATCH 6/7] Improved test install

---
 .github/workflows/pypi-release.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml
index 45ab84a245e..4210c9b5756 100644
--- a/.github/workflows/pypi-release.yml
+++ b/.github/workflows/pypi-release.yml
@@ -46,11 +46,11 @@ jobs:
       run: |
         set -ex
         ls -alR 
-        find .
-        ls -al dist
         
-        pip3 install dist/changedetection.io*.whl
+        # Find and install the first .whl file
+        find dist -type f -name "*.whl" -exec pip3 install {} \; -quit
         changedetection.io -d /tmp -p 10000 &
+        
         sleep 3
         curl --retry-connrefused --retry 6 http://127.0.0.1:10000/static/styles/pure-min.css >/dev/null
         curl --retry-connrefused --retry 6 http://127.0.0.1:10000/ >/dev/null

From e06275a4ad8c84672460ce4652bd4a4decd0009e Mon Sep 17 00:00:00 2001
From: dgtlmoon <dgtlmoon@gmail.com>
Date: Thu, 27 Feb 2025 16:39:30 +0100
Subject: [PATCH 7/7] Tweak vars

---
 changedetectionio/content_fetchers/helpers.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/changedetectionio/content_fetchers/helpers.py b/changedetectionio/content_fetchers/helpers.py
index 25b47234ea1..79826dccf79 100644
--- a/changedetectionio/content_fetchers/helpers.py
+++ b/changedetectionio/content_fetchers/helpers.py
@@ -5,7 +5,10 @@
 # - If a page is taller than ~8000–10000px, it risks exceeding GPU memory limits.
 # - This is especially important on headless Chromium, where Playwright may fail to allocate a massive full-page buffer.
 
-SCREENSHOT_SIZE_STITCH_THRESHOLD=16000
+
+# The size at which we will switch to stitching method
+SCREENSHOT_SIZE_STITCH_THRESHOLD=8000
+
 from loguru import logger
 
 def capture_stitched_together_full_page(page):
@@ -14,9 +17,9 @@ def capture_stitched_together_full_page(page):
     import time
     from PIL import Image, ImageDraw, ImageFont
 
-    MAX_TOTAL_HEIGHT = 12000  # Maximum total height for the final image
-    MAX_CHUNK_HEIGHT = 5000  # Height per screenshot chunk
-    WARNING_HEIGHT = 20  # Height of the warning text overlay
+    MAX_TOTAL_HEIGHT = SCREENSHOT_SIZE_STITCH_THRESHOLD*4  # Maximum total height for the final image (When in stitch mode)
+    MAX_CHUNK_HEIGHT = 4000  # Height per screenshot chunk
+    WARNING_TEXT_HEIGHT = 20  # Height of the warning text overlay
 
     # Save the original viewport size
     original_viewport = page.viewport_size
@@ -70,7 +73,7 @@ def capture_stitched_together_full_page(page):
 
             # Load font (default system font if Arial is unavailable)
             try:
-                font = ImageFont.truetype("arial.ttf", 21)  # Arial (Windows/Mac)
+                font = ImageFont.truetype("arial.ttf", WARNING_TEXT_HEIGHT)  # Arial (Windows/Mac)
             except IOError:
                 font = ImageFont.load_default()  # Default font if Arial not found
 
@@ -80,11 +83,11 @@ def capture_stitched_together_full_page(page):
             text_height = text_bbox[3] - text_bbox[1]  # Calculate text height
 
             # Define background rectangle (top of the image)
-            draw.rectangle([(0, 0), (viewport["width"], WARNING_HEIGHT)], fill="white")
+            draw.rectangle([(0, 0), (viewport["width"], WARNING_TEXT_HEIGHT)], fill="white")
 
             # Center text horizontally within the warning area
             text_x = (viewport["width"] - text_width) // 2
-            text_y = (WARNING_HEIGHT - text_height) // 2
+            text_y = (WARNING_TEXT_HEIGHT - text_height) // 2
 
             # Draw the warning text in red
             draw.text((text_x, text_y), warning_text, fill="red", font=font)