From f682a80c43a375f688b875b712ba6c7103326c3a Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Thu, 27 Feb 2025 15:34:02 +0100 Subject: [PATCH 1/7] BrowserSteps - Speed up scraping, refactor screenshot handling for very long pages --- .../blueprint/browser_steps/__init__.py | 29 ++++++------- .../blueprint/browser_steps/browser_steps.py | 43 ++++++++----------- .../content_fetchers/playwright.py | 14 ++++-- 3 files changed, 42 insertions(+), 44 deletions(-) diff --git a/changedetectionio/blueprint/browser_steps/__init__.py b/changedetectionio/blueprint/browser_steps/__init__.py index a472ba4b055..3e5c50c51ba 100644 --- a/changedetectionio/blueprint/browser_steps/__init__.py +++ b/changedetectionio/blueprint/browser_steps/__init__.py @@ -160,14 +160,13 @@ def browsersteps_ui_update(): if not browsersteps_sessions.get(browsersteps_session_id): return make_response('No session exists under that ID', 500) - + is_last_step = False # Actions - step/apply/etc, do the thing and return state if request.method == 'POST': # @todo - should always be an existing session step_operation = request.form.get('operation') step_selector = request.form.get('selector') step_optional_value = request.form.get('optional_value') - step_n = int(request.form.get('step_n')) is_last_step = strtobool(request.form.get('is_last_step')) # @todo try.. accept.. nice errors not popups.. @@ -182,16 +181,6 @@ def browsersteps_ui_update(): # Try to find something of value to give back to the user return make_response(str(e).splitlines()[0], 401) - # Get visual selector ready/update its data (also use the current filter info from the page?) - # When the last 'apply' button was pressed - # @todo this adds overhead because the xpath selection is happening twice - u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url - if is_last_step and u: - (screenshot, xpath_data) = browsersteps_sessions[browsersteps_session_id]['browserstepper'].request_visualselector_data() - watch = datastore.data['watching'].get(uuid) - if watch: - watch.save_screenshot(screenshot=screenshot) - watch.save_xpath_data(data=xpath_data) # if not this_session.page: # cleanup_playwright_session() @@ -199,10 +188,20 @@ def browsersteps_ui_update(): # Screenshots and other info only needed on requesting a step (POST) try: - state = browsersteps_sessions[browsersteps_session_id]['browserstepper'].get_current_state() + (screenshot, xpath_data) = browsersteps_sessions[browsersteps_session_id]['browserstepper'].get_current_state() + if is_last_step: + watch = datastore.data['watching'].get(uuid) + u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url + if watch and u: + watch.save_screenshot(screenshot=screenshot) + watch.save_xpath_data(data=xpath_data) + except playwright._impl._api_types.Error as e: return make_response("Browser session ran out of time :( Please reload this page."+str(e), 401) + except Exception as e: + return make_response("Error fetching screenshot and element data - " + str(e), 401) + # SEND THIS BACK TO THE BROWSER # Use send_file() which is way faster than read/write loop on bytes import json from tempfile import mkstemp @@ -210,8 +209,8 @@ def browsersteps_ui_update(): tmp_fd, tmp_file = mkstemp(text=True, suffix=".json", prefix="changedetectionio-") output = json.dumps({'screenshot': "data:image/jpeg;base64,{}".format( - base64.b64encode(state[0]).decode('ascii')), - 'xpath_data': state[1], + base64.b64encode(screenshot).decode('ascii')), + 'xpath_data': xpath_data, 'session_age_start': browsersteps_sessions[browsersteps_session_id]['browserstepper'].age_start, 'browser_time_remaining': round(remaining) }) diff --git a/changedetectionio/blueprint/browser_steps/browser_steps.py b/changedetectionio/blueprint/browser_steps/browser_steps.py index f533edf0b82..00a30a36c68 100644 --- a/changedetectionio/blueprint/browser_steps/browser_steps.py +++ b/changedetectionio/blueprint/browser_steps/browser_steps.py @@ -1,14 +1,15 @@ -#!/usr/bin/env python3 - import os import time import re from random import randint from loguru import logger +from changedetectionio.content_fetchers.helpers import capture_stitched_together_full_page, SCREENSHOT_SIZE_STITCH_THRESHOLD from changedetectionio.content_fetchers.base import manage_user_agent from changedetectionio.safe_jinja import render as jinja_render + + # Two flags, tell the JS which of the "Selector" or "Value" field should be enabled in the front end # 0- off, 1- on browser_step_ui_config = {'Choose one': '0 0', @@ -279,6 +280,7 @@ def connect(self, proxy=None): logger.debug(f"Time to browser setup {time.time()-now:.2f}s") self.page.wait_for_timeout(1 * 1000) + def mark_as_closed(self): logger.debug("Page closed, cleaning up..") @@ -296,39 +298,30 @@ def get_current_state(self): now = time.time() self.page.wait_for_timeout(1 * 1000) - # The actual screenshot - screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=40) + full_height = self.page.evaluate("document.documentElement.scrollHeight") + + if full_height >= SCREENSHOT_SIZE_STITCH_THRESHOLD: + logger.warning(f"Page full Height: {full_height}px longer than {SCREENSHOT_SIZE_STITCH_THRESHOLD}px, using 'stitched screenshot method'.") + screenshot = capture_stitched_together_full_page(self.page) + else: + screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=40) + + logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s") + + now = time.time() self.page.evaluate("var include_filters=''") # Go find the interactive elements # @todo in the future, something smarter that can scan for elements with .click/focus etc event handlers? elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span' xpath_element_js = xpath_element_js.replace('%ELEMENTS%', elements) + xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}") # So the JS will find the smallest one first xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True) - logger.debug(f"Time to complete get_current_state of browser {time.time()-now:.2f}s") - # except + logger.debug(f"Time to scrape xpath element data in browser {time.time()-now:.2f}s") + # playwright._impl._api_types.Error: Browser closed. # @todo show some countdown timer? return (screenshot, xpath_data) - def request_visualselector_data(self): - """ - Does the same that the playwright operation in content_fetcher does - This is used to just bump the VisualSelector data so it' ready to go if they click on the tab - @todo refactor and remove duplicate code, add include_filters - :param xpath_data: - :param screenshot: - :param current_include_filters: - :return: - """ - import importlib.resources - self.page.evaluate("var include_filters=''") - xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text() - from changedetectionio.content_fetchers import visualselector_xpath_selectors - xpath_element_js = xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) - xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}") - screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("SCREENSHOT_QUALITY", 72))) - - return (screenshot, xpath_data) diff --git a/changedetectionio/content_fetchers/playwright.py b/changedetectionio/content_fetchers/playwright.py index 53be33f1d04..70a3c6972b8 100644 --- a/changedetectionio/content_fetchers/playwright.py +++ b/changedetectionio/content_fetchers/playwright.py @@ -4,6 +4,7 @@ from loguru import logger +from changedetectionio.content_fetchers.helpers import capture_stitched_together_full_page, SCREENSHOT_SIZE_STITCH_THRESHOLD from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable @@ -199,10 +200,15 @@ def run(self, # acceptable screenshot quality here try: # The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage - self.screenshot = self.page.screenshot(type='jpeg', - full_page=True, - quality=int(os.getenv("SCREENSHOT_QUALITY", 72)), - ) + full_height = self.page.evaluate("document.documentElement.scrollHeight") + + if full_height >= SCREENSHOT_SIZE_STITCH_THRESHOLD: + logger.warning( + f"Page full Height: {full_height}px longer than {SCREENSHOT_SIZE_STITCH_THRESHOLD}px, using 'stitched screenshot method'.") + self.screenshot = capture_stitched_together_full_page(self.page) + else: + self.screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("SCREENSHOT_QUALITY", 30))) + except Exception as e: # It's likely the screenshot was too long/big and something crashed raise ScreenshotUnavailable(url=url, status_code=self.status_code) From 7ea66929e1a1e5a5f6e3a0c5b5ba1a457e99103d Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Thu, 27 Feb 2025 15:39:43 +0100 Subject: [PATCH 2/7] No need to read the temp response from disk --- .../blueprint/browser_steps/__init__.py | 41 +++++++++---------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/changedetectionio/blueprint/browser_steps/__init__.py b/changedetectionio/blueprint/browser_steps/__init__.py index 3e5c50c51ba..dc51c8da4b7 100644 --- a/changedetectionio/blueprint/browser_steps/__init__.py +++ b/changedetectionio/blueprint/browser_steps/__init__.py @@ -22,7 +22,10 @@ browsersteps_sessions = {} io_interface_context = None - +import json +import base64 +import hashlib +from flask import Response def construct_blueprint(datastore: ChangeDetectionStore): browser_steps_blueprint = Blueprint('browser_steps', __name__, template_folder="templates") @@ -202,27 +205,21 @@ def browsersteps_ui_update(): return make_response("Error fetching screenshot and element data - " + str(e), 401) # SEND THIS BACK TO THE BROWSER - # Use send_file() which is way faster than read/write loop on bytes - import json - from tempfile import mkstemp - from flask import send_file - tmp_fd, tmp_file = mkstemp(text=True, suffix=".json", prefix="changedetectionio-") - - output = json.dumps({'screenshot': "data:image/jpeg;base64,{}".format( - base64.b64encode(screenshot).decode('ascii')), - 'xpath_data': xpath_data, - 'session_age_start': browsersteps_sessions[browsersteps_session_id]['browserstepper'].age_start, - 'browser_time_remaining': round(remaining) - }) - - with os.fdopen(tmp_fd, 'w') as f: - f.write(output) - - response = make_response(send_file(path_or_file=tmp_file, - mimetype='application/json; charset=UTF-8', - etag=True)) - # No longer needed - os.unlink(tmp_file) + + output = { + "screenshot": f"data:image/jpeg;base64,{base64.b64encode(screenshot).decode('ascii')}", + "xpath_data": xpath_data, + "session_age_start": browsersteps_sessions[browsersteps_session_id]['browserstepper'].age_start, + "browser_time_remaining": round(remaining) + } + json_data = json.dumps(output) + + # Generate an ETag (hash of the response body) + etag_hash = hashlib.md5(json_data.encode('utf-8')).hexdigest() + + # Create the response with ETag + response = Response(json_data, mimetype="application/json; charset=UTF-8") + response.set_etag(etag_hash) return response From fa549b6e3903b4cfe866f4fd27a0887f0686eb0b Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Thu, 27 Feb 2025 15:44:28 +0100 Subject: [PATCH 3/7] Woops --- changedetectionio/content_fetchers/helpers.py | 101 ++++++++++++++++++ .../res/xpath_element_scraper.js | 2 +- 2 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 changedetectionio/content_fetchers/helpers.py diff --git a/changedetectionio/content_fetchers/helpers.py b/changedetectionio/content_fetchers/helpers.py new file mode 100644 index 00000000000..25b47234ea1 --- /dev/null +++ b/changedetectionio/content_fetchers/helpers.py @@ -0,0 +1,101 @@ + +# Pages with a vertical height longer than this will use the 'stitch together' method. + +# - Many GPUs have a max texture size of 16384x16384px (or lower on older devices). +# - If a page is taller than ~8000–10000px, it risks exceeding GPU memory limits. +# - This is especially important on headless Chromium, where Playwright may fail to allocate a massive full-page buffer. + +SCREENSHOT_SIZE_STITCH_THRESHOLD=16000 +from loguru import logger + +def capture_stitched_together_full_page(page): + import io + import os + import time + from PIL import Image, ImageDraw, ImageFont + + MAX_TOTAL_HEIGHT = 12000 # Maximum total height for the final image + MAX_CHUNK_HEIGHT = 5000 # Height per screenshot chunk + WARNING_HEIGHT = 20 # Height of the warning text overlay + + # Save the original viewport size + original_viewport = page.viewport_size + now = time.time() + + try: + viewport = page.viewport_size + page_height = page.evaluate("document.documentElement.scrollHeight") + + # Limit the total capture height + capture_height = min(page_height, MAX_TOTAL_HEIGHT) + + images = [] + total_captured_height = 0 + + for offset in range(0, capture_height, MAX_CHUNK_HEIGHT): + # Ensure we do not exceed the total height limit + chunk_height = min(MAX_CHUNK_HEIGHT, MAX_TOTAL_HEIGHT - total_captured_height) + + # Adjust viewport size for this chunk + page.set_viewport_size({"width": viewport["width"], "height": chunk_height}) + + # Scroll to the correct position + page.evaluate(f"window.scrollTo(0, {offset})") + + # Capture screenshot chunk + screenshot_bytes = page.screenshot(type='jpeg', quality=int(os.getenv("SCREENSHOT_QUALITY", 30))) + images.append(Image.open(io.BytesIO(screenshot_bytes))) + + total_captured_height += chunk_height + + # Stop if we reached the maximum total height + if total_captured_height >= MAX_TOTAL_HEIGHT: + break + + # Create the final stitched image + stitched_image = Image.new('RGB', (viewport["width"], total_captured_height)) + y_offset = 0 + + # Stitch the screenshot chunks together + for img in images: + stitched_image.paste(img, (0, y_offset)) + y_offset += img.height + + logger.debug(f"Screenshot stitched together in {time.time()-now:.2f}s") + + # Overlay warning text if the screenshot was trimmed + if page_height > MAX_TOTAL_HEIGHT: + draw = ImageDraw.Draw(stitched_image) + warning_text = f"WARNING: Screenshot was {page_height}px but trimmed to {MAX_TOTAL_HEIGHT}px because it was too long" + + # Load font (default system font if Arial is unavailable) + try: + font = ImageFont.truetype("arial.ttf", 21) # Arial (Windows/Mac) + except IOError: + font = ImageFont.load_default() # Default font if Arial not found + + # Get text bounding box (correct method for newer Pillow versions) + text_bbox = draw.textbbox((0, 0), warning_text, font=font) + text_width = text_bbox[2] - text_bbox[0] # Calculate text width + text_height = text_bbox[3] - text_bbox[1] # Calculate text height + + # Define background rectangle (top of the image) + draw.rectangle([(0, 0), (viewport["width"], WARNING_HEIGHT)], fill="white") + + # Center text horizontally within the warning area + text_x = (viewport["width"] - text_width) // 2 + text_y = (WARNING_HEIGHT - text_height) // 2 + + # Draw the warning text in red + draw.text((text_x, text_y), warning_text, fill="red", font=font) + + # Save or return the final image + output = io.BytesIO() + stitched_image.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", 30))) + screenshot = output.getvalue() + + finally: + # Restore the original viewport size + page.set_viewport_size(original_viewport) + + return screenshot diff --git a/changedetectionio/content_fetchers/res/xpath_element_scraper.js b/changedetectionio/content_fetchers/res/xpath_element_scraper.js index ccd89436973..d514a2064d9 100644 --- a/changedetectionio/content_fetchers/res/xpath_element_scraper.js +++ b/changedetectionio/content_fetchers/res/xpath_element_scraper.js @@ -41,7 +41,7 @@ const findUpTag = (el) => { // Strategy 1: If it's an input, with name, and there's only one, prefer that if (el.name !== undefined && el.name.length) { - var proposed = el.tagName + "[name=" + el.name + "]"; + var proposed = el.tagName + "[name=\"" + CSS.escape(el.name) + "\"]"; var proposed_element = window.document.querySelectorAll(proposed); if (proposed_element.length) { if (proposed_element.length === 1) { From 1f7a855529a38de3c1a74eead5773ce382a34215 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Thu, 27 Feb 2025 16:01:41 +0100 Subject: [PATCH 4/7] Small CPU improvements --- changedetectionio/content_fetchers/playwright.py | 4 ++++ .../content_fetchers/res/xpath_element_scraper.js | 15 +++++++++------ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/changedetectionio/content_fetchers/playwright.py b/changedetectionio/content_fetchers/playwright.py index 70a3c6972b8..861cea60ec7 100644 --- a/changedetectionio/content_fetchers/playwright.py +++ b/changedetectionio/content_fetchers/playwright.py @@ -90,6 +90,7 @@ def run(self, from playwright.sync_api import sync_playwright import playwright._impl._errors from changedetectionio.content_fetchers import visualselector_xpath_selectors + import time self.delete_browser_steps_screenshots() response = None @@ -180,6 +181,7 @@ def run(self, self.page.wait_for_timeout(extra_wait * 1000) + now = time.time() # So we can find an element on the page where its selector was entered manually (maybe not xPath etc) if current_include_filters is not None: self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters))) @@ -191,6 +193,8 @@ def run(self, self.instock_data = self.page.evaluate("async () => {" + self.instock_data_js + "}") self.content = self.page.content() + logger.debug(f"Time to scrape xpath element data in browser {time.time() - now:.2f}s") + # Bug 3 in Playwright screenshot handling # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it # JPEG is better here because the screenshots can be very very large diff --git a/changedetectionio/content_fetchers/res/xpath_element_scraper.js b/changedetectionio/content_fetchers/res/xpath_element_scraper.js index d514a2064d9..182a9b1df92 100644 --- a/changedetectionio/content_fetchers/res/xpath_element_scraper.js +++ b/changedetectionio/content_fetchers/res/xpath_element_scraper.js @@ -102,13 +102,15 @@ function collectVisibleElements(parent, visibleElements) { const children = parent.children; for (let i = 0; i < children.length; i++) { const child = children[i]; + const computedStyle = window.getComputedStyle(child); + if ( child.nodeType === Node.ELEMENT_NODE && - window.getComputedStyle(child).display !== 'none' && - window.getComputedStyle(child).visibility !== 'hidden' && + computedStyle.display !== 'none' && + computedStyle.visibility !== 'hidden' && child.offsetWidth >= 0 && child.offsetHeight >= 0 && - window.getComputedStyle(child).contentVisibility !== 'hidden' + computedStyle.contentVisibility !== 'hidden' ) { // If the child is an element and is visible, recursively collect visible elements collectVisibleElements(child, visibleElements); @@ -173,6 +175,7 @@ visibleElementsArray.forEach(function (element) { // Try to identify any possible currency amounts "Sale: 4000" or "Sale now 3000 Kc", can help with the training. const hasDigitCurrency = (/\d/.test(text.slice(0, 6)) || /\d/.test(text.slice(-6)) ) && /([€£$¥₩₹]|USD|AUD|EUR|Kč|kr|SEK|,–)/.test(text) ; + const computedStyle = window.getComputedStyle(element); size_pos.push({ xpath: xpath_result, @@ -184,10 +187,10 @@ visibleElementsArray.forEach(function (element) { tagName: (element.tagName) ? element.tagName.toLowerCase() : '', // tagtype used by Browser Steps tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '', - isClickable: window.getComputedStyle(element).cursor === "pointer", + isClickable: computedStyle.cursor === "pointer", // Used by the keras trainer - fontSize: window.getComputedStyle(element).getPropertyValue('font-size'), - fontWeight: window.getComputedStyle(element).getPropertyValue('font-weight'), + fontSize: computedStyle.getPropertyValue('font-size'), + fontWeight: computedStyle.getPropertyValue('font-weight'), hasDigitCurrency: hasDigitCurrency, label: label, }); From 3e7f2f2badded23640bfc0426d35bc6e1eeb40ac Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Thu, 27 Feb 2025 16:16:11 +0100 Subject: [PATCH 5/7] Add build debug --- .github/workflows/pypi-release.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml index 3c27c2dabf8..45ab84a245e 100644 --- a/.github/workflows/pypi-release.yml +++ b/.github/workflows/pypi-release.yml @@ -45,6 +45,10 @@ jobs: - name: Test that the basic pip built package runs without error run: | set -ex + ls -alR + find . + ls -al dist + pip3 install dist/changedetection.io*.whl changedetection.io -d /tmp -p 10000 & sleep 3 From acb642a9376bbf47067128d0d37886089884e883 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Thu, 27 Feb 2025 16:20:36 +0100 Subject: [PATCH 6/7] Improved test install --- .github/workflows/pypi-release.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml index 45ab84a245e..4210c9b5756 100644 --- a/.github/workflows/pypi-release.yml +++ b/.github/workflows/pypi-release.yml @@ -46,11 +46,11 @@ jobs: run: | set -ex ls -alR - find . - ls -al dist - pip3 install dist/changedetection.io*.whl + # Find and install the first .whl file + find dist -type f -name "*.whl" -exec pip3 install {} \; -quit changedetection.io -d /tmp -p 10000 & + sleep 3 curl --retry-connrefused --retry 6 http://127.0.0.1:10000/static/styles/pure-min.css >/dev/null curl --retry-connrefused --retry 6 http://127.0.0.1:10000/ >/dev/null From e06275a4ad8c84672460ce4652bd4a4decd0009e Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Thu, 27 Feb 2025 16:39:30 +0100 Subject: [PATCH 7/7] Tweak vars --- changedetectionio/content_fetchers/helpers.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/changedetectionio/content_fetchers/helpers.py b/changedetectionio/content_fetchers/helpers.py index 25b47234ea1..79826dccf79 100644 --- a/changedetectionio/content_fetchers/helpers.py +++ b/changedetectionio/content_fetchers/helpers.py @@ -5,7 +5,10 @@ # - If a page is taller than ~8000–10000px, it risks exceeding GPU memory limits. # - This is especially important on headless Chromium, where Playwright may fail to allocate a massive full-page buffer. -SCREENSHOT_SIZE_STITCH_THRESHOLD=16000 + +# The size at which we will switch to stitching method +SCREENSHOT_SIZE_STITCH_THRESHOLD=8000 + from loguru import logger def capture_stitched_together_full_page(page): @@ -14,9 +17,9 @@ def capture_stitched_together_full_page(page): import time from PIL import Image, ImageDraw, ImageFont - MAX_TOTAL_HEIGHT = 12000 # Maximum total height for the final image - MAX_CHUNK_HEIGHT = 5000 # Height per screenshot chunk - WARNING_HEIGHT = 20 # Height of the warning text overlay + MAX_TOTAL_HEIGHT = SCREENSHOT_SIZE_STITCH_THRESHOLD*4 # Maximum total height for the final image (When in stitch mode) + MAX_CHUNK_HEIGHT = 4000 # Height per screenshot chunk + WARNING_TEXT_HEIGHT = 20 # Height of the warning text overlay # Save the original viewport size original_viewport = page.viewport_size @@ -70,7 +73,7 @@ def capture_stitched_together_full_page(page): # Load font (default system font if Arial is unavailable) try: - font = ImageFont.truetype("arial.ttf", 21) # Arial (Windows/Mac) + font = ImageFont.truetype("arial.ttf", WARNING_TEXT_HEIGHT) # Arial (Windows/Mac) except IOError: font = ImageFont.load_default() # Default font if Arial not found @@ -80,11 +83,11 @@ def capture_stitched_together_full_page(page): text_height = text_bbox[3] - text_bbox[1] # Calculate text height # Define background rectangle (top of the image) - draw.rectangle([(0, 0), (viewport["width"], WARNING_HEIGHT)], fill="white") + draw.rectangle([(0, 0), (viewport["width"], WARNING_TEXT_HEIGHT)], fill="white") # Center text horizontally within the warning area text_x = (viewport["width"] - text_width) // 2 - text_y = (WARNING_HEIGHT - text_height) // 2 + text_y = (WARNING_TEXT_HEIGHT - text_height) // 2 # Draw the warning text in red draw.text((text_x, text_y), warning_text, fill="red", font=font)