diff --git a/changedetectionio/blueprint/browser_steps/browser_steps.py b/changedetectionio/blueprint/browser_steps/browser_steps.py
index 353f98e46a3..21abd2827ba 100644
--- a/changedetectionio/blueprint/browser_steps/browser_steps.py
+++ b/changedetectionio/blueprint/browser_steps/browser_steps.py
@@ -4,7 +4,7 @@
from random import randint
from loguru import logger
-from changedetectionio.content_fetchers.helpers import capture_full_page
+from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT
from changedetectionio.content_fetchers.base import manage_user_agent
from changedetectionio.safe_jinja import render as jinja_render
@@ -293,12 +293,16 @@ def has_expired(self):
def get_current_state(self):
"""Return the screenshot and interactive elements mapping, generally always called after action_()"""
import importlib.resources
+ import json
+ # because we for now only run browser steps in playwright mode (not puppeteer mode)
+ from changedetectionio.content_fetchers.playwright import capture_full_page
+
xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
now = time.time()
self.page.wait_for_timeout(1 * 1000)
- screenshot = capture_full_page(self.page)
+ screenshot = capture_full_page(page=self.page)
logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s")
@@ -306,13 +310,21 @@ def get_current_state(self):
self.page.evaluate("var include_filters=''")
# Go find the interactive elements
# @todo in the future, something smarter that can scan for elements with .click/focus etc event handlers?
- elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span'
- xpath_element_js = xpath_element_js.replace('%ELEMENTS%', elements)
- xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")
+ self.page.request_gc()
+
+ scan_elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span'
+
+ MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
+ xpath_data = json.loads(self.page.evaluate(xpath_element_js, {
+ "visualselector_xpath_selectors": scan_elements,
+ "max_height": MAX_TOTAL_HEIGHT
+ }))
+ self.page.request_gc()
+
# So the JS will find the smallest one first
xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True)
- logger.debug(f"Time to scrape xpath element data in browser {time.time()-now:.2f}s")
+ logger.debug(f"Time to scrape xPath element data in browser {time.time()-now:.2f}s")
# playwright._impl._api_types.Error: Browser closed.
# @todo show some countdown timer?
diff --git a/changedetectionio/content_fetchers/__init__.py b/changedetectionio/content_fetchers/__init__.py
index 94489f6d5ba..4d9145fae07 100644
--- a/changedetectionio/content_fetchers/__init__.py
+++ b/changedetectionio/content_fetchers/__init__.py
@@ -7,11 +7,29 @@
# Visual Selector scraper - 'Button' is there because some sites have .
visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4,header,footer,section,article,aside,details,main,nav,section,summary,button'
+SCREENSHOT_MAX_HEIGHT_DEFAULT = 16000
+SCREENSHOT_DEFAULT_QUALITY = 40
+
+# Maximum total height for the final image (When in stitch mode).
+# We limit this to 16000px due to the huge amount of RAM that was being used
+# Example: 16000 × 1400 × 3 = 67,200,000 bytes ≈ 64.1 MB (not including buffers in PIL etc)
+MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
+
+# The size at which we will switch to stitching method, when below this (and
+# MAX_TOTAL_HEIGHT which can be set by a user) we will use the default
+# screenshot method.
+SCREENSHOT_SIZE_STITCH_THRESHOLD = 8000
# available_fetchers() will scan this implementation looking for anything starting with html_
# this information is used in the form selections
from changedetectionio.content_fetchers.requests import fetcher as html_requests
+
+import importlib.resources
+XPATH_ELEMENT_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8')
+INSTOCK_DATA_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8')
+
+
def available_fetchers():
# See the if statement at the bottom of this file for how we switch between playwright and webdriver
import inspect
diff --git a/changedetectionio/content_fetchers/base.py b/changedetectionio/content_fetchers/base.py
index a482fbdc92c..bfa7e83cdfb 100644
--- a/changedetectionio/content_fetchers/base.py
+++ b/changedetectionio/content_fetchers/base.py
@@ -63,11 +63,6 @@ class Fetcher():
# Time ONTOP of the system defined env minimum time
render_extract_delay = 0
- def __init__(self):
- import importlib.resources
- self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8')
- self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8')
-
@abstractmethod
def get_error(self):
return self.error
@@ -143,6 +138,7 @@ def iterate_browser_steps(self, start_url=None):
logger.debug(f">> Iterating check - browser Step n {step_n} - {step['operation']}...")
self.screenshot_step("before-" + str(step_n))
self.save_step_html("before-" + str(step_n))
+
try:
optional_value = step['optional_value']
selector = step['selector']
diff --git a/changedetectionio/content_fetchers/helpers.py b/changedetectionio/content_fetchers/helpers.py
deleted file mode 100644
index def26ca3b59..00000000000
--- a/changedetectionio/content_fetchers/helpers.py
+++ /dev/null
@@ -1,138 +0,0 @@
-# Pages with a vertical height longer than this will use the 'stitch together' method.
-
-# - Many GPUs have a max texture size of 16384x16384px (or lower on older devices).
-# - If a page is taller than ~8000–10000px, it risks exceeding GPU memory limits.
-# - This is especially important on headless Chromium, where Playwright may fail to allocate a massive full-page buffer.
-
-from loguru import logger
-
-def capture_full_page(page):
- import io
- import os
- import time
- from PIL import Image, ImageDraw, ImageFont
-
- # Maximum total height for the final image (When in stitch mode).
- # We limit this to 16000px due to the huge amount of RAM that was being used
- # Example: 16000 × 1400 × 3 = 67,200,000 bytes ≈ 64.1 MB (not including buffers in PIL etc)
- MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", 16000))
-
- # The size at which we will switch to stitching method, when below this (and
- # MAX_TOTAL_HEIGHT which can be set by a user) we will use the default
- # screenshot method.
- SCREENSHOT_SIZE_STITCH_THRESHOLD = 8000
-
- WARNING_TEXT_HEIGHT = 20 # Height of the warning text overlay
-
- # Save the original viewport size
- original_viewport = page.viewport_size
- start = time.time()
-
- stitched_image = None
-
- try:
- viewport_width = original_viewport["width"]
- viewport_height = original_viewport["height"]
-
- page_height = page.evaluate("document.documentElement.scrollHeight")
-
- # Optimization to avoid unnecessary stitching if we can avoid it
- # Use the default screenshot method for smaller pages to take advantage
- # of GPU and native playwright screenshot optimizations
- if (
- page_height < SCREENSHOT_SIZE_STITCH_THRESHOLD
- and page_height < MAX_TOTAL_HEIGHT
- ):
- logger.debug("Using default screenshot method")
- screenshot = page.screenshot(
- type="jpeg",
- quality=int(os.getenv("SCREENSHOT_QUALITY", 30)),
- full_page=True,
- )
- logger.debug(f"Screenshot captured in {time.time() - start:.2f}s")
- return screenshot
-
- logger.debug(
- "Using stitching method for large screenshot because page height exceeds threshold"
- )
-
- # Limit the total capture height
- capture_height = min(page_height, MAX_TOTAL_HEIGHT)
-
- # Calculate number of chunks needed using ORIGINAL viewport height
- num_chunks = (capture_height + viewport_height - 1) // viewport_height
-
- # Create the final image upfront to avoid holding all chunks in memory
- stitched_image = Image.new("RGB", (viewport_width, capture_height))
-
- # Track cumulative paste position
- y_offset = 0
-
- for _ in range(num_chunks):
- # Scroll to position (no viewport resizing)
- page.evaluate(f"window.scrollTo(0, {y_offset})")
-
- # Capture only the visible area using clip
- with io.BytesIO(
- page.screenshot(
- type="jpeg",
- clip={
- "x": 0,
- "y": 0,
- "width": viewport_width,
- "height": min(viewport_height, capture_height - y_offset),
- },
- quality=int(os.getenv("SCREENSHOT_QUALITY", 30)),
- )
- ) as buf:
- with Image.open(buf) as img:
- img.load()
- stitched_image.paste(img, (0, y_offset))
- y_offset += img.height
-
- logger.debug(f"Screenshot stitched together in {time.time() - start:.2f}s")
-
- # Overlay warning text if the screenshot was trimmed
- if capture_height < page_height:
- draw = ImageDraw.Draw(stitched_image)
- warning_text = f"WARNING: Screenshot was {page_height}px but trimmed to {MAX_TOTAL_HEIGHT}px because it was too long"
-
- # Load font (default system font if Arial is unavailable)
- try:
- font = ImageFont.truetype(
- "arial.ttf", WARNING_TEXT_HEIGHT
- ) # Arial (Windows/Mac)
- except IOError:
- font = ImageFont.load_default() # Default font if Arial not found
-
- # Get text bounding box (correct method for newer Pillow versions)
- text_bbox = draw.textbbox((0, 0), warning_text, font=font)
- text_width = text_bbox[2] - text_bbox[0] # Calculate text width
- text_height = text_bbox[3] - text_bbox[1] # Calculate text height
-
- # Define background rectangle (top of the image)
- draw.rectangle(
- [(0, 0), (viewport_width, WARNING_TEXT_HEIGHT)], fill="white"
- )
-
- # Center text horizontally within the warning area
- text_x = (viewport_width - text_width) // 2
- text_y = (WARNING_TEXT_HEIGHT - text_height) // 2
-
- # Draw the warning text in red
- draw.text((text_x, text_y), warning_text, fill="red", font=font)
-
- # Save final image
- with io.BytesIO() as output:
- stitched_image.save(
- output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", 30))
- )
- screenshot = output.getvalue()
-
- finally:
- # Restore the original viewport size
- page.set_viewport_size(original_viewport)
- if stitched_image is not None:
- stitched_image.close()
-
- return screenshot
diff --git a/changedetectionio/content_fetchers/playwright.py b/changedetectionio/content_fetchers/playwright.py
index 207b8bb8dfb..411f8c9592b 100644
--- a/changedetectionio/content_fetchers/playwright.py
+++ b/changedetectionio/content_fetchers/playwright.py
@@ -4,10 +4,102 @@
from loguru import logger
-from changedetectionio.content_fetchers.helpers import capture_full_page
+from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, visualselector_xpath_selectors, \
+ SCREENSHOT_SIZE_STITCH_THRESHOLD, MAX_TOTAL_HEIGHT, SCREENSHOT_DEFAULT_QUALITY, XPATH_ELEMENT_JS, INSTOCK_DATA_JS
+from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable
+
+
+def capture_full_page(page):
+ import os
+ import time
+ from multiprocessing import Process, Pipe
+
+ start = time.time()
+
+ page_height = page.evaluate("document.documentElement.scrollHeight")
+
+ logger.debug(f"Playwright viewport size {page.viewport_size}")
+
+ ############################################################
+ #### SCREENSHOT FITS INTO ONE SNAPSHOT (SMALLER PAGES) #####
+ ############################################################
+
+ # Optimization to avoid unnecessary stitching if we can avoid it
+ # Use the default screenshot method for smaller pages to take advantage
+ # of GPU and native playwright screenshot optimizations
+ # - No PIL needed here, no danger of memory leaks, no sub process required
+ if (page_height < SCREENSHOT_SIZE_STITCH_THRESHOLD and page_height < MAX_TOTAL_HEIGHT ):
+ logger.debug("Using default screenshot method")
+ page.request_gc()
+ screenshot = page.screenshot(
+ type="jpeg",
+ quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)),
+ full_page=True,
+ )
+ page.request_gc()
+ logger.debug(f"Screenshot captured in {time.time() - start:.2f}s")
+ return screenshot
+
+
+
+ ###################################################################################
+ #### CASE FOR LARGE SCREENSHOTS THAT NEED TO BE TRIMMED DUE TO MEMORY ISSUES #####
+ ###################################################################################
+ # - PIL can easily allocate memory and not release it cleanly
+ # - Fetching screenshot from playwright seems OK
+ # Image.new is leaky even with .close()
+ # So lets prepare all the data chunks and farm it out to a subprocess for clean memory handling
+
+ logger.debug(
+ "Using stitching method for large screenshot because page height exceeds threshold"
+ )
+
+ # Limit the total capture height
+ capture_height = min(page_height, MAX_TOTAL_HEIGHT)
+
+ # Calculate number of chunks needed using ORIGINAL viewport height
+ num_chunks = (capture_height + page.viewport_size['height'] - 1) // page.viewport_size['height']
+ screenshot_chunks = []
+
+ # Track cumulative paste position
+ y_offset = 0
+ for _ in range(num_chunks):
+
+ page.request_gc()
+ page.evaluate(f"window.scrollTo(0, {y_offset})")
+ page.request_gc()
+ h = min(page.viewport_size['height'], capture_height - y_offset)
+ screenshot_chunks.append(page.screenshot(
+ type="jpeg",
+ clip={
+ "x": 0,
+ "y": 0,
+ "width": page.viewport_size['width'],
+ "height": h,
+ },
+ quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)),
+ ))
+
+ y_offset += h # maybe better to inspect the image here?
+ page.request_gc()
+
+ # PIL can leak memory in various situations, assign the work to a subprocess for totally clean handling
+
+ parent_conn, child_conn = Pipe()
+ p = Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, capture_height))
+ p.start()
+ result = parent_conn.recv_bytes()
+ p.join()
+
+ screenshot_chunks = None
+ logger.debug(f"Screenshot - Page height: {page_height} Capture height: {capture_height} - Stitched together in {time.time() - start:.2f}s")
+
+ return result
+
+
class fetcher(Fetcher):
fetcher_description = "Playwright {}/Javascript".format(
os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize()
@@ -60,7 +152,8 @@ def __init__(self, proxy_override=None, custom_browser_connection_url=None):
def screenshot_step(self, step_n=''):
super().screenshot_step(step_n=step_n)
- screenshot = capture_full_page(self.page)
+ screenshot = capture_full_page(page=self.page)
+
if self.browser_steps_screenshot_path is not None:
destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.jpeg'.format(step_n))
@@ -89,7 +182,6 @@ def run(self,
from playwright.sync_api import sync_playwright
import playwright._impl._errors
- from changedetectionio.content_fetchers import visualselector_xpath_selectors
import time
self.delete_browser_steps_screenshots()
response = None
@@ -185,13 +277,22 @@ def run(self,
self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
else:
self.page.evaluate("var include_filters=''")
+ self.page.request_gc()
+
+ # request_gc before and after evaluate to free up memory
+ # @todo browsersteps etc
+ MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
+ self.xpath_data = self.page.evaluate(XPATH_ELEMENT_JS, {
+ "visualselector_xpath_selectors": visualselector_xpath_selectors,
+ "max_height": MAX_TOTAL_HEIGHT
+ })
+ self.page.request_gc()
- self.xpath_data = self.page.evaluate(
- "async () => {" + self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) + "}")
- self.instock_data = self.page.evaluate("async () => {" + self.instock_data_js + "}")
+ self.instock_data = self.page.evaluate(INSTOCK_DATA_JS)
+ self.page.request_gc()
self.content = self.page.content()
- logger.debug(f"Time to scrape xpath element data in browser {time.time() - now:.2f}s")
+ logger.debug(f"Scrape xPath element data in browser done in {time.time() - now:.2f}s")
# Bug 3 in Playwright screenshot handling
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
@@ -202,11 +303,18 @@ def run(self,
# acceptable screenshot quality here
try:
# The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
- self.screenshot = capture_full_page(self.page)
+ self.screenshot = capture_full_page(page=self.page)
except Exception as e:
# It's likely the screenshot was too long/big and something crashed
raise ScreenshotUnavailable(url=url, status_code=self.status_code)
finally:
+ # Request garbage collection one more time before closing
+ try:
+ self.page.request_gc()
+ except:
+ pass
+
+ # Clean up resources properly
context.close()
browser.close()
diff --git a/changedetectionio/content_fetchers/puppeteer.py b/changedetectionio/content_fetchers/puppeteer.py
index 9dd06c38e4a..bdb0f4a3df9 100644
--- a/changedetectionio/content_fetchers/puppeteer.py
+++ b/changedetectionio/content_fetchers/puppeteer.py
@@ -6,8 +6,93 @@
from loguru import logger
+from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, visualselector_xpath_selectors, \
+ SCREENSHOT_SIZE_STITCH_THRESHOLD, MAX_TOTAL_HEIGHT, SCREENSHOT_DEFAULT_QUALITY, XPATH_ELEMENT_JS, INSTOCK_DATA_JS
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, BrowserFetchTimedOut, BrowserConnectError
+from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
+
+
+# Bug 3 in Playwright screenshot handling
+# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
+
+# Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
+# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
+# acceptable screenshot quality here
+async def capture_full_page(page):
+ import os
+ import time
+ from multiprocessing import Process, Pipe
+
+ start = time.time()
+
+ page_height = await page.evaluate("document.documentElement.scrollHeight")
+
+ logger.debug(f"Puppeteer viewport size {page.viewport}")
+
+ ############################################################
+ #### SCREENSHOT FITS INTO ONE SNAPSHOT (SMALLER PAGES) #####
+ ############################################################
+
+ # Optimization to avoid unnecessary stitching if we can avoid it
+ # Use the default screenshot method for smaller pages to take advantage
+ # of GPU and native playwright screenshot optimizations
+ # - No PIL needed here, no danger of memory leaks, no sub process required
+ if (page_height < SCREENSHOT_SIZE_STITCH_THRESHOLD and page_height < MAX_TOTAL_HEIGHT ):
+ logger.debug("Using default screenshot method")
+ await page.evaluate(f"window.scrollTo(0, 0)")
+ screenshot = await page.screenshot(
+ type_="jpeg",
+ quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)),
+ fullPage=True,
+ )
+ logger.debug(f"Screenshot captured in {time.time() - start:.2f}s")
+ return screenshot
+
+ ###################################################################################
+ #### CASE FOR LARGE SCREENSHOTS THAT NEED TO BE TRIMMED DUE TO MEMORY ISSUES #####
+ ###################################################################################
+ # - PIL can easily allocate memory and not release it cleanly
+ # - Fetching screenshot from playwright seems OK
+ # Image.new is leaky even with .close()
+ # So lets prepare all the data chunks and farm it out to a subprocess for clean memory handling
+
+ logger.debug(
+ "Using stitching method for large screenshot because page height exceeds threshold"
+ )
+
+ # Limit the total capture height
+ capture_height = min(page_height, MAX_TOTAL_HEIGHT)
+
+ # Calculate number of chunks needed using ORIGINAL viewport height
+ num_chunks = (capture_height + page.viewport['height'] - 1) // page.viewport['height']
+ screenshot_chunks = []
+
+ # Track cumulative paste position
+ y_offset = 0
+ for _ in range(num_chunks):
+ await page.evaluate(f"window.scrollTo(0, {y_offset})")
+ h = min(page.viewport['height'], capture_height - y_offset)
+ screenshot_chunks.append(await page.screenshot(
+ type_="jpeg",
+ quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)),
+ ))
+
+ y_offset += h # maybe better to inspect the image here?
+
+ # PIL can leak memory in various situations, assign the work to a subprocess for totally clean handling
+
+ parent_conn, child_conn = Pipe()
+ p = Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, capture_height))
+ p.start()
+ result = parent_conn.recv_bytes()
+ p.join()
+
+ screenshot_chunks = None
+ logger.debug(f"Screenshot - Page height: {page_height} Capture height: {capture_height} - Stitched together in {time.time() - start:.2f}s")
+
+ return result
+
class fetcher(Fetcher):
fetcher_description = "Puppeteer/direct {}/Javascript".format(
@@ -79,7 +164,6 @@ async def fetch_page(self,
empty_pages_are_a_change
):
- from changedetectionio.content_fetchers import visualselector_xpath_selectors
self.delete_browser_steps_screenshots()
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
@@ -181,11 +265,10 @@ async def fetch_page(self,
raise PageUnloadable(url=url, status_code=None, message=str(e))
if self.status_code != 200 and not ignore_status_codes:
- screenshot = await self.page.screenshot(type_='jpeg',
- fullPage=True,
- quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
+ screenshot = await capture_full_page(page=self.page)
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
+
content = await self.page.content
if not empty_pages_are_a_change and len(content.strip()) == 0:
@@ -203,46 +286,31 @@ async def fetch_page(self,
# So we can find an element on the page where its selector was entered manually (maybe not xPath etc)
# Setup the xPath/VisualSelector scraper
- if current_include_filters is not None:
+ if current_include_filters:
js = json.dumps(current_include_filters)
await self.page.evaluate(f"var include_filters={js}")
else:
await self.page.evaluate(f"var include_filters=''")
- self.xpath_data = await self.page.evaluate(
- "async () => {" + self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) + "}")
- self.instock_data = await self.page.evaluate("async () => {" + self.instock_data_js + "}")
+ MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
+ self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
+ "visualselector_xpath_selectors": visualselector_xpath_selectors,
+ "max_height": MAX_TOTAL_HEIGHT
+ })
+ if not self.xpath_data:
+ raise Exception(f"Content Fetcher > xPath scraper failed. Please report this URL so we can fix it :)")
+
+ self.instock_data = await self.page.evaluate(INSTOCK_DATA_JS)
self.content = await self.page.content
- # Bug 3 in Playwright screenshot handling
- # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
- # JPEG is better here because the screenshots can be very very large
- # Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
- # which will significantly increase the IO size between the server and client, it's recommended to use the lowest
- # acceptable screenshot quality here
- try:
- self.screenshot = await self.page.screenshot(type_='jpeg',
- fullPage=True,
- quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
- except Exception as e:
- logger.error("Error fetching screenshot")
- # // May fail on very large pages with 'WARNING: tile memory limits exceeded, some content may not draw'
- # // @ todo after text extract, we can place some overlay text with red background to say 'croppped'
- logger.error('ERROR: content-fetcher page was maybe too large for a screenshot, reverting to viewport only screenshot')
- try:
- self.screenshot = await self.page.screenshot(type_='jpeg',
- fullPage=False,
- quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
- except Exception as e:
- logger.error('ERROR: Failed to get viewport-only reduced screenshot :(')
- pass
- finally:
- # It's good to log here in the case that the browser crashes on shutting down but we still get the data we need
- logger.success(f"Fetching '{url}' complete, closing page")
- await self.page.close()
- logger.success(f"Fetching '{url}' complete, closing browser")
- await browser.close()
+ self.screenshot = await capture_full_page(page=self.page)
+
+ # It's good to log here in the case that the browser crashes on shutting down but we still get the data we need
+ logger.success(f"Fetching '{url}' complete, closing page")
+ await self.page.close()
+ logger.success(f"Fetching '{url}' complete, closing browser")
+ await browser.close()
logger.success(f"Fetching '{url}' complete, exiting puppeteer fetch.")
async def main(self, **kwargs):
diff --git a/changedetectionio/content_fetchers/res/puppeteer_fetch.js b/changedetectionio/content_fetchers/res/puppeteer_fetch.js
deleted file mode 100644
index 21c5abc87ea..00000000000
--- a/changedetectionio/content_fetchers/res/puppeteer_fetch.js
+++ /dev/null
@@ -1,190 +0,0 @@
-module.exports = async ({page, context}) => {
-
- var {
- url,
- execute_js,
- user_agent,
- extra_wait_ms,
- req_headers,
- include_filters,
- xpath_element_js,
- screenshot_quality,
- proxy_username,
- proxy_password,
- disk_cache_dir,
- no_cache_list,
- block_url_list,
- } = context;
-
- await page.setBypassCSP(true)
- await page.setExtraHTTPHeaders(req_headers);
-
- if (user_agent) {
- await page.setUserAgent(user_agent);
- }
- // https://ourcodeworld.com/articles/read/1106/how-to-solve-puppeteer-timeouterror-navigation-timeout-of-30000-ms-exceeded
-
- await page.setDefaultNavigationTimeout(0);
-
- if (proxy_username) {
- // Setting Proxy-Authentication header is deprecated, and doing so can trigger header change errors from Puppeteer
- // https://github.com/puppeteer/puppeteer/issues/676 ?
- // https://help.brightdata.com/hc/en-us/articles/12632549957649-Proxy-Manager-How-to-Guides#h_01HAKWR4Q0AFS8RZTNYWRDFJC2
- // https://cri.dev/posts/2020-03-30-How-to-solve-Puppeteer-Chrome-Error-ERR_INVALID_ARGUMENT/
- await page.authenticate({
- username: proxy_username,
- password: proxy_password
- });
- }
-
- await page.setViewport({
- width: 1024,
- height: 768,
- deviceScaleFactor: 1,
- });
-
- await page.setRequestInterception(true);
- if (disk_cache_dir) {
- console.log(">>>>>>>>>>>>>>> LOCAL DISK CACHE ENABLED <<<<<<<<<<<<<<<<<<<<<");
- }
- const fs = require('fs');
- const crypto = require('crypto');
-
- function file_is_expired(file_path) {
- if (!fs.existsSync(file_path)) {
- return true;
- }
- var stats = fs.statSync(file_path);
- const now_date = new Date();
- const expire_seconds = 300;
- if ((now_date / 1000) - (stats.mtime.getTime() / 1000) > expire_seconds) {
- console.log("CACHE EXPIRED: " + file_path);
- return true;
- }
- return false;
-
- }
-
- page.on('request', async (request) => {
- // General blocking of requests that waste traffic
- if (block_url_list.some(substring => request.url().toLowerCase().includes(substring))) return request.abort();
-
- if (disk_cache_dir) {
- const url = request.url();
- const key = crypto.createHash('md5').update(url).digest("hex");
- const dir_path = disk_cache_dir + key.slice(0, 1) + '/' + key.slice(1, 2) + '/' + key.slice(2, 3) + '/';
-
- // https://stackoverflow.com/questions/4482686/check-synchronously-if-file-directory-exists-in-node-js
-
- if (fs.existsSync(dir_path + key)) {
- console.log("* CACHE HIT , using - " + dir_path + key + " - " + url);
- const cached_data = fs.readFileSync(dir_path + key);
- // @todo headers can come from dir_path+key+".meta" json file
- request.respond({
- status: 200,
- //contentType: 'text/html', //@todo
- body: cached_data
- });
- return;
- }
- }
- request.continue();
- });
-
-
- if (disk_cache_dir) {
- page.on('response', async (response) => {
- const url = response.url();
- // Basic filtering for sane responses
- if (response.request().method() != 'GET' || response.request().resourceType() == 'xhr' || response.request().resourceType() == 'document' || response.status() != 200) {
- console.log("Skipping (not useful) - Status:" + response.status() + " Method:" + response.request().method() + " ResourceType:" + response.request().resourceType() + " " + url);
- return;
- }
- if (no_cache_list.some(substring => url.toLowerCase().includes(substring))) {
- console.log("Skipping (no_cache_list) - " + url);
- return;
- }
- if (url.toLowerCase().includes('data:')) {
- console.log("Skipping (embedded-data) - " + url);
- return;
- }
- response.buffer().then(buffer => {
- if (buffer.length > 100) {
- console.log("Cache - Saving " + response.request().method() + " - " + url + " - " + response.request().resourceType());
-
- const key = crypto.createHash('md5').update(url).digest("hex");
- const dir_path = disk_cache_dir + key.slice(0, 1) + '/' + key.slice(1, 2) + '/' + key.slice(2, 3) + '/';
-
- if (!fs.existsSync(dir_path)) {
- fs.mkdirSync(dir_path, {recursive: true})
- }
-
- if (fs.existsSync(dir_path + key)) {
- if (file_is_expired(dir_path + key)) {
- fs.writeFileSync(dir_path + key, buffer);
- }
- } else {
- fs.writeFileSync(dir_path + key, buffer);
- }
- }
- });
- });
- }
-
- const r = await page.goto(url, {
- waitUntil: 'load'
- });
-
- await page.waitForTimeout(1000);
- await page.waitForTimeout(extra_wait_ms);
-
- if (execute_js) {
- await page.evaluate(execute_js);
- await page.waitForTimeout(200);
- }
-
- var xpath_data;
- var instock_data;
- try {
- // Not sure the best way here, in the future this should be a new package added to npm then run in evaluatedCode
- // (Once the old playwright is removed)
- xpath_data = await page.evaluate((include_filters) => {%xpath_scrape_code%}, include_filters);
- instock_data = await page.evaluate(() => {%instock_scrape_code%});
- } catch (e) {
- console.log(e);
- }
-
- // Protocol error (Page.captureScreenshot): Cannot take screenshot with 0 width can come from a proxy auth failure
- // Wrap it here (for now)
-
- var b64s = false;
- try {
- b64s = await page.screenshot({encoding: "base64", fullPage: true, quality: screenshot_quality, type: 'jpeg'});
- } catch (e) {
- console.log(e);
- }
-
- // May fail on very large pages with 'WARNING: tile memory limits exceeded, some content may not draw'
- if (!b64s) {
- // @todo after text extract, we can place some overlay text with red background to say 'croppped'
- console.error('ERROR: content-fetcher page was maybe too large for a screenshot, reverting to viewport only screenshot');
- try {
- b64s = await page.screenshot({encoding: "base64", quality: screenshot_quality, type: 'jpeg'});
- } catch (e) {
- console.log(e);
- }
- }
-
- var html = await page.content();
- return {
- data: {
- 'content': html,
- 'headers': r.headers(),
- 'instock_data': instock_data,
- 'screenshot': b64s,
- 'status_code': r.status(),
- 'xpath_data': xpath_data
- },
- type: 'application/json',
- };
-};
\ No newline at end of file
diff --git a/changedetectionio/content_fetchers/res/stock-not-in-stock.js b/changedetectionio/content_fetchers/res/stock-not-in-stock.js
index 1ebf6da6a4d..098b208ac32 100644
--- a/changedetectionio/content_fetchers/res/stock-not-in-stock.js
+++ b/changedetectionio/content_fetchers/res/stock-not-in-stock.js
@@ -1,229 +1,220 @@
-// Restock Detector
-// (c) Leigh Morresi dgtlmoon@gmail.com
-//
-// Assumes the product is in stock to begin with, unless the following appears above the fold ;
-// - outOfStockTexts appears above the fold (out of stock)
-// - negateOutOfStockRegex (really is in stock)
-
-function isItemInStock() {
- // @todo Pass these in so the same list can be used in non-JS fetchers
- const outOfStockTexts = [
- ' أخبرني عندما يتوفر',
- '0 in stock',
- 'actuellement indisponible',
- 'agotado',
- 'article épuisé',
- 'artikel zurzeit vergriffen',
- 'as soon as stock is available',
- 'ausverkauft', // sold out
- 'available for back order',
- 'awaiting stock',
- 'back in stock soon',
- 'back-order or out of stock',
- 'backordered',
- 'benachrichtigt mich', // notify me
- 'brak na stanie',
- 'brak w magazynie',
- 'coming soon',
- 'currently have any tickets for this',
- 'currently unavailable',
- 'dieser artikel ist bald wieder verfügbar',
- 'dostępne wkrótce',
- 'en rupture',
- 'en rupture de stock',
- 'épuisé',
- 'esgotado',
- 'indisponible',
- 'indisponível',
- 'isn\'t in stock right now',
- 'isnt in stock right now',
- 'isn’t in stock right now',
- 'item is no longer available',
- 'let me know when it\'s available',
- 'mail me when available',
- 'message if back in stock',
- 'mevcut değil',
- 'nachricht bei',
- 'nicht auf lager',
- 'nicht lagernd',
- 'nicht lieferbar',
- 'nicht verfügbar',
- 'nicht vorrätig',
- 'nicht zur verfügung',
- 'nie znaleziono produktów',
- 'niet beschikbaar',
- 'niet leverbaar',
- 'niet op voorraad',
- 'no disponible',
- 'non disponibile',
- 'non disponible',
- 'no longer in stock',
- 'no tickets available',
- 'not available',
- 'not currently available',
- 'not in stock',
- 'notify me when available',
- 'notify me',
- 'notify when available',
- 'não disponível',
- 'não estamos a aceitar encomendas',
- 'out of stock',
- 'out-of-stock',
- 'plus disponible',
- 'prodotto esaurito',
- 'produkt niedostępny',
- 'rupture',
- 'sold out',
- 'sold-out',
- 'stok habis',
- 'stok kosong',
- 'stok varian ini habis',
- 'stokta yok',
- 'temporarily out of stock',
- 'temporarily unavailable',
- 'there were no search results for',
- 'this item is currently unavailable',
- 'tickets unavailable',
- 'tidak dijual',
- 'tidak tersedia',
- 'tijdelijk uitverkocht',
- 'tiket tidak tersedia',
- 'tükendi',
- 'unavailable nearby',
- 'unavailable tickets',
- 'vergriffen',
- 'vorbestellen',
- 'vorbestellung ist bald möglich',
- 'we don\'t currently have any',
- 'we couldn\'t find any products that match',
- 'we do not currently have an estimate of when this product will be back in stock.',
- 'we don\'t know when or if this item will be back in stock.',
- 'we were not able to find a match',
- 'when this arrives in stock',
- 'zur zeit nicht an lager',
- '品切れ',
- '已售',
- '已售完',
- '품절'
- ];
-
-
- const vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0);
-
- function getElementBaseText(element) {
- // .textContent can include text from children which may give the wrong results
- // scan only immediate TEXT_NODEs, which will be a child of the element
- var text = "";
- for (var i = 0; i < element.childNodes.length; ++i)
- if (element.childNodes[i].nodeType === Node.TEXT_NODE)
- text += element.childNodes[i].textContent;
- return text.toLowerCase().trim();
- }
+async () => {
+
+ function isItemInStock() {
+ // @todo Pass these in so the same list can be used in non-JS fetchers
+ const outOfStockTexts = [
+ ' أخبرني عندما يتوفر',
+ '0 in stock',
+ 'actuellement indisponible',
+ 'agotado',
+ 'article épuisé',
+ 'artikel zurzeit vergriffen',
+ 'as soon as stock is available',
+ 'ausverkauft', // sold out
+ 'available for back order',
+ 'awaiting stock',
+ 'back in stock soon',
+ 'back-order or out of stock',
+ 'backordered',
+ 'benachrichtigt mich', // notify me
+ 'brak na stanie',
+ 'brak w magazynie',
+ 'coming soon',
+ 'currently have any tickets for this',
+ 'currently unavailable',
+ 'dieser artikel ist bald wieder verfügbar',
+ 'dostępne wkrótce',
+ 'en rupture',
+ 'en rupture de stock',
+ 'épuisé',
+ 'esgotado',
+ 'indisponible',
+ 'indisponível',
+ 'isn\'t in stock right now',
+ 'isnt in stock right now',
+ 'isn’t in stock right now',
+ 'item is no longer available',
+ 'let me know when it\'s available',
+ 'mail me when available',
+ 'message if back in stock',
+ 'mevcut değil',
+ 'nachricht bei',
+ 'nicht auf lager',
+ 'nicht lagernd',
+ 'nicht lieferbar',
+ 'nicht verfügbar',
+ 'nicht vorrätig',
+ 'nicht zur verfügung',
+ 'nie znaleziono produktów',
+ 'niet beschikbaar',
+ 'niet leverbaar',
+ 'niet op voorraad',
+ 'no disponible',
+ 'non disponibile',
+ 'non disponible',
+ 'no longer in stock',
+ 'no tickets available',
+ 'not available',
+ 'not currently available',
+ 'not in stock',
+ 'notify me when available',
+ 'notify me',
+ 'notify when available',
+ 'não disponível',
+ 'não estamos a aceitar encomendas',
+ 'out of stock',
+ 'out-of-stock',
+ 'plus disponible',
+ 'prodotto esaurito',
+ 'produkt niedostępny',
+ 'rupture',
+ 'sold out',
+ 'sold-out',
+ 'stok habis',
+ 'stok kosong',
+ 'stok varian ini habis',
+ 'stokta yok',
+ 'temporarily out of stock',
+ 'temporarily unavailable',
+ 'there were no search results for',
+ 'this item is currently unavailable',
+ 'tickets unavailable',
+ 'tidak dijual',
+ 'tidak tersedia',
+ 'tijdelijk uitverkocht',
+ 'tiket tidak tersedia',
+ 'tükendi',
+ 'unavailable nearby',
+ 'unavailable tickets',
+ 'vergriffen',
+ 'vorbestellen',
+ 'vorbestellung ist bald möglich',
+ 'we don\'t currently have any',
+ 'we couldn\'t find any products that match',
+ 'we do not currently have an estimate of when this product will be back in stock.',
+ 'we don\'t know when or if this item will be back in stock.',
+ 'we were not able to find a match',
+ 'when this arrives in stock',
+ 'zur zeit nicht an lager',
+ '品切れ',
+ '已售',
+ '已售完',
+ '품절'
+ ];
+
+
+ const vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0);
+
+ function getElementBaseText(element) {
+ // .textContent can include text from children which may give the wrong results
+ // scan only immediate TEXT_NODEs, which will be a child of the element
+ var text = "";
+ for (var i = 0; i < element.childNodes.length; ++i)
+ if (element.childNodes[i].nodeType === Node.TEXT_NODE)
+ text += element.childNodes[i].textContent;
+ return text.toLowerCase().trim();
+ }
- const negateOutOfStockRegex = new RegExp('^([0-9] in stock|add to cart|in stock)', 'ig');
+ const negateOutOfStockRegex = new RegExp('^([0-9] in stock|add to cart|in stock)', 'ig');
- // The out-of-stock or in-stock-text is generally always above-the-fold
- // and often below-the-fold is a list of related products that may or may not contain trigger text
- // so it's good to filter to just the 'above the fold' elements
- // and it should be atleast 100px from the top to ignore items in the toolbar, sometimes menu items like "Coming soon" exist
+ // The out-of-stock or in-stock-text is generally always above-the-fold
+ // and often below-the-fold is a list of related products that may or may not contain trigger text
+ // so it's good to filter to just the 'above the fold' elements
+ // and it should be atleast 100px from the top to ignore items in the toolbar, sometimes menu items like "Coming soon" exist
// @todo - if it's SVG or IMG, go into image diff mode
-// %ELEMENTS% replaced at injection time because different interfaces use it with different settings
-
- console.log("Scanning %ELEMENTS%");
-
- function collectVisibleElements(parent, visibleElements) {
- if (!parent) return; // Base case: if parent is null or undefined, return
-
- // Add the parent itself to the visible elements array if it's of the specified types
- visibleElements.push(parent);
-
- // Iterate over the parent's children
- const children = parent.children;
- for (let i = 0; i < children.length; i++) {
- const child = children[i];
- if (
- child.nodeType === Node.ELEMENT_NODE &&
- window.getComputedStyle(child).display !== 'none' &&
- window.getComputedStyle(child).visibility !== 'hidden' &&
- child.offsetWidth >= 0 &&
- child.offsetHeight >= 0 &&
- window.getComputedStyle(child).contentVisibility !== 'hidden'
- ) {
- // If the child is an element and is visible, recursively collect visible elements
- collectVisibleElements(child, visibleElements);
+
+ function collectVisibleElements(parent, visibleElements) {
+ if (!parent) return; // Base case: if parent is null or undefined, return
+
+ // Add the parent itself to the visible elements array if it's of the specified types
+ visibleElements.push(parent);
+
+ // Iterate over the parent's children
+ const children = parent.children;
+ for (let i = 0; i < children.length; i++) {
+ const child = children[i];
+ if (
+ child.nodeType === Node.ELEMENT_NODE &&
+ window.getComputedStyle(child).display !== 'none' &&
+ window.getComputedStyle(child).visibility !== 'hidden' &&
+ child.offsetWidth >= 0 &&
+ child.offsetHeight >= 0 &&
+ window.getComputedStyle(child).contentVisibility !== 'hidden'
+ ) {
+ // If the child is an element and is visible, recursively collect visible elements
+ collectVisibleElements(child, visibleElements);
+ }
}
}
- }
- const elementsToScan = [];
- collectVisibleElements(document.body, elementsToScan);
+ const elementsToScan = [];
+ collectVisibleElements(document.body, elementsToScan);
+
+ var elementText = "";
+
+ // REGEXS THAT REALLY MEAN IT'S IN STOCK
+ for (let i = elementsToScan.length - 1; i >= 0; i--) {
+ const element = elementsToScan[i];
- var elementText = "";
+ // outside the 'fold' or some weird text in the heading area
+ // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
+ if (element.getBoundingClientRect().top + window.scrollY >= vh || element.getBoundingClientRect().top + window.scrollY <= 100) {
+ continue
+ }
- // REGEXS THAT REALLY MEAN IT'S IN STOCK
- for (let i = elementsToScan.length - 1; i >= 0; i--) {
- const element = elementsToScan[i];
+ elementText = "";
+ try {
+ if (element.tagName.toLowerCase() === "input") {
+ elementText = element.value.toLowerCase().trim();
+ } else {
+ elementText = getElementBaseText(element);
+ }
+ } catch (e) {
+ console.warn('stock-not-in-stock.js scraper - handling element for gettext failed', e);
+ }
- // outside the 'fold' or some weird text in the heading area
- // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
- if (element.getBoundingClientRect().top + window.scrollY >= vh || element.getBoundingClientRect().top + window.scrollY <= 100) {
- continue
+ if (elementText.length) {
+ // try which ones could mean its in stock
+ if (negateOutOfStockRegex.test(elementText) && !elementText.includes('(0 products)')) {
+ console.log(`Negating/overriding 'Out of Stock' back to "Possibly in stock" found "${elementText}"`)
+ return 'Possibly in stock';
+ }
+ }
}
- elementText = "";
- try {
+ // OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK
+ for (let i = elementsToScan.length - 1; i >= 0; i--) {
+ const element = elementsToScan[i];
+ // outside the 'fold' or some weird text in the heading area
+ // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
+ // Note: theres also an automated test that places the 'out of stock' text fairly low down
+ if (element.getBoundingClientRect().top + window.scrollY >= vh + 250 || element.getBoundingClientRect().top + window.scrollY <= 100) {
+ continue
+ }
+ elementText = "";
if (element.tagName.toLowerCase() === "input") {
elementText = element.value.toLowerCase().trim();
} else {
elementText = getElementBaseText(element);
}
- } catch (e) {
- console.warn('stock-not-in-stock.js scraper - handling element for gettext failed', e);
- }
- if (elementText.length) {
- // try which ones could mean its in stock
- if (negateOutOfStockRegex.test(elementText) && !elementText.includes('(0 products)')) {
- console.log(`Negating/overriding 'Out of Stock' back to "Possibly in stock" found "${elementText}"`)
- return 'Possibly in stock';
- }
- }
- }
-
- // OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK
- for (let i = elementsToScan.length - 1; i >= 0; i--) {
- const element = elementsToScan[i];
- // outside the 'fold' or some weird text in the heading area
- // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
- // Note: theres also an automated test that places the 'out of stock' text fairly low down
- if (element.getBoundingClientRect().top + window.scrollY >= vh + 250 || element.getBoundingClientRect().top + window.scrollY <= 100) {
- continue
- }
- elementText = "";
- if (element.tagName.toLowerCase() === "input") {
- elementText = element.value.toLowerCase().trim();
- } else {
- elementText = getElementBaseText(element);
- }
-
- if (elementText.length) {
- // and these mean its out of stock
- for (const outOfStockText of outOfStockTexts) {
- if (elementText.includes(outOfStockText)) {
- console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`)
- return outOfStockText; // item is out of stock
+ if (elementText.length) {
+ // and these mean its out of stock
+ for (const outOfStockText of outOfStockTexts) {
+ if (elementText.includes(outOfStockText)) {
+ console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`)
+ return outOfStockText; // item is out of stock
+ }
}
}
}
- }
- console.log(`Returning 'Possibly in stock' - cant' find any useful matching text`)
- return 'Possibly in stock'; // possibly in stock, cant decide otherwise.
-}
+ console.log(`Returning 'Possibly in stock' - cant' find any useful matching text`)
+ return 'Possibly in stock'; // possibly in stock, cant decide otherwise.
+ }
// returns the element text that makes it think it's out of stock
-return isItemInStock().trim()
-
-
+ return isItemInStock().trim()
+}
diff --git a/changedetectionio/content_fetchers/res/xpath_element_scraper.js b/changedetectionio/content_fetchers/res/xpath_element_scraper.js
index 182a9b1df92..3d6bc7c7cfb 100644
--- a/changedetectionio/content_fetchers/res/xpath_element_scraper.js
+++ b/changedetectionio/content_fetchers/res/xpath_element_scraper.js
@@ -1,285 +1,285 @@
-// Copyright (C) 2021 Leigh Morresi (dgtlmoon@gmail.com)
-// All rights reserved.
-
-// @file Scrape the page looking for elements of concern (%ELEMENTS%)
-// http://matatk.agrip.org.uk/tests/position-and-width/
-// https://stackoverflow.com/questions/26813480/when-is-element-getboundingclientrect-guaranteed-to-be-updated-accurate
-//
-// Some pages like https://www.londonstockexchange.com/stock/NCCL/ncondezi-energy-limited/analysis
-// will automatically force a scroll somewhere, so include the position offset
-// Lets hope the position doesnt change while we iterate the bbox's, but this is better than nothing
-var scroll_y = 0;
-try {
- scroll_y = +document.documentElement.scrollTop || document.body.scrollTop
-} catch (e) {
- console.log(e);
-}
+async (options) => {
+ let visualselector_xpath_selectors = options.visualselector_xpath_selectors
+ let max_height = options.max_height
+
+ var scroll_y = 0;
+ try {
+ scroll_y = +document.documentElement.scrollTop || document.body.scrollTop
+ } catch (e) {
+ console.log(e);
+ }
// Include the getXpath script directly, easier than fetching
-function getxpath(e) {
- var n = e;
- if (n && n.id) return '//*[@id="' + n.id + '"]';
- for (var o = []; n && Node.ELEMENT_NODE === n.nodeType;) {
- for (var i = 0, r = !1, d = n.previousSibling; d;) d.nodeType !== Node.DOCUMENT_TYPE_NODE && d.nodeName === n.nodeName && i++, d = d.previousSibling;
- for (d = n.nextSibling; d;) {
- if (d.nodeName === n.nodeName) {
- r = !0;
- break
+ function getxpath(e) {
+ var n = e;
+ if (n && n.id) return '//*[@id="' + n.id + '"]';
+ for (var o = []; n && Node.ELEMENT_NODE === n.nodeType;) {
+ for (var i = 0, r = !1, d = n.previousSibling; d;) d.nodeType !== Node.DOCUMENT_TYPE_NODE && d.nodeName === n.nodeName && i++, d = d.previousSibling;
+ for (d = n.nextSibling; d;) {
+ if (d.nodeName === n.nodeName) {
+ r = !0;
+ break
+ }
+ d = d.nextSibling
}
- d = d.nextSibling
+ o.push((n.prefix ? n.prefix + ":" : "") + n.localName + (i || r ? "[" + (i + 1) + "]" : "")), n = n.parentNode
}
- o.push((n.prefix ? n.prefix + ":" : "") + n.localName + (i || r ? "[" + (i + 1) + "]" : "")), n = n.parentNode
+ return o.length ? "/" + o.reverse().join("/") : ""
}
- return o.length ? "/" + o.reverse().join("/") : ""
-}
-const findUpTag = (el) => {
- let r = el
- chained_css = [];
- depth = 0;
-
- // Strategy 1: If it's an input, with name, and there's only one, prefer that
- if (el.name !== undefined && el.name.length) {
- var proposed = el.tagName + "[name=\"" + CSS.escape(el.name) + "\"]";
- var proposed_element = window.document.querySelectorAll(proposed);
- if (proposed_element.length) {
- if (proposed_element.length === 1) {
- return proposed;
- } else {
- // Some sites change ID but name= stays the same, we can hit it if we know the index
- // Find all the elements that match and work out the input[n]
- var n = Array.from(proposed_element).indexOf(el);
- // Return a Playwright selector for nthinput[name=zipcode]
- return proposed + " >> nth=" + n;
+ const findUpTag = (el) => {
+ let r = el
+ chained_css = [];
+ depth = 0;
+
+ // Strategy 1: If it's an input, with name, and there's only one, prefer that
+ if (el.name !== undefined && el.name.length) {
+ var proposed = el.tagName + "[name=\"" + CSS.escape(el.name) + "\"]";
+ var proposed_element = window.document.querySelectorAll(proposed);
+ if (proposed_element.length) {
+ if (proposed_element.length === 1) {
+ return proposed;
+ } else {
+ // Some sites change ID but name= stays the same, we can hit it if we know the index
+ // Find all the elements that match and work out the input[n]
+ var n = Array.from(proposed_element).indexOf(el);
+ // Return a Playwright selector for nthinput[name=zipcode]
+ return proposed + " >> nth=" + n;
+ }
}
}
- }
- // Strategy 2: Keep going up until we hit an ID tag, imagine it's like #list-widget div h4
- while (r.parentNode) {
- if (depth === 5) {
- break;
- }
- if ('' !== r.id) {
- chained_css.unshift("#" + CSS.escape(r.id));
- final_selector = chained_css.join(' > ');
- // Be sure theres only one, some sites have multiples of the same ID tag :-(
- if (window.document.querySelectorAll(final_selector).length === 1) {
- return final_selector;
+ // Strategy 2: Keep going up until we hit an ID tag, imagine it's like #list-widget div h4
+ while (r.parentNode) {
+ if (depth === 5) {
+ break;
}
- return null;
- } else {
- chained_css.unshift(r.tagName.toLowerCase());
+ if ('' !== r.id) {
+ chained_css.unshift("#" + CSS.escape(r.id));
+ final_selector = chained_css.join(' > ');
+ // Be sure theres only one, some sites have multiples of the same ID tag :-(
+ if (window.document.querySelectorAll(final_selector).length === 1) {
+ return final_selector;
+ }
+ return null;
+ } else {
+ chained_css.unshift(r.tagName.toLowerCase());
+ }
+ r = r.parentNode;
+ depth += 1;
}
- r = r.parentNode;
- depth += 1;
+ return null;
}
- return null;
-}
// @todo - if it's SVG or IMG, go into image diff mode
-// %ELEMENTS% replaced at injection time because different interfaces use it with different settings
-var size_pos = [];
+ var size_pos = [];
// after page fetch, inject this JS
// build a map of all elements and their positions (maybe that only include text?)
-var bbox;
-console.log("Scanning %ELEMENTS%");
+ var bbox;
+ console.log(`Scanning for "${visualselector_xpath_selectors}"`);
-function collectVisibleElements(parent, visibleElements) {
- if (!parent) return; // Base case: if parent is null or undefined, return
+ function collectVisibleElements(parent, visibleElements) {
+ if (!parent) return; // Base case: if parent is null or undefined, return
- // Add the parent itself to the visible elements array if it's of the specified types
- const tagName = parent.tagName.toLowerCase();
- if ("%ELEMENTS%".split(',').includes(tagName)) {
- visibleElements.push(parent);
- }
+ // Add the parent itself to the visible elements array if it's of the specified types
+ const tagName = parent.tagName.toLowerCase();
+ if (visualselector_xpath_selectors.split(',').includes(tagName)) {
+ visibleElements.push(parent);
+ }
- // Iterate over the parent's children
- const children = parent.children;
- for (let i = 0; i < children.length; i++) {
- const child = children[i];
- const computedStyle = window.getComputedStyle(child);
-
- if (
- child.nodeType === Node.ELEMENT_NODE &&
- computedStyle.display !== 'none' &&
- computedStyle.visibility !== 'hidden' &&
- child.offsetWidth >= 0 &&
- child.offsetHeight >= 0 &&
- computedStyle.contentVisibility !== 'hidden'
- ) {
- // If the child is an element and is visible, recursively collect visible elements
- collectVisibleElements(child, visibleElements);
+ // Iterate over the parent's children
+ const children = parent.children;
+ for (let i = 0; i < children.length; i++) {
+ const child = children[i];
+ const computedStyle = window.getComputedStyle(child);
+
+ if (
+ child.nodeType === Node.ELEMENT_NODE &&
+ computedStyle.display !== 'none' &&
+ computedStyle.visibility !== 'hidden' &&
+ child.offsetWidth >= 0 &&
+ child.offsetHeight >= 0 &&
+ computedStyle.contentVisibility !== 'hidden'
+ ) {
+ // If the child is an element and is visible, recursively collect visible elements
+ collectVisibleElements(child, visibleElements);
+ }
}
}
-}
// Create an array to hold the visible elements
-const visibleElementsArray = [];
+ const visibleElementsArray = [];
// Call collectVisibleElements with the starting parent element
-collectVisibleElements(document.body, visibleElementsArray);
+ collectVisibleElements(document.body, visibleElementsArray);
-visibleElementsArray.forEach(function (element) {
+ visibleElementsArray.forEach(function (element) {
- bbox = element.getBoundingClientRect();
+ bbox = element.getBoundingClientRect();
- // Skip really small ones, and where width or height ==0
- if (bbox['width'] * bbox['height'] < 10) {
- return
- }
+ // Skip really small ones, and where width or height ==0
+ if (bbox['width'] * bbox['height'] < 10) {
+ return
+ }
- // Don't include elements that are offset from canvas
- if (bbox['top'] + scroll_y < 0 || bbox['left'] < 0) {
- return
- }
+ // Don't include elements that are offset from canvas
+ if (bbox['top'] + scroll_y < 0 || bbox['left'] < 0) {
+ return
+ }
- // @todo the getXpath kind of sucks, it doesnt know when there is for example just one ID sometimes
- // it should not traverse when we know we can anchor off just an ID one level up etc..
- // maybe, get current class or id, keep traversing up looking for only class or id until there is just one match
+ // @todo the getXpath kind of sucks, it doesnt know when there is for example just one ID sometimes
+ // it should not traverse when we know we can anchor off just an ID one level up etc..
+ // maybe, get current class or id, keep traversing up looking for only class or id until there is just one match
- // 1st primitive - if it has class, try joining it all and select, if theres only one.. well thats us.
- xpath_result = false;
- try {
- var d = findUpTag(element);
- if (d) {
- xpath_result = d;
- }
- } catch (e) {
- console.log(e);
- }
- // You could swap it and default to getXpath and then try the smarter one
- // default back to the less intelligent one
- if (!xpath_result) {
+ // 1st primitive - if it has class, try joining it all and select, if theres only one.. well thats us.
+ xpath_result = false;
try {
- // I've seen on FB and eBay that this doesnt work
- // ReferenceError: getXPath is not defined at eval (eval at evaluate (:152:29), :67:20) at UtilityScript.evaluate (:159:18) at UtilityScript. (:1:44)
- xpath_result = getxpath(element);
+ var d = findUpTag(element);
+ if (d) {
+ xpath_result = d;
+ }
} catch (e) {
console.log(e);
- return
}
- }
+ // You could swap it and default to getXpath and then try the smarter one
+ // default back to the less intelligent one
+ if (!xpath_result) {
+ try {
+ // I've seen on FB and eBay that this doesnt work
+ // ReferenceError: getXPath is not defined at eval (eval at evaluate (:152:29), :67:20) at UtilityScript.evaluate (:159:18) at UtilityScript. (:1:44)
+ xpath_result = getxpath(element);
+ } catch (e) {
+ console.log(e);
+ return
+ }
+ }
- let label = "not-interesting" // A placeholder, the actual labels for training are done by hand for now
+ let label = "not-interesting" // A placeholder, the actual labels for training are done by hand for now
- let text = element.textContent.trim().slice(0, 30).trim();
- while (/\n{2,}|\t{2,}/.test(text)) {
- text = text.replace(/\n{2,}/g, '\n').replace(/\t{2,}/g, '\t')
- }
+ let text = element.textContent.trim().slice(0, 30).trim();
+ while (/\n{2,}|\t{2,}/.test(text)) {
+ text = text.replace(/\n{2,}/g, '\n').replace(/\t{2,}/g, '\t')
+ }
- // Try to identify any possible currency amounts "Sale: 4000" or "Sale now 3000 Kc", can help with the training.
- const hasDigitCurrency = (/\d/.test(text.slice(0, 6)) || /\d/.test(text.slice(-6)) ) && /([€£$¥₩₹]|USD|AUD|EUR|Kč|kr|SEK|,–)/.test(text) ;
- const computedStyle = window.getComputedStyle(element);
-
- size_pos.push({
- xpath: xpath_result,
- width: Math.round(bbox['width']),
- height: Math.round(bbox['height']),
- left: Math.floor(bbox['left']),
- top: Math.floor(bbox['top']) + scroll_y,
- // tagName used by Browser Steps
- tagName: (element.tagName) ? element.tagName.toLowerCase() : '',
- // tagtype used by Browser Steps
- tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '',
- isClickable: computedStyle.cursor === "pointer",
- // Used by the keras trainer
- fontSize: computedStyle.getPropertyValue('font-size'),
- fontWeight: computedStyle.getPropertyValue('font-weight'),
- hasDigitCurrency: hasDigitCurrency,
- label: label,
- });
+ // Try to identify any possible currency amounts "Sale: 4000" or "Sale now 3000 Kc", can help with the training.
+ const hasDigitCurrency = (/\d/.test(text.slice(0, 6)) || /\d/.test(text.slice(-6))) && /([€£$¥₩₹]|USD|AUD|EUR|Kč|kr|SEK|,–)/.test(text);
+ const computedStyle = window.getComputedStyle(element);
-});
+ if (Math.floor(bbox['top']) + scroll_y > max_height) {
+ return
+ }
+
+ size_pos.push({
+ xpath: xpath_result,
+ width: Math.round(bbox['width']),
+ height: Math.round(bbox['height']),
+ left: Math.floor(bbox['left']),
+ top: Math.floor(bbox['top']) + scroll_y,
+ // tagName used by Browser Steps
+ tagName: (element.tagName) ? element.tagName.toLowerCase() : '',
+ // tagtype used by Browser Steps
+ tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '',
+ isClickable: computedStyle.cursor === "pointer",
+ // Used by the keras trainer
+ fontSize: computedStyle.getPropertyValue('font-size'),
+ fontWeight: computedStyle.getPropertyValue('font-weight'),
+ hasDigitCurrency: hasDigitCurrency,
+ label: label,
+ });
+
+ });
// Inject the current one set in the include_filters, which may be a CSS rule
// used for displaying the current one in VisualSelector, where its not one we generated.
-if (include_filters.length) {
- let results;
- // Foreach filter, go and find it on the page and add it to the results so we can visualise it again
- for (const f of include_filters) {
- bbox = false;
- q = false;
-
- if (!f.length) {
- console.log("xpath_element_scraper: Empty filter, skipping");
- continue;
- }
-
- try {
- // is it xpath?
- if (f.startsWith('/') || f.startsWith('xpath')) {
- var qry_f = f.replace(/xpath(:|\d:)/, '')
- console.log("[xpath] Scanning for included filter " + qry_f)
- let xpathResult = document.evaluate(qry_f, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
- results = [];
- for (let i = 0; i < xpathResult.snapshotLength; i++) {
- results.push(xpathResult.snapshotItem(i));
- }
- } else {
- console.log("[css] Scanning for included filter " + f)
- console.log("[css] Scanning for included filter " + f);
- results = document.querySelectorAll(f);
+ if (include_filters.length) {
+ let results;
+ // Foreach filter, go and find it on the page and add it to the results so we can visualise it again
+ for (const f of include_filters) {
+ bbox = false;
+ q = false;
+
+ if (!f.length) {
+ console.log("xpath_element_scraper: Empty filter, skipping");
+ continue;
}
- } catch (e) {
- // Maybe catch DOMException and alert?
- console.log("xpath_element_scraper: Exception selecting element from filter " + f);
- console.log(e);
- }
- if (results != null && results.length) {
-
- // Iterate over the results
- results.forEach(node => {
- // Try to resolve //something/text() back to its /something so we can atleast get the bounding box
- try {
- if (typeof node.nodeName == 'string' && node.nodeName === '#text') {
- node = node.parentElement
+ try {
+ // is it xpath?
+ if (f.startsWith('/') || f.startsWith('xpath')) {
+ var qry_f = f.replace(/xpath(:|\d:)/, '')
+ console.log("[xpath] Scanning for included filter " + qry_f)
+ let xpathResult = document.evaluate(qry_f, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
+ results = [];
+ for (let i = 0; i < xpathResult.snapshotLength; i++) {
+ results.push(xpathResult.snapshotItem(i));
}
- } catch (e) {
- console.log(e)
- console.log("xpath_element_scraper: #text resolver")
+ } else {
+ console.log("[css] Scanning for included filter " + f)
+ console.log("[css] Scanning for included filter " + f);
+ results = document.querySelectorAll(f);
}
+ } catch (e) {
+ // Maybe catch DOMException and alert?
+ console.log("xpath_element_scraper: Exception selecting element from filter " + f);
+ console.log(e);
+ }
- // #1231 - IN the case XPath attribute filter is applied, we will have to traverse up and find the element.
- if (typeof node.getBoundingClientRect == 'function') {
- bbox = node.getBoundingClientRect();
- console.log("xpath_element_scraper: Got filter element, scroll from top was " + scroll_y)
- } else {
+ if (results != null && results.length) {
+
+ // Iterate over the results
+ results.forEach(node => {
+ // Try to resolve //something/text() back to its /something so we can atleast get the bounding box
try {
- // Try and see we can find its ownerElement
- bbox = node.ownerElement.getBoundingClientRect();
- console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y)
+ if (typeof node.nodeName == 'string' && node.nodeName === '#text') {
+ node = node.parentElement
+ }
} catch (e) {
console.log(e)
- console.log("xpath_element_scraper: error looking up q.ownerElement")
+ console.log("xpath_element_scraper: #text resolver")
}
- }
- if (bbox && bbox['width'] > 0 && bbox['height'] > 0) {
- size_pos.push({
- xpath: f,
- width: parseInt(bbox['width']),
- height: parseInt(bbox['height']),
- left: parseInt(bbox['left']),
- top: parseInt(bbox['top']) + scroll_y,
- highlight_as_custom_filter: true
- });
- }
- });
+ // #1231 - IN the case XPath attribute filter is applied, we will have to traverse up and find the element.
+ if (typeof node.getBoundingClientRect == 'function') {
+ bbox = node.getBoundingClientRect();
+ console.log("xpath_element_scraper: Got filter element, scroll from top was " + scroll_y)
+ } else {
+ try {
+ // Try and see we can find its ownerElement
+ bbox = node.ownerElement.getBoundingClientRect();
+ console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y)
+ } catch (e) {
+ console.log(e)
+ console.log("xpath_element_scraper: error looking up q.ownerElement")
+ }
+ }
+
+ if (bbox && bbox['width'] > 0 && bbox['height'] > 0) {
+ size_pos.push({
+ xpath: f,
+ width: parseInt(bbox['width']),
+ height: parseInt(bbox['height']),
+ left: parseInt(bbox['left']),
+ top: parseInt(bbox['top']) + scroll_y,
+ highlight_as_custom_filter: true
+ });
+ }
+ });
+ }
}
}
-}
// Sort the elements so we find the smallest one first, in other words, we find the smallest one matching in that area
// so that we dont select the wrapping element by mistake and be unable to select what we want
-size_pos.sort((a, b) => (a.width * a.height > b.width * b.height) ? 1 : -1)
+ size_pos.sort((a, b) => (a.width * a.height > b.width * b.height) ? 1 : -1)
+
+// browser_width required for proper scaling in the frontend
+ // Return as a string to save playwright for juggling thousands of objects
+ return JSON.stringify({'size_pos': size_pos, 'browser_width': window.innerWidth});
+}
-// Window.width required for proper scaling in the frontend
-return {'size_pos': size_pos, 'browser_width': window.innerWidth};
diff --git a/changedetectionio/content_fetchers/screenshot_handler.py b/changedetectionio/content_fetchers/screenshot_handler.py
new file mode 100644
index 00000000000..04133b864a6
--- /dev/null
+++ b/changedetectionio/content_fetchers/screenshot_handler.py
@@ -0,0 +1,73 @@
+# Pages with a vertical height longer than this will use the 'stitch together' method.
+
+# - Many GPUs have a max texture size of 16384x16384px (or lower on older devices).
+# - If a page is taller than ~8000–10000px, it risks exceeding GPU memory limits.
+# - This is especially important on headless Chromium, where Playwright may fail to allocate a massive full-page buffer.
+
+from loguru import logger
+
+from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, SCREENSHOT_DEFAULT_QUALITY
+
+
+def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_height):
+ import os
+ import io
+ from PIL import Image, ImageDraw, ImageFont
+
+ try:
+
+ # Load images from byte chunks
+ images = [Image.open(io.BytesIO(b)) for b in chunks_bytes]
+ total_height = sum(im.height for im in images)
+ max_width = max(im.width for im in images)
+
+ # Create stitched image
+ stitched = Image.new('RGB', (max_width, total_height))
+ y_offset = 0
+ for im in images:
+ stitched.paste(im, (0, y_offset))
+ y_offset += im.height
+
+ # Draw caption on top (overlaid, not extending canvas)
+ draw = ImageDraw.Draw(stitched)
+
+
+ caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long"
+ padding = 10
+ font_size = 35
+ font_color = (255, 0, 0)
+ background_color = (255, 255, 255)
+
+
+ # Try to load a proper font
+ try:
+ font = ImageFont.truetype("arial.ttf", font_size)
+ except IOError:
+ font = ImageFont.load_default()
+
+ bbox = draw.textbbox((0, 0), caption_text, font=font)
+ text_width = bbox[2] - bbox[0]
+ text_height = bbox[3] - bbox[1]
+
+ # Draw white rectangle background behind text
+ rect_top = 0
+ rect_bottom = text_height + 2 * padding
+ draw.rectangle([(0, rect_top), (max_width, rect_bottom)], fill=background_color)
+
+ # Draw text centered horizontally, 10px padding from top of the rectangle
+ text_x = (max_width - text_width) // 2
+ text_y = padding
+ draw.text((text_x, text_y), caption_text, font=font, fill=font_color)
+
+ # Encode and send image
+ output = io.BytesIO()
+ stitched.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)))
+ pipe_conn.send_bytes(output.getvalue())
+
+ stitched.close()
+ except Exception as e:
+ pipe_conn.send(f"error:{e}")
+ finally:
+ pipe_conn.close()
+
+
diff --git a/changedetectionio/flask_app.py b/changedetectionio/flask_app.py
index 7ed5e2c100e..5b7201186c0 100644
--- a/changedetectionio/flask_app.py
+++ b/changedetectionio/flask_app.py
@@ -394,7 +394,7 @@ def static_content(group, filename):
response.headers['Content-Type'] = 'application/json'
response.headers['Content-Encoding'] = 'deflate'
else:
- logger.error(f'Request elements.deflate at "{watch_directory}" but was notfound.')
+ logger.error(f'Request elements.deflate at "{watch_directory}" but was not found.')
abort(404)
if response:
diff --git a/changedetectionio/model/Watch.py b/changedetectionio/model/Watch.py
index 3fbe8f94a72..86e93983f56 100644
--- a/changedetectionio/model/Watch.py
+++ b/changedetectionio/model/Watch.py
@@ -553,7 +553,10 @@ def save_xpath_data(self, data, as_error=False):
self.ensure_data_dir_exists()
with open(target_path, 'wb') as f:
- f.write(zlib.compress(json.dumps(data).encode()))
+ if not isinstance(data, str):
+ f.write(zlib.compress(json.dumps(data).encode()))
+ else:
+ f.write(zlib.compress(data.encode()))
f.close()
# Save as PNG, PNG is larger but better for doing visual diff in the future
diff --git a/changedetectionio/update_worker.py b/changedetectionio/update_worker.py
index 38a8f73a61e..2e9e2294dc3 100644
--- a/changedetectionio/update_worker.py
+++ b/changedetectionio/update_worker.py
@@ -592,6 +592,7 @@ def run(self):
self.current_uuid = None # Done
self.q.task_done()
+ update_handler = None
logger.debug(f"Watch {uuid} done in {time.time()-fetch_start_time:.2f}s")
# Give the CPU time to interrupt