Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 2 additions & 9 deletions changedetectionio/blueprint/browser_steps/browser_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from random import randint
from loguru import logger

from changedetectionio.content_fetchers.helpers import capture_stitched_together_full_page, SCREENSHOT_SIZE_STITCH_THRESHOLD
from changedetectionio.content_fetchers.helpers import capture_full_page
from changedetectionio.content_fetchers.base import manage_user_agent
from changedetectionio.safe_jinja import render as jinja_render

Expand Down Expand Up @@ -298,14 +298,7 @@ def get_current_state(self):
now = time.time()
self.page.wait_for_timeout(1 * 1000)


full_height = self.page.evaluate("document.documentElement.scrollHeight")

if full_height >= SCREENSHOT_SIZE_STITCH_THRESHOLD:
logger.warning(f"Page full Height: {full_height}px longer than {SCREENSHOT_SIZE_STITCH_THRESHOLD}px, using 'stitched screenshot method'.")
screenshot = capture_stitched_together_full_page(self.page)
else:
screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=40)
screenshot = capture_full_page(self.page)

logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s")

Expand Down
128 changes: 81 additions & 47 deletions changedetectionio/content_fetchers/helpers.py
Original file line number Diff line number Diff line change
@@ -1,79 +1,107 @@

# Pages with a vertical height longer than this will use the 'stitch together' method.

# - Many GPUs have a max texture size of 16384x16384px (or lower on older devices).
# - If a page is taller than ~8000–10000px, it risks exceeding GPU memory limits.
# - This is especially important on headless Chromium, where Playwright may fail to allocate a massive full-page buffer.


# The size at which we will switch to stitching method
SCREENSHOT_SIZE_STITCH_THRESHOLD=8000

from loguru import logger

def capture_stitched_together_full_page(page):
def capture_full_page(page):
import io
import os
import time
from PIL import Image, ImageDraw, ImageFont

MAX_TOTAL_HEIGHT = SCREENSHOT_SIZE_STITCH_THRESHOLD*4 # Maximum total height for the final image (When in stitch mode)
MAX_CHUNK_HEIGHT = 4000 # Height per screenshot chunk
# Maximum total height for the final image (When in stitch mode).
# We limit this to 16000px due to the huge amount of RAM that was being used
# Example: 16000 × 1400 × 3 = 67,200,000 bytes ≈ 64.1 MB (not including buffers in PIL etc)
MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", 16000))

# The size at which we will switch to stitching method, when below this (and
# MAX_TOTAL_HEIGHT which can be set by a user) we will use the default
# screenshot method.
SCREENSHOT_SIZE_STITCH_THRESHOLD = 8000

WARNING_TEXT_HEIGHT = 20 # Height of the warning text overlay

# Save the original viewport size
original_viewport = page.viewport_size
now = time.time()
start = time.time()

stitched_image = None

try:
viewport = page.viewport_size
viewport_width = original_viewport["width"]
viewport_height = original_viewport["height"]

page_height = page.evaluate("document.documentElement.scrollHeight")

# Optimization to avoid unnecessary stitching if we can avoid it
# Use the default screenshot method for smaller pages to take advantage
# of GPU and native playwright screenshot optimizations
if (
page_height < SCREENSHOT_SIZE_STITCH_THRESHOLD
and page_height < MAX_TOTAL_HEIGHT
):
logger.debug("Using default screenshot method")
screenshot = page.screenshot(
type="jpeg",
quality=int(os.getenv("SCREENSHOT_QUALITY", 30)),
full_page=True,
)
logger.debug(f"Screenshot captured in {time.time() - start:.2f}s")
return screenshot

logger.debug(
"Using stitching method for large screenshot because page height exceeds threshold"
)

# Limit the total capture height
capture_height = min(page_height, MAX_TOTAL_HEIGHT)

images = []
total_captured_height = 0

for offset in range(0, capture_height, MAX_CHUNK_HEIGHT):
# Ensure we do not exceed the total height limit
chunk_height = min(MAX_CHUNK_HEIGHT, MAX_TOTAL_HEIGHT - total_captured_height)

# Adjust viewport size for this chunk
page.set_viewport_size({"width": viewport["width"], "height": chunk_height})

# Scroll to the correct position
page.evaluate(f"window.scrollTo(0, {offset})")
# Calculate number of chunks needed using ORIGINAL viewport height
num_chunks = (capture_height + viewport_height - 1) // viewport_height

# Capture screenshot chunk
screenshot_bytes = page.screenshot(type='jpeg', quality=int(os.getenv("SCREENSHOT_QUALITY", 30)))
images.append(Image.open(io.BytesIO(screenshot_bytes)))
# Create the final image upfront to avoid holding all chunks in memory
stitched_image = Image.new("RGB", (viewport_width, capture_height))

total_captured_height += chunk_height

# Stop if we reached the maximum total height
if total_captured_height >= MAX_TOTAL_HEIGHT:
break

# Create the final stitched image
stitched_image = Image.new('RGB', (viewport["width"], total_captured_height))
# Track cumulative paste position
y_offset = 0

# Stitch the screenshot chunks together
for img in images:
stitched_image.paste(img, (0, y_offset))
y_offset += img.height

logger.debug(f"Screenshot stitched together in {time.time()-now:.2f}s")
for _ in range(num_chunks):
# Scroll to position (no viewport resizing)
page.evaluate(f"window.scrollTo(0, {y_offset})")

# Capture only the visible area using clip
with io.BytesIO(
page.screenshot(
Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And you are 100% sure that this solved the memory problem?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR in it's entirety has solved the issue on my system/installation, yes.

type="jpeg",
clip={
"x": 0,
"y": 0,
"width": viewport_width,
"height": min(viewport_height, capture_height - y_offset),
},
quality=int(os.getenv("SCREENSHOT_QUALITY", 30)),
)
) as buf:
with Image.open(buf) as img:
img.load()
stitched_image.paste(img, (0, y_offset))
y_offset += img.height

logger.debug(f"Screenshot stitched together in {time.time() - start:.2f}s")

# Overlay warning text if the screenshot was trimmed
if page_height > MAX_TOTAL_HEIGHT:
if capture_height < page_height:
draw = ImageDraw.Draw(stitched_image)
warning_text = f"WARNING: Screenshot was {page_height}px but trimmed to {MAX_TOTAL_HEIGHT}px because it was too long"

# Load font (default system font if Arial is unavailable)
try:
font = ImageFont.truetype("arial.ttf", WARNING_TEXT_HEIGHT) # Arial (Windows/Mac)
font = ImageFont.truetype(
"arial.ttf", WARNING_TEXT_HEIGHT
) # Arial (Windows/Mac)
except IOError:
font = ImageFont.load_default() # Default font if Arial not found

Expand All @@ -83,22 +111,28 @@ def capture_stitched_together_full_page(page):
text_height = text_bbox[3] - text_bbox[1] # Calculate text height

# Define background rectangle (top of the image)
draw.rectangle([(0, 0), (viewport["width"], WARNING_TEXT_HEIGHT)], fill="white")
draw.rectangle(
[(0, 0), (viewport_width, WARNING_TEXT_HEIGHT)], fill="white"
)

# Center text horizontally within the warning area
text_x = (viewport["width"] - text_width) // 2
text_x = (viewport_width - text_width) // 2
text_y = (WARNING_TEXT_HEIGHT - text_height) // 2

# Draw the warning text in red
draw.text((text_x, text_y), warning_text, fill="red", font=font)

# Save or return the final image
output = io.BytesIO()
stitched_image.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", 30)))
screenshot = output.getvalue()
# Save final image
with io.BytesIO() as output:
stitched_image.save(
output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", 30))
)
screenshot = output.getvalue()

finally:
# Restore the original viewport size
page.set_viewport_size(original_viewport)
if stitched_image is not None:
stitched_image.close()

return screenshot
11 changes: 2 additions & 9 deletions changedetectionio/content_fetchers/playwright.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from loguru import logger

from changedetectionio.content_fetchers.helpers import capture_stitched_together_full_page, SCREENSHOT_SIZE_STITCH_THRESHOLD
from changedetectionio.content_fetchers.helpers import capture_full_page
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable

Expand Down Expand Up @@ -204,14 +204,7 @@ def run(self,
# acceptable screenshot quality here
try:
# The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
full_height = self.page.evaluate("document.documentElement.scrollHeight")

if full_height >= SCREENSHOT_SIZE_STITCH_THRESHOLD:
logger.warning(
f"Page full Height: {full_height}px longer than {SCREENSHOT_SIZE_STITCH_THRESHOLD}px, using 'stitched screenshot method'.")
self.screenshot = capture_stitched_together_full_page(self.page)
else:
self.screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("SCREENSHOT_QUALITY", 30)))
self.screenshot = capture_full_page(self.page)

except Exception as e:
# It's likely the screenshot was too long/big and something crashed
Expand Down
4 changes: 4 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ services:
#
# A valid timezone name to run as (for scheduling watch checking) see https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
# - TZ=America/Los_Angeles
#
# Maximum height of screenshots, default is 16000 px, screenshots will be clipped to this if exceeded.
# RAM usage will be higher if you increase this.
# - SCREENSHOT_MAX_HEIGHT=16000

# Comment out ports: when using behind a reverse proxy , enable networks: etc.
ports:
Expand Down