Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 18 additions & 6 deletions changedetectionio/blueprint/browser_steps/browser_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from random import randint
from loguru import logger

from changedetectionio.content_fetchers.helpers import capture_full_page
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT
from changedetectionio.content_fetchers.base import manage_user_agent
from changedetectionio.safe_jinja import render as jinja_render

Expand Down Expand Up @@ -293,26 +293,38 @@ def has_expired(self):
def get_current_state(self):
"""Return the screenshot and interactive elements mapping, generally always called after action_()"""
import importlib.resources
import json
# because we for now only run browser steps in playwright mode (not puppeteer mode)
from changedetectionio.content_fetchers.playwright import capture_full_page

xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()

now = time.time()
self.page.wait_for_timeout(1 * 1000)

screenshot = capture_full_page(self.page)
screenshot = capture_full_page(page=self.page)

logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s")

now = time.time()
self.page.evaluate("var include_filters=''")
# Go find the interactive elements
# @todo in the future, something smarter that can scan for elements with .click/focus etc event handlers?
elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span'
xpath_element_js = xpath_element_js.replace('%ELEMENTS%', elements)

xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")
self.page.request_gc()

scan_elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span'

MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
xpath_data = json.loads(self.page.evaluate(xpath_element_js, {
"visualselector_xpath_selectors": scan_elements,
"max_height": MAX_TOTAL_HEIGHT
}))
self.page.request_gc()

# So the JS will find the smallest one first
xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True)
logger.debug(f"Time to scrape xpath element data in browser {time.time()-now:.2f}s")
logger.debug(f"Time to scrape xPath element data in browser {time.time()-now:.2f}s")

# playwright._impl._api_types.Error: Browser closed.
# @todo show some countdown timer?
Expand Down
18 changes: 18 additions & 0 deletions changedetectionio/content_fetchers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,29 @@
# Visual Selector scraper - 'Button' is there because some sites have <button>OUT OF STOCK</button>.
visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4,header,footer,section,article,aside,details,main,nav,section,summary,button'

SCREENSHOT_MAX_HEIGHT_DEFAULT = 16000
SCREENSHOT_DEFAULT_QUALITY = 40

# Maximum total height for the final image (When in stitch mode).
# We limit this to 16000px due to the huge amount of RAM that was being used
# Example: 16000 × 1400 × 3 = 67,200,000 bytes ≈ 64.1 MB (not including buffers in PIL etc)
MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))

# The size at which we will switch to stitching method, when below this (and
# MAX_TOTAL_HEIGHT which can be set by a user) we will use the default
# screenshot method.
SCREENSHOT_SIZE_STITCH_THRESHOLD = 8000

# available_fetchers() will scan this implementation looking for anything starting with html_
# this information is used in the form selections
from changedetectionio.content_fetchers.requests import fetcher as html_requests


import importlib.resources
XPATH_ELEMENT_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8')
INSTOCK_DATA_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8')


def available_fetchers():
# See the if statement at the bottom of this file for how we switch between playwright and webdriver
import inspect
Expand Down
6 changes: 1 addition & 5 deletions changedetectionio/content_fetchers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,6 @@ class Fetcher():
# Time ONTOP of the system defined env minimum time
render_extract_delay = 0

def __init__(self):
import importlib.resources
self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8')
self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8')

@abstractmethod
def get_error(self):
return self.error
Expand Down Expand Up @@ -143,6 +138,7 @@ def iterate_browser_steps(self, start_url=None):
logger.debug(f">> Iterating check - browser Step n {step_n} - {step['operation']}...")
self.screenshot_step("before-" + str(step_n))
self.save_step_html("before-" + str(step_n))

try:
optional_value = step['optional_value']
selector = step['selector']
Expand Down
138 changes: 0 additions & 138 deletions changedetectionio/content_fetchers/helpers.py

This file was deleted.

Loading