Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ COPY changedetection.py /app/changedetection.py
# Github Action test purpose(test-only.yml).
# On production, it is effectively LOGGER_LEVEL=''.
ARG LOGGER_LEVEL=''
ENV LOGGER_LEVEL "$LOGGER_LEVEL"
ENV LOGGER_LEVEL="$LOGGER_LEVEL"

WORKDIR /app
CMD ["python", "./changedetection.py", "-d", "/datastore"]
Expand Down
15 changes: 8 additions & 7 deletions changedetectionio/blueprint/ui/edit.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,9 +227,6 @@ def edit_page(uuid):
if request.method == 'POST' and not form.validate():
flash("An error occurred, please see below.", "error")

visualselector_data_is_ready = datastore.visualselector_data_is_ready(uuid)


# JQ is difficult to install on windows and must be manually added (outside requirements.txt)
jq_support = True
try:
Expand All @@ -239,11 +236,12 @@ def edit_page(uuid):

watch = datastore.data['watching'].get(uuid)

# if system or watch is configured to need a chrome type browser
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'

watch_uses_webdriver = False
watch_needs_selenium_or_playwright = False
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
watch_uses_webdriver = True
watch_needs_selenium_or_playwright = True


from zoneinfo import available_timezones

Expand All @@ -265,11 +263,14 @@ def edit_page(uuid):
'lev_info': levenshtein_ratio_recent_history(watch),
'playwright_enabled': os.getenv('PLAYWRIGHT_DRIVER_URL', False),
'settings_application': datastore.data['settings']['application'],
'system_has_playwright_configured': os.getenv('PLAYWRIGHT_DRIVER_URL'),
'system_has_webdriver_configured': os.getenv('WEBDRIVER_URL'),
'visual_selector_data_ready': datastore.visualselector_data_is_ready(watch_uuid=uuid),
'timezone_default_config': datastore.data['settings']['application'].get('timezone'),
'using_global_webdriver_wait': not default['webdriver_delay'],
'uuid': uuid,
'watch': watch,
'watch_uses_webdriver': watch_uses_webdriver,
'watch_needs_selenium_or_playwright': watch_needs_selenium_or_playwright,
}

included_content = None
Expand Down
17 changes: 15 additions & 2 deletions changedetectionio/content_fetchers/webdriver_selenium.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,17 @@ def run(self,
# request_body, request_method unused for now, until some magic in the future happens.

options = ChromeOptions()
options.add_argument("--headless")

# Load Chrome options from env
CHROME_OPTIONS = [
line.strip()
for line in os.getenv("CHROME_OPTIONS", "").strip().splitlines()
if line.strip()
]

for opt in CHROME_OPTIONS:
options.add_argument(opt)

if self.proxy:
options.proxy = self.proxy

Expand All @@ -80,14 +90,17 @@ def run(self,
self.quit()
raise

self.driver.set_window_size(1280, 1024)
if not "--window-size" in os.getenv("CHROME_OPTIONS", ""):
self.driver.set_window_size(1280, 1024)

self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))

if self.webdriver_js_execute_code is not None:
self.driver.execute_script(self.webdriver_js_execute_code)
# Selenium doesn't automatically wait for actions as good as Playwright, so wait again
self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))


# @todo - how to check this? is it possible?
self.status_code = 200
# @todo somehow we should try to get this working for WebDriver
Expand Down
8 changes: 3 additions & 5 deletions changedetectionio/templates/_helpers.html
Original file line number Diff line number Diff line change
Expand Up @@ -98,15 +98,13 @@


{% macro playwright_warning() %}
<p><strong>Error - Playwright support for Chrome based fetching is not enabled.</strong> Alternatively try our <a href="https://changedetection.io">very affordable subscription based service which has all this setup for you</a>.</p>
<p><strong>Error - This watch needs Chrome (with playwright/sockpuppetbrowser), but Chrome based fetching is not enabled.</strong> Alternatively try our <a href="https://changedetection.io">very affordable subscription based service which has all this setup for you</a>.</p>
<p>You may need to <a href="https://github.com/dgtlmoon/changedetection.io/blob/09ebc6ec6338545bdd694dc6eee57f2e9d2b8075/docker-compose.yml#L31">Enable playwright environment variable</a> and uncomment the <strong>sockpuppetbrowser</strong> in the <a href="https://github.com/dgtlmoon/changedetection.io/blob/master/docker-compose.yml">docker-compose.yml</a> file.</p>
<br>
<p>(Also Selenium/WebDriver can not extract full page screenshots reliably so Playwright is recommended here)</p>

{% endmacro %}

{% macro only_webdriver_type_watches_warning() %}
<p><strong>Sorry, this functionality only works with Playwright/Chrome enabled watches.<br>You need to <a href="#request">Set the fetch method to Playwright/Chrome mode and resave</a> and have the Playwright connection enabled.</strong></p><br>
{% macro only_playwright_type_watches_warning() %}
<p><strong>Sorry, this functionality only works with Playwright/Chrome enabled watches.<br>You need to <a href="#request">Set the fetch method to Playwright/Chrome mode and resave</a> and have the SockpuppetBrowser/Playwright or Selenium enabled.</strong></p><br>
{% endmacro %}

{% macro render_time_schedule_form(form, available_timezones, timezone_default_config) %}
Expand Down
46 changes: 28 additions & 18 deletions changedetectionio/templates/edit.html
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{% extends 'base.html' %}
{% block content %}
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_webdriver_type_watches_warning, render_conditions_fieldlist_of_formfields_as_table %}
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_playwright_type_watches_warning, render_conditions_fieldlist_of_formfields_as_table %}
{% from '_common_fields.html' import render_common_settings_form %}
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
<script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script>
Expand Down Expand Up @@ -204,7 +204,9 @@
</div>

<div class="tab-pane-inner" id="browser-steps">
{% if playwright_enabled and watch_uses_webdriver %}
{% if watch_needs_selenium_or_playwright %}
{# Only works with playwright #}
{% if system_has_playwright_configured %}
<img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}" alt="New beta functionality">
<fieldset>
<div class="pure-control-group">
Expand All @@ -223,7 +225,6 @@
<div class="flex-wrapper" >

<div id="browser-steps-ui" class="noselect">

<div class="noselect" id="browsersteps-selector-wrapper" style="width: 100%">
<span class="loader" >
<span id="browsersteps-click-start">
Expand All @@ -245,15 +246,16 @@ <h2 >Click here to Start</h2>
</div>
</fieldset>
{% else %}
<span class="pure-form-message-inline">
{% if not watch_uses_webdriver %}
{{ only_webdriver_type_watches_warning() }}
{% endif %}
{% if not playwright_enabled %}
{{ playwright_warning() }}
{% endif %}
</span>
{# it's configured to use selenium or chrome but system says its not configured #}
{{ playwright_warning() }}
{% if system_has_webdriver_configured %}
<strong>Selenium/Webdriver cant be used here because it wont fetch screenshots reliably.</strong>
{% endif %}
{% endif %}
{% else %}
{# "This functionality needs chrome.." #}
{{ only_playwright_type_watches_warning() }}
{% endif %}
</div>


Expand Down Expand Up @@ -379,7 +381,9 @@ <h3>Text filtering</h3>

<fieldset>
<div class="pure-control-group">
{% if playwright_enabled and watch_uses_webdriver %}
{% if watch_needs_selenium_or_playwright %}
{% if system_has_playwright_configured %}
{% if visual_selector_data_ready %}
<span class="pure-form-message-inline" id="visual-selector-heading">
The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection. It automatically fills-in the filters in the "CSS/JSONPath/JQ/XPath Filters" box of the <a href="#filters-and-triggers">Filters & Triggers</a> tab. Use <strong>Shift+Click</strong> to select multiple items.
</span>
Expand All @@ -396,14 +400,20 @@ <h3>Text filtering</h3>
<canvas id="selector-canvas"></canvas>
</div>
<div id="selector-current-xpath" style="overflow-x: hidden"><strong>Currently:</strong>&nbsp;<span class="text">Loading...</span></div>
{% else %}
{% if not watch_uses_webdriver %}
{{ only_webdriver_type_watches_warning() }}
{% endif %}
{% if not playwright_enabled %}
{{ playwright_warning() }}
{% else %}
<strong>Error, The Visual selector data is not ready, it needs to complete atleast one fetch, please queue the item and reload.</strong>
{% endif %}
{% else %}
{# The watch needed chrome but system says that playwright is not ready #}
{{ playwright_warning() }}
{% endif %}
{% if system_has_webdriver_configured %}
<strong>Selenium/Webdriver cant be used here because it wont fetch screenshots reliably.</strong>
{% endif %}
{% else %}
{# "This functionality needs chrome.." #}
{{ only_playwright_type_watches_warning() }}
{% endif %}
</div>
</fieldset>
</div>
Expand Down
26 changes: 16 additions & 10 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,25 +9,27 @@ services:
# - ./proxies.json:/datastore/proxies.json

# environment:
# Default listening port, can also be changed with the -p option
# Default listening port, can also be changed with the -p option (not to be confused with ports: below)
# - PORT=5000
#
# Log levels are in descending order. (TRACE is the most detailed one)
# Log output levels: TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL
# - LOGGER_LEVEL=TRACE
#
# Alternative WebDriver/selenium URL, do not use "'s or 's!
# - WEBDRIVER_URL=http://browser-chrome:4444/wd/hub
#
# Uncomment below and the "sockpuppetbrowser" to use a real Chrome browser (It uses the "playwright" protocol)
# - PLAYWRIGHT_DRIVER_URL=ws://browser-sockpuppet-chrome:3000
#
#
# Alternative WebDriver/selenium URL, do not use "'s or 's! (old, deprecated, does not support screenshots very well)
# - WEBDRIVER_URL=http://browser-selenium-chrome:4444/wd/hub
#
# WebDriver proxy settings webdriver_proxyType, webdriver_ftpProxy, webdriver_noProxy,
# webdriver_proxyAutoconfigUrl, webdriver_autodetect,
# webdriver_socksProxy, webdriver_socksUsername, webdriver_socksVersion, webdriver_socksPassword
#
# https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy
#
# Alternative target "Chrome" Playwright URL, do not use "'s or 's!
# "Playwright" is a driver/librarythat allows changedetection to talk to a Chrome or similar browser.
# - PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000
#
# Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password
#
Expand Down Expand Up @@ -86,8 +88,8 @@ services:

# Sockpuppetbrowser is basically chrome wrapped in an API for allowing fast fetching of web-pages.
# RECOMMENDED FOR FETCHING PAGES WITH CHROME, be sure to enable the "PLAYWRIGHT_DRIVER_URL" env variable in the main changedetection container
# sockpuppetbrowser:
# hostname: sockpuppetbrowser
# browser-sockpuppet-chrome:
# hostname: browser-sockpuppet-chrome
# image: dgtlmoon/sockpuppetbrowser:latest
# cap_add:
# - SYS_ADMIN
Expand All @@ -102,14 +104,18 @@ services:
# Used for fetching pages via Playwright+Chrome where you need Javascript support.
# Note: Works well but is deprecated, does not fetch full page screenshots (doesnt work with Visual Selector)
# Does not report status codes (200, 404, 403) and other issues
# browser-chrome:
# hostname: browser-chrome
# browser-selenium-chrome:
# hostname: browser-selenium-chrome
# image: selenium/standalone-chrome:4
# environment:
# - VNC_NO_PASSWORD=1
# - SCREEN_WIDTH=1920
# - SCREEN_HEIGHT=1080
# - SCREEN_DEPTH=24
# CHROME_OPTIONS: |
# --window-size=1280,1024
# --headless
# --disable-gpu
# volumes:
# # Workaround to avoid the browser crashing inside a docker container
# # See https://github.com/SeleniumHQ/docker-selenium#quick-start
Expand Down
Loading