diff --git a/changedetectionio/blueprint/rss/__init__.py b/changedetectionio/blueprint/rss/__init__.py
index 113e35ac5a9..d4e091948d7 100644
--- a/changedetectionio/blueprint/rss/__init__.py
+++ b/changedetectionio/blueprint/rss/__init__.py
@@ -1,102 +1 @@
-import time
-import datetime
-import pytz
-from flask import Blueprint, make_response, request, url_for
-from loguru import logger
-from feedgen.feed import FeedGenerator
-
-from changedetectionio.store import ChangeDetectionStore
-from changedetectionio.safe_jinja import render as jinja_render
-
-def construct_blueprint(datastore: ChangeDetectionStore):
- rss_blueprint = Blueprint('rss', __name__)
-
- # Import the login decorator if needed
- # from changedetectionio.auth_decorator import login_optionally_required
- @rss_blueprint.route("", methods=['GET'])
- def feed():
- now = time.time()
- # Always requires token set
- app_rss_token = datastore.data['settings']['application'].get('rss_access_token')
- rss_url_token = request.args.get('token')
- if rss_url_token != app_rss_token:
- return "Access denied, bad token", 403
-
- from changedetectionio import diff
- limit_tag = request.args.get('tag', '').lower().strip()
- # Be sure limit_tag is a uuid
- for uuid, tag in datastore.data['settings']['application'].get('tags', {}).items():
- if limit_tag == tag.get('title', '').lower().strip():
- limit_tag = uuid
-
- # Sort by last_changed and add the uuid which is usually the key..
- sorted_watches = []
-
- # @todo needs a .itemsWithTag() or something - then we can use that in Jinaj2 and throw this away
- for uuid, watch in datastore.data['watching'].items():
- # @todo tag notification_muted skip also (improve Watch model)
- if datastore.data['settings']['application'].get('rss_hide_muted_watches') and watch.get('notification_muted'):
- continue
- if limit_tag and not limit_tag in watch['tags']:
- continue
- watch['uuid'] = uuid
- sorted_watches.append(watch)
-
- sorted_watches.sort(key=lambda x: x.last_changed, reverse=False)
-
- fg = FeedGenerator()
- fg.title('changedetection.io')
- fg.description('Feed description')
- fg.link(href='https://changedetection.io')
-
- for watch in sorted_watches:
-
- dates = list(watch.history.keys())
- # Re #521 - Don't bother processing this one if theres less than 2 snapshots, means we never had a change detected.
- if len(dates) < 2:
- continue
-
- if not watch.viewed:
- # Re #239 - GUID needs to be individual for each event
- # @todo In the future make this a configurable link back (see work on BASE_URL https://github.com/dgtlmoon/changedetection.io/pull/228)
- guid = "{}/{}".format(watch['uuid'], watch.last_changed)
- fe = fg.add_entry()
-
- # Include a link to the diff page, they will have to login here to see if password protection is enabled.
- # Description is the page you watch, link takes you to the diff JS UI page
- # Dict val base_url will get overriden with the env var if it is set.
- ext_base_url = datastore.data['settings']['application'].get('active_base_url')
-
- # Because we are called via whatever web server, flask should figure out the right path (
- diff_link = {'href': url_for('ui.ui_views.diff_history_page', uuid=watch['uuid'], _external=True)}
-
- fe.link(link=diff_link)
-
- # @todo watch should be a getter - watch.get('title') (internally if URL else..)
-
- watch_title = watch.get('title') if watch.get('title') else watch.get('url')
- fe.title(title=watch_title)
-
- html_diff = diff.render_diff(previous_version_file_contents=watch.get_history_snapshot(dates[-2]),
- newest_version_file_contents=watch.get_history_snapshot(dates[-1]),
- include_equal=False,
- line_feed_sep="
")
-
- # @todo Make this configurable and also consider html-colored markup
- # @todo User could decide if goes to the diff page, or to the watch link
- rss_template = "
{{html_diff}}
\n\n" - content = jinja_render(template_str=rss_template, watch_title=watch_title, html_diff=html_diff, watch_url=watch.link) - - fe.content(content=content, type='CDATA') - - fe.guid(guid, permalink=False) - dt = datetime.datetime.fromtimestamp(int(watch.newest_history_key)) - dt = dt.replace(tzinfo=pytz.UTC) - fe.pubDate(dt) - - response = make_response(fg.rss_str()) - response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8') - logger.trace(f"RSS generated in {time.time() - now:.3f}s") - return response - - return rss_blueprint \ No newline at end of file +RSS_FORMAT_TYPES = [('plaintext', 'Plain text'), ('html', 'HTML Color')] diff --git a/changedetectionio/blueprint/rss/blueprint.py b/changedetectionio/blueprint/rss/blueprint.py new file mode 100644 index 00000000000..edaa5b1ec8a --- /dev/null +++ b/changedetectionio/blueprint/rss/blueprint.py @@ -0,0 +1,147 @@ + +from changedetectionio.safe_jinja import render as jinja_render +from changedetectionio.store import ChangeDetectionStore +from feedgen.feed import FeedGenerator +from flask import Blueprint, make_response, request, url_for, redirect +from loguru import logger +import datetime +import pytz +import re +import time + + +BAD_CHARS_REGEX=r'[\x00-\x08\x0B\x0C\x0E-\x1F]' + +# Anything that is not text/UTF-8 should be stripped before it breaks feedgen (such as binary data etc) +def scan_invalid_chars_in_rss(content): + for match in re.finditer(BAD_CHARS_REGEX, content): + i = match.start() + bad_char = content[i] + hex_value = f"0x{ord(bad_char):02x}" + # Grab context + start = max(0, i - 20) + end = min(len(content), i + 21) + context = content[start:end].replace('\n', '\\n').replace('\r', '\\r') + logger.warning(f"Invalid char {hex_value} at pos {i}: ...{context}...") + # First match is enough + return True + + return False + + +def clean_entry_content(content): + cleaned = re.sub(BAD_CHARS_REGEX, '', content) + return cleaned + +def construct_blueprint(datastore: ChangeDetectionStore): + rss_blueprint = Blueprint('rss', __name__) + + # Some RSS reader situations ended up with rss/ (forward slash after RSS) due + # to some earlier blueprint rerouting work, it should goto feed. + @rss_blueprint.route("/", methods=['GET']) + def extraslash(): + return redirect(url_for('rss.feed')) + + # Import the login decorator if needed + # from changedetectionio.auth_decorator import login_optionally_required + @rss_blueprint.route("", methods=['GET']) + def feed(): + now = time.time() + # Always requires token set + app_rss_token = datastore.data['settings']['application'].get('rss_access_token') + rss_url_token = request.args.get('token') + if rss_url_token != app_rss_token: + return "Access denied, bad token", 403 + + from changedetectionio import diff + limit_tag = request.args.get('tag', '').lower().strip() + # Be sure limit_tag is a uuid + for uuid, tag in datastore.data['settings']['application'].get('tags', {}).items(): + if limit_tag == tag.get('title', '').lower().strip(): + limit_tag = uuid + + # Sort by last_changed and add the uuid which is usually the key.. + sorted_watches = [] + + # @todo needs a .itemsWithTag() or something - then we can use that in Jinaj2 and throw this away + for uuid, watch in datastore.data['watching'].items(): + # @todo tag notification_muted skip also (improve Watch model) + if datastore.data['settings']['application'].get('rss_hide_muted_watches') and watch.get('notification_muted'): + continue + if limit_tag and not limit_tag in watch['tags']: + continue + watch['uuid'] = uuid + sorted_watches.append(watch) + + sorted_watches.sort(key=lambda x: x.last_changed, reverse=False) + + fg = FeedGenerator() + fg.title('changedetection.io') + fg.description('Feed description') + fg.link(href='https://changedetection.io') + + html_colour_enable = False + if datastore.data['settings']['application'].get('rss_content_format') == 'html': + html_colour_enable = True + + for watch in sorted_watches: + + dates = list(watch.history.keys()) + # Re #521 - Don't bother processing this one if theres less than 2 snapshots, means we never had a change detected. + if len(dates) < 2: + continue + + if not watch.viewed: + # Re #239 - GUID needs to be individual for each event + # @todo In the future make this a configurable link back (see work on BASE_URL https://github.com/dgtlmoon/changedetection.io/pull/228) + guid = "{}/{}".format(watch['uuid'], watch.last_changed) + fe = fg.add_entry() + + # Include a link to the diff page, they will have to login here to see if password protection is enabled. + # Description is the page you watch, link takes you to the diff JS UI page + # Dict val base_url will get overriden with the env var if it is set. + ext_base_url = datastore.data['settings']['application'].get('active_base_url') + # @todo fix + + # Because we are called via whatever web server, flask should figure out the right path ( + diff_link = {'href': url_for('ui.ui_views.diff_history_page', uuid=watch['uuid'], _external=True)} + + fe.link(link=diff_link) + + # @todo watch should be a getter - watch.get('title') (internally if URL else..) + + watch_title = watch.get('title') if watch.get('title') else watch.get('url') + fe.title(title=watch_title) + try: + + html_diff = diff.render_diff(previous_version_file_contents=watch.get_history_snapshot(dates[-2]), + newest_version_file_contents=watch.get_history_snapshot(dates[-1]), + include_equal=False, + line_feed_sep="{{html_diff}}
\n\n" + + content = jinja_render(template_str=rss_template, watch_title=watch_title, html_diff=html_diff, watch_url=watch.link) + + # Out of range chars could also break feedgen + if scan_invalid_chars_in_rss(content): + content = clean_entry_content(content) + + fe.content(content=content, type='CDATA') + fe.guid(guid, permalink=False) + dt = datetime.datetime.fromtimestamp(int(watch.newest_history_key)) + dt = dt.replace(tzinfo=pytz.UTC) + fe.pubDate(dt) + + response = make_response(fg.rss_str()) + response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8') + logger.trace(f"RSS generated in {time.time() - now:.3f}s") + return response + + return rss_blueprint \ No newline at end of file diff --git a/changedetectionio/blueprint/settings/templates/settings.html b/changedetectionio/blueprint/settings/templates/settings.html index 65ce6c66026..ef58f4abe5b 100644 --- a/changedetectionio/blueprint/settings/templates/settings.html +++ b/changedetectionio/blueprint/settings/templates/settings.html @@ -78,7 +78,10 @@ {{ render_field(form.application.form.pager_size) }} - +{content}
+