Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions changedetectionio/conditions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from .exceptions import EmptyConditionRuleRowNotUsable
from .pluggy_interface import plugin_manager # Import the pluggy plugin manager
from . import default_plugin

from loguru import logger
# List of all supported JSON Logic operators
operator_choices = [
(None, "Choose one - Operator"),
Expand Down Expand Up @@ -113,12 +113,14 @@ def execute_ruleset_against_all_plugins(current_watch_uuid: str, application_dat
application_datastruct=application_datastruct,
ephemeral_data=ephemeral_data
)

logger.debug(f"Trying plugin {plugin}....")

# Set a timeout of 10 seconds
try:
new_execute_data = future.result(timeout=10)
if new_execute_data and isinstance(new_execute_data, dict):
EXECUTE_DATA.update(new_execute_data)

except concurrent.futures.TimeoutError:
# The plugin took too long, abort processing for this watch
raise Exception(f"Plugin {plugin.__class__.__name__} took more than 10 seconds to run.")
Expand Down
25 changes: 15 additions & 10 deletions changedetectionio/conditions/plugins/levenshtein_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,20 @@ def levenshtein_ratio_recent_history(watch, incoming_text=None):
try:
from Levenshtein import ratio, distance
k = list(watch.history.keys())
if len(k) >= 2:
# When called from ui_edit_stats_extras, we don't have incoming_text
if incoming_text is None:
a = watch.get_history_snapshot(timestamp=k[-1]) # Latest snapshot
b = watch.get_history_snapshot(timestamp=k[-2]) # Previous snapshot
else:
a = watch.get_history_snapshot(timestamp=k[-2]) # Second newest, incoming_text will be "newest"
b = incoming_text

a = None
b = None

# When called from ui_edit_stats_extras, we don't have incoming_text
if incoming_text is None:
a = watch.get_history_snapshot(timestamp=k[-1]) # Latest snapshot
b = watch.get_history_snapshot(timestamp=k[-2]) # Previous snapshot

# Needs atleast one snapshot
elif len(k) >= 1: # Should be atleast one snapshot to compare against
a = watch.get_history_snapshot(timestamp=k[-1]) # Latest saved snapshot
b = incoming_text if incoming_text else k[-2]

if a and b:
distance_value = distance(a, b)
ratio_value = ratio(a, b)
return {
Expand Down Expand Up @@ -53,7 +58,7 @@ def add_data(current_watch_uuid, application_datastruct, ephemeral_data):
# ephemeral_data['text'] will be the current text after filters, they may have edited filters but not saved them yet etc

if watch and 'text' in ephemeral_data:
lev_data = levenshtein_ratio_recent_history(watch, ephemeral_data['text'])
lev_data = levenshtein_ratio_recent_history(watch, ephemeral_data.get('text',''))
if isinstance(lev_data, dict):
res['levenshtein_ratio'] = lev_data.get('ratio', 0)
res['levenshtein_similarity'] = lev_data.get('percent_similar', 0)
Expand Down
111 changes: 109 additions & 2 deletions changedetectionio/tests/test_conditions.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,11 @@ def test_condition_validate_rule_row(client, live_server):
)
assert res.status_code == 200
assert b'false' in res.data

# cleanup for the next
client.get(
url_for("ui.form_delete", uuid="all"),
follow_redirects=True
)



Expand Down Expand Up @@ -235,4 +239,107 @@ def test_wordcount_conditions_plugin(client, live_server, measure_memory_usage):
)

# Assert the word count is counted correctly
assert b'<td>13</td>' in res.data
assert b'<td>13</td>' in res.data

# cleanup for the next
client.get(
url_for("ui.form_delete", uuid="all"),
follow_redirects=True
)

# If there was only a change in the whitespacing, then we shouldnt have a change detected
def test_lev_conditions_plugin(client, live_server, measure_memory_usage):
#live_server_setup(live_server)

with open("test-datastore/endpoint-content.txt", "w") as f:
f.write("""<html>
<body>
Some initial text<br>
<p>Which is across multiple lines</p>
<br>
So let's see what happens. <br>
</body>
</html>
""")

# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("ui.ui_views.form_quick_watch_add"),
data={"url": test_url, "tags": '', 'edit_and_watch_submit_button': 'Edit > Watch'},
follow_redirects=True
)
assert b"Watch added in Paused state, saving will unpause" in res.data

uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
# Give the thread time to pick it up
wait_for_all_checks(client)
res = client.post(
url_for("ui.ui_edit.edit_page", uuid=uuid, unpause_on_save=1),
data={
"url": test_url,
"fetch_backend": "html_requests",
"conditions_match_logic": "ALL", # ALL = AND logic
"conditions-0-field": "levenshtein_ratio",
"conditions-0-operator": "<",
"conditions-0-value": "0.8" # needs to be more of a diff to trigger a change
},
follow_redirects=True
)

assert b"unpaused" in res.data

wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'unviewed' not in res.data

# Check the content saved initially, even tho a condition was set - this is the first snapshot so shouldnt be affected by conditions
res = client.get(
url_for("ui.ui_views.preview_page", uuid=uuid),
follow_redirects=True
)
assert b'Which is across multiple lines' in res.data


############### Now change it a LITTLE bit...
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write("""<html>
<body>
Some initial text<br>
<p>Which is across multiple lines</p>
<br>
So let's see what happenxxxxxxxxx. <br>
</body>
</html>
""")

res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
assert b'Queued 1 watch for rechecking.' in res.data
wait_for_all_checks(client)

res = client.get(url_for("watchlist.index"))
assert b'unviewed' not in res.data #because this will be like 0.90 not 0.8 threshold

############### Now change it a MORE THAN 50%
test_return_data = """<html>
<body>
Some sxxxx<br>
<p>Which is across a lines</p>
<br>
ok. <br>
</body>
</html>
"""

with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
assert b'Queued 1 watch for rechecking.' in res.data
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'unviewed' in res.data
# cleanup for the next
client.get(
url_for("ui.form_delete", uuid="all"),
follow_redirects=True
)
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ extruct
# For cleaning up unknown currency formats
babel

levenshtein

# Needed for > 3.10, https://github.com/microsoft/playwright-python/issues/2096
greenlet >= 3.0.3

Expand Down