Skip to content

Commit

Permalink
Org: Make sure anchors in URLs are not viewed as hashtags
Browse files Browse the repository at this point in the history
TYPE: Bugfix
LINK: OGC-1816
  • Loading branch information
BreathingFlesh authored Sep 16, 2024
1 parent 201cba1 commit ece085a
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 12 deletions.
15 changes: 10 additions & 5 deletions src/onegov/org/assets/js/common.js
Original file line number Diff line number Diff line change
Expand Up @@ -102,14 +102,19 @@ var tagselectors = [
'.directory-fields .field-display dd',
'.message .text'
];
var tagexpr = new RegExp('(#[0-9a-zA-Zöäüéèà]{3,})', 'gi');
// To avoid matching URLs, we need to make sure that the hashtag is not
// preceded by a letter, number or /. This is done by including the character
// before the hashtag. The character is then added back in the replacement.
var tagexpr = new RegExp('(^|[^a-zA-Z0-9/])(#[0-9a-zA-Zöäüéèà]{3,})', 'gi');

var highlightTags = function(target) {
$(target).find(tagselectors.join(',')).each(function() {
this.innerHTML = this.innerHTML.replace(
tagexpr, function(match) {
return '<a class="hashtag" href="/search?q=' + encodeURIComponent(match) + '">' + match + '</a>';
});
this.innerHTML = this.innerHTML.replace(tagexpr, function(fullMatch, beforeChar, hashtag) {
// `beforeChar` captures the character before the hashtag
// `hashtag` captures the hashtag itself

return beforeChar + '<a class="hashtag" href="/search?q=' + encodeURIComponent(hashtag) + '">' + hashtag + '</a>';
});
});
};

Expand Down
2 changes: 1 addition & 1 deletion src/onegov/org/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
# regex module in in onegov.core
#
# additionally it is used in onegov.org's common.js in javascript variant
HASHTAG = re.compile(r'#\w{3,}')
HASHTAG = re.compile(r'(?<![\w/])#\w{3,}')
IMG_URLS = re.compile(r'<img[^>]*?src="(.*?)"')


Expand Down
2 changes: 1 addition & 1 deletion src/onegov/search/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

# XXX this is doubly defined in onegov.org.utils, maybe move to a common
# regex module in in onegov.core
HASHTAG = re.compile(r'#\w{3,}')
HASHTAG = re.compile(r'(?<![\w/])#\w{3,}')


def searchable_sqlalchemy_models(
Expand Down
16 changes: 11 additions & 5 deletions src/onegov/town6/assets/js/common.js
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,20 @@ var tagselectors = [
'.directory-fields .field-display dd',
'.message .text'
];
var tagexpr = new RegExp('(#[0-9a-zA-Zöäüéèà]{3,})', 'gi');

// To avoid matching URLs, we need to make sure that the hashtag is not
// preceded by a letter, number or /. This is done by including the character
// before the hashtag. The character is then added back in the replacement.
var tagexpr = new RegExp('(^|[^a-zA-Z0-9/])(#[0-9a-zA-Zöäüéèà]{3,})', 'gi');

var highlightTags = function(target) {
$(target).find(tagselectors.join(',')).each(function() {
this.innerHTML = this.innerHTML.replace(
tagexpr, function(match) {
return '<a class="hashtag" href="/search?q=' + encodeURIComponent(match) + '">' + match + '</a>';
});
this.innerHTML = this.innerHTML.replace(tagexpr, function(fullMatch, beforeChar, hashtag) {
// `beforeChar` captures the character before the hashtag
// `hashtag` captures the hashtag itself

return beforeChar + '<a class="hashtag" href="/search?q=' + encodeURIComponent(hashtag) + '">' + hashtag + '</a>';
});
});
};

Expand Down
50 changes: 50 additions & 0 deletions tests/onegov/org/test_browser.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,3 +615,53 @@ def test_script_escaped_in_user_submitted_html(browser, org_app):
payload_h1_selector = 'h1#foo' # CSS selector for the injected element
time.sleep(1)
assert not browser.find_by_css(payload_h1_selector)


def test_link_hashtags(browser, org_app):
browser.login_admin()

DirectoryCollection(org_app.session(), type='extended').add(
title="Crime Scenes",
structure="""
Name *= ___
Description *= ...
""",
configuration="""
title:
- name
order:
- name
display:
content:
- name
- description
""",
type='extended'
)

transaction.commit()

browser.login_admin()
browser.visit('/directories/crime-scenes/+new')
browser.fill('name', "Seven Seas Motel")
browser.fill('description',
"""
#hotel Our rooms are #amazing! Check them out here:
https://www.seven-seas-motel.com/#rooms
https://www.seven-seas-motel.com/rooms#luxury-suite
#fantastic
""")
browser.find_by_value("Absenden").click()
assert browser.is_text_present("Seven Seas Motel")

# Only hashtags should be links, URL anchors should not be seen as hashtags
assert ('<a class="hashtag" href="/search?q=%23amazing">#amazing</a>'
) in browser.html
assert ('<a class="hashtag" href="/search?q=%23fantastic">#fantastic</a>'
) in browser.html
assert ('<a class="hashtag" href="/search?q=%23hotel">#hotel</a>'
) in browser.html
assert ('<a class="hashtag" href="/search?q=%23rooms">#rooms</a>'
) not in browser.html
assert ('<a class="hashtag" href="/search?q=%23luxury-suite">'
'#luxury-suite</a>') not in browser.html

0 comments on commit ece085a

Please sign in to comment.