Skip to content

Commit

Permalink
Org: Fix lxml usage in html annotation.
Browse files Browse the repository at this point in the history
TYPE: Bugfix
  • Loading branch information
msom authored Apr 14, 2024
1 parent f1ea705 commit 7daa3c5
Showing 1 changed file with 5 additions and 6 deletions.
11 changes: 5 additions & 6 deletions src/onegov/org/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from isodate import parse_date, parse_datetime
from itertools import groupby
from libres.modules import errors as libres_errors
from lxml.etree import ParserError
from lxml.html import fragments_fromstring, tostring
from onegov.core.cache import lru_cache
from onegov.core.layout import Layout
Expand Down Expand Up @@ -161,19 +162,17 @@ def annotate_html(
if not html:
return html

fragments = fragments_fromstring(html)
try:
fragments = fragments_fromstring(html, no_leading_text=True)
except ParserError:
return html
images = []

# we perform a root xpath lookup, which will result in all paragraphs
# being looked at - so we don't need to loop over all elements (yah, it's
# a bit weird)
for element in fragments[:1]:

# instead of failing, lxml will return strings instead of elements if
# they can't be parsed.. so we have to inspect the objects
if not hasattr(element, 'xpath'):
return html

for paragraph in element.xpath('//p[img]'):
add_class_to_node(paragraph, 'has-img')

Expand Down

0 comments on commit 7daa3c5

Please sign in to comment.