Skip to content

Commit 1334134

Browse files
author
Greg Guthe
committed
sanitizer: escape HTML comments
fixes: bug 1689399 / GHSA vv2x-vrpj-qqpq
1 parent c045a8b commit 1334134

File tree

3 files changed

+52
-0
lines changed

3 files changed

+52
-0
lines changed

bleach/html5lib_shim.py

+1
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
HTMLInputStream,
4949
) # noqa: E402 module level import not at top of file
5050
from bleach._vendor.html5lib.serializer import (
51+
escape,
5152
HTMLSerializer,
5253
) # noqa: E402 module level import not at top of file
5354
from bleach._vendor.html5lib._tokenizer import (

bleach/sanitizer.py

+4
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,10 @@ def sanitize_token(self, token):
371371

372372
elif token_type == "Comment":
373373
if not self.strip_html_comments:
374+
# call lxml.sax.saxutils to escape &, <, and > in addition to " and '
375+
token["data"] = html5lib_shim.escape(
376+
token["data"], entities={'"': "&quot;", "'": "&#x27;"}
377+
)
374378
return token
375379
else:
376380
return None

tests/test_clean.py

+47
Original file line numberDiff line numberDiff line change
@@ -739,6 +739,53 @@ def test_namespace_rc_data_element_strip_false(
739739
)
740740

741741

742+
@pytest.mark.parametrize(
743+
"namespace_tag, end_tag, data, expected",
744+
[
745+
(
746+
"math",
747+
"p",
748+
"<math></p><style><!--</style><img src/onerror=alert(1)>",
749+
"<math><p></p><style><!--&lt;/style&gt;&lt;img src/onerror=alert(1)&gt;--></style></math>",
750+
),
751+
(
752+
"math",
753+
"br",
754+
"<math></br><style><!--</style><img src/onerror=alert(1)>",
755+
"<math><br><style><!--&lt;/style&gt;&lt;img src/onerror=alert(1)&gt;--></style></math>",
756+
),
757+
(
758+
"svg",
759+
"p",
760+
"<svg></p><style><!--</style><img src/onerror=alert(1)>",
761+
"<svg><p></p><style><!--&lt;/style&gt;&lt;img src/onerror=alert(1)&gt;--></style></svg>",
762+
),
763+
(
764+
"svg",
765+
"br",
766+
"<svg></br><style><!--</style><img src/onerror=alert(1)>",
767+
"<svg><br><style><!--&lt;/style&gt;&lt;img src/onerror=alert(1)&gt;--></style></svg>",
768+
),
769+
],
770+
)
771+
def test_html_comments_escaped(namespace_tag, end_tag, data, expected):
772+
# refs: bug 1689399 / GHSA-vv2x-vrpj-qqpq
773+
#
774+
# p and br can be just an end tag (e.g. </p> == <p></p>)
775+
#
776+
# In browsers:
777+
#
778+
# * img and other tags break out of the svg or math namespace (e.g. <svg><img></svg> == <svg><img></svg>)
779+
# * style does not (e.g. <svg><style></svg> == <svg><style></style></svg>)
780+
# * the breaking tag ejects trailing elements (e.g. <svg><img><style></style></svg> == <svg></svg><img><style></style>)
781+
#
782+
# the ejected elements can trigger XSS
783+
assert (
784+
clean(data, tags=[namespace_tag, end_tag, "style"], strip_comments=False)
785+
== expected
786+
)
787+
788+
742789
def get_ids_and_tests():
743790
"""Retrieves regression tests from data/ directory
744791

0 commit comments

Comments
 (0)