diff --git a/bleach/html5lib_shim.py b/bleach/html5lib_shim.py
index 20d7e22b..be06aead 100644
--- a/bleach/html5lib_shim.py
+++ b/bleach/html5lib_shim.py
@@ -534,7 +534,18 @@ def next_possible_entity(text):
class BleachHTMLSerializer(HTMLSerializer):
- """HTMLSerializer that undoes & -> & in attributes"""
+ """HTMLSerializer that undoes & -> & in attributes and sets
+ escape_rcdata to True
+ """
+
+ # per the HTMLSerializer.__init__ docstring:
+ #
+ # Whether to escape characters that need to be
+ # escaped within normal elements within rcdata elements such as
+ # style.
+ #
+ escape_rcdata = True
+
def escape_base_amp(self, stoken):
"""Escapes just bare & in HTML attribute values"""
# First, undo escaping of &. We need to do this because html5lib's
diff --git a/tests/test_clean.py b/tests/test_clean.py
index 8f64beb2..133cd822 100644
--- a/tests/test_clean.py
+++ b/tests/test_clean.py
@@ -7,7 +7,7 @@
from bleach import clean
from bleach.html5lib_shim import Filter
from bleach.sanitizer import Cleaner
-
+from bleach._vendor.html5lib.constants import rcdataElements
def test_clean_idempotent():
"""Make sure that applying the filter twice doesn't change anything."""
@@ -789,7 +789,7 @@ def test_nonexistent_namespace():
(
raw_tag,
"" % raw_tag,
- "<img src=x onerror=alert(1) />" % raw_tag,
+ "<img src=x onerror=alert(1) />" % raw_tag,
)
for raw_tag in _raw_tags
],
@@ -799,6 +799,29 @@ def test_noscript_rawtag_(raw_tag, data, expected):
assert clean(data, tags=["noscript", raw_tag]) == expected
+@pytest.mark.parametrize(
+ "namespace_tag, rc_data_element_tag, data, expected",
+ [
+ (
+ namespace_tag,
+ rc_data_element_tag,
+ "<%s><%s>" % (namespace_tag, rc_data_element_tag),
+ "<%s><%s><img src=x onerror=alert(1)>%s>%s>" % (namespace_tag, rc_data_element_tag, rc_data_element_tag, namespace_tag),
+ )
+ for namespace_tag in ["math", "svg"]
+ # https://dev.w3.org/html5/html-author/#rcdata-elements
+ # https://html.spec.whatwg.org/index.html#parsing-html-fragments
+ # in html5lib: 'style', 'script', 'xmp', 'iframe', 'noembed', 'noframes', and 'noscript'
+ for rc_data_element_tag in rcdataElements
+ ],
+)
+def test_namespace_rc_data_element_strip_false(namespace_tag, rc_data_element_tag, data, expected):
+ # refs: bug 1621692 / GHSA-m6xf-fq7q-8743
+ #
+ # browsers will pull the img out of the namespace and rc data tag resulting in XSS
+ assert clean(data, tags=[namespace_tag, rc_data_element_tag], strip=False) == expected
+
+
def get_ids_and_tests():
"""Retrieves regression tests from data/ directory