File tree 2 files changed +36
-14
lines changed
2 files changed +36
-14
lines changed Original file line number Diff line number Diff line change @@ -581,22 +581,27 @@ def _has_sneaky_javascript(self, style):
581
581
that and remove only the Javascript from the style; this catches
582
582
more sneaky attempts.
583
583
"""
584
- style = self ._substitute_comments ('' , style )
585
- style = style .replace ('\\ ' , '' )
586
584
style = _substitute_whitespace ('' , style )
587
585
style = style .lower ()
588
- if _has_javascript_scheme (style ):
589
- return True
590
- if 'expression(' in style :
591
- return True
592
- if '@import' in style :
593
- return True
594
- if '</noscript' in style :
595
- # e.g. '<noscript><style><a title="</noscript><img src=x onerror=alert(1)>">'
596
- return True
597
- if _looks_like_tag_content (style ):
598
- # e.g. '<math><style><img src=x onerror=alert(1)></style></math>'
599
- return True
586
+
587
+ for with_comments in True , False :
588
+ if not with_comments :
589
+ style = self ._substitute_comments ('' , style )
590
+
591
+ style = style .replace ('\\ ' , '' )
592
+
593
+ if _has_javascript_scheme (style ):
594
+ return True
595
+ if 'expression(' in style :
596
+ return True
597
+ if '@import' in style :
598
+ return True
599
+ if '</noscript' in style :
600
+ # e.g. '<noscript><style><a title="</noscript><img src=x onerror=alert(1)>">'
601
+ return True
602
+ if _looks_like_tag_content (style ):
603
+ # e.g. '<math><style><img src=x onerror=alert(1)></style></math>'
604
+ return True
600
605
return False
601
606
602
607
def clean_html (self , html ):
Original file line number Diff line number Diff line change @@ -127,6 +127,23 @@ def test_sneaky_js_in_math_style(self):
127
127
b'<math><style>/* deleted */</style></math>' ,
128
128
lxml .html .tostring (clean_html (s )))
129
129
130
+ def test_sneaky_js_in_style_comment_math_svg (self ):
131
+ for tag in "svg" , "math" :
132
+ html = f'<{ tag } ><style>/*<img src onerror=alert(origin)>*/'
133
+ s = lxml .html .fragment_fromstring (html )
134
+
135
+ self .assertEqual (
136
+ f'<{ tag } ><style>/* deleted */</style></{ tag } >' .encode (),
137
+ lxml .html .tostring (clean_html (s )))
138
+
139
+ def test_sneaky_js_in_style_comment_noscript (self ):
140
+ html = '<noscript><style>/*</noscript><img src onerror=alert(origin)>*/'
141
+ s = lxml .html .fragment_fromstring (html )
142
+
143
+ self .assertEqual (
144
+ b'<noscript><style>/* deleted */</style></noscript>' ,
145
+ lxml .html .tostring (clean_html (s )))
146
+
130
147
def test_sneaky_import_in_style (self ):
131
148
# Prevent "@@importimport" -> "@import" replacement etc.
132
149
style_codes = [
You can’t perform that action at this time.
0 commit comments