@@ -366,8 +366,11 @@ def __call__(self, doc):
366
366
new = _replace_css_import ('' , new )
367
367
if self ._has_sneaky_javascript (new ):
368
368
# Something tricky is going on...
369
- el .text = '/* deleted */'
370
- elif new != old :
369
+ new = '/* deleted */'
370
+ else :
371
+ new = self ._remove_sneaky_css_comments (new )
372
+
373
+ if new != old :
371
374
el .text = new
372
375
if self .comments :
373
376
kill_tags .add (etree .Comment )
@@ -568,7 +571,9 @@ def _remove_javascript_link(self, link):
568
571
return ''
569
572
return link
570
573
571
- _substitute_comments = re .compile (r'/\*.*?\*/' , re .S ).sub
574
+ _comments_re = re .compile (r'/\*.*?\*/' , re .S )
575
+ _find_comments = _comments_re .finditer
576
+ _substitute_comments = _comments_re .sub
572
577
573
578
def _has_sneaky_javascript (self , style ):
574
579
"""
@@ -581,29 +586,42 @@ def _has_sneaky_javascript(self, style):
581
586
that and remove only the Javascript from the style; this catches
582
587
more sneaky attempts.
583
588
"""
589
+ style = self ._substitute_comments ('' , style )
590
+ style = style .replace ('\\ ' , '' )
584
591
style = _substitute_whitespace ('' , style )
585
592
style = style .lower ()
586
-
587
- for with_comments in True , False :
588
- if not with_comments :
589
- style = self ._substitute_comments ('' , style )
590
-
591
- style = style .replace ('\\ ' , '' )
592
-
593
- if _has_javascript_scheme (style ):
594
- return True
595
- if 'expression(' in style :
596
- return True
597
- if '@import' in style :
598
- return True
599
- if '</noscript' in style :
600
- # e.g. '<noscript><style><a title="</noscript><img src=x onerror=alert(1)>">'
601
- return True
602
- if _looks_like_tag_content (style ):
603
- # e.g. '<math><style><img src=x onerror=alert(1)></style></math>'
604
- return True
593
+ if _has_javascript_scheme (style ):
594
+ return True
595
+ if 'expression(' in style :
596
+ return True
597
+ if '@import' in style :
598
+ return True
599
+ if '</noscript' in style :
600
+ # e.g. '<noscript><style><a title="</noscript><img src=x onerror=alert(1)>">'
601
+ return True
602
+ if _looks_like_tag_content (style ):
603
+ # e.g. '<math><style><img src=x onerror=alert(1)></style></math>'
604
+ return True
605
605
return False
606
606
607
+ def _remove_sneaky_css_comments (self , style ):
608
+ """
609
+ Look for suspicious code in CSS comment and if found,
610
+ remove the entire comment from the given style.
611
+
612
+ Browsers might parse <style> as an ordinary HTML tag
613
+ in some specific context and that might cause code in CSS
614
+ comments to run.
615
+ """
616
+ for match in self ._find_comments (style ):
617
+ comment = match .group (0 )
618
+ print ("f" , comment )
619
+ if _has_javascript_scheme (comment ) or _looks_like_tag_content (comment ):
620
+ style = style .replace (comment , "/* deleted */" )
621
+ print ("f" , style )
622
+
623
+ return style
624
+
607
625
def clean_html (self , html ):
608
626
result_type = type (html )
609
627
if isinstance (html , (str , bytes )):
0 commit comments