@@ -366,8 +366,11 @@ def __call__(self, doc):
366366 new = _replace_css_import ('' , new )
367367 if self ._has_sneaky_javascript (new ):
368368 # Something tricky is going on...
369- el .text = '/* deleted */'
370- elif new != old :
369+ new = '/* deleted */'
370+ else :
371+ new = self ._remove_sneaky_css_comments (new )
372+
373+ if new != old :
371374 el .text = new
372375 if self .comments :
373376 kill_tags .add (etree .Comment )
@@ -568,7 +571,9 @@ def _remove_javascript_link(self, link):
568571 return ''
569572 return link
570573
571- _substitute_comments = re .compile (r'/\*.*?\*/' , re .S ).sub
574+ _comments_re = re .compile (r'/\*.*?\*/' , re .S )
575+ _find_comments = _comments_re .finditer
576+ _substitute_comments = _comments_re .sub
572577
573578 def _has_sneaky_javascript (self , style ):
574579 """
@@ -581,29 +586,42 @@ def _has_sneaky_javascript(self, style):
581586 that and remove only the Javascript from the style; this catches
582587 more sneaky attempts.
583588 """
589+ style = self ._substitute_comments ('' , style )
590+ style = style .replace ('\\ ' , '' )
584591 style = _substitute_whitespace ('' , style )
585592 style = style .lower ()
586-
587- for with_comments in True , False :
588- if not with_comments :
589- style = self ._substitute_comments ('' , style )
590-
591- style = style .replace ('\\ ' , '' )
592-
593- if _has_javascript_scheme (style ):
594- return True
595- if 'expression(' in style :
596- return True
597- if '@import' in style :
598- return True
599- if '</noscript' in style :
600- # e.g. '<noscript><style><a title="</noscript><img src=x onerror=alert(1)>">'
601- return True
602- if _looks_like_tag_content (style ):
603- # e.g. '<math><style><img src=x onerror=alert(1)></style></math>'
604- return True
593+ if _has_javascript_scheme (style ):
594+ return True
595+ if 'expression(' in style :
596+ return True
597+ if '@import' in style :
598+ return True
599+ if '</noscript' in style :
600+ # e.g. '<noscript><style><a title="</noscript><img src=x onerror=alert(1)>">'
601+ return True
602+ if _looks_like_tag_content (style ):
603+ # e.g. '<math><style><img src=x onerror=alert(1)></style></math>'
604+ return True
605605 return False
606606
607+ def _remove_sneaky_css_comments (self , style ):
608+ """
609+ Look for suspicious code in CSS comment and if found,
610+ remove the entire comment from the given style.
611+
612+ Browsers might parse <style> as an ordinary HTML tag
613+ in some specific context and that might cause code in CSS
614+ comments to run.
615+ """
616+ for match in self ._find_comments (style ):
617+ comment = match .group (0 )
618+ print ("f" , comment )
619+ if _has_javascript_scheme (comment ) or _looks_like_tag_content (comment ):
620+ style = style .replace (comment , "/* deleted */" )
621+ print ("f" , style )
622+
623+ return style
624+
607625 def clean_html (self , html ):
608626 result_type = type (html )
609627 if isinstance (html , (str , bytes )):
0 commit comments