11#!/usr/bin/env python3 
22from  argparse  import  ArgumentParser 
3+ from  typing  import  Generator 
34import  tempfile 
45import  re 
56import  urllib .request 
2324
2425CERT_WIKI  =  "https://wiki.sei.cmu.edu" 
2526RULES_LIST_C  =  "/confluence/display/c/2+Rules" 
27+ RECOMMENDED_LIST_C  =  "/confluence/display/c/3+Recommendations" 
2628RULES_LIST_CPP  =  "/confluence/display/cplusplus/2+Rules" 
2729
2830cache_path  =  script_path .parent  /  '.cache' 
@@ -47,16 +49,22 @@ def soupify(url: str) -> BeautifulSoup:
4749
4850    return  BeautifulSoup (content , 'html.parser' )
4951
50- 
51- def  get_rules ():
52-     rules  =  []
53-     for  soup  in  [soupify (f"{ CERT_WIKI } { RULES_LIST_C }  ), soupify (f"{ CERT_WIKI } { RULES_LIST_CPP }  )]:
52+ def  get_rule_listings () ->  Generator [Tag , None , None ]:
53+     for  rule_list_id  in  [RULES_LIST_C , RULES_LIST_CPP ]:
54+         soup  =  soupify (f"{ CERT_WIKI } { rule_list_id }  )
5455        if  soup  ==  None :
55-             return   None 
56- 
57-         rule_listing_start   =  soup .find (
56+             continue 
57+      
58+         yield  soup .find (
5859            "h1" , string = "Rule Listing" )
5960
61+     soup  =  soupify (f"{ CERT_WIKI } { RECOMMENDED_LIST_C }  )
62+     if  soup  !=  None :
63+         yield  soup .find ("h1" , string = "Recommendation Listing" )
64+ 
65+ def  get_rules ():
66+     rules  =  []
67+     for  rule_listing_start  in  get_rule_listings ():
6068        for  link  in  rule_listing_start .next_element .next_element .find_all ('a' ):
6169            if  '-C'  in  link .string :
6270                rule , title  =  map (str .strip , link .string .split ('.' , 1 ))
@@ -214,6 +222,8 @@ def helper(node):
214222                # Fix a broken url present in many CERT-C pages 
215223                if  node .name  ==  'a'  and  'href'  in  node .attrs  and  node ['href' ] ==  "http://BB. Definitions#vulnerability" :
216224                    node ['href' ] =  "https://wiki.sei.cmu.edu/confluence/display/c/BB.+Definitions#BB.Definitions-vulnerability" 
225+                 elif  node .name  ==  'a'  and  'href'  in  node .attrs  and  node ['href' ] ==  "http://BB. Definitions#unexpected behavior" :
226+                     node ['href' ] =  "https://wiki.sei.cmu.edu/confluence/display/c/BB.+Definitions#BB.Definitions-unexpectedbehavior" 
217227                # Turn relative URLs into absolute URLS 
218228                elif  node .name  ==  'a'  and  'href'  in  node .attrs  and  node ['href' ].startswith ("/confluence" ):
219229                    node ['href' ] =  f"{ CERT_WIKI } { node ['href' ]}  
0 commit comments