@@ -44,6 +44,22 @@ def chomp(text):
4444 return (prefix , suffix , text )
4545
4646
47+ def abstract_inline_conversion (markup_fn ):
48+ """
49+ This abstracts all simple inline tags like b, em, del, ...
50+ Returns a function that wraps the chomped text in a pair of the string
51+ that is returned by markup_fn. markup_fn is necessary to allow for
52+ references to self.strong_em_symbol etc.
53+ """
54+ def implementation (self , el , text , convert_as_inline ):
55+ markup = markup_fn (self )
56+ prefix , suffix , text = chomp (text )
57+ if not text :
58+ return ''
59+ return '%s%s%s%s%s' % (prefix , markup , text , markup , suffix )
60+ return implementation
61+
62+
4763def _todict (obj ):
4864 return dict ((k , getattr (obj , k )) for k in dir (obj ) if not k .startswith ('_' ))
4965
@@ -124,12 +140,21 @@ def is_nested_node(el):
124140
125141 def process_text (self , el ):
126142 text = six .text_type (el )
143+
144+ # dont remove any whitespace when handling pre or code in pre
145+ if (el .parent .name == 'pre'
146+ or (el .parent .name == 'code' and el .parent .parent .name == 'pre' )):
147+ return escape (text or '' )
148+
149+ cleaned_text = escape (whitespace_re .sub (' ' , text or '' ))
150+
127151 # remove trailing whitespaces if any of the following condition is true:
128152 # - current text node is the last node in li
129153 # - current text node is followed by an embedded list
130154 if el .parent .name == 'li' and (not el .next_sibling or el .next_sibling .name in ['ul' , 'ol' ]):
131- return escape (all_whitespace_re .sub (' ' , text or '' )).rstrip ()
132- return escape (whitespace_re .sub (' ' , text or '' ))
155+ return cleaned_text .rstrip ()
156+
157+ return cleaned_text
133158
134159 def __getattr__ (self , attr ):
135160 # Handle headings
@@ -179,8 +204,7 @@ def convert_a(self, el, text, convert_as_inline):
179204 title_part = ' "%s"' % title .replace ('"' , r'\"' ) if title else ''
180205 return '%s[%s](%s%s)%s' % (prefix , text , href , title_part , suffix ) if href else text
181206
182- def convert_b (self , el , text , convert_as_inline ):
183- return self .convert_strong (el , text , convert_as_inline )
207+ convert_b = abstract_inline_conversion (lambda self : 2 * self .options ['strong_em_symbol' ])
184208
185209 def convert_blockquote (self , el , text , convert_as_inline ):
186210
@@ -198,12 +222,17 @@ def convert_br(self, el, text, convert_as_inline):
198222 else :
199223 return ' \n '
200224
201- def convert_em (self , el , text , convert_as_inline ):
202- em_tag = self .options ['strong_em_symbol' ]
203- prefix , suffix , text = chomp (text )
204- if not text :
205- return ''
206- return '%s%s%s%s%s' % (prefix , em_tag , text , em_tag , suffix )
225+ def convert_code (self , el , text , convert_as_inline ):
226+ if el .parent .name == 'pre' :
227+ return text
228+ converter = abstract_inline_conversion (lambda self : '`' )
229+ return converter (self , el , text , convert_as_inline )
230+
231+ convert_del = abstract_inline_conversion (lambda self : '~~' )
232+
233+ convert_em = abstract_inline_conversion (lambda self : self .options ['strong_em_symbol' ])
234+
235+ convert_kbd = convert_code
207236
208237 def convert_hn (self , n , el , text , convert_as_inline ):
209238 if convert_as_inline :
@@ -219,8 +248,20 @@ def convert_hn(self, n, el, text, convert_as_inline):
219248 return '%s %s %s\n \n ' % (hashes , text , hashes )
220249 return '%s %s\n \n ' % (hashes , text )
221250
222- def convert_i (self , el , text , convert_as_inline ):
223- return self .convert_em (el , text , convert_as_inline )
251+ def convert_hr (self , el , text , convert_as_inline ):
252+ return '\n \n ---\n \n '
253+
254+ convert_i = convert_em
255+
256+ def convert_img (self , el , text , convert_as_inline ):
257+ alt = el .attrs .get ('alt' , None ) or ''
258+ src = el .attrs .get ('src' , None ) or ''
259+ title = el .attrs .get ('title' , None ) or ''
260+ title_part = ' "%s"' % title .replace ('"' , r'\"' ) if title else ''
261+ if convert_as_inline :
262+ return alt
263+
264+ return '' % (alt , src , title_part )
224265
225266 def convert_list (self , el , text , convert_as_inline ):
226267
@@ -267,26 +308,26 @@ def convert_p(self, el, text, convert_as_inline):
267308 return text
268309 return '%s\n \n ' % text if text else ''
269310
270- def convert_strong (self , el , text , convert_as_inline ):
271- strong_tag = 2 * self .options ['strong_em_symbol' ]
272- prefix , suffix , text = chomp (text )
311+ def convert_pre (self , el , text , convert_as_inline ):
273312 if not text :
274313 return ''
275- return '%s%s%s%s%s ' % ( prefix , strong_tag , text , strong_tag , suffix )
314+ return '\n ``` \n %s \n ``` \n ' % text
276315
277- def convert_img (self , el , text , convert_as_inline ):
278- alt = el .attrs .get ('alt' , None ) or ''
279- src = el .attrs .get ('src' , None ) or ''
280- title = el .attrs .get ('title' , None ) or ''
281- title_part = ' "%s"' % title .replace ('"' , r'\"' ) if title else ''
282- if convert_as_inline :
283- return alt
316+ convert_s = convert_del
284317
285- return '' % (alt , src , title_part )
318+ convert_strong = convert_b
319+
320+ convert_samp = convert_code
286321
287322 def convert_table (self , el , text , convert_as_inline ):
288323 return '\n \n ' + text + '\n '
289324
325+ def convert_td (self , el , text , convert_as_inline ):
326+ return ' ' + text + ' |'
327+
328+ def convert_th (self , el , text , convert_as_inline ):
329+ return ' ' + text + ' |'
330+
290331 def convert_tr (self , el , text , convert_as_inline ):
291332 cells = el .find_all (['td' , 'th' ])
292333 is_headrow = all ([cell .name == 'th' for cell in cells ])
@@ -302,15 +343,6 @@ def convert_tr(self, el, text, convert_as_inline):
302343 overline += '| ' + ' | ' .join (['---' ] * len (cells )) + ' |' + '\n '
303344 return overline + '|' + text + '\n ' + underline
304345
305- def convert_th (self , el , text , convert_as_inline ):
306- return ' ' + text + ' |'
307-
308- def convert_td (self , el , text , convert_as_inline ):
309- return ' ' + text + ' |'
310-
311- def convert_hr (self , el , text , convert_as_inline ):
312- return '\n \n ---\n \n '
313-
314346
315347def markdownify (html , ** options ):
316348 return MarkdownConverter (** options ).convert (html )
0 commit comments