4
4
import six
5
5
6
6
7
+ # General-purpose regex patterns
8
+ re_convert_heading = re .compile (r'convert_h(\d+)' )
7
9
re_line_with_content = re .compile (r'^(.*)' , flags = re .MULTILINE )
8
10
re_whitespace = re .compile (r'[\t ]+' )
9
11
re_all_whitespace = re .compile (r'[\t \r\n]+' )
10
12
re_newline_whitespace = re .compile (r'[\t \r\n]*[\r\n][\t \r\n]*' )
11
13
re_html_heading = re .compile (r'h(\d+)' )
12
14
13
- # extract (leading_nl, content, trailing_nl) from a string
15
+ # Pattern for creating convert_<tag> function names from tag names
16
+ re_make_convert_fn_name = re .compile (r'[\[\]:-]' )
17
+
18
+ # Extract (leading_nl, content, trailing_nl) from a string
14
19
# (functionally equivalent to r'^(\n*)(.*?)(\n*)$', but greedy is faster than reluctant here)
15
20
re_extract_newlines = re .compile (r'^(\n*)((?:.*[^\n])?)(\n*)$' , flags = re .DOTALL )
16
21
22
+ # Escape miscellaneous special Markdown characters
23
+ re_escape_misc_chars = re .compile (r'([]\\&<`[>~=+|])' )
24
+
25
+ # Escape sequence of one or more consecutive '-', preceded
26
+ # and followed by whitespace or start/end of fragment, as it
27
+ # might be confused with an underline of a header, or with a
28
+ # list marker
29
+ re_escape_misc_dash_sequences = re .compile (r'(\s|^)(-+(?:\s|$))' )
30
+
31
+ # Escape sequence of up to six consecutive '#', preceded
32
+ # and followed by whitespace or start/end of fragment, as
33
+ # it might be confused with an ATX heading
34
+ re_escape_misc_hashes = re .compile (r'(\s|^)(#{1,6}(?:\s|$))' )
35
+
36
+ # Escape '.' or ')' preceded by up to nine digits, as it might be
37
+ # confused with a list item
38
+ re_escape_misc_list_items = re .compile (r'((?:\s|^)[0-9]{1,9})([.)](?:\s|$))' )
17
39
18
40
# Heading styles
19
41
ATX = 'atx'
@@ -346,7 +368,7 @@ def get_conv_fn(self, tag_name):
346
368
return lambda el , text , parent_tags : self ._convert_hn (n , el , text , parent_tags )
347
369
348
370
# For other tags, look up their conversion function by tag name
349
- convert_fn_name = "convert_%s" % re .sub (r"[\[\]:-]" , "_" , tag_name )
371
+ convert_fn_name = "convert_%s" % re_make_convert_fn_name .sub ('_' , tag_name )
350
372
convert_fn = getattr (self , convert_fn_name , None )
351
373
return convert_fn
352
374
@@ -365,20 +387,11 @@ def escape(self, text, parent_tags):
365
387
if not text :
366
388
return ''
367
389
if self .options ['escape_misc' ]:
368
- text = re .sub (r'([]\\&<`[>~=+|])' , r'\\\1' , text )
369
- # A sequence of one or more consecutive '-', preceded and
370
- # followed by whitespace or start/end of fragment, might
371
- # be confused with an underline of a header, or with a
372
- # list marker.
373
- text = re .sub (r'(\s|^)(-+(?:\s|$))' , r'\1\\\2' , text )
374
- # A sequence of up to six consecutive '#', preceded and
375
- # followed by whitespace or start/end of fragment, might
376
- # be confused with an ATX heading.
377
- text = re .sub (r'(\s|^)(#{1,6}(?:\s|$))' , r'\1\\\2' , text )
378
- # '.' or ')' preceded by up to nine digits might be
379
- # confused with a list item.
380
- text = re .sub (r'((?:\s|^)[0-9]{1,9})([.)](?:\s|$))' , r'\1\\\2' ,
381
- text )
390
+ text = re_escape_misc_chars .sub (r'\\\1' , text )
391
+ text = re_escape_misc_dash_sequences .sub (r'\1\\\2' , text )
392
+ text = re_escape_misc_hashes .sub (r'\1\\\2' , text )
393
+ text = re_escape_misc_list_items .sub (r'\1\\\2' , text )
394
+
382
395
if self .options ['escape_asterisks' ]:
383
396
text = text .replace ('*' , r'\*' )
384
397
if self .options ['escape_underscores' ]:
0 commit comments