Skip to content

Commit 6984dca

Browse files
authored
use a conversion function cache to improve runtime (#196)
Signed-off-by: chrispy <[email protected]>
1 parent 24977fd commit 6984dca

File tree

1 file changed

+31
-17
lines changed

1 file changed

+31
-17
lines changed

Diff for: markdownify/__init__.py

+31-17
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,11 @@
44
import six
55

66

7-
re_convert_heading = re.compile(r'convert_h(\d+)')
87
re_line_with_content = re.compile(r'^(.*)', flags=re.MULTILINE)
98
re_whitespace = re.compile(r'[\t ]+')
109
re_all_whitespace = re.compile(r'[\t \r\n]+')
1110
re_newline_whitespace = re.compile(r'[\t \r\n]*[\r\n][\t \r\n]*')
12-
re_html_heading = re.compile(r'h[1-6]')
11+
re_html_heading = re.compile(r'h(\d+)')
1312

1413
# extract (leading_nl, content, trailing_nl) from a string
1514
# (functionally equivalent to r'^(\n*)(.*?)(\n*)$', but greedy is faster than reluctant here)
@@ -165,6 +164,9 @@ def __init__(self, **options):
165164
raise ValueError('You may specify either tags to strip or tags to'
166165
' convert, but not both.')
167166

167+
# Initialize the conversion function cache
168+
self.convert_fn_cache = {}
169+
168170
def convert(self, html):
169171
soup = BeautifulSoup(html, 'html.parser')
170172
return self.convert_soup(soup)
@@ -266,9 +268,8 @@ def _can_ignore(el):
266268
text = ''.join(child_strings)
267269

268270
# apply this tag's final conversion function
269-
convert_fn_name = "convert_%s" % re.sub(r"[\[\]:-]", "_", node.name)
270-
convert_fn = getattr(self, convert_fn_name, None)
271-
if convert_fn and self.should_convert_tag(node.name):
271+
convert_fn = self.get_conv_fn_cached(node.name)
272+
if convert_fn is not None:
272273
text = convert_fn(node, text, parent_tags=parent_tags)
273274

274275
return text
@@ -321,23 +322,36 @@ def process_text(self, el, parent_tags=None):
321322

322323
return text
323324

324-
def __getattr__(self, attr):
325-
# Handle headings
326-
m = re_convert_heading.match(attr)
327-
if m:
328-
n = int(m.group(1))
325+
def get_conv_fn_cached(self, tag_name):
326+
"""Given a tag name, return the conversion function using the cache."""
327+
# If conversion function is not in cache, add it
328+
if tag_name not in self.convert_fn_cache:
329+
self.convert_fn_cache[tag_name] = self.get_conv_fn(tag_name)
330+
331+
# Return the cached entry
332+
return self.convert_fn_cache[tag_name]
329333

330-
def convert_tag(el, text, parent_tags):
331-
return self._convert_hn(n, el, text, parent_tags)
334+
def get_conv_fn(self, tag_name):
335+
"""Given a tag name, find and return the conversion function."""
336+
tag_name = tag_name.lower()
332337

333-
convert_tag.__name__ = 'convert_h%s' % n
334-
setattr(self, convert_tag.__name__, convert_tag)
335-
return convert_tag
338+
# Handle strip/convert exclusion options
339+
if not self.should_convert_tag(tag_name):
340+
return None
336341

337-
raise AttributeError(attr)
342+
# Handle headings with _convert_hn() function
343+
match = re_html_heading.match(tag_name)
344+
if match:
345+
n = int(match.group(1))
346+
return lambda el, text, parent_tags: self._convert_hn(n, el, text, parent_tags)
347+
348+
# For other tags, look up their conversion function by tag name
349+
convert_fn_name = "convert_%s" % re.sub(r"[\[\]:-]", "_", tag_name)
350+
convert_fn = getattr(self, convert_fn_name, None)
351+
return convert_fn
338352

339353
def should_convert_tag(self, tag):
340-
tag = tag.lower()
354+
"""Given a tag name, return whether to convert based on strip/convert options."""
341355
strip = self.options['strip']
342356
convert = self.options['convert']
343357
if strip is not None:

0 commit comments

Comments
 (0)