Skip to content

Commit f73a435

Browse files
authored
Merge pull request #171 from chrispy-snps/chrispy/optimize-li-blockquote-empty-lines
optimize empty-line handling for li and blockquote content
2 parents 600f77d + 17c3678 commit f73a435

File tree

3 files changed

+34
-14
lines changed

3 files changed

+34
-14
lines changed

Diff for: markdownify/__init__.py

+31-11
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66

77
convert_heading_re = re.compile(r'convert_h(\d+)')
8-
line_beginning_re = re.compile(r'^', re.MULTILINE)
8+
line_with_content_re = re.compile(r'^(.*)', flags=re.MULTILINE)
99
whitespace_re = re.compile(r'[\t ]+')
1010
all_whitespace_re = re.compile(r'[\t \r\n]+')
1111
newline_whitespace_re = re.compile(r'[\t \r\n]*[\r\n][\t \r\n]*')
@@ -256,9 +256,6 @@ def escape(self, text):
256256
text = text.replace('_', r'\_')
257257
return text
258258

259-
def indent(self, text, columns):
260-
return line_beginning_re.sub(' ' * columns, text) if text else ''
261-
262259
def underline(self, text, pad_char):
263260
text = (text or '').rstrip()
264261
return '\n\n%s\n%s\n\n' % (text, pad_char * len(text)) if text else ''
@@ -286,11 +283,20 @@ def convert_a(self, el, text, convert_as_inline):
286283
convert_b = abstract_inline_conversion(lambda self: 2 * self.options['strong_em_symbol'])
287284

288285
def convert_blockquote(self, el, text, convert_as_inline):
289-
286+
# handle some early-exit scenarios
287+
text = (text or '').strip()
290288
if convert_as_inline:
291-
return ' ' + text.strip() + ' '
289+
return ' ' + text + ' '
290+
if not text:
291+
return "\n"
292+
293+
# indent lines with blockquote marker
294+
def _indent_for_blockquote(match):
295+
line_content = match.group(1)
296+
return '> ' + line_content if line_content else '>'
297+
text = line_with_content_re.sub(_indent_for_blockquote, text)
292298

293-
return '\n' + (line_beginning_re.sub('> ', text.strip()) + '\n\n') if text else ''
299+
return '\n' + text + '\n\n'
294300

295301
def convert_br(self, el, text, convert_as_inline):
296302
if convert_as_inline:
@@ -371,6 +377,12 @@ def convert_list(self, el, text, convert_as_inline):
371377
convert_ol = convert_list
372378

373379
def convert_li(self, el, text, convert_as_inline):
380+
# handle some early-exit scenarios
381+
text = (text or '').strip()
382+
if not text:
383+
return "\n"
384+
385+
# determine list item bullet character to use
374386
parent = el.parent
375387
if parent is not None and parent.name == 'ol':
376388
if parent.get("start") and str(parent.get("start")).isnumeric():
@@ -387,10 +399,18 @@ def convert_li(self, el, text, convert_as_inline):
387399
bullets = self.options['bullets']
388400
bullet = bullets[depth % len(bullets)]
389401
bullet = bullet + ' '
390-
text = (text or '').strip()
391-
text = self.indent(text, len(bullet))
392-
if text:
393-
text = bullet + text[len(bullet):]
402+
bullet_width = len(bullet)
403+
bullet_indent = ' ' * bullet_width
404+
405+
# indent content lines by bullet width
406+
def _indent_for_li(match):
407+
line_content = match.group(1)
408+
return bullet_indent + line_content if line_content else ''
409+
text = line_with_content_re.sub(_indent_for_li, text)
410+
411+
# insert bullet into first-line indent whitespace
412+
text = bullet + text[bullet_width:]
413+
394414
return '%s\n' % text
395415

396416
def convert_p(self, el, text, convert_as_inline):

Diff for: tests/test_conversions.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def test_blockquote():
6262

6363
def test_blockquote_with_nested_paragraph():
6464
assert md('<blockquote><p>Hello</p></blockquote>') == '\n> Hello\n\n'
65-
assert md('<blockquote><p>Hello</p><p>Hello again</p></blockquote>') == '\n> Hello\n> \n> Hello again\n\n'
65+
assert md('<blockquote><p>Hello</p><p>Hello again</p></blockquote>') == '\n> Hello\n>\n> Hello again\n\n'
6666

6767

6868
def test_blockquote_with_paragraph():

Diff for: tests/test_lists.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def test_ol():
4747
assert md('<ol start="-1"><li>a</li><li>b</li></ol>') == '\n\n1. a\n2. b\n'
4848
assert md('<ol start="foo"><li>a</li><li>b</li></ol>') == '\n\n1. a\n2. b\n'
4949
assert md('<ol start="1.5"><li>a</li><li>b</li></ol>') == '\n\n1. a\n2. b\n'
50-
assert md('<ol start="1234"><li><p>first para</p><p>second para</p></li><li><p>third para</p><p>fourth para</p></li></ol>') == '\n\n1234. first para\n \n second para\n1235. third para\n \n fourth para\n'
50+
assert md('<ol start="1234"><li><p>first para</p><p>second para</p></li><li><p>third para</p><p>fourth para</p></li></ol>') == '\n\n1234. first para\n\n second para\n1235. third para\n\n fourth para\n'
5151

5252

5353
def test_nested_ols():
@@ -64,7 +64,7 @@ def test_ul():
6464
<li> c
6565
</li>
6666
</ul>""") == '\n\n* a\n* b\n* c\n'
67-
assert md('<ul><li><p>first para</p><p>second para</p></li><li><p>third para</p><p>fourth para</p></li></ul>') == '\n\n* first para\n \n second para\n* third para\n \n fourth para\n'
67+
assert md('<ul><li><p>first para</p><p>second para</p></li><li><p>third para</p><p>fourth para</p></li></ul>') == '\n\n* first para\n\n second para\n* third para\n\n fourth para\n'
6868

6969

7070
def test_inline_ul():

0 commit comments

Comments
 (0)