diff --git a/markdownify/__init__.py b/markdownify/__init__.py index b70a0e5..fd03569 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -5,7 +5,7 @@ convert_heading_re = re.compile(r'convert_h(\d+)') -line_beginning_re = re.compile(r'^', re.MULTILINE) +line_with_content_re = re.compile(r'^(.*)', flags=re.MULTILINE) whitespace_re = re.compile(r'[\t ]+') all_whitespace_re = re.compile(r'[\t \r\n]+') newline_whitespace_re = re.compile(r'[\t \r\n]*[\r\n][\t \r\n]*') @@ -256,9 +256,6 @@ def escape(self, text): text = text.replace('_', r'\_') return text - def indent(self, text, columns): - return line_beginning_re.sub(' ' * columns, text) if text else '' - def underline(self, text, pad_char): text = (text or '').rstrip() return '\n\n%s\n%s\n\n' % (text, pad_char * len(text)) if text else '' @@ -286,11 +283,20 @@ def convert_a(self, el, text, convert_as_inline): convert_b = abstract_inline_conversion(lambda self: 2 * self.options['strong_em_symbol']) def convert_blockquote(self, el, text, convert_as_inline): - + # handle some early-exit scenarios + text = (text or '').strip() if convert_as_inline: - return ' ' + text.strip() + ' ' + return ' ' + text + ' ' + if not text: + return "\n" + + # indent lines with blockquote marker + def _indent_for_blockquote(match): + line_content = match.group(1) + return '> ' + line_content if line_content else '>' + text = line_with_content_re.sub(_indent_for_blockquote, text) - return '\n' + (line_beginning_re.sub('> ', text.strip()) + '\n\n') if text else '' + return '\n' + text + '\n\n' def convert_br(self, el, text, convert_as_inline): if convert_as_inline: @@ -371,6 +377,12 @@ def convert_list(self, el, text, convert_as_inline): convert_ol = convert_list def convert_li(self, el, text, convert_as_inline): + # handle some early-exit scenarios + text = (text or '').strip() + if not text: + return "\n" + + # determine list item bullet character to use parent = el.parent if parent is not None and parent.name == 'ol': if parent.get("start") and str(parent.get("start")).isnumeric(): @@ -387,10 +399,18 @@ def convert_li(self, el, text, convert_as_inline): bullets = self.options['bullets'] bullet = bullets[depth % len(bullets)] bullet = bullet + ' ' - text = (text or '').strip() - text = self.indent(text, len(bullet)) - if text: - text = bullet + text[len(bullet):] + bullet_width = len(bullet) + bullet_indent = ' ' * bullet_width + + # indent content lines by bullet width + def _indent_for_li(match): + line_content = match.group(1) + return bullet_indent + line_content if line_content else '' + text = line_with_content_re.sub(_indent_for_li, text) + + # insert bullet into first-line indent whitespace + text = bullet + text[bullet_width:] + return '%s\n' % text def convert_p(self, el, text, convert_as_inline): diff --git a/tests/test_conversions.py b/tests/test_conversions.py index 01f8b91..868db7c 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -62,7 +62,7 @@ def test_blockquote(): def test_blockquote_with_nested_paragraph(): assert md('
') == '\n> Hello\n\n' - assert md('Hello
') == '\n> Hello\n> \n> Hello again\n\n' + assert md('Hello
Hello again
') == '\n> Hello\n>\n> Hello again\n\n' def test_blockquote_with_paragraph(): diff --git a/tests/test_lists.py b/tests/test_lists.py index a660778..ce54a87 100644 --- a/tests/test_lists.py +++ b/tests/test_lists.py @@ -47,7 +47,7 @@ def test_ol(): assert md('Hello
Hello again
first para
second para
third para
fourth para
first para
second para
third para
fourth para
first para
second para
third para
fourth para
first para
second para
third para
fourth para