From 76f01cc8efaed5d8dc6f304ecb42be92cd9ca96f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=81lvaro=20Mond=C3=A9jar=20Rubio?= Date: Tue, 22 Oct 2024 04:30:27 +0200 Subject: [PATCH] Indented code blocks surrounded by newlines (#234) --- .github/workflows/ci.yml | 2 +- src/mkdocs_include_markdown_plugin/process.py | 78 ++++++++--- tests/test_unit/test_process.py | 124 ++++++++++++------ 3 files changed, 142 insertions(+), 62 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e524b30..ca494c7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -52,7 +52,7 @@ jobs: run: | import codecs, os, sys env = f"py=py3{sys.version_info[1]}\n" - print(f"Picked {env.split('=')[1].strip()} for {sys.version}") + sys.stdout.write(f"Picked {env.split('=')[1].strip()} for {sys.version}\n") with codecs.open(os.environ["GITHUB_OUTPUT"], "a", "utf-8") as file_handler: file_handler.write(env) - name: Install dependencies diff --git a/src/mkdocs_include_markdown_plugin/process.py b/src/mkdocs_include_markdown_plugin/process.py index 62eb9f9..57771e5 100644 --- a/src/mkdocs_include_markdown_plugin/process.py +++ b/src/mkdocs_include_markdown_plugin/process.py @@ -114,7 +114,7 @@ ) -def transform_p_by_p_skipping_codeblocks( +def transform_p_by_p_skipping_codeblocks( # noqa: PLR0912, PLR0915 markdown: str, func: Callable[[str], str], ) -> str: @@ -128,45 +128,80 @@ def transform_p_by_p_skipping_codeblocks( _current_fcodeblock_delimiter = '' # inside indented codeblock - _inside_icodeblock = False + _maybe_icodeblock_lines: list[str] = [] + _previous_line_was_empty = False lines, current_paragraph = ([], '') def process_current_paragraph() -> None: lines.extend(func(current_paragraph).splitlines(keepends=True)) + # The next implementation takes into account that indented code + # blocks must be surrounded by newlines as per the CommonMark + # specification. See https://spec.commonmark.org/0.28/#indented-code-blocks + # + # However, note that ambiguities with list items are not handled. + for line in io.StringIO(markdown): - if not _current_fcodeblock_delimiter and not _inside_icodeblock: + if not _current_fcodeblock_delimiter: lstripped_line = line.lstrip() if ( lstripped_line.startswith('```') or lstripped_line.startswith('~~~') ): _current_fcodeblock_delimiter = lstripped_line[:3] - if current_paragraph: - process_current_paragraph() - current_paragraph = '' + process_current_paragraph() + current_paragraph = '' lines.append(line) - elif ( - line.replace('\t', ' ').replace('\r\n', '\n') - == ' \n' - ): - _inside_icodeblock = True - if current_paragraph: + elif line.startswith(' '): + if not lstripped_line or _maybe_icodeblock_lines: + # maybe enter indented codeblock + _maybe_icodeblock_lines.append(line) + else: + current_paragraph += line + elif _maybe_icodeblock_lines: + process_current_paragraph() + current_paragraph = '' + if not _previous_line_was_empty: + # wasn't an indented code block + for line_ in _maybe_icodeblock_lines: + current_paragraph += line_ + _maybe_icodeblock_lines = [] + current_paragraph += line process_current_paragraph() current_paragraph = '' - lines.append(line) + else: + # exit indented codeblock + for line_ in _maybe_icodeblock_lines: + lines.append(line_) + _maybe_icodeblock_lines = [] + lines.append(line) else: current_paragraph += line + _previous_line_was_empty = not lstripped_line else: lines.append(line) - if _current_fcodeblock_delimiter: - if line.lstrip().startswith(_current_fcodeblock_delimiter): - _current_fcodeblock_delimiter = '' - elif not line.startswith(' ') and not line.startswith('\t'): - _inside_icodeblock = False - - process_current_paragraph() + lstripped_line = line.lstrip() + if lstripped_line.startswith(_current_fcodeblock_delimiter): + _current_fcodeblock_delimiter = '' + _previous_line_was_empty = not lstripped_line + + if _maybe_icodeblock_lines: + if not _previous_line_was_empty: + # at EOF + process_current_paragraph() + current_paragraph = '' + for line_ in _maybe_icodeblock_lines: + current_paragraph += line_ + process_current_paragraph() + current_paragraph = '' + else: + process_current_paragraph() + current_paragraph = '' + for line_ in _maybe_icodeblock_lines: + lines.append(line_) + else: + process_current_paragraph() return ''.join(lines) @@ -180,7 +215,7 @@ def transform_line_by_line_skipping_codeblocks( Skip fenced codeblock lines, where the transformation never is applied. Indented codeblocks are not taken into account because in the practice - this function is never used for transformations on indented lines. See + this function is only used for transformations of heading prefixes. See the PR https://github.com/mondeja/mkdocs-include-markdown-plugin/pull/95 to recover the implementation handling indented codeblocks. """ @@ -269,6 +304,7 @@ def transform(paragraph: str) -> str: functools.partial(found_href, url_group_index=2), paragraph, ) + return transform_p_by_p_skipping_codeblocks( markdown, transform, diff --git a/tests/test_unit/test_process.py b/tests/test_unit/test_process.py index a85113c..62d8655 100644 --- a/tests/test_unit/test_process.py +++ b/tests/test_unit/test_process.py @@ -14,14 +14,10 @@ ('markdown', 'source_path', 'destination_path', 'expected_result'), ( pytest.param( - ''' - Here's a [link](CHANGELOG.md) to the changelog. -''', + "Here's a [link](CHANGELOG.md) to the changelog.", 'README', 'docs/nav.md', - ''' - Here's a [link](../CHANGELOG.md) to the changelog. -''', + "Here's a [link](../CHANGELOG.md) to the changelog.", id='relative-link', ), pytest.param( @@ -69,17 +65,17 @@ id='link-reference', ), pytest.param( - '''Here's a diagram: ![diagram](assets/diagram.png)''', + "Here's a diagram: ![diagram](assets/diagram.png)", 'README', 'docs/home.md', - '''Here's a diagram: ![diagram](../assets/diagram.png)''', + "Here's a diagram: ![diagram](../assets/diagram.png)", id='image', ), pytest.param( - '''Build status: [![Build Status](badge.png)](build/)''', + 'Build status: [![Build Status](badge.png)](build/)', 'README', 'docs/home.md', - '''Build status: [![Build Status](../badge.png)](../build/)''', + 'Build status: [![Build Status](../badge.png)](../build/)', id='image-inside-link', ), pytest.param( @@ -92,10 +88,10 @@ id='absolute-urls', ), pytest.param( - '''[contact us](mailto:hello@example.com)''', + '[contact us](mailto:hello@example.com)', 'README', 'docs/nav.md', - '''[contact us](mailto:hello@example.com)''', + '[contact us](mailto:hello@example.com)', id='mailto-urls', ), pytest.param( @@ -120,35 +116,33 @@ id='cpp-likelink-fenced-codeblock', ), pytest.param( - '''Some text before -\t -\tconst auto lambda = []() { .... }; - -Some text after -''', + ( + 'Text before\n' + ' \n ' + 'const auto lambda = []() { .... };\n \nText after\n' + ), 'README', 'examples/lambda.md', - '''Some text before -\t -\tconst auto lambda = []() { .... }; - -Some text after -''', + ( + 'Text before\n' + ' \n ' + 'const auto lambda = []() { .... };\n \nText after\n' + ), id='cpp-likelink-indented-codeblock', ), pytest.param( - '''Some text before -\t -\tconst auto lambda = []() { .... };\r\n -Some text after -''', + ( + 'Text before\r\n' + ' \r\n ' + 'const auto lambda = []() { .... };\r\n \r\nText after\r\n' + ), 'README', 'examples/lambda.md', - '''Some text before -\t -\tconst auto lambda = []() { .... };\r\n -Some text after -''', + ( + 'Text before\r\n' + ' \r\n ' + 'const auto lambda = []() { .... };\r\n \r\nText after\r\n' + ), id='cpp-likelink-indented-codeblock-windows-newlines', ), pytest.param( @@ -165,16 +159,66 @@ id='exclude-fenced-code-blocks', ), pytest.param( - ' ' * 4 + ''' - [link](CHANGELOG.md) -''' + ' ' * 4 + '\n', + ( + ' \n' + ' [link](CHANGELOG.md)\n' + ' \n' + ), 'README', 'docs/nav.md', - ' ' * 4 + ''' - [link](CHANGELOG.md) -''' + ' ' * 4 + '\n', + ( + ' \n' + ' [link](CHANGELOG.md)\n' + ' \n' + ), id='exclude-indented-code-blocks', ), + pytest.param( + ( + ' \n' + ' [link](CHANGELOG.md)\n' + ), + 'README', + 'docs/nav.md', + # is rewritten because not newline at end of code block + ( + ' \n' + ' [link](../CHANGELOG.md)\n' + ), + id='exclude-indented-code-blocks-eof', + ), + pytest.param( + ( + ' [link](CHANGELOG.md)\n' + ' \n' + ), + 'README', + 'docs/nav.md', + ( + ' [link](../CHANGELOG.md)\n' + ' \n' + ), + # No newline before, is not an indented code block, see: + # https://spec.commonmark.org/0.28/#indented-code-blocks + id='no-exclude-indented-code-blocks-missing-newline-before', + ), + pytest.param( + ( + ' \n' + ' [link](CHANGELOG.md)\n' + 'Foo\n' + ), + 'README', + 'docs/nav.md', + ( + ' \n' + ' [link](../CHANGELOG.md)\n' + 'Foo\n' + ), + # No newline after, is not an indented code block, see: + # https://spec.commonmark.org/0.28/#indented-code-blocks + id='no-exclude-indented-code-blocks-missing-newline-after', + ), ), ) def test_rewrite_relative_urls(