From 8d6f88b67cbbd5672d6d6a6ac5733c9c6871562f Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Wed, 1 Feb 2023 20:45:03 +0200 Subject: [PATCH 1/6] fix: Need to linkify during Markdown rendering --- ietf/utils/markdown.py | 38 +++++++++++++++++++++++++++++++------- requirements.txt | 1 + 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/ietf/utils/markdown.py b/ietf/utils/markdown.py index 3b7c60cae0..80b2f9185a 100644 --- a/ietf/utils/markdown.py +++ b/ietf/utils/markdown.py @@ -10,17 +10,41 @@ from django.utils.safestring import mark_safe from ietf.doc.templatetags.ietf_filters import urlize_ietf_docs -from ietf.utils.text import bleach_cleaner, bleach_linker +from ietf.utils.text import ( + bleach_cleaner, + check_url_validity, + bleach, + tlds_sorted, + protocols, +) +from mdx_linkify.mdx_linkify import LinkifyExtension # type: ignore def markdown(text): return mark_safe( - bleach_linker.linkify( - urlize_ietf_docs( - bleach_cleaner.clean( - python_markdown.markdown( - text, extensions=["extra", "nl2br", "sane_lists", "toc"] - ) + urlize_ietf_docs( + bleach_cleaner.clean( + python_markdown.markdown( + text, + extensions=[ + "extra", + "nl2br", + "sane_lists", + "toc", + LinkifyExtension( + # keep these in sync with the bleach_linker initialization + linker_options={ + "callbacks": [check_url_validity], + "url_re": bleach.linkifier.build_url_re( + tlds=tlds_sorted, protocols=protocols + ), + "email_re": bleach.linkifier.build_email_re( + tlds=tlds_sorted + ), + "parse_email": True, + } + ), + ], ) ) ) diff --git a/requirements.txt b/requirements.txt index 7a5662ffa7..aaddfef901 100644 --- a/requirements.txt +++ b/requirements.txt @@ -40,6 +40,7 @@ jwcrypto>=1.2 # for signed notifications - this is aspirational, and is not r logging_tree>=1.9 # Used only by the showloggers management command lxml>=4.8.0,<5 markdown>=3.3.6 +mdx_linkify>=2.1 mock>=4.0.3 # Used only by tests, of course mypy>=0.782,<0.790 # Version requirements determined by django-stubs. mysqlclient>=2.1.0 From 1b4e9c6326bfd7935a5eeac655be3554a48d7863 Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Thu, 2 Feb 2023 11:41:44 +0200 Subject: [PATCH 2/6] Don't depend on mdx_linkify --- ietf/utils/markdown.py | 43 +++++++++++++++++++++--------------------- requirements.txt | 1 - 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/ietf/utils/markdown.py b/ietf/utils/markdown.py index 80b2f9185a..d2b00fd357 100644 --- a/ietf/utils/markdown.py +++ b/ietf/utils/markdown.py @@ -6,18 +6,28 @@ the datatracker. """ import markdown as python_markdown +from markdown.extensions import Extension +from markdown.postprocessors import Postprocessor from django.utils.safestring import mark_safe from ietf.doc.templatetags.ietf_filters import urlize_ietf_docs -from ietf.utils.text import ( - bleach_cleaner, - check_url_validity, - bleach, - tlds_sorted, - protocols, -) -from mdx_linkify.mdx_linkify import LinkifyExtension # type: ignore +from ietf.utils.text import bleach_cleaner, bleach_linker + + +class LinkifyExtension(Extension): + """ + Simple Markdown extension inspired by https://github.com/daGrevis/mdx_linkify, + but using our bleach_linker directly. + """ + + def extendMarkdown(self, md): + md.postprocessors.register(LinkifyPostprocessor(md), "linkify", 50) + + +class LinkifyPostprocessor(Postprocessor): + def run(self, text): + return bleach_linker.linkify(text) def markdown(text): @@ -27,23 +37,14 @@ def markdown(text): python_markdown.markdown( text, extensions=[ + # TODO: discuss which extensions we want to enable, see + # https://python-markdown.github.io/extensions/ and + # https://github.com/Python-Markdown/markdown/wiki/Third-Party-Extensions "extra", "nl2br", "sane_lists", "toc", - LinkifyExtension( - # keep these in sync with the bleach_linker initialization - linker_options={ - "callbacks": [check_url_validity], - "url_re": bleach.linkifier.build_url_re( - tlds=tlds_sorted, protocols=protocols - ), - "email_re": bleach.linkifier.build_email_re( - tlds=tlds_sorted - ), - "parse_email": True, - } - ), + LinkifyExtension(), ], ) ) diff --git a/requirements.txt b/requirements.txt index aaddfef901..7a5662ffa7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -40,7 +40,6 @@ jwcrypto>=1.2 # for signed notifications - this is aspirational, and is not r logging_tree>=1.9 # Used only by the showloggers management command lxml>=4.8.0,<5 markdown>=3.3.6 -mdx_linkify>=2.1 mock>=4.0.3 # Used only by tests, of course mypy>=0.782,<0.790 # Version requirements determined by django-stubs. mysqlclient>=2.1.0 From 26b68b52708345930b8a9254faffe613a02624b9 Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Thu, 2 Feb 2023 13:28:55 +0200 Subject: [PATCH 3/6] Also linkify IETF docs as part of the Markdown conversion --- ietf/utils/markdown.py | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/ietf/utils/markdown.py b/ietf/utils/markdown.py index d2b00fd357..b0d71b8b49 100644 --- a/ietf/utils/markdown.py +++ b/ietf/utils/markdown.py @@ -18,7 +18,8 @@ class LinkifyExtension(Extension): """ Simple Markdown extension inspired by https://github.com/daGrevis/mdx_linkify, - but using our bleach_linker directly. + but using our bleach_linker directly. Doing the linkification on the converted + Markdown output introduces artifacts. """ def extendMarkdown(self, md): @@ -27,26 +28,24 @@ def extendMarkdown(self, md): class LinkifyPostprocessor(Postprocessor): def run(self, text): - return bleach_linker.linkify(text) + return urlize_ietf_docs(bleach_linker.linkify(text)) def markdown(text): return mark_safe( - urlize_ietf_docs( - bleach_cleaner.clean( - python_markdown.markdown( - text, - extensions=[ - # TODO: discuss which extensions we want to enable, see - # https://python-markdown.github.io/extensions/ and - # https://github.com/Python-Markdown/markdown/wiki/Third-Party-Extensions - "extra", - "nl2br", - "sane_lists", - "toc", - LinkifyExtension(), - ], - ) + bleach_cleaner.clean( + python_markdown.markdown( + text, + extensions=[ + # TODO: discuss which extensions we want to enable, see + # https://python-markdown.github.io/extensions/ and + # https://github.com/Python-Markdown/markdown/wiki/Third-Party-Extensions + "extra", + "nl2br", + "sane_lists", + "toc", + LinkifyExtension(), + ], ) ) ) From 905110c84e1abe5f47ec491bf2a78d90f2b8576e Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Fri, 3 Feb 2023 14:50:49 +0200 Subject: [PATCH 4/6] Add test case --- ietf/utils/markdown-test.html | 18 ++++++++++++++++++ ietf/utils/markdown-test.md | 19 +++++++++++++++++++ ietf/utils/markdown.py | 3 --- ietf/utils/tests_markdown.py | 19 +++++++++++++++++++ 4 files changed, 56 insertions(+), 3 deletions(-) create mode 100644 ietf/utils/markdown-test.html create mode 100644 ietf/utils/markdown-test.md create mode 100644 ietf/utils/tests_markdown.py diff --git a/ietf/utils/markdown-test.html b/ietf/utils/markdown-test.html new file mode 100644 index 0000000000..5df9e2f9b1 --- /dev/null +++ b/ietf/utils/markdown-test.html @@ -0,0 +1,18 @@ +

IETF Markdown Test File

+

This file contains a bunch of constructs to test our markdown converter in
+ietf/utils/markdown.py.

+ + \ No newline at end of file diff --git a/ietf/utils/markdown-test.md b/ietf/utils/markdown-test.md new file mode 100644 index 0000000000..ba02b6fdf1 --- /dev/null +++ b/ietf/utils/markdown-test.md @@ -0,0 +1,19 @@ +# IETF Markdown Test File + +This file contains a bunch of constructs to test our markdown converter in +`ietf/utils/markdown.py`. + +## Links + +* https://example.com +* +* [Example](https://example.com) +* user@example.com +* +* [User](mailto:user@example.com) +* RFC2119 +* BCP 3 +* STD 1 +* FYI2 +* draft-ietf-opsec-indicators-of-compromise +* draft-ietf-opsec-indicators-of-compromise-01 diff --git a/ietf/utils/markdown.py b/ietf/utils/markdown.py index b0d71b8b49..35b89745f6 100644 --- a/ietf/utils/markdown.py +++ b/ietf/utils/markdown.py @@ -37,9 +37,6 @@ def markdown(text): python_markdown.markdown( text, extensions=[ - # TODO: discuss which extensions we want to enable, see - # https://python-markdown.github.io/extensions/ and - # https://github.com/Python-Markdown/markdown/wiki/Third-Party-Extensions "extra", "nl2br", "sane_lists", diff --git a/ietf/utils/tests_markdown.py b/ietf/utils/tests_markdown.py new file mode 100644 index 0000000000..2eeb25166e --- /dev/null +++ b/ietf/utils/tests_markdown.py @@ -0,0 +1,19 @@ +# Copyright The IETF Trust 2023, All Rights Reserved +"""Markdown API utilities tests""" + +from pathlib import Path + +from django.conf import settings + +from ietf.utils.tests import TestCase +from ietf.utils.markdown import markdown + + +class MarkdownTests(TestCase): + SAMPLE_DIR = Path(settings.BASE_DIR) / "utils" + SAMPLE_MARKDOWN = (SAMPLE_DIR / "markdown-test.md").read_text() + SAMPLE_MARKDOWN_OUTPUT = (SAMPLE_DIR / "markdown-test.html").read_text() + + def test_markdown(self): + result = markdown(self.SAMPLE_MARKDOWN) + self.assertEqual(result, self.SAMPLE_MARKDOWN_OUTPUT) From 01c6ef4c8c9050e338213ccd0e07bef49277e303 Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Tue, 7 Feb 2023 16:33:30 +0200 Subject: [PATCH 5/6] Disable automatic links via angle brackets for email addresses --- ietf/utils/markdown-test.html | 2 +- ietf/utils/markdown.py | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/ietf/utils/markdown-test.html b/ietf/utils/markdown-test.html index 5df9e2f9b1..7713abc329 100644 --- a/ietf/utils/markdown-test.html +++ b/ietf/utils/markdown-test.html @@ -7,7 +7,7 @@
  • https://example.com
  • Example
  • user@example.com
  • -
  • user@example.com
  • +
  • <user@example.com>
  • User
  • RFC2119
  • BCP 3
  • diff --git a/ietf/utils/markdown.py b/ietf/utils/markdown.py index 35b89745f6..63d1c7a70f 100644 --- a/ietf/utils/markdown.py +++ b/ietf/utils/markdown.py @@ -24,6 +24,9 @@ class LinkifyExtension(Extension): def extendMarkdown(self, md): md.postprocessors.register(LinkifyPostprocessor(md), "linkify", 50) + # disable automatic links via angle brackets for email addresses + md.inlinePatterns.deregister("automail") + # "autolink" for URLs does not seem to cause issues, so leave it on class LinkifyPostprocessor(Postprocessor): From 46c83a4140de25cc9bc8b821c124e4502847a240 Mon Sep 17 00:00:00 2001 From: Lars Eggert Date: Tue, 7 Feb 2023 16:56:03 +0200 Subject: [PATCH 6/6] Inline the markdown test files --- ietf/utils/markdown-test.html | 18 ------------ ietf/utils/markdown-test.md | 19 ------------- ietf/utils/tests_markdown.py | 53 +++++++++++++++++++++++++++++++---- 3 files changed, 47 insertions(+), 43 deletions(-) delete mode 100644 ietf/utils/markdown-test.html delete mode 100644 ietf/utils/markdown-test.md diff --git a/ietf/utils/markdown-test.html b/ietf/utils/markdown-test.html deleted file mode 100644 index 7713abc329..0000000000 --- a/ietf/utils/markdown-test.html +++ /dev/null @@ -1,18 +0,0 @@ -

    IETF Markdown Test File

    -

    This file contains a bunch of constructs to test our markdown converter in
    -ietf/utils/markdown.py.

    - - \ No newline at end of file diff --git a/ietf/utils/markdown-test.md b/ietf/utils/markdown-test.md deleted file mode 100644 index ba02b6fdf1..0000000000 --- a/ietf/utils/markdown-test.md +++ /dev/null @@ -1,19 +0,0 @@ -# IETF Markdown Test File - -This file contains a bunch of constructs to test our markdown converter in -`ietf/utils/markdown.py`. - -## Links - -* https://example.com -* -* [Example](https://example.com) -* user@example.com -* -* [User](mailto:user@example.com) -* RFC2119 -* BCP 3 -* STD 1 -* FYI2 -* draft-ietf-opsec-indicators-of-compromise -* draft-ietf-opsec-indicators-of-compromise-01 diff --git a/ietf/utils/tests_markdown.py b/ietf/utils/tests_markdown.py index 2eeb25166e..c8c07b50c7 100644 --- a/ietf/utils/tests_markdown.py +++ b/ietf/utils/tests_markdown.py @@ -1,18 +1,59 @@ # Copyright The IETF Trust 2023, All Rights Reserved """Markdown API utilities tests""" -from pathlib import Path - -from django.conf import settings +from textwrap import dedent from ietf.utils.tests import TestCase from ietf.utils.markdown import markdown class MarkdownTests(TestCase): - SAMPLE_DIR = Path(settings.BASE_DIR) / "utils" - SAMPLE_MARKDOWN = (SAMPLE_DIR / "markdown-test.md").read_text() - SAMPLE_MARKDOWN_OUTPUT = (SAMPLE_DIR / "markdown-test.html").read_text() + SAMPLE_MARKDOWN = dedent( + """ + # IETF Markdown Test File + + This file contains a bunch of constructs to test our markdown converter in + `ietf/utils/markdown.py`. + + ## Links + + * https://example.com + * + * [Example](https://example.com) + * user@example.com + * + * [User](mailto:user@example.com) + * RFC2119 + * BCP 3 + * STD 1 + * FYI2 + * draft-ietf-opsec-indicators-of-compromise + * draft-ietf-opsec-indicators-of-compromise-01 + """ + ) + + SAMPLE_MARKDOWN_OUTPUT = dedent( + """ +

    IETF Markdown Test File

    +

    This file contains a bunch of constructs to test our markdown converter in
    + ietf/utils/markdown.py.

    + + + """ + ).strip() def test_markdown(self): result = markdown(self.SAMPLE_MARKDOWN)