From 10d5b4d05eb667b55695ddb132414d9b8f7ba760 Mon Sep 17 00:00:00 2001 From: joker21663 Date: Mon, 9 Sep 2024 22:57:28 +0300 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Add=20`allowed`=20option=20for=20in?= =?UTF-8?q?line/block=20attributes=20(#115)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `allowed` option accepts a list of allowed attribute names. If not ``None``, any attributes not in this list will be removed and placed in the token's meta under the key `"insecure_attrs"`. Co-authored-by: maximkurbatov Co-authored-by: Chris Sewell --- mdit_py_plugins/attrs/index.py | 104 ++++++++++++++------ tests/test_attrs.py | 17 ++++ tests/test_attrs/test_attrs_allowed.yml | 121 ++++++++++++++++++++++++ 3 files changed, 213 insertions(+), 29 deletions(-) create mode 100644 tests/test_attrs/test_attrs_allowed.yml diff --git a/mdit_py_plugins/attrs/index.py b/mdit_py_plugins/attrs/index.py index 11455f8..3efaab6 100644 --- a/mdit_py_plugins/attrs/index.py +++ b/mdit_py_plugins/attrs/index.py @@ -1,4 +1,7 @@ -from typing import List, Optional, Sequence +from __future__ import annotations + +from functools import partial +from typing import Any, Sequence from markdown_it import MarkdownIt from markdown_it.rules_block import StateBlock @@ -17,6 +20,7 @@ def attrs_plugin( after: Sequence[str] = ("image", "code_inline", "link_close", "span_close"), spans: bool = False, span_after: str = "link", + allowed: Sequence[str] | None = None, ) -> None: """Parse inline attributes that immediately follow certain inline elements:: @@ -48,36 +52,25 @@ def attrs_plugin( :param spans: If True, also parse attributes after spans of text, encapsulated by `[]`. Note Markdown link references take precedence over this syntax. :param span_after: The name of an inline rule after which spans may be specified. + :param allowed: A list of allowed attribute names. + If not ``None``, any attributes not in this list will be removed + and placed in the token's meta under the key "insecure_attrs". """ - def _attr_inline_rule(state: StateInline, silent: bool) -> bool: - if state.pending or not state.tokens: - return False - token = state.tokens[-1] - if token.type not in after: - return False - try: - new_pos, attrs = parse(state.src[state.pos :]) - except ParseError: - return False - token_index = _find_opening(state.tokens, len(state.tokens) - 1) - if token_index is None: - return False - state.pos += new_pos + 1 - if not silent: - attr_token = state.tokens[token_index] - if "class" in attrs and "class" in token.attrs: - attrs["class"] = f"{attr_token.attrs['class']} {attrs['class']}" - attr_token.attrs.update(attrs) - return True - if spans: md.inline.ruler.after(span_after, "span", _span_rule) if after: - md.inline.ruler.push("attr", _attr_inline_rule) + md.inline.ruler.push( + "attr", + partial( + _attr_inline_rule, + after=after, + allowed=None if allowed is None else set(allowed), + ), + ) -def attrs_block_plugin(md: MarkdownIt) -> None: +def attrs_block_plugin(md: MarkdownIt, *, allowed: Sequence[str] | None = None) -> None: """Parse block attributes. Block attributes are attributes on a single line, with no other content. @@ -93,12 +86,22 @@ def attrs_block_plugin(md: MarkdownIt) -> None: A paragraph, that will be assigned the class ``a b c``, and the identifier ``b``. This syntax is inspired by Djot block attributes. + + :param allowed: A list of allowed attribute names. + If not ``None``, any attributes not in this list will be removed + and placed in the token's meta under the key "insecure_attrs". """ md.block.ruler.before("fence", "attr", _attr_block_rule) - md.core.ruler.after("block", "attr", _attr_resolve_block_rule) + md.core.ruler.after( + "block", + "attr", + partial( + _attr_resolve_block_rule, allowed=None if allowed is None else set(allowed) + ), + ) -def _find_opening(tokens: List[Token], index: int) -> Optional[int]: +def _find_opening(tokens: Sequence[Token], index: int) -> int | None: """Find the opening token index, if the token is closing.""" if tokens[index].nesting != -1: return index @@ -149,6 +152,34 @@ def _span_rule(state: StateInline, silent: bool) -> bool: return True +def _attr_inline_rule( + state: StateInline, + silent: bool, + after: Sequence[str], + *, + allowed: set[str] | None = None, +) -> bool: + if state.pending or not state.tokens: + return False + token = state.tokens[-1] + if token.type not in after: + return False + try: + new_pos, attrs = parse(state.src[state.pos :]) + except ParseError: + return False + token_index = _find_opening(state.tokens, len(state.tokens) - 1) + if token_index is None: + return False + state.pos += new_pos + 1 + if not silent: + attr_token = state.tokens[token_index] + if "class" in attrs and "class" in token.attrs: + attrs["class"] = f"{token.attrs['class']} {attrs['class']}" + _add_attrs(attr_token, attrs, allowed) + return True + + def _attr_block_rule( state: StateBlock, startLine: int, endLine: int, silent: bool ) -> bool: @@ -197,7 +228,7 @@ def _attr_block_rule( return True -def _attr_resolve_block_rule(state: StateCore) -> None: +def _attr_resolve_block_rule(state: StateCore, *, allowed: set[str] | None) -> None: """Find attribute block then move its attributes to the next block.""" i = 0 len_tokens = len(state.tokens) @@ -221,8 +252,23 @@ def _attr_resolve_block_rule(state: StateCore) -> None: if key == "class" or key not in next_token.attrs: next_token.attrs[key] = value else: - # attribute block takes precedence over attributes in other blocks - next_token.attrs.update(state.tokens[i].attrs) + _add_attrs(next_token, state.tokens[i].attrs, allowed) state.tokens.pop(i) len_tokens -= 1 + + +def _add_attrs( + token: Token, + attrs: dict[str, Any], + allowed: set[str] | None, +) -> None: + """Add attributes to a token, skipping any disallowed attributes.""" + if allowed is not None and ( + disallowed := {k: v for k, v in attrs.items() if k not in allowed} + ): + token.meta["insecure_attrs"] = disallowed + attrs = {k: v for k, v in attrs.items() if k in allowed} + + # attributes takes precedence over existing attributes + token.attrs.update(attrs) diff --git a/tests/test_attrs.py b/tests/test_attrs.py index 2e0bd58..cec33d1 100644 --- a/tests/test_attrs.py +++ b/tests/test_attrs.py @@ -20,3 +20,20 @@ def test_attrs(line, title, input, expected): text = md.render(input) print(text) assert text.rstrip() == expected.rstrip() + + +def test_attrs_allowed(data_regression): + allowed = ["safe"] + md = ( + MarkdownIt("commonmark") + .use(attrs_plugin, allowed=allowed) + .use(attrs_block_plugin, allowed=allowed) + ) + tokens = md.parse(""" +{danger1=a safe=b} +{danger2=c safe=d} +# header + +`inline`{safe=a danger=b} + """) + data_regression.check([t.as_dict() for t in tokens]) diff --git a/tests/test_attrs/test_attrs_allowed.yml b/tests/test_attrs/test_attrs_allowed.yml new file mode 100644 index 0000000..64e7cd8 --- /dev/null +++ b/tests/test_attrs/test_attrs_allowed.yml @@ -0,0 +1,121 @@ +- attrs: + - - safe + - d + block: true + children: null + content: '' + hidden: false + info: '' + level: 0 + map: + - 3 + - 4 + markup: '#' + meta: + insecure_attrs: + danger1: a + danger2: c + nesting: 1 + tag: h1 + type: heading_open +- attrs: null + block: true + children: + - attrs: null + block: false + children: null + content: header + hidden: false + info: '' + level: 0 + map: null + markup: '' + meta: {} + nesting: 0 + tag: '' + type: text + content: header + hidden: false + info: '' + level: 1 + map: + - 3 + - 4 + markup: '' + meta: {} + nesting: 0 + tag: '' + type: inline +- attrs: null + block: true + children: null + content: '' + hidden: false + info: '' + level: 0 + map: null + markup: '#' + meta: {} + nesting: -1 + tag: h1 + type: heading_close +- attrs: null + block: true + children: null + content: '' + hidden: false + info: '' + level: 0 + map: + - 5 + - 6 + markup: '' + meta: {} + nesting: 1 + tag: p + type: paragraph_open +- attrs: null + block: true + children: + - attrs: + - - safe + - a + block: false + children: null + content: inline + hidden: false + info: '' + level: 0 + map: null + markup: '`' + meta: + insecure_attrs: + danger: b + nesting: 0 + tag: code + type: code_inline + content: '`inline`{safe=a danger=b}' + hidden: false + info: '' + level: 1 + map: + - 5 + - 6 + markup: '' + meta: {} + nesting: 0 + tag: '' + type: inline +- attrs: null + block: true + children: null + content: '' + hidden: false + info: '' + level: 0 + map: null + markup: '' + meta: {} + nesting: -1 + tag: p + type: paragraph_close