✨ Add allowed option for inline/block attributes (#115)

The `allowed` option accepts a list of allowed attribute names. If not ``None``, any attributes not in this list will be removed and placed in the token's meta under the key `"insecure_attrs"`. Co-authored-by: maximkurbatov <[email protected]> Co-authored-by: Chris Sewell <[email protected]>
executablebooks · Sep 9, 2024 · 10d5b4d · 10d5b4d
1 parent 3f7fcc6
commit 10d5b4d
Show file tree

Hide file tree

Showing 3 changed files with 213 additions and 29 deletions.
diff --git a/mdit_py_plugins/attrs/index.py b/mdit_py_plugins/attrs/index.py
@@ -1,4 +1,7 @@
-from typing import List, Optional, Sequence
+from __future__ import annotations
+
+from functools import partial
+from typing import Any, Sequence
 
 from markdown_it import MarkdownIt
 from markdown_it.rules_block import StateBlock
@@ -17,6 +20,7 @@ def attrs_plugin(
     after: Sequence[str] = ("image", "code_inline", "link_close", "span_close"),
     spans: bool = False,
     span_after: str = "link",
+    allowed: Sequence[str] | None = None,
 ) -> None:
     """Parse inline attributes that immediately follow certain inline elements::
 
@@ -48,36 +52,25 @@ def attrs_plugin(
     :param spans: If True, also parse attributes after spans of text, encapsulated by `[]`.
         Note Markdown link references take precedence over this syntax.
     :param span_after: The name of an inline rule after which spans may be specified.
+    :param allowed: A list of allowed attribute names.
+        If not ``None``, any attributes not in this list will be removed
+        and placed in the token's meta under the key "insecure_attrs".
     """
 
-    def _attr_inline_rule(state: StateInline, silent: bool) -> bool:
-        if state.pending or not state.tokens:
-            return False
-        token = state.tokens[-1]
-        if token.type not in after:
-            return False
-        try:
-            new_pos, attrs = parse(state.src[state.pos :])
-        except ParseError:
-            return False
-        token_index = _find_opening(state.tokens, len(state.tokens) - 1)
-        if token_index is None:
-            return False
-        state.pos += new_pos + 1
-        if not silent:
-            attr_token = state.tokens[token_index]
-            if "class" in attrs and "class" in token.attrs:
-                attrs["class"] = f"{attr_token.attrs['class']} {attrs['class']}"
-            attr_token.attrs.update(attrs)
-        return True
-
     if spans:
         md.inline.ruler.after(span_after, "span", _span_rule)
     if after:
-        md.inline.ruler.push("attr", _attr_inline_rule)
+        md.inline.ruler.push(
+            "attr",
+            partial(
+                _attr_inline_rule,
+                after=after,
+                allowed=None if allowed is None else set(allowed),
+            ),
+        )
 
 
-def attrs_block_plugin(md: MarkdownIt) -> None:
+def attrs_block_plugin(md: MarkdownIt, *, allowed: Sequence[str] | None = None) -> None:
     """Parse block attributes.
 
     Block attributes are attributes on a single line, with no other content.
@@ -93,12 +86,22 @@ def attrs_block_plugin(md: MarkdownIt) -> None:
         A paragraph, that will be assigned the class ``a b c``, and the identifier ``b``.
 
     This syntax is inspired by Djot block attributes.
+
+    :param allowed: A list of allowed attribute names.
+        If not ``None``, any attributes not in this list will be removed
+        and placed in the token's meta under the key "insecure_attrs".
     """
     md.block.ruler.before("fence", "attr", _attr_block_rule)
-    md.core.ruler.after("block", "attr", _attr_resolve_block_rule)
+    md.core.ruler.after(
+        "block",
+        "attr",
+        partial(
+            _attr_resolve_block_rule, allowed=None if allowed is None else set(allowed)
+        ),
+    )
 
 
-def _find_opening(tokens: List[Token], index: int) -> Optional[int]:
+def _find_opening(tokens: Sequence[Token], index: int) -> int | None:
     """Find the opening token index, if the token is closing."""
     if tokens[index].nesting != -1:
         return index
@@ -149,6 +152,34 @@ def _span_rule(state: StateInline, silent: bool) -> bool:
     return True
 
 
+def _attr_inline_rule(
+    state: StateInline,
+    silent: bool,
+    after: Sequence[str],
+    *,
+    allowed: set[str] | None = None,
+) -> bool:
+    if state.pending or not state.tokens:
+        return False
+    token = state.tokens[-1]
+    if token.type not in after:
+        return False
+    try:
+        new_pos, attrs = parse(state.src[state.pos :])
+    except ParseError:
+        return False
+    token_index = _find_opening(state.tokens, len(state.tokens) - 1)
+    if token_index is None:
+        return False
+    state.pos += new_pos + 1
+    if not silent:
+        attr_token = state.tokens[token_index]
+        if "class" in attrs and "class" in token.attrs:
+            attrs["class"] = f"{token.attrs['class']} {attrs['class']}"
+        _add_attrs(attr_token, attrs, allowed)
+    return True
+
+
 def _attr_block_rule(
     state: StateBlock, startLine: int, endLine: int, silent: bool
 ) -> bool:
@@ -197,7 +228,7 @@ def _attr_block_rule(
     return True
 
 
-def _attr_resolve_block_rule(state: StateCore) -> None:
+def _attr_resolve_block_rule(state: StateCore, *, allowed: set[str] | None) -> None:
     """Find attribute block then move its attributes to the next block."""
     i = 0
     len_tokens = len(state.tokens)
@@ -221,8 +252,23 @@ def _attr_resolve_block_rule(state: StateCore) -> None:
                     if key == "class" or key not in next_token.attrs:
                         next_token.attrs[key] = value
             else:
-                # attribute block takes precedence over attributes in other blocks
-                next_token.attrs.update(state.tokens[i].attrs)
+                _add_attrs(next_token, state.tokens[i].attrs, allowed)
 
         state.tokens.pop(i)
         len_tokens -= 1
+
+
+def _add_attrs(
+    token: Token,
+    attrs: dict[str, Any],
+    allowed: set[str] | None,
+) -> None:
+    """Add attributes to a token, skipping any disallowed attributes."""
+    if allowed is not None and (
+        disallowed := {k: v for k, v in attrs.items() if k not in allowed}
+    ):
+        token.meta["insecure_attrs"] = disallowed
+        attrs = {k: v for k, v in attrs.items() if k in allowed}
+
+    # attributes takes precedence over existing attributes
+    token.attrs.update(attrs)
diff --git a/tests/test_attrs.py b/tests/test_attrs.py
@@ -20,3 +20,20 @@ def test_attrs(line, title, input, expected):
     text = md.render(input)
     print(text)
     assert text.rstrip() == expected.rstrip()
+
+
+def test_attrs_allowed(data_regression):
+    allowed = ["safe"]
+    md = (
+        MarkdownIt("commonmark")
+        .use(attrs_plugin, allowed=allowed)
+        .use(attrs_block_plugin, allowed=allowed)
+    )
+    tokens = md.parse("""
+{danger1=a safe=b}
+{danger2=c safe=d}
+# header
+
+`inline`{safe=a danger=b}
+    """)
+    data_regression.check([t.as_dict() for t in tokens])
diff --git a/tests/test_attrs/test_attrs_allowed.yml b/tests/test_attrs/test_attrs_allowed.yml
@@ -0,0 +1,121 @@
+- attrs:
+  - - safe
+    - d
+  block: true
+  children: null
+  content: ''
+  hidden: false
+  info: ''
+  level: 0
+  map:
+  - 3
+  - 4
+  markup: '#'
+  meta:
+    insecure_attrs:
+      danger1: a
+      danger2: c
+  nesting: 1
+  tag: h1
+  type: heading_open
+- attrs: null
+  block: true
+  children:
+  - attrs: null
+    block: false
+    children: null
+    content: header
+    hidden: false
+    info: ''
+    level: 0
+    map: null
+    markup: ''
+    meta: {}
+    nesting: 0
+    tag: ''
+    type: text
+  content: header
+  hidden: false
+  info: ''
+  level: 1
+  map:
+  - 3
+  - 4
+  markup: ''
+  meta: {}
+  nesting: 0
+  tag: ''
+  type: inline
+- attrs: null
+  block: true
+  children: null
+  content: ''
+  hidden: false
+  info: ''
+  level: 0
+  map: null
+  markup: '#'
+  meta: {}
+  nesting: -1
+  tag: h1
+  type: heading_close
+- attrs: null
+  block: true
+  children: null
+  content: ''
+  hidden: false
+  info: ''
+  level: 0
+  map:
+  - 5
+  - 6
+  markup: ''
+  meta: {}
+  nesting: 1
+  tag: p
+  type: paragraph_open
+- attrs: null
+  block: true
+  children:
+  - attrs:
+    - - safe
+      - a
+    block: false
+    children: null
+    content: inline
+    hidden: false
+    info: ''
+    level: 0
+    map: null
+    markup: '`'
+    meta:
+      insecure_attrs:
+        danger: b
+    nesting: 0
+    tag: code
+    type: code_inline
+  content: '`inline`{safe=a danger=b}'
+  hidden: false
+  info: ''
+  level: 1
+  map:
+  - 5
+  - 6
+  markup: ''
+  meta: {}
+  nesting: 0
+  tag: ''
+  type: inline
+- attrs: null
+  block: true
+  children: null
+  content: ''
+  hidden: false
+  info: ''
+  level: 0
+  map: null
+  markup: ''
+  meta: {}
+  nesting: -1
+  tag: p
+  type: paragraph_close