Skip to content

Commit

Permalink
✨ Add allowed option for inline/block attributes (#115)
Browse files Browse the repository at this point in the history
The `allowed` option accepts a list of allowed attribute names.
If not ``None``, any attributes not in this list will be removed
and placed in the token's meta under the key `"insecure_attrs"`.

Co-authored-by: maximkurbatov <[email protected]>
Co-authored-by: Chris Sewell <[email protected]>
  • Loading branch information
3 people authored Sep 9, 2024
1 parent 3f7fcc6 commit 10d5b4d
Show file tree
Hide file tree
Showing 3 changed files with 213 additions and 29 deletions.
104 changes: 75 additions & 29 deletions mdit_py_plugins/attrs/index.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from typing import List, Optional, Sequence
from __future__ import annotations

from functools import partial
from typing import Any, Sequence

from markdown_it import MarkdownIt
from markdown_it.rules_block import StateBlock
Expand All @@ -17,6 +20,7 @@ def attrs_plugin(
after: Sequence[str] = ("image", "code_inline", "link_close", "span_close"),
spans: bool = False,
span_after: str = "link",
allowed: Sequence[str] | None = None,
) -> None:
"""Parse inline attributes that immediately follow certain inline elements::
Expand Down Expand Up @@ -48,36 +52,25 @@ def attrs_plugin(
:param spans: If True, also parse attributes after spans of text, encapsulated by `[]`.
Note Markdown link references take precedence over this syntax.
:param span_after: The name of an inline rule after which spans may be specified.
:param allowed: A list of allowed attribute names.
If not ``None``, any attributes not in this list will be removed
and placed in the token's meta under the key "insecure_attrs".
"""

def _attr_inline_rule(state: StateInline, silent: bool) -> bool:
if state.pending or not state.tokens:
return False
token = state.tokens[-1]
if token.type not in after:
return False
try:
new_pos, attrs = parse(state.src[state.pos :])
except ParseError:
return False
token_index = _find_opening(state.tokens, len(state.tokens) - 1)
if token_index is None:
return False
state.pos += new_pos + 1
if not silent:
attr_token = state.tokens[token_index]
if "class" in attrs and "class" in token.attrs:
attrs["class"] = f"{attr_token.attrs['class']} {attrs['class']}"
attr_token.attrs.update(attrs)
return True

if spans:
md.inline.ruler.after(span_after, "span", _span_rule)
if after:
md.inline.ruler.push("attr", _attr_inline_rule)
md.inline.ruler.push(
"attr",
partial(
_attr_inline_rule,
after=after,
allowed=None if allowed is None else set(allowed),
),
)


def attrs_block_plugin(md: MarkdownIt) -> None:
def attrs_block_plugin(md: MarkdownIt, *, allowed: Sequence[str] | None = None) -> None:
"""Parse block attributes.
Block attributes are attributes on a single line, with no other content.
Expand All @@ -93,12 +86,22 @@ def attrs_block_plugin(md: MarkdownIt) -> None:
A paragraph, that will be assigned the class ``a b c``, and the identifier ``b``.
This syntax is inspired by Djot block attributes.
:param allowed: A list of allowed attribute names.
If not ``None``, any attributes not in this list will be removed
and placed in the token's meta under the key "insecure_attrs".
"""
md.block.ruler.before("fence", "attr", _attr_block_rule)
md.core.ruler.after("block", "attr", _attr_resolve_block_rule)
md.core.ruler.after(
"block",
"attr",
partial(
_attr_resolve_block_rule, allowed=None if allowed is None else set(allowed)
),
)


def _find_opening(tokens: List[Token], index: int) -> Optional[int]:
def _find_opening(tokens: Sequence[Token], index: int) -> int | None:
"""Find the opening token index, if the token is closing."""
if tokens[index].nesting != -1:
return index
Expand Down Expand Up @@ -149,6 +152,34 @@ def _span_rule(state: StateInline, silent: bool) -> bool:
return True


def _attr_inline_rule(
state: StateInline,
silent: bool,
after: Sequence[str],
*,
allowed: set[str] | None = None,
) -> bool:
if state.pending or not state.tokens:
return False
token = state.tokens[-1]
if token.type not in after:
return False
try:
new_pos, attrs = parse(state.src[state.pos :])
except ParseError:
return False
token_index = _find_opening(state.tokens, len(state.tokens) - 1)
if token_index is None:
return False
state.pos += new_pos + 1
if not silent:
attr_token = state.tokens[token_index]
if "class" in attrs and "class" in token.attrs:
attrs["class"] = f"{token.attrs['class']} {attrs['class']}"
_add_attrs(attr_token, attrs, allowed)
return True


def _attr_block_rule(
state: StateBlock, startLine: int, endLine: int, silent: bool
) -> bool:
Expand Down Expand Up @@ -197,7 +228,7 @@ def _attr_block_rule(
return True


def _attr_resolve_block_rule(state: StateCore) -> None:
def _attr_resolve_block_rule(state: StateCore, *, allowed: set[str] | None) -> None:
"""Find attribute block then move its attributes to the next block."""
i = 0
len_tokens = len(state.tokens)
Expand All @@ -221,8 +252,23 @@ def _attr_resolve_block_rule(state: StateCore) -> None:
if key == "class" or key not in next_token.attrs:
next_token.attrs[key] = value
else:
# attribute block takes precedence over attributes in other blocks
next_token.attrs.update(state.tokens[i].attrs)
_add_attrs(next_token, state.tokens[i].attrs, allowed)

state.tokens.pop(i)
len_tokens -= 1


def _add_attrs(
token: Token,
attrs: dict[str, Any],
allowed: set[str] | None,
) -> None:
"""Add attributes to a token, skipping any disallowed attributes."""
if allowed is not None and (
disallowed := {k: v for k, v in attrs.items() if k not in allowed}
):
token.meta["insecure_attrs"] = disallowed
attrs = {k: v for k, v in attrs.items() if k in allowed}

# attributes takes precedence over existing attributes
token.attrs.update(attrs)
17 changes: 17 additions & 0 deletions tests/test_attrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,20 @@ def test_attrs(line, title, input, expected):
text = md.render(input)
print(text)
assert text.rstrip() == expected.rstrip()


def test_attrs_allowed(data_regression):
allowed = ["safe"]
md = (
MarkdownIt("commonmark")
.use(attrs_plugin, allowed=allowed)
.use(attrs_block_plugin, allowed=allowed)
)
tokens = md.parse("""
{danger1=a safe=b}
{danger2=c safe=d}
# header
`inline`{safe=a danger=b}
""")
data_regression.check([t.as_dict() for t in tokens])
121 changes: 121 additions & 0 deletions tests/test_attrs/test_attrs_allowed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
- attrs:
- - safe
- d
block: true
children: null
content: ''
hidden: false
info: ''
level: 0
map:
- 3
- 4
markup: '#'
meta:
insecure_attrs:
danger1: a
danger2: c
nesting: 1
tag: h1
type: heading_open
- attrs: null
block: true
children:
- attrs: null
block: false
children: null
content: header
hidden: false
info: ''
level: 0
map: null
markup: ''
meta: {}
nesting: 0
tag: ''
type: text
content: header
hidden: false
info: ''
level: 1
map:
- 3
- 4
markup: ''
meta: {}
nesting: 0
tag: ''
type: inline
- attrs: null
block: true
children: null
content: ''
hidden: false
info: ''
level: 0
map: null
markup: '#'
meta: {}
nesting: -1
tag: h1
type: heading_close
- attrs: null
block: true
children: null
content: ''
hidden: false
info: ''
level: 0
map:
- 5
- 6
markup: ''
meta: {}
nesting: 1
tag: p
type: paragraph_open
- attrs: null
block: true
children:
- attrs:
- - safe
- a
block: false
children: null
content: inline
hidden: false
info: ''
level: 0
map: null
markup: '`'
meta:
insecure_attrs:
danger: b
nesting: 0
tag: code
type: code_inline
content: '`inline`{safe=a danger=b}'
hidden: false
info: ''
level: 1
map:
- 5
- 6
markup: ''
meta: {}
nesting: 0
tag: ''
type: inline
- attrs: null
block: true
children: null
content: ''
hidden: false
info: ''
level: 0
map: null
markup: ''
meta: {}
nesting: -1
tag: p
type: paragraph_close

0 comments on commit 10d5b4d

Please sign in to comment.