Skip to content

Commit

Permalink
fix: Fallback to slugified title as id for non-exact, non-code refere…
Browse files Browse the repository at this point in the history
…nces (`[Hello World][]` -> `[hello-world][]`)

With a heading like `## Welcome`, we should be able to cross-reference it with `[Welcome][]`, without having to specify the actual, slugified identifier: `[Welcome][welcome]`. This is compliant with the original Markdown spec.

How does it work?

When the base Markdown converter doesn't convert a reference, autorefs kicks in. It converts the yet-unresolved reference to an `autoref` HTML element. If an identifier was explicitly given, it creates a regular `autoref` element like before. If only a title was provided, then there are two scenarios:

- the title converts to a `code` HTML element, in which case we create a regular `autoref` again (important for API docs)
- the title does not convert to a `code` HTML element, in which case we add a slug to the `autoref` element

`autoref` elements without a slug are handled like before. `autoref` elements with a slug will first try to find an URL for the initial identifier (which is the title), and if that fails, will try again with the slugified title. Slugification is made with the `toc` extension's `slugify` function.

Issue-58: #58
  • Loading branch information
pawamoy committed Jan 10, 2025
1 parent 418e770 commit 13428f1
Show file tree
Hide file tree
Showing 2 changed files with 119 additions and 18 deletions.
53 changes: 38 additions & 15 deletions src/mkdocs_autorefs/references.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@
import markupsafe
from markdown.core import Markdown
from markdown.extensions import Extension
from markdown.extensions.toc import slugify
from markdown.inlinepatterns import REFERENCE_RE, ReferenceInlineProcessor
from markdown.treeprocessors import Treeprocessor
from markdown.util import HTML_PLACEHOLDER_RE, INLINE_PLACEHOLDER_RE

if TYPE_CHECKING:
from collections.abc import Iterable
from pathlib import Path
from re import Match

Expand Down Expand Up @@ -120,7 +122,6 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: # noqa: D107

# Code based on
# https://github.com/Python-Markdown/markdown/blob/8e7528fa5c98bf4652deb13206d6e6241d61630b/markdown/inlinepatterns.py#L780

def handleMatch(self, m: Match[str], data: str) -> tuple[Element | None, int | None, int | None]: # type: ignore[override] # noqa: N802
"""Handle an element that matched.
Expand All @@ -135,19 +136,19 @@ def handleMatch(self, m: Match[str], data: str) -> tuple[Element | None, int | N
if not handled:
return None, None, None

identifier, end, handled = self.evalId(data, index, text)
identifier, slug, end, handled = self._eval_id(data, index, text)
if not handled or identifier is None:
return None, None, None

if re.search(r"[\x00-\x1f]", identifier):
if slug is None and re.search(r"[\x00-\x1f]", identifier):
# Do nothing if the matched reference contains control characters (from 0 to 31 included).
# Specifically `\x01` is used by Python-Markdown HTML stash when there's inline formatting,
# but references with Markdown formatting are not possible anyway.
return None, m.start(0), end

return self._make_tag(identifier, text), m.start(0), end
return self._make_tag(identifier, text, slug=slug), m.start(0), end

def evalId(self, data: str, index: int, text: str) -> tuple[str | None, int, bool]: # noqa: N802 (parent's casing)
def _eval_id(self, data: str, index: int, text: str) -> tuple[str | None, str | None, int, bool]:
"""Evaluate the id portion of `[ref][id]`.
If `[ref][]` use `[ref]`.
Expand All @@ -158,23 +159,28 @@ def evalId(self, data: str, index: int, text: str) -> tuple[str | None, int, boo
text: The text to use when no identifier.
Returns:
A tuple containing the identifier, its end position, and whether it matched.
A tuple containing the identifier, its optional slug, its end position, and whether it matched.
"""
m = self.RE_LINK.match(data, pos=index)
if not m:
return None, index, False
return None, None, index, False

identifier = m.group(1)
if not identifier:
if identifier := m.group(1):
# An identifier was provided, match it exactly (later).
slug = None
else:
# Only a title was provided, use it as identifier.
identifier = text
# Allow the entire content to be one placeholder, with the intent of catching things like [`Foo`][].
# It doesn't catch [*Foo*][] though, just due to the priority order.
# https://github.com/Python-Markdown/markdown/blob/1858c1b601ead62ed49646ae0d99298f41b1a271/markdown/inlinepatterns.py#L78

# Catch single stash entries, like the result of [`Foo`][].
if match := INLINE_PLACEHOLDER_RE.fullmatch(identifier):
stashed_nodes: dict[str, Element | str] = self.md.treeprocessors["inline"].stashed_nodes # type: ignore[attr-defined]
el = stashed_nodes.get(match[1])
if isinstance(el, Element) and el.tag == "code":
# The title was wrapped in backticks, we only keep the content,
# and tell autorefs to match the identifier exactly.
identifier = "".join(el.itertext())
slug = None
# Special case: allow pymdownx.inlinehilite raw <code> snippets but strip them back to unhighlighted.
if match := HTML_PLACEHOLDER_RE.fullmatch(identifier):
stash_index = int(match.group(1))
Expand All @@ -183,9 +189,9 @@ def evalId(self, data: str, index: int, text: str) -> tuple[str | None, int, boo
self.md.htmlStash.rawHtmlBlocks[stash_index] = escape(identifier)

end = m.end(0)
return identifier, end, True
return identifier, slug, end, True

def _make_tag(self, identifier: str, text: str) -> Element:
def _make_tag(self, identifier: str, text: str, *, slug: str | None = None) -> Element:
"""Create a tag that can be matched by `AUTO_REF_RE`.
Arguments:
Expand All @@ -201,6 +207,8 @@ def _make_tag(self, identifier: str, text: str) -> Element:
el.attrib.update(self.hook.get_context().as_dict())
el.set("identifier", identifier)
el.text = text
if slug:
el.attrib["slug"] = slug
return el


Expand Down Expand Up @@ -300,6 +308,7 @@ class _AutorefsAttrs(dict):
"origin",
"filepath",
"lineno",
"slug",
}

@property
Expand Down Expand Up @@ -337,6 +346,15 @@ def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None
_html_attrs_parser = _HTMLAttrsParser()


def _find_url(identifiers: Iterable[str], url_mapper: Callable[[str], str]) -> str:
for identifier in identifiers:
try:
return url_mapper(identifier)
except KeyError:
pass
raise KeyError(f"None of the identifiers {identifiers} were found")


def fix_ref(
url_mapper: Callable[[str], str],
unmapped: list[tuple[str, AutorefsHookInterface.Context | None]],
Expand All @@ -363,11 +381,14 @@ def inner(match: Match) -> str:
title = match["title"]
attrs = _html_attrs_parser.parse(f"<a {match['attrs']}>")
identifier: str = attrs["identifier"]
slug = attrs.get("slug", None)
optional = "optional" in attrs
hover = "hover" in attrs

identifiers = (identifier, slug) if slug else (identifier,)

try:
url = url_mapper(unescape(identifier))
url = _find_url(identifiers, url_mapper)
except KeyError:
if optional:
if hover:
Expand All @@ -376,6 +397,8 @@ def inner(match: Match) -> str:
unmapped.append((identifier, attrs.context))
if title == identifier:
return f"[{identifier}][]"
if title == f"<code>{identifier}</code>" and not slug:
return f"[<code>{identifier}</code>][]"
return f"[{title}][{identifier}]"

parsed = urlsplit(url)
Expand Down
84 changes: 81 additions & 3 deletions tests/test_references.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def test_missing_reference_with_markdown_text() -> None:
run_references_test(
url_map={"NotFoo": "foo.html#NotFoo"},
source="[`Foo`][Foo]",
output="<p>[<code>Foo</code>][Foo]</p>",
output="<p>[<code>Foo</code>][]</p>",
unmapped=[("Foo", None)],
)

Expand All @@ -201,8 +201,8 @@ def test_missing_reference_with_markdown_implicit() -> None:
run_references_test(
url_map={"Foo-bar": "foo.html#Foo-bar"},
source="[*Foo-bar*][] and [`Foo`-bar][]",
output="<p>[<em>Foo-bar</em>][*Foo-bar*] and [<code>Foo</code>-bar][]</p>",
unmapped=[("*Foo-bar*", None)],
output="<p>[<em>Foo-bar</em>][*Foo-bar*] and [<code>Foo</code>-bar][`Foo`-bar]</p>",
unmapped=[("*Foo-bar*", None), ("`Foo`-bar", None)],
)


Expand Down Expand Up @@ -405,3 +405,81 @@ def test_keep_data_attributes() -> None:
source = '<autoref optional identifier="example" class="hi ho" data-foo data-bar="0">e</autoref>'
output, _ = fix_refs(source, url_map.__getitem__)
assert output == '<a class="autorefs autorefs-external hi ho" href="https://e.com" data-foo data-bar="0">e</a>'


@pytest.mark.parametrize(
("markdown_ref", "exact_expected"),
[
("[Foo][]", False),
("[\\`Foo][]", False),
("[\\`\\`Foo][]", False),
("[\\`\\`Foo\\`][]", False),
("[Foo\\`][]", False),
("[Foo\\`\\`][]", False),
("[\\`Foo\\`\\`][]", False),
("[`Foo` `Bar`][]", False),
("[Foo][Foo]", True),
("[`Foo`][]", True),
("[`Foo``Bar`][]", True),
("[`Foo```Bar`][]", True),
("[``Foo```Bar``][]", True),
("[``Foo`Bar``][]", True),
("[```Foo``Bar```][]", True),
],
)
def test_mark_identifiers_as_exact(markdown_ref: str, exact_expected: bool) -> None:
"""Mark code and explicit identifiers as exact (no `slug` attribute in autoref elements)."""
plugin = AutorefsPlugin()
md = markdown.Markdown(extensions=["attr_list", "toc", AutorefsExtension(plugin)])
plugin.current_page = "page"
output = md.convert(markdown_ref)
if exact_expected:
assert "slug=" not in output
else:
assert "slug=" in output


def test_slugified_identifier_fallback() -> None:
"""Fallback to the slugified identifier when no URL is found."""
run_references_test(
url_map={"hello-world": "https://e.com#a"},
source='<autoref identifier="Hello World" slug="hello-world">Hello World</autoref>',
output='<p><a class="autorefs autorefs-external" href="https://e.com#a">Hello World</a></p>',
)
run_references_test(
url_map={"foo-bar": "https://e.com#a"},
source="[*Foo*-bar][]",
output='<p><a class="autorefs autorefs-external" href="https://e.com#a"><em>Foo</em>-bar</a></p>',
)
run_references_test(
url_map={"foo-bar": "https://e.com#a"},
source="[`Foo`-bar][]",
output='<p><a class="autorefs autorefs-external" href="https://e.com#a"><code>Foo</code>-bar</a></p>',
)


def test_no_fallback_for_exact_identifiers() -> None:
"""Do not fallback to the slugified identifier for exact identifiers."""
run_references_test(
url_map={"hello-world": "https://e.com"},
source='<autoref identifier="Hello World"><code>Hello World</code></autoref>',
output="<p>[<code>Hello World</code>][]</p>",
unmapped=[("Hello World", None)],
)

run_references_test(
url_map={"hello-world": "https://e.com"},
source='<autoref identifier="Hello World">Hello World</autoref>',
output="<p>[Hello World][]</p>",
unmapped=[("Hello World", None)],
)


def test_no_fallback_for_provided_identifiers() -> None:
"""Do not slugify provided identifiers."""
run_references_test(
url_map={"hello-world": "foo.html#hello-world"},
source="[Hello][Hello world]",
output="<p>[Hello][Hello world]</p>",
unmapped=[("Hello world", None)],
)

0 comments on commit 13428f1

Please sign in to comment.