Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions server/utils/readme.ts
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,46 @@ function slugify(text: string): string {
.replace(/^-|-$/g, '') // Trim leading/trailing hyphens
}

/**
* Lazy ATX heading extension for marked: allows headings without a space after `#`.
*
* Reimplements the behavior of markdown-it-lazy-headers
* (https://npmx.dev/package/markdown-it-lazy-headers), which is used by npm's own markdown renderer
* marky-markdown (https://npmx.dev/package/marky-markdown).
*
* CommonMark requires a space after # for ATX headings, but many READMEs in the npm registry omit
* this space. This extension allows marked to parse these headings the same way npm does.
*/
marked.use({
tokenizer: {
heading(src: string) {
// Only match headings where `#` is immediately followed by non-whitespace, non-`#` content.
// Normal headings (with space) return false to fall through to marked's default tokenizer.
const match = /^ {0,3}(#{1,6})([^\s#][^\n]*)(?:\n+|$)/.exec(src)
if (!match) return false

let text = match[2]!.trim()

// Strip trailing # characters only if preceded by a space (CommonMark behavior).
// e.g., "#heading ##" → "heading", but "#heading#" stays as "heading#"
if (text.endsWith('#')) {
const stripped = text.replace(/#+$/, '')
if (!stripped || stripped.endsWith(' ')) {
text = stripped.trim()
}
}

return {
type: 'heading' as const,
raw: match[0]!,
depth: match[1]!.length as number,
text,
tokens: this.lexer.inline(text),
}
},
},
})

/** These path on npmjs.com don't belong to packages or search, so we shouldn't try to replace them with npmx.dev urls */
const reservedPathsNpmJs = [
'products',
Expand Down
86 changes: 86 additions & 0 deletions test/unit/server/utils/readme.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,92 @@ describe('ReadmeResponse shape (HTML route contract)', () => {
})
})

// Tests for the lazy ATX heading extension, matching the behavior of
// markdown-it-lazy-headers (https://npmx.dev/package/markdown-it-lazy-headers).
describe('Lazy ATX headings (no space after #)', () => {
it('parses #foo through ######foo as headings', async () => {
const markdown = '#foo\n\n##foo\n\n###foo\n\n####foo\n\n#####foo\n\n######foo'
const result = await renderReadmeHtml(markdown, 'test-pkg')

expect(result.toc).toHaveLength(6)
expect(result.toc[0]).toMatchObject({ text: 'foo', depth: 1 })
expect(result.toc[1]).toMatchObject({ text: 'foo', depth: 2 })
expect(result.toc[2]).toMatchObject({ text: 'foo', depth: 3 })
expect(result.toc[3]).toMatchObject({ text: 'foo', depth: 4 })
expect(result.toc[4]).toMatchObject({ text: 'foo', depth: 5 })
expect(result.toc[5]).toMatchObject({ text: 'foo', depth: 6 })
})

it('rejects 7+ # characters as not a heading', async () => {
const markdown = '#######foo'
const result = await renderReadmeHtml(markdown, 'test-pkg')

expect(result.toc).toHaveLength(0)
expect(result.html).toContain('#######foo')
})

it('does not affect headings that already have spaces', async () => {
const markdown = '# Title\n\n## Subtitle'
const result = await renderReadmeHtml(markdown, 'test-pkg')

expect(result.toc).toHaveLength(2)
expect(result.toc[0]).toMatchObject({ text: 'Title', depth: 1 })
expect(result.toc[1]).toMatchObject({ text: 'Subtitle', depth: 2 })
})

it('strips optional trailing # sequence preceded by space', async () => {
const markdown = '##foo ##'
const result = await renderReadmeHtml(markdown, 'test-pkg')

expect(result.toc).toHaveLength(1)
expect(result.toc[0]).toMatchObject({ text: 'foo', depth: 2 })
})

it('keeps trailing # not preceded by space as part of content', async () => {
const markdown = '#foo#'
const result = await renderReadmeHtml(markdown, 'test-pkg')

expect(result.toc).toHaveLength(1)
expect(result.toc[0]).toMatchObject({ text: 'foo#', depth: 1 })
})

it('does not modify lines inside fenced code blocks', async () => {
const markdown = '```\n#not-a-heading\n```'
const result = await renderReadmeHtml(markdown, 'test-pkg')

expect(result.toc).toHaveLength(0)
expect(result.html).toContain('#not-a-heading')
})

it('handles mixed headings with and without spaces', async () => {
const markdown = '#Title\n\nSome text\n\n## Subtitle\n\n###Another'
const result = await renderReadmeHtml(markdown, 'test-pkg')

expect(result.toc).toHaveLength(3)
expect(result.toc[0]).toMatchObject({ text: 'Title', depth: 1 })
expect(result.toc[1]).toMatchObject({ text: 'Subtitle', depth: 2 })
expect(result.toc[2]).toMatchObject({ text: 'Another', depth: 3 })
})

it('allows 1-3 spaces indentation', async () => {
const markdown = ' ###foo\n\n ##foo\n\n #foo'
const result = await renderReadmeHtml(markdown, 'test-pkg')

expect(result.toc).toHaveLength(3)
expect(result.toc[0]).toMatchObject({ text: 'foo', depth: 3 })
expect(result.toc[1]).toMatchObject({ text: 'foo', depth: 2 })
expect(result.toc[2]).toMatchObject({ text: 'foo', depth: 1 })
})

it('works after paragraphs separated by blank lines', async () => {
const markdown = 'Foo bar\n\n#baz\n\nBar foo'
const result = await renderReadmeHtml(markdown, 'test-pkg')

expect(result.toc).toHaveLength(1)
expect(result.toc[0]).toMatchObject({ text: 'baz', depth: 1 })
})
})

describe('HTML output', () => {
it('returns sanitized html', async () => {
const markdown = `# Title\n\nSome **bold** text and a [link](https://example.com).`
Expand Down
Loading