From 8d87c8cc2e7e4f69a0412889a9cbcb89d2940fa8 Mon Sep 17 00:00:00 2001 From: Philippe Serhal Date: Fri, 27 Feb 2026 15:34:21 -0500 Subject: [PATCH] fix(readme): parse headings without space after # to match npm Many READMEs in the npm registry use instead of . CommonMark (and marked) requires the space, so these render as plain text instead of headings on npmx.dev. npm's own renderer (https://npmx.dev/package/marky-markdown) handles this via https://npmx.dev/package/markdown-it-lazy-headers, a markdown-it plugin that relaxes the space requirement. This commit reimplements that behavior as a marked tokenizer extension, since we use marked rather than markdown-it. The extension only handles the no-space case and falls through to marked's default tokenizer for standard headings. Closes #1697 --- server/utils/readme.ts | 40 +++++++++++++ test/unit/server/utils/readme.spec.ts | 86 +++++++++++++++++++++++++++ 2 files changed, 126 insertions(+) diff --git a/server/utils/readme.ts b/server/utils/readme.ts index b7c8fb266b..151bb1fdda 100644 --- a/server/utils/readme.ts +++ b/server/utils/readme.ts @@ -228,6 +228,46 @@ function slugify(text: string): string { .replace(/^-|-$/g, '') // Trim leading/trailing hyphens } +/** + * Lazy ATX heading extension for marked: allows headings without a space after `#`. + * + * Reimplements the behavior of markdown-it-lazy-headers + * (https://npmx.dev/package/markdown-it-lazy-headers), which is used by npm's own markdown renderer + * marky-markdown (https://npmx.dev/package/marky-markdown). + * + * CommonMark requires a space after # for ATX headings, but many READMEs in the npm registry omit + * this space. This extension allows marked to parse these headings the same way npm does. + */ +marked.use({ + tokenizer: { + heading(src: string) { + // Only match headings where `#` is immediately followed by non-whitespace, non-`#` content. + // Normal headings (with space) return false to fall through to marked's default tokenizer. + const match = /^ {0,3}(#{1,6})([^\s#][^\n]*)(?:\n+|$)/.exec(src) + if (!match) return false + + let text = match[2]!.trim() + + // Strip trailing # characters only if preceded by a space (CommonMark behavior). + // e.g., "#heading ##" → "heading", but "#heading#" stays as "heading#" + if (text.endsWith('#')) { + const stripped = text.replace(/#+$/, '') + if (!stripped || stripped.endsWith(' ')) { + text = stripped.trim() + } + } + + return { + type: 'heading' as const, + raw: match[0]!, + depth: match[1]!.length as number, + text, + tokens: this.lexer.inline(text), + } + }, + }, +}) + /** These path on npmjs.com don't belong to packages or search, so we shouldn't try to replace them with npmx.dev urls */ const reservedPathsNpmJs = [ 'products', diff --git a/test/unit/server/utils/readme.spec.ts b/test/unit/server/utils/readme.spec.ts index d06be1806d..479bfae4a1 100644 --- a/test/unit/server/utils/readme.spec.ts +++ b/test/unit/server/utils/readme.spec.ts @@ -465,6 +465,92 @@ describe('ReadmeResponse shape (HTML route contract)', () => { }) }) +// Tests for the lazy ATX heading extension, matching the behavior of +// markdown-it-lazy-headers (https://npmx.dev/package/markdown-it-lazy-headers). +describe('Lazy ATX headings (no space after #)', () => { + it('parses #foo through ######foo as headings', async () => { + const markdown = '#foo\n\n##foo\n\n###foo\n\n####foo\n\n#####foo\n\n######foo' + const result = await renderReadmeHtml(markdown, 'test-pkg') + + expect(result.toc).toHaveLength(6) + expect(result.toc[0]).toMatchObject({ text: 'foo', depth: 1 }) + expect(result.toc[1]).toMatchObject({ text: 'foo', depth: 2 }) + expect(result.toc[2]).toMatchObject({ text: 'foo', depth: 3 }) + expect(result.toc[3]).toMatchObject({ text: 'foo', depth: 4 }) + expect(result.toc[4]).toMatchObject({ text: 'foo', depth: 5 }) + expect(result.toc[5]).toMatchObject({ text: 'foo', depth: 6 }) + }) + + it('rejects 7+ # characters as not a heading', async () => { + const markdown = '#######foo' + const result = await renderReadmeHtml(markdown, 'test-pkg') + + expect(result.toc).toHaveLength(0) + expect(result.html).toContain('#######foo') + }) + + it('does not affect headings that already have spaces', async () => { + const markdown = '# Title\n\n## Subtitle' + const result = await renderReadmeHtml(markdown, 'test-pkg') + + expect(result.toc).toHaveLength(2) + expect(result.toc[0]).toMatchObject({ text: 'Title', depth: 1 }) + expect(result.toc[1]).toMatchObject({ text: 'Subtitle', depth: 2 }) + }) + + it('strips optional trailing # sequence preceded by space', async () => { + const markdown = '##foo ##' + const result = await renderReadmeHtml(markdown, 'test-pkg') + + expect(result.toc).toHaveLength(1) + expect(result.toc[0]).toMatchObject({ text: 'foo', depth: 2 }) + }) + + it('keeps trailing # not preceded by space as part of content', async () => { + const markdown = '#foo#' + const result = await renderReadmeHtml(markdown, 'test-pkg') + + expect(result.toc).toHaveLength(1) + expect(result.toc[0]).toMatchObject({ text: 'foo#', depth: 1 }) + }) + + it('does not modify lines inside fenced code blocks', async () => { + const markdown = '```\n#not-a-heading\n```' + const result = await renderReadmeHtml(markdown, 'test-pkg') + + expect(result.toc).toHaveLength(0) + expect(result.html).toContain('#not-a-heading') + }) + + it('handles mixed headings with and without spaces', async () => { + const markdown = '#Title\n\nSome text\n\n## Subtitle\n\n###Another' + const result = await renderReadmeHtml(markdown, 'test-pkg') + + expect(result.toc).toHaveLength(3) + expect(result.toc[0]).toMatchObject({ text: 'Title', depth: 1 }) + expect(result.toc[1]).toMatchObject({ text: 'Subtitle', depth: 2 }) + expect(result.toc[2]).toMatchObject({ text: 'Another', depth: 3 }) + }) + + it('allows 1-3 spaces indentation', async () => { + const markdown = ' ###foo\n\n ##foo\n\n #foo' + const result = await renderReadmeHtml(markdown, 'test-pkg') + + expect(result.toc).toHaveLength(3) + expect(result.toc[0]).toMatchObject({ text: 'foo', depth: 3 }) + expect(result.toc[1]).toMatchObject({ text: 'foo', depth: 2 }) + expect(result.toc[2]).toMatchObject({ text: 'foo', depth: 1 }) + }) + + it('works after paragraphs separated by blank lines', async () => { + const markdown = 'Foo bar\n\n#baz\n\nBar foo' + const result = await renderReadmeHtml(markdown, 'test-pkg') + + expect(result.toc).toHaveLength(1) + expect(result.toc[0]).toMatchObject({ text: 'baz', depth: 1 }) + }) +}) + describe('HTML output', () => { it('returns sanitized html', async () => { const markdown = `# Title\n\nSome **bold** text and a [link](https://example.com).`