Skip to content

Commit

Permalink
Use lexer for extraction of markdown links (#2261)
Browse files Browse the repository at this point in the history
Changes the implementation of the Markdown link extraction to use the
lexer from `marked`.

For the actual rendering we use
https://github.com/syntax-tree/mdast-util-from-markdown but since that
is ESM-only I have chosen to use `marked` in this PR.
  • Loading branch information
FrederikBolding authored Mar 12, 2024
1 parent 3dabbf1 commit 17fae5a
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 15 deletions.
6 changes: 3 additions & 3 deletions packages/snaps-utils/coverage.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"branches": 96.47,
"functions": 98.62,
"branches": 96.48,
"functions": 98.64,
"lines": 98.74,
"statements": 94.48
"statements": 94.51
}
1 change: 1 addition & 0 deletions packages/snaps-utils/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
"cron-parser": "^4.5.0",
"fast-deep-equal": "^3.1.3",
"fast-json-stable-stringify": "^2.1.0",
"marked": "^12.0.1",
"rfdc": "^1.3.0",
"semver": "^7.5.4",
"ses": "^1.1.0",
Expand Down
60 changes: 60 additions & 0 deletions packages/snaps-utils/src/ui.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,73 @@ describe('validateTextLinks', () => {
expect(() =>
validateTextLinks('[](https://foo.bar)', () => false),
).not.toThrow();

expect(() =>
validateTextLinks('[[test]](https://foo.bar)', () => false),
).not.toThrow();

expect(() =>
validateTextLinks('[test](https://foo.bar "foo bar baz")', () => false),
).not.toThrow();

expect(() =>
validateTextLinks('<https://foo.bar>', () => false),
).not.toThrow();

expect(() =>
validateTextLinks(
`[foo][1]
[1]: https://foo.bar`,
() => false,
),
).not.toThrow();

expect(() =>
validateTextLinks(
`[foo][1]
[1]: https://foo.bar "foo bar baz"`,
() => false,
),
).not.toThrow();
});

it('throws an error if an invalid link is found in text', () => {
expect(() =>
validateTextLinks('[test](http://foo.bar)', () => false),
).toThrow('Invalid URL: Protocol must be one of: https:, mailto:.');

expect(() =>
validateTextLinks('[[test]](http://foo.bar)', () => false),
).toThrow('Invalid URL: Protocol must be one of: https:, mailto:.');

expect(() => validateTextLinks('<http://foo.bar>', () => false)).toThrow(
'Invalid URL: Protocol must be one of: https:, mailto:.',
);

expect(() =>
validateTextLinks('[test](http://foo.bar "foo bar baz")', () => false),
).toThrow('Invalid URL: Protocol must be one of: https:, mailto:.');

expect(() =>
validateTextLinks(
`[foo][1]
[1]: http://foo.bar`,
() => false,
),
).toThrow('Invalid URL: Protocol must be one of: https:, mailto:.');

expect(() =>
validateTextLinks(
`[foo][1]
[1]: http://foo.bar "foo bar baz"`,
() => false,
),
).toThrow('Invalid URL: Protocol must be one of: https:, mailto:.');

expect(() => validateTextLinks('[test](#code)', () => false)).toThrow(
'Invalid URL: Unable to parse URL.',
);

expect(() => validateTextLinks('[test](foo.bar)', () => false)).toThrow(
'Invalid URL: Unable to parse URL.',
);
Expand Down
36 changes: 24 additions & 12 deletions packages/snaps-utils/src/ui.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,31 @@
import type { Component } from '@metamask/snaps-sdk';
import { NodeType } from '@metamask/snaps-sdk';
import { assert, AssertionError } from '@metamask/utils';

const MARKDOWN_LINK_REGEX = /\[(?<name>[^\]]*)\]\((?<url>[^)]+)\)/giu;
import type { Tokens } from 'marked';
import { lexer, walkTokens } from 'marked';

const ALLOWED_PROTOCOLS = ['https:', 'mailto:'];

/**
* Extract all links from a Markdown text string using the `marked` lexer.
*
* @param text - The markdown text string.
* @returns A list of URLs linked to in the string.
*/
function getMarkdownLinks(text: string) {
const tokens = lexer(text);
const links: (Tokens.Link | Tokens.Generic)[] = [];

// Walk the lexed tokens and collect all link tokens
walkTokens(tokens, (token) => {
if (token.type === 'link') {
links.push(token);
}
});

return links.map((link) => link?.href).filter(Boolean);
}

/**
* Searches for markdown links in a string and checks them against the phishing list.
*
Expand All @@ -18,17 +38,9 @@ export function validateTextLinks(
text: string,
isOnPhishingList: (url: string) => boolean,
) {
const matches = String.prototype.matchAll.call(text, MARKDOWN_LINK_REGEX);

for (const { groups } of matches) {
const link = groups?.url;

/* This case should never happen with the regex but the TS type allows for undefined */
/* istanbul ignore next */
if (!link) {
continue;
}
const links = getMarkdownLinks(text);

for (const link of links) {
try {
const url = new URL(link);
assert(
Expand Down
10 changes: 10 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -6008,6 +6008,7 @@ __metadata:
istanbul-lib-report: ^3.0.0
istanbul-reports: ^3.1.5
jest: ^29.0.2
marked: ^12.0.1
memfs: ^3.4.13
prettier: ^2.7.1
prettier-plugin-packagejson: ^2.2.11
Expand Down Expand Up @@ -17209,6 +17210,15 @@ __metadata:
languageName: node
linkType: hard

"marked@npm:^12.0.1":
version: 12.0.1
resolution: "marked@npm:12.0.1"
bin:
marked: bin/marked.js
checksum: 35ebc6c4612fcc028a1cd6419321e336be5b29d3feb68dfd5aaa7fcddb399c7873cd3291d60daf342db3eede747757e4e18515f349f0ee7b84ec24254f3a4190
languageName: node
linkType: hard

"md5.js@npm:^1.3.4":
version: 1.3.5
resolution: "md5.js@npm:1.3.5"
Expand Down

0 comments on commit 17fae5a

Please sign in to comment.