Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use lexer for extraction of markdown links #2261

Merged
merged 3 commits into from
Mar 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions packages/snaps-utils/coverage.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"branches": 96.47,
"functions": 98.62,
"branches": 96.48,
"functions": 98.64,
"lines": 98.74,
"statements": 94.48
"statements": 94.51
}
1 change: 1 addition & 0 deletions packages/snaps-utils/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
"cron-parser": "^4.5.0",
"fast-deep-equal": "^3.1.3",
"fast-json-stable-stringify": "^2.1.0",
"marked": "^12.0.1",
"rfdc": "^1.3.0",
"semver": "^7.5.4",
"ses": "^1.1.0",
Expand Down
60 changes: 60 additions & 0 deletions packages/snaps-utils/src/ui.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,73 @@ describe('validateTextLinks', () => {
expect(() =>
validateTextLinks('[](https://foo.bar)', () => false),
).not.toThrow();

expect(() =>
validateTextLinks('[[test]](https://foo.bar)', () => false),
).not.toThrow();

expect(() =>
validateTextLinks('[test](https://foo.bar "foo bar baz")', () => false),
).not.toThrow();

expect(() =>
validateTextLinks('<https://foo.bar>', () => false),
).not.toThrow();

expect(() =>
validateTextLinks(
`[foo][1]
[1]: https://foo.bar`,
() => false,
),
).not.toThrow();

expect(() =>
validateTextLinks(
`[foo][1]
[1]: https://foo.bar "foo bar baz"`,
() => false,
),
).not.toThrow();
});

it('throws an error if an invalid link is found in text', () => {
expect(() =>
validateTextLinks('[test](http://foo.bar)', () => false),
).toThrow('Invalid URL: Protocol must be one of: https:, mailto:.');

expect(() =>
validateTextLinks('[[test]](http://foo.bar)', () => false),
).toThrow('Invalid URL: Protocol must be one of: https:, mailto:.');

expect(() => validateTextLinks('<http://foo.bar>', () => false)).toThrow(
'Invalid URL: Protocol must be one of: https:, mailto:.',
);

expect(() =>
validateTextLinks('[test](http://foo.bar "foo bar baz")', () => false),
).toThrow('Invalid URL: Protocol must be one of: https:, mailto:.');

expect(() =>
validateTextLinks(
`[foo][1]
[1]: http://foo.bar`,
() => false,
),
).toThrow('Invalid URL: Protocol must be one of: https:, mailto:.');

expect(() =>
validateTextLinks(
`[foo][1]
[1]: http://foo.bar "foo bar baz"`,
() => false,
),
).toThrow('Invalid URL: Protocol must be one of: https:, mailto:.');

expect(() => validateTextLinks('[test](#code)', () => false)).toThrow(
'Invalid URL: Unable to parse URL.',
);

expect(() => validateTextLinks('[test](foo.bar)', () => false)).toThrow(
'Invalid URL: Unable to parse URL.',
);
Expand Down
36 changes: 24 additions & 12 deletions packages/snaps-utils/src/ui.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,31 @@
import type { Component } from '@metamask/snaps-sdk';
import { NodeType } from '@metamask/snaps-sdk';
import { assert, AssertionError } from '@metamask/utils';

const MARKDOWN_LINK_REGEX = /\[(?<name>[^\]]*)\]\((?<url>[^)]+)\)/giu;
import type { Tokens } from 'marked';
import { lexer, walkTokens } from 'marked';

const ALLOWED_PROTOCOLS = ['https:', 'mailto:'];

/**
* Extract all links from a Markdown text string using the `marked` lexer.
*
* @param text - The markdown text string.
* @returns A list of URLs linked to in the string.
*/
function getMarkdownLinks(text: string) {
const tokens = lexer(text);
const links: (Tokens.Link | Tokens.Generic)[] = [];

// Walk the lexed tokens and collect all link tokens
walkTokens(tokens, (token) => {
if (token.type === 'link') {
links.push(token);
}
});

return links.map((link) => link?.href).filter(Boolean);
}

/**
* Searches for markdown links in a string and checks them against the phishing list.
*
Expand All @@ -18,17 +38,9 @@ export function validateTextLinks(
text: string,
isOnPhishingList: (url: string) => boolean,
) {
const matches = String.prototype.matchAll.call(text, MARKDOWN_LINK_REGEX);

for (const { groups } of matches) {
const link = groups?.url;

/* This case should never happen with the regex but the TS type allows for undefined */
/* istanbul ignore next */
if (!link) {
continue;
}
const links = getMarkdownLinks(text);

for (const link of links) {
try {
const url = new URL(link);
assert(
Expand Down
10 changes: 10 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -6008,6 +6008,7 @@ __metadata:
istanbul-lib-report: ^3.0.0
istanbul-reports: ^3.1.5
jest: ^29.0.2
marked: ^12.0.1
memfs: ^3.4.13
prettier: ^2.7.1
prettier-plugin-packagejson: ^2.2.11
Expand Down Expand Up @@ -17209,6 +17210,15 @@ __metadata:
languageName: node
linkType: hard

"marked@npm:^12.0.1":
version: 12.0.1
resolution: "marked@npm:12.0.1"
bin:
marked: bin/marked.js
checksum: 35ebc6c4612fcc028a1cd6419321e336be5b29d3feb68dfd5aaa7fcddb399c7873cd3291d60daf342db3eede747757e4e18515f349f0ee7b84ec24254f3a4190
languageName: node
linkType: hard

"md5.js@npm:^1.3.4":
version: 1.3.5
resolution: "md5.js@npm:1.3.5"
Expand Down
Loading