From 17fae5adcc3b4a0fbeeccc7e3f6e71b807269345 Mon Sep 17 00:00:00 2001
From: Frederik Bolding <frederik.bolding@gmail.com>
Date: Tue, 12 Mar 2024 11:54:54 +0100
Subject: [PATCH] Use lexer for extraction of markdown links (#2261)

Changes the implementation of the Markdown link extraction to use the
lexer from `marked`.

For the actual rendering we use
https://github.com/syntax-tree/mdast-util-from-markdown but since that
is ESM-only I have chosen to use `marked` in this PR.
---
 packages/snaps-utils/coverage.json  |  6 +--
 packages/snaps-utils/package.json   |  1 +
 packages/snaps-utils/src/ui.test.ts | 60 +++++++++++++++++++++++++++++
 packages/snaps-utils/src/ui.ts      | 36 +++++++++++------
 yarn.lock                           | 10 +++++
 5 files changed, 98 insertions(+), 15 deletions(-)

diff --git a/packages/snaps-utils/coverage.json b/packages/snaps-utils/coverage.json
index ed3cb454e0..648e009c22 100644
--- a/packages/snaps-utils/coverage.json
+++ b/packages/snaps-utils/coverage.json
@@ -1,6 +1,6 @@
 {
-  "branches": 96.47,
-  "functions": 98.62,
+  "branches": 96.48,
+  "functions": 98.64,
   "lines": 98.74,
-  "statements": 94.48
+  "statements": 94.51
 }
diff --git a/packages/snaps-utils/package.json b/packages/snaps-utils/package.json
index d92bc49352..12a07afd99 100644
--- a/packages/snaps-utils/package.json
+++ b/packages/snaps-utils/package.json
@@ -66,6 +66,7 @@
     "cron-parser": "^4.5.0",
     "fast-deep-equal": "^3.1.3",
     "fast-json-stable-stringify": "^2.1.0",
+    "marked": "^12.0.1",
     "rfdc": "^1.3.0",
     "semver": "^7.5.4",
     "ses": "^1.1.0",
diff --git a/packages/snaps-utils/src/ui.test.ts b/packages/snaps-utils/src/ui.test.ts
index 4c9a8c9ffd..5402601db3 100644
--- a/packages/snaps-utils/src/ui.test.ts
+++ b/packages/snaps-utils/src/ui.test.ts
@@ -19,6 +19,34 @@ describe('validateTextLinks', () => {
     expect(() =>
       validateTextLinks('[](https://foo.bar)', () => false),
     ).not.toThrow();
+
+    expect(() =>
+      validateTextLinks('[[test]](https://foo.bar)', () => false),
+    ).not.toThrow();
+
+    expect(() =>
+      validateTextLinks('[test](https://foo.bar "foo bar baz")', () => false),
+    ).not.toThrow();
+
+    expect(() =>
+      validateTextLinks('<https://foo.bar>', () => false),
+    ).not.toThrow();
+
+    expect(() =>
+      validateTextLinks(
+        `[foo][1]
+         [1]: https://foo.bar`,
+        () => false,
+      ),
+    ).not.toThrow();
+
+    expect(() =>
+      validateTextLinks(
+        `[foo][1]
+         [1]: https://foo.bar "foo bar baz"`,
+        () => false,
+      ),
+    ).not.toThrow();
   });
 
   it('throws an error if an invalid link is found in text', () => {
@@ -26,6 +54,38 @@ describe('validateTextLinks', () => {
       validateTextLinks('[test](http://foo.bar)', () => false),
     ).toThrow('Invalid URL: Protocol must be one of: https:, mailto:.');
 
+    expect(() =>
+      validateTextLinks('[[test]](http://foo.bar)', () => false),
+    ).toThrow('Invalid URL: Protocol must be one of: https:, mailto:.');
+
+    expect(() => validateTextLinks('<http://foo.bar>', () => false)).toThrow(
+      'Invalid URL: Protocol must be one of: https:, mailto:.',
+    );
+
+    expect(() =>
+      validateTextLinks('[test](http://foo.bar "foo bar baz")', () => false),
+    ).toThrow('Invalid URL: Protocol must be one of: https:, mailto:.');
+
+    expect(() =>
+      validateTextLinks(
+        `[foo][1]
+         [1]: http://foo.bar`,
+        () => false,
+      ),
+    ).toThrow('Invalid URL: Protocol must be one of: https:, mailto:.');
+
+    expect(() =>
+      validateTextLinks(
+        `[foo][1]
+         [1]: http://foo.bar "foo bar baz"`,
+        () => false,
+      ),
+    ).toThrow('Invalid URL: Protocol must be one of: https:, mailto:.');
+
+    expect(() => validateTextLinks('[test](#code)', () => false)).toThrow(
+      'Invalid URL: Unable to parse URL.',
+    );
+
     expect(() => validateTextLinks('[test](foo.bar)', () => false)).toThrow(
       'Invalid URL: Unable to parse URL.',
     );
diff --git a/packages/snaps-utils/src/ui.ts b/packages/snaps-utils/src/ui.ts
index c1bfe3982d..0b757a0959 100644
--- a/packages/snaps-utils/src/ui.ts
+++ b/packages/snaps-utils/src/ui.ts
@@ -1,11 +1,31 @@
 import type { Component } from '@metamask/snaps-sdk';
 import { NodeType } from '@metamask/snaps-sdk';
 import { assert, AssertionError } from '@metamask/utils';
-
-const MARKDOWN_LINK_REGEX = /\[(?<name>[^\]]*)\]\((?<url>[^)]+)\)/giu;
+import type { Tokens } from 'marked';
+import { lexer, walkTokens } from 'marked';
 
 const ALLOWED_PROTOCOLS = ['https:', 'mailto:'];
 
+/**
+ * Extract all links from a Markdown text string using the `marked` lexer.
+ *
+ * @param text - The markdown text string.
+ * @returns A list of URLs linked to in the string.
+ */
+function getMarkdownLinks(text: string) {
+  const tokens = lexer(text);
+  const links: (Tokens.Link | Tokens.Generic)[] = [];
+
+  // Walk the lexed tokens and collect all link tokens
+  walkTokens(tokens, (token) => {
+    if (token.type === 'link') {
+      links.push(token);
+    }
+  });
+
+  return links.map((link) => link?.href).filter(Boolean);
+}
+
 /**
  * Searches for markdown links in a string and checks them against the phishing list.
  *
@@ -18,17 +38,9 @@ export function validateTextLinks(
   text: string,
   isOnPhishingList: (url: string) => boolean,
 ) {
-  const matches = String.prototype.matchAll.call(text, MARKDOWN_LINK_REGEX);
-
-  for (const { groups } of matches) {
-    const link = groups?.url;
-
-    /* This case should never happen with the regex but the TS type allows for undefined */
-    /* istanbul ignore next */
-    if (!link) {
-      continue;
-    }
+  const links = getMarkdownLinks(text);
 
+  for (const link of links) {
     try {
       const url = new URL(link);
       assert(
diff --git a/yarn.lock b/yarn.lock
index 9f0e31ee0e..75d4b0f7ca 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -6008,6 +6008,7 @@ __metadata:
     istanbul-lib-report: ^3.0.0
     istanbul-reports: ^3.1.5
     jest: ^29.0.2
+    marked: ^12.0.1
     memfs: ^3.4.13
     prettier: ^2.7.1
     prettier-plugin-packagejson: ^2.2.11
@@ -17209,6 +17210,15 @@ __metadata:
   languageName: node
   linkType: hard
 
+"marked@npm:^12.0.1":
+  version: 12.0.1
+  resolution: "marked@npm:12.0.1"
+  bin:
+    marked: bin/marked.js
+  checksum: 35ebc6c4612fcc028a1cd6419321e336be5b29d3feb68dfd5aaa7fcddb399c7873cd3291d60daf342db3eede747757e4e18515f349f0ee7b84ec24254f3a4190
+  languageName: node
+  linkType: hard
+
 "md5.js@npm:^1.3.4":
   version: 1.3.5
   resolution: "md5.js@npm:1.3.5"