lycheeverse · mre · Sep 3, 2025 · Aug 29, 2025 · Aug 29, 2025
diff --git a/lychee-lib/src/extract/markdown.rs b/lychee-lib/src/extract/markdown.rs
@@ -23,6 +23,7 @@ pub(crate) fn extract_markdown(
     // which is why we keep track of entries and exits while traversing the input.
     let mut inside_code_block = false;
     let mut inside_link_block = false;
+    let mut inside_wikilink_block = false;
 
     let parser = TextMergeStream::new(Parser::new_ext(input, md_extensions()));
     parser
@@ -40,6 +41,7 @@ pub(crate) fn extract_markdown(
                     // Inline link like `[foo](bar)`
                     // This is the most common link type
                     LinkType::Inline => {
+                        inside_link_block = true;
                         Some(vec![RawUri {
                             text: dest_url.to_string(),
                             // Emulate `<a href="...">` tag here to be compatible with
@@ -60,7 +62,10 @@ pub(crate) fn extract_markdown(
                     // Shortcut link like `[foo]`
                     LinkType::Shortcut |
                     // Shortcut without destination in the document, but resolved by the `broken_link_callback`
-                    LinkType::ShortcutUnknown |
+                    LinkType::ShortcutUnknown => {
+                        inside_link_block = true;
+                        Some(extract_raw_uri_from_plaintext(&dest_url))
+                    },
                     // Autolink like `<http://foo.bar/baz>`
                     LinkType::Autolink |
                     // Email address in autolink like `<john@example.org>`
@@ -72,7 +77,7 @@ pub(crate) fn extract_markdown(
                         if !include_wikilinks {
                             return None;
                         }
-                        inside_link_block = true;
+                        inside_wikilink_block = true;
                         //Ignore gitlab toc notation: https://docs.gitlab.com/user/markdown/#table-of-contents
                         if ["_TOC_".to_string(), "TOC".to_string()].contains(&dest_url.to_string()) {
                             return None;
@@ -111,7 +116,9 @@ pub(crate) fn extract_markdown(
 
             // A text node.
             Event::Text(txt) => {
-                if (inside_code_block && !include_verbatim) || inside_link_block {
+                if inside_wikilink_block
+                    || (inside_link_block && !include_verbatim)
+                    || (inside_code_block && !include_verbatim) {
                     None
                 } else {
                     Some(extract_raw_uri_from_plaintext(&txt))
@@ -137,6 +144,7 @@ pub(crate) fn extract_markdown(
             // A detected link block.
             Event::End(TagEnd::Link) => {
                 inside_link_block = false;
+                inside_wikilink_block = false;
                 None
             }
 
@@ -439,4 +447,61 @@ $$
         let uris = extract_markdown(markdown, true, true);
         assert!(uris.is_empty());
     }
+
+    #[test]
+    fn test_link_text_not_checked() {
+        // Test that link text is not extracted as a separate link by default
+        let markdown =
+            r"[https://lycheerepublic.gov/notexist (archive.org link)](https://example.com)";
+        let uris = extract_markdown(markdown, false, false);
+
+        // Should only extract the destination URL, not the link text
+        let expected = vec![RawUri {
+            text: "https://example.com".to_string(),
+            element: Some("a".to_string()),
+            attribute: Some("href".to_string()),
+        }];
+
+        assert_eq!(uris, expected);
+        assert_eq!(
+            uris.len(),
+            1,
+            "Should only find destination URL, not link text"
+        );
+    }
+
+    #[test]
+    fn test_link_text_checked_with_include_verbatim() {
+        // Test that link text IS extracted when include_verbatim is true
+        let markdown =
+            r"[https://lycheerepublic.gov/notexist (archive.org link)](https://example.com)";
+        let uris = extract_markdown(markdown, true, false);
+
+        // Should extract both the link text AND the destination URL
+        let expected = vec![
+            RawUri {
+                text: "https://example.com".to_string(),
+                element: Some("a".to_string()),
+                attribute: Some("href".to_string()),
+            },
+            RawUri {
+                text: "https://lycheerepublic.gov/notexist".to_string(),
+                element: None,
+                attribute: None,
+            },
+        ];
+
+        assert_eq!(
+            uris.len(),
+            2,
+            "Should find both destination URL and link text"
+        );
+        // Check that both expected URLs are present (order might vary)
+        for expected_uri in expected {
+            assert!(
+                uris.contains(&expected_uri),
+                "Missing expected URI: {expected_uri:?}"
+            );
+        }
+    }
 }