From a54355901e8653c922351427f66e08e41e50de84 Mon Sep 17 00:00:00 2001 From: JayJayArr Date: Wed, 13 Aug 2025 21:29:54 +0200 Subject: [PATCH 01/27] create fixture for whitespace wikilinks Feat: strip Potholes and Headings from wikilinks adjust fixture to contain Headers and Potholes add integration test for fixture split fixture into obsidian and mediawiki --- fixtures/wiki/Dash-Usage.md | 1 + fixtures/wiki/Space Usage.md | 1 + fixtures/wiki/Underscore_Usage.md | 1 + fixtures/wiki/Usage.md | 1 + fixtures/wiki/obsidian-style.md | 13 ++++ .../subdirectory/Different-Directory-Dash.md | 1 + .../wiki/subdirectory/DifferentDirectory.md | 1 + .../Different_Directory_Underscore.md | 1 + .../Space Usage DifferentDirectory.md | 1 + fixtures/wiki/wikilink-style.md | 19 ++++++ lychee-bin/tests/cli.rs | 62 +++++++++++++++++++ lychee-lib/src/extract/markdown.rs | 50 ++++++++++++++- 12 files changed, 150 insertions(+), 2 deletions(-) create mode 100644 fixtures/wiki/Dash-Usage.md create mode 100644 fixtures/wiki/Space Usage.md create mode 100644 fixtures/wiki/Underscore_Usage.md create mode 100644 fixtures/wiki/Usage.md create mode 100644 fixtures/wiki/obsidian-style.md create mode 100644 fixtures/wiki/subdirectory/Different-Directory-Dash.md create mode 100644 fixtures/wiki/subdirectory/DifferentDirectory.md create mode 100644 fixtures/wiki/subdirectory/Different_Directory_Underscore.md create mode 100644 fixtures/wiki/subdirectory/Space Usage DifferentDirectory.md create mode 100644 fixtures/wiki/wikilink-style.md diff --git a/fixtures/wiki/Dash-Usage.md b/fixtures/wiki/Dash-Usage.md new file mode 100644 index 0000000000..6c67b6a977 --- /dev/null +++ b/fixtures/wiki/Dash-Usage.md @@ -0,0 +1 @@ +# Header diff --git a/fixtures/wiki/Space Usage.md b/fixtures/wiki/Space Usage.md new file mode 100644 index 0000000000..6c67b6a977 --- /dev/null +++ b/fixtures/wiki/Space Usage.md @@ -0,0 +1 @@ +# Header diff --git a/fixtures/wiki/Underscore_Usage.md b/fixtures/wiki/Underscore_Usage.md new file mode 100644 index 0000000000..6c67b6a977 --- /dev/null +++ b/fixtures/wiki/Underscore_Usage.md @@ -0,0 +1 @@ +# Header diff --git a/fixtures/wiki/Usage.md b/fixtures/wiki/Usage.md new file mode 100644 index 0000000000..6c67b6a977 --- /dev/null +++ b/fixtures/wiki/Usage.md @@ -0,0 +1 @@ +# Header diff --git a/fixtures/wiki/obsidian-style.md b/fixtures/wiki/obsidian-style.md new file mode 100644 index 0000000000..4fb6f4a4c9 --- /dev/null +++ b/fixtures/wiki/obsidian-style.md @@ -0,0 +1,13 @@ +[[#LocalHeader]] + +[[Usage]] +[[Space Usage]] +[[Space Usage DifferentDirectory]] +[[DifferentDirectory]] + +[[Usage#Header|HeaderRenaming]] +[[Space Usage#Header|HeaderRenaming]] +[[Space Usage DifferentDirectory#Header|HeaderRenaming]] +[[DifferentDirectory#Header|HeaderRenaming]] + +# LocalHeader diff --git a/fixtures/wiki/subdirectory/Different-Directory-Dash.md b/fixtures/wiki/subdirectory/Different-Directory-Dash.md new file mode 100644 index 0000000000..6c67b6a977 --- /dev/null +++ b/fixtures/wiki/subdirectory/Different-Directory-Dash.md @@ -0,0 +1 @@ +# Header diff --git a/fixtures/wiki/subdirectory/DifferentDirectory.md b/fixtures/wiki/subdirectory/DifferentDirectory.md new file mode 100644 index 0000000000..6c67b6a977 --- /dev/null +++ b/fixtures/wiki/subdirectory/DifferentDirectory.md @@ -0,0 +1 @@ +# Header diff --git a/fixtures/wiki/subdirectory/Different_Directory_Underscore.md b/fixtures/wiki/subdirectory/Different_Directory_Underscore.md new file mode 100644 index 0000000000..6c67b6a977 --- /dev/null +++ b/fixtures/wiki/subdirectory/Different_Directory_Underscore.md @@ -0,0 +1 @@ +# Header diff --git a/fixtures/wiki/subdirectory/Space Usage DifferentDirectory.md b/fixtures/wiki/subdirectory/Space Usage DifferentDirectory.md new file mode 100644 index 0000000000..6c67b6a977 --- /dev/null +++ b/fixtures/wiki/subdirectory/Space Usage DifferentDirectory.md @@ -0,0 +1 @@ +# Header diff --git a/fixtures/wiki/wikilink-style.md b/fixtures/wiki/wikilink-style.md new file mode 100644 index 0000000000..61479263a2 --- /dev/null +++ b/fixtures/wiki/wikilink-style.md @@ -0,0 +1,19 @@ +[[#LocalHeader]] + +[[Usage]] +[[Space Usage]] +[[Dash Usage]] +[[Underscore Usage]] +[[DifferentDirectory]] +[[Different Directory Dash]] +[[Different Directory Underscore]] + +[[Usage#Header|HeaderRenaming]] +[[Space Usage#Header|HeaderRenaming]] +[[Dash Usage#Header|HeaderRenaming]] +[[Underscore Usage#Header|HeaderRenaming]] +[[DifferentDirectory#Header|HeaderRenaming]] +[[Different Directory Dash#Header|HeaderRenaming]] +[[Different Directory Underscore#Header|HeaderRenaming]] + +# LocalHeader diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index c0cae571d0..b977b4d491 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -3044,6 +3044,68 @@ The config file should contain every possible key for documentation purposes." .assert() .success() .stdout(contains("https://example.org")); // Should extract the link as plaintext + + fn test_wikilink_fixture_obsidian_style() { + let input = fixtures_path().join("wiki/obsidian-style.md"); + + // testing without fragments should not yield failures + main_command() + .arg(&input) + .arg("--include-wikilinks") + .arg("--fallback-extensions") + .arg("md") + .assert() + .success(); + } + + #[test] + fn test_wikilink_fixture_with_fragments_obsidian_style() { + let input = fixtures_path().join("wiki/obsidian-style.md"); + + //fragments should resolve all headers + let dir_links_with_fragment = 2; + main_command() + .arg(&input) + .arg("--include-wikilinks") + .arg("--include-fragments") + .arg("--fallback-extensions") + .arg("md") + .assert() + .failure() + .stdout(contains("Cannot find fragment").count(dir_links_with_fragment)) + .stdout(contains("#").count(dir_links_with_fragment)); + } + + #[test] + fn test_wikilink_fixture_wikilink_style() { + let input = fixtures_path().join("wiki/wikilink-style.md"); + + // testing without fragments should not yield failures + main_command() + .arg(&input) + .arg("--include-wikilinks") + .arg("--fallback-extensions") + .arg("md") + .assert() + .success(); + } + + #[test] + fn test_wikilink_fixture_with_fragments_wikilink_style() { + let input = fixtures_path().join("wiki/wikilink-style.md"); + + //fragments should resolve all headers + let dir_links_with_fragment = 2; + main_command() + .arg(&input) + .arg("--include-wikilinks") + .arg("--include-fragments") + .arg("--fallback-extensions") + .arg("md") + .assert() + .failure() + .stdout(contains("Cannot find fragment").count(dir_links_with_fragment)) + .stdout(contains("#").count(dir_links_with_fragment)); } /// An input which matches nothing should print a warning and continue. diff --git a/lychee-lib/src/extract/markdown.rs b/lychee-lib/src/extract/markdown.rs index 4b3720206f..57731620cb 100644 --- a/lychee-lib/src/extract/markdown.rs +++ b/lychee-lib/src/extract/markdown.rs @@ -86,7 +86,7 @@ pub(crate) fn extract_markdown( Some(extract_raw_uri_from_plaintext(&dest_url, &span_provider)) } // Wiki URL (`[[http://example.com]]`) - LinkType::WikiLink { has_pothole: _ } => { + LinkType::WikiLink { has_pothole } => { // Exclude WikiLinks if not explicitly enabled if !include_wikilinks { return None; @@ -97,8 +97,20 @@ pub(crate) fn extract_markdown( return None; } + //Strip potholes (|) from wikilinks + let stripped_dest_url = if has_pothole { + pulldown_cmark::CowStr::Borrowed(&dest_url[0..dest_url.find('|').unwrap_or(dest_url.len())]) + }else { + dest_url.clone() + }; + + Some(vec![RawUri { + text: stripped_dest_url.to_string(), + element: Some("a".to_string()), + attribute: Some("href".to_string()), // wiki links start with `[[`, so offset the span by `2` - Some(raw_uri(&dest_url, span_provider.span(span.start + 2))) + span: span.start + 2 + }]) } } } @@ -677,6 +689,40 @@ Shortcut link: [link4] "Missing expected URI: {expected_uri:?}. Found: {uris:?}" ); } + + fn test_remove_wikilink_pothole() { + let markdown = r"[[foo|bar]]"; + let uris = extract_markdown(markdown, true, true); + let expected = vec![RawUri { + text: "foo".to_string(), + element: Some("a".to_string()), + attribute: Some("href".to_string()), + }]; + assert_eq!(uris, expected); + } + + #[test] + fn test_remove_wikilink_title() { + let markdown = r"[[foo#bar]]"; + let uris = extract_markdown(markdown, true, true); + let expected = vec![RawUri { + text: "foo".to_string(), + element: Some("a".to_string()), + attribute: Some("href".to_string()), + }]; + assert_eq!(uris, expected); + } + + #[test] + fn test_remove_wikilink_pothole_and_title() { + let markdown = r"[[foo#bar|baz]]"; + let uris = extract_markdown(markdown, true, true); + let expected = vec![RawUri { + text: "foo".to_string(), + element: Some("a".to_string()), + attribute: Some("href".to_string()), + }]; + assert_eq!(uris, expected); } #[test] From 6311db0001e62cc8c2995528d273b4411c36a809 Mon Sep 17 00:00:00 2001 From: JayJayArr Date: Thu, 28 Aug 2025 09:15:36 +0200 Subject: [PATCH 02/27] implement directory walking for base-url --- Cargo.lock | 1 + lychee-lib/Cargo.toml | 1 + lychee-lib/src/checker/file.rs | 24 +++++++++--- lychee-lib/src/client.rs | 4 ++ lychee-lib/src/extract/markdown.rs | 34 +++------------- lychee-lib/src/utils/mod.rs | 1 + lychee-lib/src/utils/wikilink_checker.rs | 50 ++++++++++++++++++++++++ 7 files changed, 80 insertions(+), 35 deletions(-) create mode 100644 lychee-lib/src/utils/wikilink_checker.rs diff --git a/Cargo.lock b/Cargo.lock index 53bdcf6024..bc30b08c33 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2865,6 +2865,7 @@ dependencies = [ "toml", "typed-builder", "url", + "walkdir", "wiremock", ] diff --git a/lychee-lib/Cargo.toml b/lychee-lib/Cargo.toml index 0c94b62ca5..cddd684970 100644 --- a/lychee-lib/Cargo.toml +++ b/lychee-lib/Cargo.toml @@ -64,6 +64,7 @@ tokio = { version = "1.48.0", features = ["full"] } toml = "0.9.10" typed-builder = "0.23.2" url = { version = "2.5.7", features = ["serde"] } +walkdir = "2.5.0" [dependencies.par-stream] version = "0.10.2" diff --git a/lychee-lib/src/checker/file.rs b/lychee-lib/src/checker/file.rs index 122eadf7f8..8205c117dd 100644 --- a/lychee-lib/src/checker/file.rs +++ b/lychee-lib/src/checker/file.rs @@ -3,6 +3,7 @@ use log::warn; use std::borrow::Cow; use std::path::{Path, PathBuf}; +use crate::utils::wikilink_checker::WikilinkChecker; use crate::{ Base, ErrorKind, Status, Uri, utils::fragment_checker::{FragmentChecker, FragmentInput}, @@ -32,8 +33,12 @@ pub(crate) struct FileChecker { index_files: Option>, /// Whether to check for the existence of fragments (e.g., `#section-id`) in HTML files. include_fragments: bool, + /// Whether to check for the existence of files linked to by Wikilinks + include_wikilinks: bool, /// Utility for performing fragment checks in HTML files. fragment_checker: FragmentChecker, + /// Utility for checking wikilinks, indexes files in a given directory + wikilink_checker: WikilinkChecker, } impl FileChecker { @@ -50,13 +55,16 @@ impl FileChecker { fallback_extensions: Vec, index_files: Option>, include_fragments: bool, + include_wikilinks: bool, ) -> Self { Self { - base, + base: base.clone(), fallback_extensions, index_files, include_fragments, + include_wikilinks, fragment_checker: FragmentChecker::new(), + wikilink_checker: WikilinkChecker::new(base), } } @@ -372,7 +380,7 @@ mod tests { #[tokio::test] async fn test_default() { // default behaviour accepts dir links as long as the directory exists. - let checker = FileChecker::new(None, vec![], None, true); + let checker = FileChecker::new(None, vec![], None, true, false); assert_filecheck!(&checker, "filechecker/index_dir", Status::Ok(_)); @@ -430,6 +438,7 @@ mod tests { vec![], Some(vec!["index.html".to_owned(), "index.md".to_owned()]), true, + false, ); assert_resolves!( @@ -468,6 +477,7 @@ mod tests { vec!["html".to_owned()], Some(vec!["index".to_owned()]), false, + false, ); // this test case has a subdir 'same_name' and a file 'same_name.html'. @@ -492,7 +502,7 @@ mod tests { #[tokio::test] async fn test_empty_index_list_corner() { // empty index_files list will reject all directory links - let checker_no_indexes = FileChecker::new(None, vec![], Some(vec![]), false); + let checker_no_indexes = FileChecker::new(None, vec![], Some(vec![]), false, false); assert_resolves!( &checker_no_indexes, "filechecker/index_dir", @@ -516,7 +526,7 @@ mod tests { "..".to_owned(), "/".to_owned(), ]; - let checker_dir_indexes = FileChecker::new(None, vec![], Some(dir_names), false); + let checker_dir_indexes = FileChecker::new(None, vec![], Some(dir_names), false, false); assert_resolves!( &checker_dir_indexes, "filechecker/index_dir", @@ -537,6 +547,7 @@ mod tests { vec![], Some(vec!["../index_dir/index.html".to_owned()]), true, + false, ); assert_resolves!( &checker_dotdot, @@ -550,7 +561,8 @@ mod tests { .to_str() .expect("expected utf-8 fixtures path") .to_owned(); - let checker_absolute = FileChecker::new(None, vec![], Some(vec![absolute_html]), true); + let checker_absolute = + FileChecker::new(None, vec![], Some(vec![absolute_html]), true, false); assert_resolves!( &checker_absolute, "filechecker/empty_dir#fragment", @@ -560,7 +572,7 @@ mod tests { #[tokio::test] async fn test_fallback_extensions_on_directories() { - let checker = FileChecker::new(None, vec!["html".to_owned()], None, true); + let checker = FileChecker::new(None, vec!["html".to_owned()], None, true, false); // fallback extensions should be applied when directory links are resolved // to directories (i.e., the default index_files behavior or if `.` diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index e0ec4e6c5c..4ace6b9569 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -300,6 +300,9 @@ pub struct ClientBuilder { /// Enable the checking of fragments in links. include_fragments: bool, + /// Enable the checking of wikilinks in markdown files + include_wikilinks: bool, + /// Requests run through this chain where each item in the chain /// can modify the request. A chained item can also decide to exit /// early and return a status, so that subsequent chain items are @@ -398,6 +401,7 @@ impl ClientBuilder { self.fallback_extensions, self.index_files, self.include_fragments, + self.include_wikilinks, ), }) } diff --git a/lychee-lib/src/extract/markdown.rs b/lychee-lib/src/extract/markdown.rs index 57731620cb..e9570889f0 100644 --- a/lychee-lib/src/extract/markdown.rs +++ b/lychee-lib/src/extract/markdown.rs @@ -107,7 +107,7 @@ pub(crate) fn extract_markdown( Some(vec![RawUri { text: stripped_dest_url.to_string(), element: Some("a".to_string()), - attribute: Some("href".to_string()), + attribute: Some("wikilink".to_string()), // wiki links start with `[[`, so offset the span by `2` span: span.start + 2 }]) @@ -542,7 +542,7 @@ $$ let expected = vec![RawUri { text: "https://example.com/destination".to_string(), element: Some("a".to_string()), - attribute: Some("href".to_string()), + attribute: Some("wikilink".to_string()), span: span(1, 3), }]; let uris = extract_markdown(markdown, true, true); @@ -556,13 +556,13 @@ $$ RawUri { text: "https://example.com/destination".to_string(), element: Some("a".to_string()), - attribute: Some("href".to_string()), + attribute: Some("wikilink".to_string()), span: span(1, 3), }, RawUri { text: "https://example.com/source".to_string(), element: Some("a".to_string()), - attribute: Some("href".to_string()), + attribute: Some("wikilink".to_string()), span: span(1, 38), }, ]; @@ -696,31 +696,7 @@ Shortcut link: [link4] let expected = vec![RawUri { text: "foo".to_string(), element: Some("a".to_string()), - attribute: Some("href".to_string()), - }]; - assert_eq!(uris, expected); - } - - #[test] - fn test_remove_wikilink_title() { - let markdown = r"[[foo#bar]]"; - let uris = extract_markdown(markdown, true, true); - let expected = vec![RawUri { - text: "foo".to_string(), - element: Some("a".to_string()), - attribute: Some("href".to_string()), - }]; - assert_eq!(uris, expected); - } - - #[test] - fn test_remove_wikilink_pothole_and_title() { - let markdown = r"[[foo#bar|baz]]"; - let uris = extract_markdown(markdown, true, true); - let expected = vec![RawUri { - text: "foo".to_string(), - element: Some("a".to_string()), - attribute: Some("href".to_string()), + attribute: Some("wikilink".to_string()), }]; assert_eq!(uris, expected); } diff --git a/lychee-lib/src/utils/mod.rs b/lychee-lib/src/utils/mod.rs index d75d20c064..0236b9de32 100644 --- a/lychee-lib/src/utils/mod.rs +++ b/lychee-lib/src/utils/mod.rs @@ -3,3 +3,4 @@ pub(crate) mod path; pub(crate) mod request; pub(crate) mod reqwest; pub(crate) mod url; +pub(crate) mod wikilink_checker; diff --git a/lychee-lib/src/utils/wikilink_checker.rs b/lychee-lib/src/utils/wikilink_checker.rs new file mode 100644 index 0000000000..1339284f64 --- /dev/null +++ b/lychee-lib/src/utils/wikilink_checker.rs @@ -0,0 +1,50 @@ +use std::{collections::HashSet, path::PathBuf, sync::Arc}; + +use std::sync::Mutex; +use walkdir::WalkDir; + +use crate::Base; + +#[derive(Clone, Debug, Default)] +/// Indexes a given directory for filenames +pub(crate) struct WikilinkChecker { + filesnames: Arc>>, + basedir: Option, +} + +impl WikilinkChecker { + pub(crate) fn new(base: Option) -> Self { + Self { + filesnames: Arc::new(Mutex::new(HashSet::with_capacity(100000000))), + basedir: base, + } + } + + pub(crate) fn index_files(&self) { + match self.basedir { + None => {} + Some(ref basetype) => match basetype { + Base::Local(localbasename) => { + //Start file indexing only if the Base is valid and local + + let mut filenameslock = self.filesnames.lock().unwrap(); + for entry in WalkDir::new::(localbasename.into()) + //actively ignore symlinks + .follow_links(false) + .into_iter() + .filter_map(|e| e.ok()) + { + match entry.path().file_name() { + Some(filename) => { + filenameslock.insert(filename.to_string_lossy().to_string()); + } + None => {} + } + } + } + // A remote base is of no use for the wikilink checker + Base::Remote(_remotebasename) => {} + }, + } + } +} From bf27508d719c9966a4394e3c70eb0f1d17f7a0a1 Mon Sep 17 00:00:00 2001 From: Jakob <144204108+JayJayArr@users.noreply.github.com> Date: Sun, 31 Aug 2025 10:58:20 +0200 Subject: [PATCH 03/27] implement indexing and lookup switch to tokio mutex --- lychee-bin/src/client.rs | 1 + lychee-lib/src/checker/file.rs | 13 ++++++ lychee-lib/src/utils/wikilink_checker.rs | 50 +++++++++++++++++++----- 3 files changed, 54 insertions(+), 10 deletions(-) diff --git a/lychee-bin/src/client.rs b/lychee-bin/src/client.rs index 3efefffe6f..d1b8c578ac 100644 --- a/lychee-bin/src/client.rs +++ b/lychee-bin/src/client.rs @@ -55,6 +55,7 @@ pub(crate) fn create(cfg: &Config, cookie_jar: Option<&Arc>) - .include_fragments(cfg.include_fragments) .fallback_extensions(cfg.fallback_extensions.clone()) .index_files(cfg.index_files.clone()) + .include_wikilinks(cfg.include_wikilinks) .rate_limit_config(RateLimitConfig::from_options( cfg.host_concurrency, cfg.host_request_interval, diff --git a/lychee-lib/src/checker/file.rs b/lychee-lib/src/checker/file.rs index 8205c117dd..2b6b22b140 100644 --- a/lychee-lib/src/checker/file.rs +++ b/lychee-lib/src/checker/file.rs @@ -81,6 +81,9 @@ impl FileChecker { /// /// Returns a `Status` indicating the result of the check. pub(crate) async fn check(&self, uri: &Uri) -> Status { + if self.include_wikilinks { + self.setup_wikilinks().await; + } let Ok(path) = uri.url.to_file_path() else { return ErrorKind::InvalidFilePath(uri.clone()).into(); }; @@ -321,6 +324,16 @@ impl FileChecker { } } } + + // Initializes the Index of the wikilink checker + async fn setup_wikilinks(&self) { + self.wikilink_checker.index_files().await; + } + // Tries to resolve a link by looking up the filename in the wikilink index + // The + async fn check_wikilink(&self, path: &Path, uri: &Uri) -> Status { + self.wikilink_checker.check(path, uri).await + } } #[cfg(test)] diff --git a/lychee-lib/src/utils/wikilink_checker.rs b/lychee-lib/src/utils/wikilink_checker.rs index 1339284f64..1439507245 100644 --- a/lychee-lib/src/utils/wikilink_checker.rs +++ b/lychee-lib/src/utils/wikilink_checker.rs @@ -1,33 +1,44 @@ +use crate::{Base, Status, Uri}; +use http::StatusCode; +use log::info; +use std::path::Path; use std::{collections::HashSet, path::PathBuf, sync::Arc}; - -use std::sync::Mutex; +use tokio::sync::Mutex; use walkdir::WalkDir; -use crate::Base; - #[derive(Clone, Debug, Default)] /// Indexes a given directory for filenames pub(crate) struct WikilinkChecker { - filesnames: Arc>>, + filenames: Arc>>, basedir: Option, } impl WikilinkChecker { pub(crate) fn new(base: Option) -> Self { Self { - filesnames: Arc::new(Mutex::new(HashSet::with_capacity(100000000))), + filenames: Arc::new(Mutex::new(HashSet::new())), basedir: base, } } - pub(crate) fn index_files(&self) { + pub(crate) async fn index_files(&self) { + //Skip the indexing step in case the filenames are already populated + if !self.filenames.lock().await.is_empty() { + return; + } match self.basedir { - None => {} + None => { + info!("File indexing for Wikilinks aborted as no base directory is specified"); + } Some(ref basetype) => match basetype { Base::Local(localbasename) => { //Start file indexing only if the Base is valid and local + info!( + "Starting file indexing for wikilinks in {}", + localbasename.display() + ); - let mut filenameslock = self.filesnames.lock().unwrap(); + let mut filenameslock = self.filenames.lock().await; for entry in WalkDir::new::(localbasename.into()) //actively ignore symlinks .follow_links(false) @@ -42,9 +53,28 @@ impl WikilinkChecker { } } } - // A remote base is of no use for the wikilink checker + // A remote base is of no use for the wikilink checker, silently skip over it Base::Remote(_remotebasename) => {} }, } } + + pub(crate) async fn check(&self, path: &Path, uri: &Uri) -> Status { + match path.file_name() { + None => Status::Error(crate::ErrorKind::InvalidFilePath(uri.clone())), + Some(filename) => { + if self + .filenames + .lock() + .await + .get(filename.to_str().unwrap()) + .is_some() + { + Status::Ok(StatusCode::OK) + } else { + Status::Error(crate::ErrorKind::InvalidFilePath(uri.clone())) + } + } + } + } } From 0f1228c382dfb7589053a9244f74fbcf5e171baa Mon Sep 17 00:00:00 2001 From: JayJayArr Date: Tue, 2 Sep 2025 07:28:36 +0200 Subject: [PATCH 04/27] switch to Hashmap to resolve file names to pathes --- lychee-lib/src/checker/file.rs | 2 +- lychee-lib/src/utils/wikilink_checker.rs | 35 ++++++++++++------------ 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/lychee-lib/src/checker/file.rs b/lychee-lib/src/checker/file.rs index 2b6b22b140..9c101cc951 100644 --- a/lychee-lib/src/checker/file.rs +++ b/lychee-lib/src/checker/file.rs @@ -331,7 +331,7 @@ impl FileChecker { } // Tries to resolve a link by looking up the filename in the wikilink index // The - async fn check_wikilink(&self, path: &Path, uri: &Uri) -> Status { + async fn apply_wikilink_check(&self, path: &Path, uri: &Uri) -> Result { self.wikilink_checker.check(path, uri).await } } diff --git a/lychee-lib/src/utils/wikilink_checker.rs b/lychee-lib/src/utils/wikilink_checker.rs index 1439507245..0314de8be0 100644 --- a/lychee-lib/src/utils/wikilink_checker.rs +++ b/lychee-lib/src/utils/wikilink_checker.rs @@ -1,22 +1,23 @@ -use crate::{Base, Status, Uri}; -use http::StatusCode; +use crate::{Base, ErrorKind, Uri}; use log::info; +use std::collections::HashMap; +use std::ffi::OsString; use std::path::Path; -use std::{collections::HashSet, path::PathBuf, sync::Arc}; +use std::{path::PathBuf, sync::Arc}; use tokio::sync::Mutex; use walkdir::WalkDir; #[derive(Clone, Debug, Default)] -/// Indexes a given directory for filenames +// Indexes a given directory for filenames and the corresponding path pub(crate) struct WikilinkChecker { - filenames: Arc>>, + filenames: Arc>>, basedir: Option, } impl WikilinkChecker { pub(crate) fn new(base: Option) -> Self { Self { - filenames: Arc::new(Mutex::new(HashSet::new())), + filenames: Arc::new(Mutex::new(HashMap::new())), basedir: base, } } @@ -47,7 +48,7 @@ impl WikilinkChecker { { match entry.path().file_name() { Some(filename) => { - filenameslock.insert(filename.to_string_lossy().to_string()); + filenameslock.insert(filename.into(), entry.path().to_path_buf()); } None => {} } @@ -59,20 +60,18 @@ impl WikilinkChecker { } } - pub(crate) async fn check(&self, path: &Path, uri: &Uri) -> Status { + pub(crate) async fn check(&self, path: &Path, uri: &Uri) -> Result { match path.file_name() { - None => Status::Error(crate::ErrorKind::InvalidFilePath(uri.clone())), + None => Err(ErrorKind::InvalidFilePath(uri.clone())), Some(filename) => { - if self - .filenames - .lock() - .await - .get(filename.to_str().unwrap()) - .is_some() - { - Status::Ok(StatusCode::OK) + let filenamelock = self.filenames.lock().await; + if filenamelock.contains_key(filename.into()) { + Ok(filenamelock + .get(filename.into()) + .expect("Could not retrieve inserted Path for discovered Wikilink-Path")) + .cloned() } else { - Status::Error(crate::ErrorKind::InvalidFilePath(uri.clone())) + Err(ErrorKind::InvalidFilePath(uri.clone())) } } } From 9762e071d6177fa516f0978a137ae13df31171be Mon Sep 17 00:00:00 2001 From: JayJayArr Date: Wed, 17 Sep 2025 14:14:45 +0200 Subject: [PATCH 05/27] feat: resolve Filenames through wikilink checker --- fixtures/wiki/obsidian-style-plus-headers.md | 8 ++++++++ fixtures/wiki/obsidian-style.md | 9 --------- lychee-bin/tests/cli.rs | 6 +++++- lychee-lib/src/checker/file.rs | 20 +++++++++++++++++--- lychee-lib/src/utils/wikilink_checker.rs | 14 ++++++-------- 5 files changed, 36 insertions(+), 21 deletions(-) create mode 100644 fixtures/wiki/obsidian-style-plus-headers.md diff --git a/fixtures/wiki/obsidian-style-plus-headers.md b/fixtures/wiki/obsidian-style-plus-headers.md new file mode 100644 index 0000000000..0b892aafc7 --- /dev/null +++ b/fixtures/wiki/obsidian-style-plus-headers.md @@ -0,0 +1,8 @@ +[[#LocalHeader]] + +# LocalHeader + +[[Usage#Header|HeaderRenaming]] +[[Space Usage#Header|HeaderRenaming]] +[[Space Usage DifferentDirectory#Header|HeaderRenaming]] +[[DifferentDirectory#Header|HeaderRenaming]] diff --git a/fixtures/wiki/obsidian-style.md b/fixtures/wiki/obsidian-style.md index 4fb6f4a4c9..4911206397 100644 --- a/fixtures/wiki/obsidian-style.md +++ b/fixtures/wiki/obsidian-style.md @@ -1,13 +1,4 @@ -[[#LocalHeader]] - [[Usage]] [[Space Usage]] [[Space Usage DifferentDirectory]] [[DifferentDirectory]] - -[[Usage#Header|HeaderRenaming]] -[[Space Usage#Header|HeaderRenaming]] -[[Space Usage DifferentDirectory#Header|HeaderRenaming]] -[[DifferentDirectory#Header|HeaderRenaming]] - -# LocalHeader diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index b977b4d491..f0f433e209 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -3054,13 +3054,17 @@ The config file should contain every possible key for documentation purposes." .arg("--include-wikilinks") .arg("--fallback-extensions") .arg("md") + .arg("--base-url") + .arg(fixtures_path()) + .arg("--root-dir") + .arg(fixtures_path()) .assert() .success(); } #[test] fn test_wikilink_fixture_with_fragments_obsidian_style() { - let input = fixtures_path().join("wiki/obsidian-style.md"); + let input = fixtures_path().join("wiki/obsidian-style-plus-headers.md"); //fragments should resolve all headers let dir_links_with_fragment = 2; diff --git a/lychee-lib/src/checker/file.rs b/lychee-lib/src/checker/file.rs index 9c101cc951..44f92a5b17 100644 --- a/lychee-lib/src/checker/file.rs +++ b/lychee-lib/src/checker/file.rs @@ -81,6 +81,7 @@ impl FileChecker { /// /// Returns a `Status` indicating the result of the check. pub(crate) async fn check(&self, uri: &Uri) -> Status { + //only populate the wikilink filenames if it is enabled if self.include_wikilinks { self.setup_wikilinks().await; } @@ -145,8 +146,12 @@ impl FileChecker { ) -> Result, ErrorKind> { let path = match path.metadata() { // for non-existing paths, attempt fallback extensions + // if fallback extensions don't help, try wikilinks Err(e) if e.kind() == std::io::ErrorKind::NotFound => { - self.apply_fallback_extensions(path, uri).map(Cow::Owned) + match self.apply_fallback_extensions(path, uri).map(Cow::Owned) { + Ok(val) => Ok(val), + Err(_) => self.apply_wikilink_check(path, uri).await.map(Cow::Owned), + } } // other IO errors are unexpected and should fail the check @@ -266,7 +271,7 @@ impl FileChecker { } /// Checks a resolved file, optionally verifying fragments for HTML files. - /// + ///u /// # Arguments /// /// * `path` - The resolved path to check. @@ -332,7 +337,16 @@ impl FileChecker { // Tries to resolve a link by looking up the filename in the wikilink index // The async fn apply_wikilink_check(&self, path: &Path, uri: &Uri) -> Result { - self.wikilink_checker.check(path, uri).await + let mut path_buf = path.to_path_buf(); + for ext in &self.fallback_extensions { + path_buf.set_extension(ext); + match self.wikilink_checker.check(&path_buf, uri).await { + Err(_) => {} + Ok(resolved_path) => return Ok(resolved_path), + } + } + + Err(ErrorKind::InvalidFilePath(uri.clone())) } } diff --git a/lychee-lib/src/utils/wikilink_checker.rs b/lychee-lib/src/utils/wikilink_checker.rs index 0314de8be0..b03dbd92e6 100644 --- a/lychee-lib/src/utils/wikilink_checker.rs +++ b/lychee-lib/src/utils/wikilink_checker.rs @@ -44,13 +44,11 @@ impl WikilinkChecker { //actively ignore symlinks .follow_links(false) .into_iter() - .filter_map(|e| e.ok()) + .filter_map(std::result::Result::ok) { - match entry.path().file_name() { - Some(filename) => { - filenameslock.insert(filename.into(), entry.path().to_path_buf()); - } - None => {} + if let Some(filename) = entry.path().file_name() { + filenameslock + .insert(filename.to_ascii_lowercase(), entry.path().to_path_buf()); } } } @@ -65,9 +63,9 @@ impl WikilinkChecker { None => Err(ErrorKind::InvalidFilePath(uri.clone())), Some(filename) => { let filenamelock = self.filenames.lock().await; - if filenamelock.contains_key(filename.into()) { + if filenamelock.contains_key(&filename.to_ascii_lowercase()) { Ok(filenamelock - .get(filename.into()) + .get(&filename.to_ascii_lowercase()) .expect("Could not retrieve inserted Path for discovered Wikilink-Path")) .cloned() } else { From cc4630d7ed706082538b93e99f842c2ad52b41a1 Mon Sep 17 00:00:00 2001 From: JayJayArr Date: Tue, 30 Sep 2025 11:31:50 +0200 Subject: [PATCH 06/27] fix: exclude fragments cleanup fix: merge conflicts --- lychee-bin/tests/cli.rs | 40 +++++++------------- lychee-lib/src/checker/file.rs | 12 +++--- lychee-lib/src/extract/markdown.rs | 48 ++++++++++++++++++++---- lychee-lib/src/utils/wikilink_checker.rs | 12 +++--- 4 files changed, 66 insertions(+), 46 deletions(-) diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index f0f433e209..14123e36d1 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -3044,7 +3044,8 @@ The config file should contain every possible key for documentation purposes." .assert() .success() .stdout(contains("https://example.org")); // Should extract the link as plaintext - + } + #[test] fn test_wikilink_fixture_obsidian_style() { let input = fixtures_path().join("wiki/obsidian-style.md"); @@ -3063,53 +3064,40 @@ The config file should contain every possible key for documentation purposes." } #[test] - fn test_wikilink_fixture_with_fragments_obsidian_style() { + fn test_wikilink_fixture_with_fragments_obsidian_style_fixtures_excluded() { let input = fixtures_path().join("wiki/obsidian-style-plus-headers.md"); //fragments should resolve all headers - let dir_links_with_fragment = 2; - main_command() - .arg(&input) - .arg("--include-wikilinks") - .arg("--include-fragments") - .arg("--fallback-extensions") - .arg("md") - .assert() - .failure() - .stdout(contains("Cannot find fragment").count(dir_links_with_fragment)) - .stdout(contains("#").count(dir_links_with_fragment)); - } - - #[test] - fn test_wikilink_fixture_wikilink_style() { - let input = fixtures_path().join("wiki/wikilink-style.md"); - - // testing without fragments should not yield failures main_command() .arg(&input) .arg("--include-wikilinks") .arg("--fallback-extensions") .arg("md") + .arg("--base-url") + .arg(fixtures_path()) + .arg("--root-dir") + .arg(fixtures_path()) .assert() .success(); } #[test] - fn test_wikilink_fixture_with_fragments_wikilink_style() { - let input = fixtures_path().join("wiki/wikilink-style.md"); + fn test_wikilink_fixture_with_fragments_obsidian_style() { + let input = fixtures_path().join("wiki/obsidian-style-plus-headers.md"); //fragments should resolve all headers - let dir_links_with_fragment = 2; main_command() .arg(&input) .arg("--include-wikilinks") .arg("--include-fragments") .arg("--fallback-extensions") .arg("md") + .arg("--base-url") + .arg(fixtures_path()) + .arg("--root-dir") + .arg(fixtures_path()) .assert() - .failure() - .stdout(contains("Cannot find fragment").count(dir_links_with_fragment)) - .stdout(contains("#").count(dir_links_with_fragment)); + .success(); } /// An input which matches nothing should print a warning and continue. diff --git a/lychee-lib/src/checker/file.rs b/lychee-lib/src/checker/file.rs index 44f92a5b17..9232388984 100644 --- a/lychee-lib/src/checker/file.rs +++ b/lychee-lib/src/checker/file.rs @@ -83,7 +83,7 @@ impl FileChecker { pub(crate) async fn check(&self, uri: &Uri) -> Status { //only populate the wikilink filenames if it is enabled if self.include_wikilinks { - self.setup_wikilinks().await; + self.setup_wikilinks(); } let Ok(path) = uri.url.to_file_path() else { return ErrorKind::InvalidFilePath(uri.clone()).into(); @@ -150,7 +150,7 @@ impl FileChecker { Err(e) if e.kind() == std::io::ErrorKind::NotFound => { match self.apply_fallback_extensions(path, uri).map(Cow::Owned) { Ok(val) => Ok(val), - Err(_) => self.apply_wikilink_check(path, uri).await.map(Cow::Owned), + Err(_) => self.apply_wikilink_check(path, uri).map(Cow::Owned), } } @@ -331,16 +331,16 @@ impl FileChecker { } // Initializes the Index of the wikilink checker - async fn setup_wikilinks(&self) { - self.wikilink_checker.index_files().await; + fn setup_wikilinks(&self) { + self.wikilink_checker.index_files(); } // Tries to resolve a link by looking up the filename in the wikilink index // The - async fn apply_wikilink_check(&self, path: &Path, uri: &Uri) -> Result { + fn apply_wikilink_check(&self, path: &Path, uri: &Uri) -> Result { let mut path_buf = path.to_path_buf(); for ext in &self.fallback_extensions { path_buf.set_extension(ext); - match self.wikilink_checker.check(&path_buf, uri).await { + match self.wikilink_checker.check(&path_buf, uri) { Err(_) => {} Ok(resolved_path) => return Ok(resolved_path), } diff --git a/lychee-lib/src/extract/markdown.rs b/lychee-lib/src/extract/markdown.rs index e9570889f0..f229deb57a 100644 --- a/lychee-lib/src/extract/markdown.rs +++ b/lychee-lib/src/extract/markdown.rs @@ -98,19 +98,29 @@ pub(crate) fn extract_markdown( } //Strip potholes (|) from wikilinks - let stripped_dest_url = if has_pothole { + let mut stripped_dest_url = if has_pothole { pulldown_cmark::CowStr::Borrowed(&dest_url[0..dest_url.find('|').unwrap_or(dest_url.len())]) }else { dest_url.clone() }; - Some(vec![RawUri { - text: stripped_dest_url.to_string(), - element: Some("a".to_string()), - attribute: Some("wikilink".to_string()), + //Strip fragments (#) from wikilinks, according to the obsidian spec + //fragments come before potholes + if stripped_dest_url.contains('#') { + stripped_dest_url = pulldown_cmark::CowStr::Borrowed(&dest_url[0..dest_url.find('#').unwrap_or(dest_url.len())]); + } + + if stripped_dest_url.is_empty() { + None + } else { + Some(vec![RawUri { + text: stripped_dest_url.to_string(), + element: Some("a".to_string()), + attribute: Some("wikilink".to_string()), // wiki links start with `[[`, so offset the span by `2` span: span.start + 2 - }]) + }]) + } } } } @@ -689,7 +699,8 @@ Shortcut link: [link4] "Missing expected URI: {expected_uri:?}. Found: {uris:?}" ); } - + } + #[test] fn test_remove_wikilink_pothole() { let markdown = r"[[foo|bar]]"; let uris = extract_markdown(markdown, true, true); @@ -717,7 +728,16 @@ Shortcut link: [link4] }]; let uris = extract_markdown(input, false, false); - + } + #[test] + fn test_remove_wikilink_fragment() { + let markdown = r"[[foo#bar]]"; + let uris = extract_markdown(markdown, true, true); + let expected = vec![RawUri { + text: "foo".to_string(), + element: Some("a".to_string()), + attribute: Some("wikilink".to_string()), + }]; assert_eq!(uris, expected); } @@ -780,4 +800,16 @@ Shortcut link: [link4] assert_eq!(uri.attribute, Some("href".to_string())); } } + + #[test] + fn test_remove_wikilink_potholes_and_fragments() { + let markdown = r"[[foo#bar|baz]]"; + let uris = extract_markdown(markdown, true, true); + let expected = vec![RawUri { + text: "foo".to_string(), + element: Some("a".to_string()), + attribute: Some("wikilink".to_string()), + }]; + assert_eq!(uris, expected); + } } diff --git a/lychee-lib/src/utils/wikilink_checker.rs b/lychee-lib/src/utils/wikilink_checker.rs index b03dbd92e6..4349ef505e 100644 --- a/lychee-lib/src/utils/wikilink_checker.rs +++ b/lychee-lib/src/utils/wikilink_checker.rs @@ -3,8 +3,8 @@ use log::info; use std::collections::HashMap; use std::ffi::OsString; use std::path::Path; +use std::sync::Mutex; use std::{path::PathBuf, sync::Arc}; -use tokio::sync::Mutex; use walkdir::WalkDir; #[derive(Clone, Debug, Default)] @@ -22,9 +22,9 @@ impl WikilinkChecker { } } - pub(crate) async fn index_files(&self) { + pub(crate) fn index_files(&self) { //Skip the indexing step in case the filenames are already populated - if !self.filenames.lock().await.is_empty() { + if !self.filenames.lock().unwrap().is_empty() { return; } match self.basedir { @@ -39,7 +39,7 @@ impl WikilinkChecker { localbasename.display() ); - let mut filenameslock = self.filenames.lock().await; + let mut filenameslock = self.filenames.lock().unwrap(); for entry in WalkDir::new::(localbasename.into()) //actively ignore symlinks .follow_links(false) @@ -58,11 +58,11 @@ impl WikilinkChecker { } } - pub(crate) async fn check(&self, path: &Path, uri: &Uri) -> Result { + pub(crate) fn check(&self, path: &Path, uri: &Uri) -> Result { match path.file_name() { None => Err(ErrorKind::InvalidFilePath(uri.clone())), Some(filename) => { - let filenamelock = self.filenames.lock().await; + let filenamelock = self.filenames.lock().unwrap(); if filenamelock.contains_key(&filename.to_ascii_lowercase()) { Ok(filenamelock .get(&filename.to_ascii_lowercase()) From 91fa1ed5072956ca3ddca3ac1791874972922df1 Mon Sep 17 00:00:00 2001 From: Jakob <144204108+JayJayArr@users.noreply.github.com> Date: Fri, 3 Oct 2025 16:00:28 +0200 Subject: [PATCH 07/27] Apply suggestions from code review Co-authored-by: Matthias Endler --- lychee-lib/src/checker/file.rs | 9 ++++----- lychee-lib/src/extract/markdown.rs | 2 +- lychee-lib/src/utils/wikilink_checker.rs | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/lychee-lib/src/checker/file.rs b/lychee-lib/src/checker/file.rs index 9232388984..afcdd28a9e 100644 --- a/lychee-lib/src/checker/file.rs +++ b/lychee-lib/src/checker/file.rs @@ -81,7 +81,7 @@ impl FileChecker { /// /// Returns a `Status` indicating the result of the check. pub(crate) async fn check(&self, uri: &Uri) -> Status { - //only populate the wikilink filenames if it is enabled + //only populate the wikilink filenames if the feature is enabled if self.include_wikilinks { self.setup_wikilinks(); } @@ -271,7 +271,7 @@ impl FileChecker { } /// Checks a resolved file, optionally verifying fragments for HTML files. - ///u + /// /// # Arguments /// /// * `path` - The resolved path to check. @@ -330,18 +330,17 @@ impl FileChecker { } } - // Initializes the Index of the wikilink checker + // Initializes the index of the wikilink checker fn setup_wikilinks(&self) { self.wikilink_checker.index_files(); } // Tries to resolve a link by looking up the filename in the wikilink index - // The fn apply_wikilink_check(&self, path: &Path, uri: &Uri) -> Result { let mut path_buf = path.to_path_buf(); for ext in &self.fallback_extensions { path_buf.set_extension(ext); match self.wikilink_checker.check(&path_buf, uri) { - Err(_) => {} + Err(_) => { trace!("Tried to find wikilink at {path_buf}") } Ok(resolved_path) => return Ok(resolved_path), } } diff --git a/lychee-lib/src/extract/markdown.rs b/lychee-lib/src/extract/markdown.rs index f229deb57a..77b1bee667 100644 --- a/lychee-lib/src/extract/markdown.rs +++ b/lychee-lib/src/extract/markdown.rs @@ -100,7 +100,7 @@ pub(crate) fn extract_markdown( //Strip potholes (|) from wikilinks let mut stripped_dest_url = if has_pothole { pulldown_cmark::CowStr::Borrowed(&dest_url[0..dest_url.find('|').unwrap_or(dest_url.len())]) - }else { + } else { dest_url.clone() }; diff --git a/lychee-lib/src/utils/wikilink_checker.rs b/lychee-lib/src/utils/wikilink_checker.rs index 4349ef505e..4f48937972 100644 --- a/lychee-lib/src/utils/wikilink_checker.rs +++ b/lychee-lib/src/utils/wikilink_checker.rs @@ -17,8 +17,8 @@ pub(crate) struct WikilinkChecker { impl WikilinkChecker { pub(crate) fn new(base: Option) -> Self { Self { - filenames: Arc::new(Mutex::new(HashMap::new())), basedir: base, + ..default::Default() } } From da50cb896fc28df1bc7ae1132bb3ee039072bc48 Mon Sep 17 00:00:00 2001 From: JayJayArr Date: Fri, 3 Oct 2025 18:59:47 +0200 Subject: [PATCH 08/27] tie --include-wikilinks to --base-url --- README.md | 31 +++++++++++++++++++++++++++---- lychee-bin/src/options.rs | 3 ++- lychee-bin/tests/cli.rs | 22 +++++++++++----------- 3 files changed, 40 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 924b3eac4d..d51b882344 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ Available as a command-line utility, a library and a [GitHub Action](https://git + ## Table of Contents - [Development](#development) @@ -717,6 +718,28 @@ Options: User agent [default: lychee/0.20.1] + [default: color] + [possible values: plain, color, emoji, task] + + -f, --format + Output format of final status report + + [default: compact] + [possible values: compact, detailed, json, markdown, raw] + + --require-https + When HTTPS is available, treat HTTP links as errors + + --cookie-jar + Tell lychee to read cookies from the given file. Cookies will be stored in the + cookie jar and sent with requests. New cookies will be stored in the cookie jar + and existing cookies will be updated. + + --include-wikilinks + Check WikiLinks in Markdown files, this requires specifying --base-url + + -h, --help + Print help (see a summary with '-h') -v, --verbose... Set verbosity level; more output per occurrence (e.g. `-v` or `-vv`) @@ -732,13 +755,13 @@ Options: ### Exit codes -0 Success. The operation was completed successfully as instructed. +0 Success. The operation was completed successfully as instructed. -1 Missing inputs or any unexpected runtime failures or configuration errors +1 Missing inputs or any unexpected runtime failures or configuration errors -2 Link check failures. At least one non-excluded link failed the check. +2 Link check failures. At least one non-excluded link failed the check. -3 Encountered errors in the config file. +3 Encountered errors in the config file. ### Ignoring links diff --git a/lychee-bin/src/options.rs b/lychee-bin/src/options.rs index b9be8fe521..1f4677e918 100644 --- a/lychee-bin/src/options.rs +++ b/lychee-bin/src/options.rs @@ -891,7 +891,8 @@ and existing cookies will be updated." pub(crate) cookie_jar: Option, #[allow(clippy::doc_markdown)] - /// Check WikiLinks in Markdown files + /// Check WikiLinks in Markdown files, this requires specifying --base-url + #[clap(requires = "base_url")] #[arg(long)] #[serde(default)] pub(crate) include_wikilinks: bool, diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index 14123e36d1..10e61f60c6 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -2586,6 +2586,8 @@ The config file should contain every possible key for documentation purposes." cargo_bin_cmd!() .arg("--dump") .arg("--include-wikilinks") + .arg("--base-url") + .arg(fixtures_path()) .arg(test_path) .assert() .success() @@ -3045,6 +3047,7 @@ The config file should contain every possible key for documentation purposes." .success() .stdout(contains("https://example.org")); // Should extract the link as plaintext } + #[test] fn test_wikilink_fixture_obsidian_style() { let input = fixtures_path().join("wiki/obsidian-style.md"); @@ -3057,17 +3060,16 @@ The config file should contain every possible key for documentation purposes." .arg("md") .arg("--base-url") .arg(fixtures_path()) - .arg("--root-dir") - .arg(fixtures_path()) .assert() - .success(); + .success() + .stdout(contains("4 OK")); } #[test] fn test_wikilink_fixture_with_fragments_obsidian_style_fixtures_excluded() { let input = fixtures_path().join("wiki/obsidian-style-plus-headers.md"); - //fragments should resolve all headers + // fragments should resolve all headers main_command() .arg(&input) .arg("--include-wikilinks") @@ -3075,17 +3077,16 @@ The config file should contain every possible key for documentation purposes." .arg("md") .arg("--base-url") .arg(fixtures_path()) - .arg("--root-dir") - .arg(fixtures_path()) .assert() - .success(); + .success() + .stdout(contains("4 OK")); } #[test] fn test_wikilink_fixture_with_fragments_obsidian_style() { let input = fixtures_path().join("wiki/obsidian-style-plus-headers.md"); - //fragments should resolve all headers + // fragments should resolve all headers main_command() .arg(&input) .arg("--include-wikilinks") @@ -3094,10 +3095,9 @@ The config file should contain every possible key for documentation purposes." .arg("md") .arg("--base-url") .arg(fixtures_path()) - .arg("--root-dir") - .arg(fixtures_path()) .assert() - .success(); + .success() + .stdout(contains("4 OK")); } /// An input which matches nothing should print a warning and continue. From 62a69c1b1ede7180c2cde2165e894177fa9c4f9f Mon Sep 17 00:00:00 2001 From: JayJayArr Date: Fri, 3 Oct 2025 19:05:48 +0200 Subject: [PATCH 09/27] update return values for Wikilink checker --- lychee-lib/src/checker/file.rs | 22 ++++--- lychee-lib/src/extract/markdown.rs | 6 +- lychee-lib/src/types/error.rs | 14 +++++ lychee-lib/src/utils/wikilink_checker.rs | 73 +++++++++++++++--------- 4 files changed, 77 insertions(+), 38 deletions(-) diff --git a/lychee-lib/src/checker/file.rs b/lychee-lib/src/checker/file.rs index afcdd28a9e..ddbff75bd6 100644 --- a/lychee-lib/src/checker/file.rs +++ b/lychee-lib/src/checker/file.rs @@ -1,5 +1,5 @@ use http::StatusCode; -use log::warn; +use log::{trace, warn}; use std::borrow::Cow; use std::path::{Path, PathBuf}; @@ -81,9 +81,12 @@ impl FileChecker { /// /// Returns a `Status` indicating the result of the check. pub(crate) async fn check(&self, uri: &Uri) -> Status { - //only populate the wikilink filenames if the feature is enabled + // only populate the wikilink filenames if the feature is enabled if self.include_wikilinks { - self.setup_wikilinks(); + match self.setup_wikilinks() { + Ok(()) => (), + Err(e) => return Status::Error(e), + } } let Ok(path) = uri.url.to_file_path() else { return ErrorKind::InvalidFilePath(uri.clone()).into(); @@ -331,17 +334,20 @@ impl FileChecker { } // Initializes the index of the wikilink checker - fn setup_wikilinks(&self) { - self.wikilink_checker.index_files(); + fn setup_wikilinks(&self) -> Result<(), ErrorKind> { + self.wikilink_checker.setup_wikilinks_index() } + // Tries to resolve a link by looking up the filename in the wikilink index fn apply_wikilink_check(&self, path: &Path, uri: &Uri) -> Result { let mut path_buf = path.to_path_buf(); for ext in &self.fallback_extensions { path_buf.set_extension(ext); - match self.wikilink_checker.check(&path_buf, uri) { - Err(_) => { trace!("Tried to find wikilink at {path_buf}") } - Ok(resolved_path) => return Ok(resolved_path), + match self.wikilink_checker.contains_path(&path_buf) { + None => { + trace!("Tried to find wikilink {} at {}", uri, path_buf.display()); + } + Some(resolved_path) => return Ok(resolved_path), } } diff --git a/lychee-lib/src/extract/markdown.rs b/lychee-lib/src/extract/markdown.rs index 77b1bee667..41d1840c5d 100644 --- a/lychee-lib/src/extract/markdown.rs +++ b/lychee-lib/src/extract/markdown.rs @@ -97,15 +97,15 @@ pub(crate) fn extract_markdown( return None; } - //Strip potholes (|) from wikilinks + // Strip potholes (|) from wikilinks let mut stripped_dest_url = if has_pothole { pulldown_cmark::CowStr::Borrowed(&dest_url[0..dest_url.find('|').unwrap_or(dest_url.len())]) } else { dest_url.clone() }; - //Strip fragments (#) from wikilinks, according to the obsidian spec - //fragments come before potholes + // Strip fragments (#) from wikilinks, according to the obsidian spec + // fragments come before potholes if stripped_dest_url.contains('#') { stripped_dest_url = pulldown_cmark::CowStr::Borrowed(&dest_url[0..dest_url.find('#').unwrap_or(dest_url.len())]); } diff --git a/lychee-lib/src/types/error.rs b/lychee-lib/src/types/error.rs index a351c4dbbf..45cf15c2b3 100644 --- a/lychee-lib/src/types/error.rs +++ b/lychee-lib/src/types/error.rs @@ -178,6 +178,16 @@ pub enum ErrorKind { /// The reason the command failed reason: String, }, + + /// Error locking a Mutex + #[error("Failed to lock a Mutex")] + MutexPoisoned, + + /// Test-only error variant for formatter tests + /// Available in both test and debug builds to support cross-crate testing + #[cfg(any(test, debug_assertions))] + #[error("Generic test error")] + TestError, } impl ErrorKind { @@ -336,6 +346,9 @@ impl ErrorKind { [init @ .., tail] => format!("An index file ({}, or {}) is required", init.join(", "), tail), }.into(), ErrorKind::PreprocessorError{command, reason} => Some(format!("Command '{command}' failed {reason}. Check value of the preprocessor option")), + ErrorKind::MutexPoisoned => Some ( + "One or more threads failed and poisoned a Mutex".to_string() + ) } } @@ -466,6 +479,7 @@ impl Hash for ErrorKind { Self::Cookies(e) => e.hash(state), Self::StatusCodeSelectorError(e) => e.to_string().hash(state), Self::PreprocessorError { command, reason } => (command, reason).hash(state), + Self::MutexPoisoned => "Mutex Poisoned".to_string().hash(state), } } } diff --git a/lychee-lib/src/utils/wikilink_checker.rs b/lychee-lib/src/utils/wikilink_checker.rs index 4f48937972..348d0561c4 100644 --- a/lychee-lib/src/utils/wikilink_checker.rs +++ b/lychee-lib/src/utils/wikilink_checker.rs @@ -1,5 +1,5 @@ -use crate::{Base, ErrorKind, Uri}; -use log::info; +use crate::{Base, ErrorKind, Result}; +use log::{info, warn}; use std::collections::HashMap; use std::ffi::OsString; use std::path::Path; @@ -7,8 +7,12 @@ use std::sync::Mutex; use std::{path::PathBuf, sync::Arc}; use walkdir::WalkDir; +/// Indexes a given directory mapping filenames to their corresponding path. +/// +/// The `WikilinkChecker` Recursively checks all subdirectories of the given +/// base directory mapping any found files to the path where they can be found. +/// Symlinks are ignored to prevent it from infinite loops. #[derive(Clone, Debug, Default)] -// Indexes a given directory for filenames and the corresponding path pub(crate) struct WikilinkChecker { filenames: Arc>>, basedir: Option, @@ -18,58 +22,73 @@ impl WikilinkChecker { pub(crate) fn new(base: Option) -> Self { Self { basedir: base, - ..default::Default() + ..Default::default() } } - pub(crate) fn index_files(&self) { - //Skip the indexing step in case the filenames are already populated + /// Populates the index of the `WikilinkChecker` unless it is already populated. + /// + /// Recursively walks the base directory mapping each filename to an absolute filepath. + /// Errors if no base directory is given or if it is recognized as remote + pub(crate) fn setup_wikilinks_index(&self) -> Result<()> { + // Skip the indexing step in case the filenames are already populated if !self.filenames.lock().unwrap().is_empty() { - return; + return Ok(()); } match self.basedir { None => { - info!("File indexing for Wikilinks aborted as no base directory is specified"); + warn!("File indexing for Wikilinks aborted as no base directory is specified"); + Ok(()) } - Some(ref basetype) => match basetype { - Base::Local(localbasename) => { - //Start file indexing only if the Base is valid and local + Some(ref base_type) => match base_type { + Base::Local(local_base_name) => { + // Start file indexing only if the Base is valid and local info!( "Starting file indexing for wikilinks in {}", - localbasename.display() + local_base_name.display() ); - let mut filenameslock = self.filenames.lock().unwrap(); - for entry in WalkDir::new::(localbasename.into()) - //actively ignore symlinks + let mut lock = self + .filenames + .lock() + .map_err(|_| ErrorKind::MutexPoisoned)?; + for entry in WalkDir::new::(local_base_name.into()) + // actively ignore symlinks .follow_links(false) .into_iter() .filter_map(std::result::Result::ok) { if let Some(filename) = entry.path().file_name() { - filenameslock - .insert(filename.to_ascii_lowercase(), entry.path().to_path_buf()); + lock.insert(filename.to_ascii_lowercase(), entry.path().to_path_buf()); } } + Ok(()) } + // A remote base is of no use for the wikilink checker, silently skip over it - Base::Remote(_remotebasename) => {} + Base::Remote(remote_base_name) => { + warn!("Error using remote base url for checking wililinks: {remote_base_name}"); + Ok(()) + } }, } } - - pub(crate) fn check(&self, path: &Path, uri: &Uri) -> Result { + /// Checks the index for a filename. Returning the absolute path if the name is found, + /// otherwise returning None + pub(crate) fn contains_path(&self, path: &Path) -> Option { match path.file_name() { - None => Err(ErrorKind::InvalidFilePath(uri.clone())), + None => None, Some(filename) => { - let filenamelock = self.filenames.lock().unwrap(); - if filenamelock.contains_key(&filename.to_ascii_lowercase()) { - Ok(filenamelock - .get(&filename.to_ascii_lowercase()) - .expect("Could not retrieve inserted Path for discovered Wikilink-Path")) + let filename_lock = self.filenames.lock().unwrap(); + if filename_lock.contains_key(&filename.to_ascii_lowercase()) { + Some( + filename_lock.get(&filename.to_ascii_lowercase()).expect( + "Could not retrieve inserted Path for discovered Wikilink-Path", + ), + ) .cloned() } else { - Err(ErrorKind::InvalidFilePath(uri.clone())) + None } } } From f4b06007e2c735c019ba5c34c352d5a8ba241c21 Mon Sep 17 00:00:00 2001 From: JayJayArr Date: Mon, 13 Oct 2025 14:10:16 +0200 Subject: [PATCH 10/27] refactor: wikilink cleanup --- lychee-lib/src/extract/markdown.rs | 98 ++++++++++++++++++++---------- 1 file changed, 67 insertions(+), 31 deletions(-) diff --git a/lychee-lib/src/extract/markdown.rs b/lychee-lib/src/extract/markdown.rs index 41d1840c5d..b8200f51d9 100644 --- a/lychee-lib/src/extract/markdown.rs +++ b/lychee-lib/src/extract/markdown.rs @@ -1,9 +1,11 @@ //! Extract links and fragments from markdown documents use std::collections::{HashMap, HashSet}; +use log::warn; use pulldown_cmark::{CowStr, Event, LinkType, Options, Parser, Tag, TagEnd, TextMergeWithOffset}; use crate::{ + ErrorKind, extract::{html::html5gum::extract_html_with_span, plaintext::extract_raw_uri_from_plaintext}, types::uri::raw::{ OffsetSpanProvider, RawUri, RawUriSpan, SourceSpanProvider, SpanProvider as _, @@ -97,29 +99,17 @@ pub(crate) fn extract_markdown( return None; } - // Strip potholes (|) from wikilinks - let mut stripped_dest_url = if has_pothole { - pulldown_cmark::CowStr::Borrowed(&dest_url[0..dest_url.find('|').unwrap_or(dest_url.len())]) - } else { - dest_url.clone() - }; - - // Strip fragments (#) from wikilinks, according to the obsidian spec - // fragments come before potholes - if stripped_dest_url.contains('#') { - stripped_dest_url = pulldown_cmark::CowStr::Borrowed(&dest_url[0..dest_url.find('#').unwrap_or(dest_url.len())]); - } - - if stripped_dest_url.is_empty() { - None - } else { + if let Ok(wikilink) = clean_wikilink(&dest_url, has_pothole) { Some(vec![RawUri { - text: stripped_dest_url.to_string(), + text: wikilink.to_string(), element: Some("a".to_string()), attribute: Some("wikilink".to_string()), // wiki links start with `[[`, so offset the span by `2` span: span.start + 2 }]) + } else { + warn!("WARNING: The wikilink destination url {dest_url} could not be cleaned by removing potholes and fragments"); + None } } } @@ -333,6 +323,26 @@ pub(crate) fn extract_markdown_fragments(input: &str) -> HashSet { out } +fn clean_wikilink(input: &str, has_pothole: bool) -> Result, ErrorKind> { + // Strip potholes (|) from wikilinks + let mut stripped_input = if has_pothole { + pulldown_cmark::CowStr::Borrowed(&input[0..input.find('|').unwrap_or(input.len())]) + } else { + pulldown_cmark::CowStr::Borrowed(input) + }; + + // Strip fragments (#) from wikilinks, according to the obsidian spec + // fragments always come before potholes + if stripped_input.contains('#') { + stripped_input = + pulldown_cmark::CowStr::Borrowed(&input[0..input.find('#').unwrap_or(input.len())]); + } + if stripped_input.is_empty() { + return Err(ErrorKind::EmptyUrl); + } + Ok(stripped_input) +} + #[derive(Default)] struct HeadingIdGenerator { counter: HashMap, @@ -373,6 +383,7 @@ mod tests { use crate::types::uri::raw::span; use super::*; + use rstest::rstest; const MD_INPUT: &str = r#" # A Test @@ -700,15 +711,32 @@ Shortcut link: [link4] ); } } + #[test] - fn test_remove_wikilink_pothole() { - let markdown = r"[[foo|bar]]"; + fn test_clean_wikilink() { + let markdown = r" +[[foo|bar]] +[[foo#bar]] +[[foo#bar|baz]] +"; let uris = extract_markdown(markdown, true, true); - let expected = vec![RawUri { - text: "foo".to_string(), - element: Some("a".to_string()), - attribute: Some("wikilink".to_string()), - }]; + let expected = vec![ + RawUri { + text: "foo".to_string(), + element: Some("a".to_string()), + attribute: Some("wikilink".to_string()), + }, + RawUri { + text: "foo".to_string(), + element: Some("a".to_string()), + attribute: Some("wikilink".to_string()), + }, + RawUri { + text: "foo".to_string(), + element: Some("a".to_string()), + attribute: Some("wikilink".to_string()), + }, + ]; assert_eq!(uris, expected); } @@ -730,15 +758,16 @@ Shortcut link: [link4] let uris = extract_markdown(input, false, false); } #[test] - fn test_remove_wikilink_fragment() { + fn test_wikilink_extraction_returns_none_on_empty_links() { let markdown = r"[[foo#bar]]"; + let markdown = r" +[[|bar]] +[[#bar]] +[[#bar|baz]] +"; + let uris = extract_markdown(markdown, true, true); - let expected = vec![RawUri { - text: "foo".to_string(), - element: Some("a".to_string()), - attribute: Some("wikilink".to_string()), - }]; - assert_eq!(uris, expected); + assert!(uris.is_empty()); } #[test] @@ -811,5 +840,12 @@ Shortcut link: [link4] attribute: Some("wikilink".to_string()), }]; assert_eq!(uris, expected); + #[rstest] + #[case("|foo", true)] + #[case("|foo#bar", true)] + #[case("#baz", false)] + fn test_from_str(#[case] input: &str, #[case] has_pothole: bool) { + let result = clean_wikilink(input, has_pothole); + assert!(result.is_err()); } } From 070b7b2714b3b81a9adf3c8dd7530262ec011d45 Mon Sep 17 00:00:00 2001 From: JayJayArr Date: Mon, 13 Oct 2025 14:10:58 +0200 Subject: [PATCH 11/27] feat: WikilinkChecker as optional include span in tests fix: allow too many lines fix merge conflicts fix merge conflicts --- README.md | 32 ++++-------------------- lychee-bin/tests/cli.rs | 20 +++++++-------- lychee-lib/src/checker/file.rs | 19 +++++++++----- lychee-lib/src/extract/markdown.rs | 7 ++++-- lychee-lib/src/types/error.rs | 16 ++++++------ lychee-lib/src/utils/wikilink_checker.rs | 20 +++++++++++---- 6 files changed, 57 insertions(+), 57 deletions(-) diff --git a/README.md b/README.md index d51b882344..02129489a7 100644 --- a/README.md +++ b/README.md @@ -574,7 +574,7 @@ Options: Find links in verbatim sections like `pre`- and `code` blocks --include-wikilinks - Check WikiLinks in Markdown files + Check WikiLinks in Markdown files, this requires specifying --base-url --index-files When checking locally, resolves directory links to a separate index file. @@ -718,28 +718,6 @@ Options: User agent [default: lychee/0.20.1] - [default: color] - [possible values: plain, color, emoji, task] - - -f, --format - Output format of final status report - - [default: compact] - [possible values: compact, detailed, json, markdown, raw] - - --require-https - When HTTPS is available, treat HTTP links as errors - - --cookie-jar - Tell lychee to read cookies from the given file. Cookies will be stored in the - cookie jar and sent with requests. New cookies will be stored in the cookie jar - and existing cookies will be updated. - - --include-wikilinks - Check WikiLinks in Markdown files, this requires specifying --base-url - - -h, --help - Print help (see a summary with '-h') -v, --verbose... Set verbosity level; more output per occurrence (e.g. `-v` or `-vv`) @@ -755,13 +733,13 @@ Options: ### Exit codes -0 Success. The operation was completed successfully as instructed. +0 Success. The operation was completed successfully as instructed. -1 Missing inputs or any unexpected runtime failures or configuration errors +1 Missing inputs or any unexpected runtime failures or configuration errors -2 Link check failures. At least one non-excluded link failed the check. +2 Link check failures. At least one non-excluded link failed the check. -3 Encountered errors in the config file. +3 Encountered errors in the config file. ### Ignoring links diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index 10e61f60c6..3a1bc10151 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -2587,7 +2587,7 @@ The config file should contain every possible key for documentation purposes." .arg("--dump") .arg("--include-wikilinks") .arg("--base-url") - .arg(fixtures_path()) + .arg(fixtures_path!()) .arg(test_path) .assert() .success() @@ -3050,16 +3050,16 @@ The config file should contain every possible key for documentation purposes." #[test] fn test_wikilink_fixture_obsidian_style() { - let input = fixtures_path().join("wiki/obsidian-style.md"); + let input = fixtures_path!().join("wiki/obsidian-style.md"); // testing without fragments should not yield failures - main_command() + main_command!() .arg(&input) .arg("--include-wikilinks") .arg("--fallback-extensions") .arg("md") .arg("--base-url") - .arg(fixtures_path()) + .arg(fixtures_path!()) .assert() .success() .stdout(contains("4 OK")); @@ -3067,16 +3067,16 @@ The config file should contain every possible key for documentation purposes." #[test] fn test_wikilink_fixture_with_fragments_obsidian_style_fixtures_excluded() { - let input = fixtures_path().join("wiki/obsidian-style-plus-headers.md"); + let input = fixtures_path!().join("wiki/obsidian-style-plus-headers.md"); // fragments should resolve all headers - main_command() + main_command!() .arg(&input) .arg("--include-wikilinks") .arg("--fallback-extensions") .arg("md") .arg("--base-url") - .arg(fixtures_path()) + .arg(fixtures_path!()) .assert() .success() .stdout(contains("4 OK")); @@ -3084,17 +3084,17 @@ The config file should contain every possible key for documentation purposes." #[test] fn test_wikilink_fixture_with_fragments_obsidian_style() { - let input = fixtures_path().join("wiki/obsidian-style-plus-headers.md"); + let input = fixtures_path!().join("wiki/obsidian-style-plus-headers.md"); // fragments should resolve all headers - main_command() + main_command!() .arg(&input) .arg("--include-wikilinks") .arg("--include-fragments") .arg("--fallback-extensions") .arg("md") .arg("--base-url") - .arg(fixtures_path()) + .arg(fixtures_path!()) .assert() .success() .stdout(contains("4 OK")); diff --git a/lychee-lib/src/checker/file.rs b/lychee-lib/src/checker/file.rs index ddbff75bd6..0b80f79bdf 100644 --- a/lychee-lib/src/checker/file.rs +++ b/lychee-lib/src/checker/file.rs @@ -38,7 +38,7 @@ pub(crate) struct FileChecker { /// Utility for performing fragment checks in HTML files. fragment_checker: FragmentChecker, /// Utility for checking wikilinks, indexes files in a given directory - wikilink_checker: WikilinkChecker, + wikilink_checker: Option, } impl FileChecker { @@ -335,7 +335,12 @@ impl FileChecker { // Initializes the index of the wikilink checker fn setup_wikilinks(&self) -> Result<(), ErrorKind> { - self.wikilink_checker.setup_wikilinks_index() + match &self.wikilink_checker { + Some(checker) => checker.setup_wikilinks_index(), + None => Err(ErrorKind::WikilinkCheckerInit( + "Initialization failed, no checker instantiated".to_string(), + )), + } } // Tries to resolve a link by looking up the filename in the wikilink index @@ -343,11 +348,13 @@ impl FileChecker { let mut path_buf = path.to_path_buf(); for ext in &self.fallback_extensions { path_buf.set_extension(ext); - match self.wikilink_checker.contains_path(&path_buf) { - None => { - trace!("Tried to find wikilink {} at {}", uri, path_buf.display()); + if let Some(checker) = &self.wikilink_checker { + match checker.contains_path(&path_buf) { + None => { + trace!("Tried to find wikilink {} at {}", uri, path_buf.display()); + } + Some(resolved_path) => return Ok(resolved_path), } - Some(resolved_path) => return Ok(resolved_path), } } diff --git a/lychee-lib/src/extract/markdown.rs b/lychee-lib/src/extract/markdown.rs index b8200f51d9..662b7ebc0a 100644 --- a/lychee-lib/src/extract/markdown.rs +++ b/lychee-lib/src/extract/markdown.rs @@ -104,8 +104,8 @@ pub(crate) fn extract_markdown( text: wikilink.to_string(), element: Some("a".to_string()), attribute: Some("wikilink".to_string()), - // wiki links start with `[[`, so offset the span by `2` - span: span.start + 2 + // wiki links start with `[[`, so offset the span by `2` + span: span_provider.span(span.start + 2) }]) } else { warn!("WARNING: The wikilink destination url {dest_url} could not be cleaned by removing potholes and fragments"); @@ -725,16 +725,19 @@ Shortcut link: [link4] text: "foo".to_string(), element: Some("a".to_string()), attribute: Some("wikilink".to_string()), + span: span(2, 3), }, RawUri { text: "foo".to_string(), element: Some("a".to_string()), attribute: Some("wikilink".to_string()), + span: span(3, 3), }, RawUri { text: "foo".to_string(), element: Some("a".to_string()), attribute: Some("wikilink".to_string()), + span: span(4, 3), }, ]; assert_eq!(uris, expected); diff --git a/lychee-lib/src/types/error.rs b/lychee-lib/src/types/error.rs index 45cf15c2b3..17ab632796 100644 --- a/lychee-lib/src/types/error.rs +++ b/lychee-lib/src/types/error.rs @@ -183,11 +183,9 @@ pub enum ErrorKind { #[error("Failed to lock a Mutex")] MutexPoisoned, - /// Test-only error variant for formatter tests - /// Available in both test and debug builds to support cross-crate testing - #[cfg(any(test, debug_assertions))] - #[error("Generic test error")] - TestError, + /// Error when initializing the Wikilink Checker + #[error("Failed to initialize Wikilink Checker")] + WikilinkCheckerInit(String), } impl ErrorKind { @@ -348,7 +346,10 @@ impl ErrorKind { ErrorKind::PreprocessorError{command, reason} => Some(format!("Command '{command}' failed {reason}. Check value of the preprocessor option")), ErrorKind::MutexPoisoned => Some ( "One or more threads failed and poisoned a Mutex".to_string() - ) + ), + ErrorKind::WikilinkCheckerInit(reason) => Some(format!( + "Error initializing the Wikilink Checker: {reason} ", + )), } } @@ -479,7 +480,8 @@ impl Hash for ErrorKind { Self::Cookies(e) => e.hash(state), Self::StatusCodeSelectorError(e) => e.to_string().hash(state), Self::PreprocessorError { command, reason } => (command, reason).hash(state), - Self::MutexPoisoned => "Mutex Poisoned".to_string().hash(state), + Self::MutexPoisoned => "Mutex Poisoned".hash(state), + Self::WikilinkCheckerInit(e) => e.hash(state), } } } diff --git a/lychee-lib/src/utils/wikilink_checker.rs b/lychee-lib/src/utils/wikilink_checker.rs index 348d0561c4..fcd4d5f084 100644 --- a/lychee-lib/src/utils/wikilink_checker.rs +++ b/lychee-lib/src/utils/wikilink_checker.rs @@ -19,10 +19,17 @@ pub(crate) struct WikilinkChecker { } impl WikilinkChecker { - pub(crate) fn new(base: Option) -> Self { - Self { - basedir: base, - ..Default::default() + pub(crate) fn new(base: Option) -> Option { + if base.is_none() { + None + } else { + warn!( + "The Wikilink Checker could not be initialized because the base directory is missing" + ); + Some(Self { + basedir: base, + ..Default::default() + }) } } @@ -68,7 +75,10 @@ impl WikilinkChecker { // A remote base is of no use for the wikilink checker, silently skip over it Base::Remote(remote_base_name) => { warn!("Error using remote base url for checking wililinks: {remote_base_name}"); - Ok(()) + Err(ErrorKind::WikilinkCheckerInit( + "Remote Base Directory found, only local directories are allowed" + .to_string(), + )) } }, } From c95c7658f31d9e862b39e1cde2aa9c03dcc7d23c Mon Sep 17 00:00:00 2001 From: Jakob <144204108+JayJayArr@users.noreply.github.com> Date: Sun, 16 Nov 2025 18:41:23 +0100 Subject: [PATCH 12/27] Apply suggestions from code review Co-authored-by: Matthias Endler --- lychee-lib/src/checker/file.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lychee-lib/src/checker/file.rs b/lychee-lib/src/checker/file.rs index 0b80f79bdf..249b15d7e7 100644 --- a/lychee-lib/src/checker/file.rs +++ b/lychee-lib/src/checker/file.rs @@ -37,7 +37,7 @@ pub(crate) struct FileChecker { include_wikilinks: bool, /// Utility for performing fragment checks in HTML files. fragment_checker: FragmentChecker, - /// Utility for checking wikilinks, indexes files in a given directory + /// Utility for checking Wikilinks, indexes files in a given directory wikilink_checker: Option, } @@ -83,9 +83,8 @@ impl FileChecker { pub(crate) async fn check(&self, uri: &Uri) -> Status { // only populate the wikilink filenames if the feature is enabled if self.include_wikilinks { - match self.setup_wikilinks() { - Ok(()) => (), - Err(e) => return Status::Error(e), + if let Err(e) = self.setup_wikilinks() { + return Status::Error(e); } } let Ok(path) = uri.url.to_file_path() else { From 166b8706ff1a368259c85ad1773cd8c6711b7e81 Mon Sep 17 00:00:00 2001 From: JayJayArr Date: Thu, 20 Nov 2025 23:57:34 +0100 Subject: [PATCH 13/27] refactor: WikilinkResolver in own module fix remove unused Error Types feat: doc WikiLink Resolver --- lychee-bin/tests/cli.rs | 6 +- lychee-lib/src/checker/file.rs | 62 +++++-------------- lychee-lib/src/checker/mod.rs | 1 + .../wikilink/index.rs} | 59 +++++++----------- lychee-lib/src/checker/wikilink/mod.rs | 6 ++ lychee-lib/src/checker/wikilink/resolver.rs | 34 ++++++++++ lychee-lib/src/extract/markdown.rs | 6 +- lychee-lib/src/types/error.rs | 20 ++---- lychee-lib/src/utils/mod.rs | 1 - 9 files changed, 92 insertions(+), 103 deletions(-) rename lychee-lib/src/{utils/wikilink_checker.rs => checker/wikilink/index.rs} (61%) create mode 100644 lychee-lib/src/checker/wikilink/mod.rs create mode 100644 lychee-lib/src/checker/wikilink/resolver.rs diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index 3a1bc10151..2cb2c85e08 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -3053,7 +3053,7 @@ The config file should contain every possible key for documentation purposes." let input = fixtures_path!().join("wiki/obsidian-style.md"); // testing without fragments should not yield failures - main_command!() + cargo_bin_cmd!() .arg(&input) .arg("--include-wikilinks") .arg("--fallback-extensions") @@ -3070,7 +3070,7 @@ The config file should contain every possible key for documentation purposes." let input = fixtures_path!().join("wiki/obsidian-style-plus-headers.md"); // fragments should resolve all headers - main_command!() + cargo_bin_cmd!() .arg(&input) .arg("--include-wikilinks") .arg("--fallback-extensions") @@ -3087,7 +3087,7 @@ The config file should contain every possible key for documentation purposes." let input = fixtures_path!().join("wiki/obsidian-style-plus-headers.md"); // fragments should resolve all headers - main_command!() + cargo_bin_cmd!() .arg(&input) .arg("--include-wikilinks") .arg("--include-fragments") diff --git a/lychee-lib/src/checker/file.rs b/lychee-lib/src/checker/file.rs index 249b15d7e7..c967b0a20f 100644 --- a/lychee-lib/src/checker/file.rs +++ b/lychee-lib/src/checker/file.rs @@ -1,9 +1,9 @@ use http::StatusCode; -use log::{trace, warn}; +use log::warn; use std::borrow::Cow; use std::path::{Path, PathBuf}; -use crate::utils::wikilink_checker::WikilinkChecker; +use crate::checker::wikilink::resolver::WikilinkResolver; use crate::{ Base, ErrorKind, Status, Uri, utils::fragment_checker::{FragmentChecker, FragmentInput}, @@ -37,8 +37,8 @@ pub(crate) struct FileChecker { include_wikilinks: bool, /// Utility for performing fragment checks in HTML files. fragment_checker: FragmentChecker, - /// Utility for checking Wikilinks, indexes files in a given directory - wikilink_checker: Option, + /// Utility for resolving Wikilinks, indexes files in a given directory + wikilink_resolver: WikilinkResolver, } impl FileChecker { @@ -59,12 +59,12 @@ impl FileChecker { ) -> Self { Self { base: base.clone(), - fallback_extensions, + fallback_extensions: fallback_extensions.clone(), index_files, include_fragments, include_wikilinks, fragment_checker: FragmentChecker::new(), - wikilink_checker: WikilinkChecker::new(base), + wikilink_resolver: WikilinkResolver::new(base, fallback_extensions), } } @@ -81,12 +81,6 @@ impl FileChecker { /// /// Returns a `Status` indicating the result of the check. pub(crate) async fn check(&self, uri: &Uri) -> Status { - // only populate the wikilink filenames if the feature is enabled - if self.include_wikilinks { - if let Err(e) = self.setup_wikilinks() { - return Status::Error(e); - } - } let Ok(path) = uri.url.to_file_path() else { return ErrorKind::InvalidFilePath(uri.clone()).into(); }; @@ -149,12 +143,16 @@ impl FileChecker { let path = match path.metadata() { // for non-existing paths, attempt fallback extensions // if fallback extensions don't help, try wikilinks - Err(e) if e.kind() == std::io::ErrorKind::NotFound => { - match self.apply_fallback_extensions(path, uri).map(Cow::Owned) { - Ok(val) => Ok(val), - Err(_) => self.apply_wikilink_check(path, uri).map(Cow::Owned), - } - } + Err(e) if e.kind() == std::io::ErrorKind::NotFound => self + .apply_fallback_extensions(path, uri) + .or_else(|_| { + if self.include_wikilinks { + self.wikilink_resolver.resolve(path, uri) + } else { + Err(ErrorKind::InvalidFilePath(uri.clone())) + } + }) + .map(Cow::Owned), // other IO errors are unexpected and should fail the check Err(e) => Err(ErrorKind::ReadFileInput(e, path.to_path_buf())), @@ -331,34 +329,6 @@ impl FileChecker { } } } - - // Initializes the index of the wikilink checker - fn setup_wikilinks(&self) -> Result<(), ErrorKind> { - match &self.wikilink_checker { - Some(checker) => checker.setup_wikilinks_index(), - None => Err(ErrorKind::WikilinkCheckerInit( - "Initialization failed, no checker instantiated".to_string(), - )), - } - } - - // Tries to resolve a link by looking up the filename in the wikilink index - fn apply_wikilink_check(&self, path: &Path, uri: &Uri) -> Result { - let mut path_buf = path.to_path_buf(); - for ext in &self.fallback_extensions { - path_buf.set_extension(ext); - if let Some(checker) = &self.wikilink_checker { - match checker.contains_path(&path_buf) { - None => { - trace!("Tried to find wikilink {} at {}", uri, path_buf.display()); - } - Some(resolved_path) => return Ok(resolved_path), - } - } - } - - Err(ErrorKind::InvalidFilePath(uri.clone())) - } } #[cfg(test)] diff --git a/lychee-lib/src/checker/mod.rs b/lychee-lib/src/checker/mod.rs index bfbef9de51..803b1aa9ea 100644 --- a/lychee-lib/src/checker/mod.rs +++ b/lychee-lib/src/checker/mod.rs @@ -5,3 +5,4 @@ pub(crate) mod file; pub(crate) mod mail; pub(crate) mod website; +pub(crate) mod wikilink; diff --git a/lychee-lib/src/utils/wikilink_checker.rs b/lychee-lib/src/checker/wikilink/index.rs similarity index 61% rename from lychee-lib/src/utils/wikilink_checker.rs rename to lychee-lib/src/checker/wikilink/index.rs index fcd4d5f084..302abe9212 100644 --- a/lychee-lib/src/utils/wikilink_checker.rs +++ b/lychee-lib/src/checker/wikilink/index.rs @@ -1,5 +1,5 @@ -use crate::{Base, ErrorKind, Result}; -use log::{info, warn}; +use crate::Base; +use log::{info, trace, warn}; use std::collections::HashMap; use std::ffi::OsString; use std::path::Path; @@ -9,45 +9,36 @@ use walkdir::WalkDir; /// Indexes a given directory mapping filenames to their corresponding path. /// -/// The `WikilinkChecker` Recursively checks all subdirectories of the given +/// The `WikilinkIndex` Recursively checks all subdirectories of the given /// base directory mapping any found files to the path where they can be found. /// Symlinks are ignored to prevent it from infinite loops. #[derive(Clone, Debug, Default)] -pub(crate) struct WikilinkChecker { +pub(crate) struct WikilinkIndex { filenames: Arc>>, basedir: Option, } -impl WikilinkChecker { - pub(crate) fn new(base: Option) -> Option { - if base.is_none() { - None - } else { - warn!( - "The Wikilink Checker could not be initialized because the base directory is missing" - ); - Some(Self { - basedir: base, - ..Default::default() - }) - } +impl WikilinkIndex { + pub(crate) fn new(base: Option) -> Self { + let index = Self { + basedir: base, + ..Default::default() + }; + index.start_indexing(); + index } - /// Populates the index of the `WikilinkChecker` unless it is already populated. + /// Populates the index of the `WikilinkIndex` on startup /// /// Recursively walks the base directory mapping each filename to an absolute filepath. - /// Errors if no base directory is given or if it is recognized as remote - pub(crate) fn setup_wikilinks_index(&self) -> Result<()> { - // Skip the indexing step in case the filenames are already populated - if !self.filenames.lock().unwrap().is_empty() { - return Ok(()); - } - match self.basedir { + /// The Index stays empty if no base directory is supplied or if the base directory is remote + pub(crate) fn start_indexing(&self) { + match &self.basedir { None => { - warn!("File indexing for Wikilinks aborted as no base directory is specified"); - Ok(()) + // The Empty Index returns no results in this case + trace!("File indexing for Wikilinks aborted as no base directory is specified"); } - Some(ref base_type) => match base_type { + Some(base_type) => match base_type { Base::Local(local_base_name) => { // Start file indexing only if the Base is valid and local info!( @@ -55,10 +46,7 @@ impl WikilinkChecker { local_base_name.display() ); - let mut lock = self - .filenames - .lock() - .map_err(|_| ErrorKind::MutexPoisoned)?; + let mut lock = self.filenames.lock().unwrap(); for entry in WalkDir::new::(local_base_name.into()) // actively ignore symlinks .follow_links(false) @@ -69,16 +57,11 @@ impl WikilinkChecker { lock.insert(filename.to_ascii_lowercase(), entry.path().to_path_buf()); } } - Ok(()) } - // A remote base is of no use for the wikilink checker, silently skip over it + // A remote base is of no use for the wikilink checker, return an error to the user Base::Remote(remote_base_name) => { warn!("Error using remote base url for checking wililinks: {remote_base_name}"); - Err(ErrorKind::WikilinkCheckerInit( - "Remote Base Directory found, only local directories are allowed" - .to_string(), - )) } }, } diff --git a/lychee-lib/src/checker/wikilink/mod.rs b/lychee-lib/src/checker/wikilink/mod.rs new file mode 100644 index 0000000000..19e11f8070 --- /dev/null +++ b/lychee-lib/src/checker/wikilink/mod.rs @@ -0,0 +1,6 @@ +//! `WikiLink` Module +//! +//! This module contains a Indexer and a Resolver for `WikiLinks` + +pub(crate) mod index; +pub(crate) mod resolver; diff --git a/lychee-lib/src/checker/wikilink/resolver.rs b/lychee-lib/src/checker/wikilink/resolver.rs new file mode 100644 index 0000000000..0880526f04 --- /dev/null +++ b/lychee-lib/src/checker/wikilink/resolver.rs @@ -0,0 +1,34 @@ +use crate::{Base, ErrorKind, Uri, checker::wikilink::index::WikilinkIndex}; +use log::trace; +use std::path::{Path, PathBuf}; + +#[derive(Clone, Debug)] +pub(crate) struct WikilinkResolver { + checker: WikilinkIndex, + fallback_extensions: Vec, +} + +/// Tries to resolve a `WikiLink` by searching for the filename in the `WikilinkIndex` +/// Returns the path of the found file if found, otherwise an Error +impl WikilinkResolver { + pub(crate) fn new(basedir: Option, fallback_extensions: Vec) -> Self { + Self { + checker: WikilinkIndex::new(basedir), + fallback_extensions, + } + } + /// Resolves a wikilink by searching the index with fallback extensions. + pub(crate) fn resolve(&self, path: &Path, uri: &Uri) -> Result { + for ext in &self.fallback_extensions { + let mut candidate = path.to_path_buf(); + candidate.set_extension(ext); + + if let Some(resolved) = self.checker.contains_path(&candidate) { + return Ok(resolved); + } + trace!("Wikilink not found: {} at {}", uri, candidate.display()); + } + + Err(ErrorKind::WikilinkNotFound(uri.clone())) + } +} diff --git a/lychee-lib/src/extract/markdown.rs b/lychee-lib/src/extract/markdown.rs index 662b7ebc0a..763a247f3a 100644 --- a/lychee-lib/src/extract/markdown.rs +++ b/lychee-lib/src/extract/markdown.rs @@ -759,10 +759,11 @@ Shortcut link: [link4] }]; let uris = extract_markdown(input, false, false); + + assert_eq!(uris, expected); } #[test] fn test_wikilink_extraction_returns_none_on_empty_links() { - let markdown = r"[[foo#bar]]"; let markdown = r" [[|bar]] [[#bar]] @@ -841,8 +842,11 @@ Shortcut link: [link4] text: "foo".to_string(), element: Some("a".to_string()), attribute: Some("wikilink".to_string()), + span: span(1, 3), }]; assert_eq!(uris, expected); + } + #[rstest] #[case("|foo", true)] #[case("|foo#bar", true)] diff --git a/lychee-lib/src/types/error.rs b/lychee-lib/src/types/error.rs index 17ab632796..3c5788bde7 100644 --- a/lychee-lib/src/types/error.rs +++ b/lychee-lib/src/types/error.rs @@ -179,13 +179,9 @@ pub enum ErrorKind { reason: String, }, - /// Error locking a Mutex - #[error("Failed to lock a Mutex")] - MutexPoisoned, - - /// Error when initializing the Wikilink Checker + /// The extracted `WikiLink` could not be found by searching the directory #[error("Failed to initialize Wikilink Checker")] - WikilinkCheckerInit(String), + WikilinkNotFound(Uri), } impl ErrorKind { @@ -343,12 +339,9 @@ impl ErrorKind { [name] => format!("An index file ({name}) is required"), [init @ .., tail] => format!("An index file ({}, or {}) is required", init.join(", "), tail), }.into(), - ErrorKind::PreprocessorError{command, reason} => Some(format!("Command '{command}' failed {reason}. Check value of the preprocessor option")), - ErrorKind::MutexPoisoned => Some ( - "One or more threads failed and poisoned a Mutex".to_string() - ), - ErrorKind::WikilinkCheckerInit(reason) => Some(format!( - "Error initializing the Wikilink Checker: {reason} ", + ErrorKind::PreprocessorError{command, reason} => Some(format!("Command '{command}' failed {reason}. Check value of the pre option")), + ErrorKind::WikilinkNotFound(uri) => Some(format!( + "WikiLink could not be found: {uri} ", )), } } @@ -480,8 +473,7 @@ impl Hash for ErrorKind { Self::Cookies(e) => e.hash(state), Self::StatusCodeSelectorError(e) => e.to_string().hash(state), Self::PreprocessorError { command, reason } => (command, reason).hash(state), - Self::MutexPoisoned => "Mutex Poisoned".hash(state), - Self::WikilinkCheckerInit(e) => e.hash(state), + Self::WikilinkNotFound(e) => e.hash(state), } } } diff --git a/lychee-lib/src/utils/mod.rs b/lychee-lib/src/utils/mod.rs index 0236b9de32..d75d20c064 100644 --- a/lychee-lib/src/utils/mod.rs +++ b/lychee-lib/src/utils/mod.rs @@ -3,4 +3,3 @@ pub(crate) mod path; pub(crate) mod request; pub(crate) mod reqwest; pub(crate) mod url; -pub(crate) mod wikilink_checker; From fcb87167ffce2ed491a995a37cb79b40030a7053 Mon Sep 17 00:00:00 2001 From: Matthias Endler Date: Tue, 25 Nov 2025 14:37:22 +0100 Subject: [PATCH 14/27] Apply suggestion from @mre --- lychee-lib/src/checker/wikilink/index.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lychee-lib/src/checker/wikilink/index.rs b/lychee-lib/src/checker/wikilink/index.rs index 302abe9212..ccc7ef5a5e 100644 --- a/lychee-lib/src/checker/wikilink/index.rs +++ b/lychee-lib/src/checker/wikilink/index.rs @@ -9,7 +9,7 @@ use walkdir::WalkDir; /// Indexes a given directory mapping filenames to their corresponding path. /// -/// The `WikilinkIndex` Recursively checks all subdirectories of the given +/// The `WikilinkIndex` recursively checks all subdirectories of the given /// base directory mapping any found files to the path where they can be found. /// Symlinks are ignored to prevent it from infinite loops. #[derive(Clone, Debug, Default)] From 27b2787c6faed4516450f993d3b6cb830d551355 Mon Sep 17 00:00:00 2001 From: Matthias Endler Date: Tue, 25 Nov 2025 14:39:06 +0100 Subject: [PATCH 15/27] Apply suggestion from @mre --- lychee-lib/src/checker/wikilink/index.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lychee-lib/src/checker/wikilink/index.rs b/lychee-lib/src/checker/wikilink/index.rs index ccc7ef5a5e..79cb77d3d5 100644 --- a/lychee-lib/src/checker/wikilink/index.rs +++ b/lychee-lib/src/checker/wikilink/index.rs @@ -30,7 +30,7 @@ impl WikilinkIndex { /// Populates the index of the `WikilinkIndex` on startup /// - /// Recursively walks the base directory mapping each filename to an absolute filepath. + /// Recursively walks the base directory, mapping each filename to an absolute filepath. /// The Index stays empty if no base directory is supplied or if the base directory is remote pub(crate) fn start_indexing(&self) { match &self.basedir { From b74cb5e52960ca30eb94a6495cd0db8b46ab39f9 Mon Sep 17 00:00:00 2001 From: Matthias Endler Date: Tue, 25 Nov 2025 14:47:57 +0100 Subject: [PATCH 16/27] Apply suggestion from @mre --- lychee-lib/src/checker/wikilink/index.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/lychee-lib/src/checker/wikilink/index.rs b/lychee-lib/src/checker/wikilink/index.rs index 79cb77d3d5..0173290b5c 100644 --- a/lychee-lib/src/checker/wikilink/index.rs +++ b/lychee-lib/src/checker/wikilink/index.rs @@ -66,6 +66,7 @@ impl WikilinkIndex { }, } } + /// Checks the index for a filename. Returning the absolute path if the name is found, /// otherwise returning None pub(crate) fn contains_path(&self, path: &Path) -> Option { From 93ea466bc94e341e48c984d80eb67d7b4ff3f5c5 Mon Sep 17 00:00:00 2001 From: Jakob <144204108+JayJayArr@users.noreply.github.com> Date: Tue, 25 Nov 2025 19:37:47 +0100 Subject: [PATCH 17/27] Apply suggestions from code review Co-authored-by: Matthias Endler --- lychee-lib/src/checker/wikilink/index.rs | 8 +++----- lychee-lib/src/checker/wikilink/resolver.rs | 4 ++-- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/lychee-lib/src/checker/wikilink/index.rs b/lychee-lib/src/checker/wikilink/index.rs index 0173290b5c..4871c66648 100644 --- a/lychee-lib/src/checker/wikilink/index.rs +++ b/lychee-lib/src/checker/wikilink/index.rs @@ -15,7 +15,7 @@ use walkdir::WalkDir; #[derive(Clone, Debug, Default)] pub(crate) struct WikilinkIndex { filenames: Arc>>, - basedir: Option, + basedir: Base, } impl WikilinkIndex { @@ -51,7 +51,7 @@ impl WikilinkIndex { // actively ignore symlinks .follow_links(false) .into_iter() - .filter_map(std::result::Result::ok) + .filter_map(Result::ok) { if let Some(filename) = entry.path().file_name() { lock.insert(filename.to_ascii_lowercase(), entry.path().to_path_buf()); @@ -76,9 +76,7 @@ impl WikilinkIndex { let filename_lock = self.filenames.lock().unwrap(); if filename_lock.contains_key(&filename.to_ascii_lowercase()) { Some( - filename_lock.get(&filename.to_ascii_lowercase()).expect( - "Could not retrieve inserted Path for discovered Wikilink-Path", - ), + filename_lock.get(&filename.to_ascii_lowercase()).ok(), ) .cloned() } else { diff --git a/lychee-lib/src/checker/wikilink/resolver.rs b/lychee-lib/src/checker/wikilink/resolver.rs index 0880526f04..2d74bc0559 100644 --- a/lychee-lib/src/checker/wikilink/resolver.rs +++ b/lychee-lib/src/checker/wikilink/resolver.rs @@ -11,7 +11,7 @@ pub(crate) struct WikilinkResolver { /// Tries to resolve a `WikiLink` by searching for the filename in the `WikilinkIndex` /// Returns the path of the found file if found, otherwise an Error impl WikilinkResolver { - pub(crate) fn new(basedir: Option, fallback_extensions: Vec) -> Self { + pub(crate) fn new(basedir: Base, fallback_extensions: Vec) -> Self { Self { checker: WikilinkIndex::new(basedir), fallback_extensions, @@ -26,7 +26,7 @@ impl WikilinkResolver { if let Some(resolved) = self.checker.contains_path(&candidate) { return Ok(resolved); } - trace!("Wikilink not found: {} at {}", uri, candidate.display()); + trace!("Wikilink {uri} not found at {candiate}", uri, candidate=candidate.display()); } Err(ErrorKind::WikilinkNotFound(uri.clone())) From b67a5791c8074be6e6370c7ccb70062b095e598f Mon Sep 17 00:00:00 2001 From: JayJayArr Date: Wed, 26 Nov 2025 08:23:11 +0100 Subject: [PATCH 18/27] WikilinkResolver base non-optional --- lychee-lib/src/checker/file.rs | 23 ++++--- lychee-lib/src/checker/wikilink/index.rs | 70 +++++++++------------ lychee-lib/src/checker/wikilink/resolver.rs | 19 ++++-- lychee-lib/src/types/error.rs | 10 ++- 4 files changed, 70 insertions(+), 52 deletions(-) diff --git a/lychee-lib/src/checker/file.rs b/lychee-lib/src/checker/file.rs index c967b0a20f..e5dcd43056 100644 --- a/lychee-lib/src/checker/file.rs +++ b/lychee-lib/src/checker/file.rs @@ -37,8 +37,9 @@ pub(crate) struct FileChecker { include_wikilinks: bool, /// Utility for performing fragment checks in HTML files. fragment_checker: FragmentChecker, - /// Utility for resolving Wikilinks, indexes files in a given directory - wikilink_resolver: WikilinkResolver, + /// Utility for resolving Wikilinks, indexes files in a given directory, only initialized when + /// `include_wikilinks` is set + wikilink_resolver: Option, } impl FileChecker { @@ -50,6 +51,7 @@ impl FileChecker { /// * `fallback_extensions` - List of extensions to try if the original file is not found. /// * `index_files` - Optional list of index file names to search for if the path is a directory. /// * `include_fragments` - Whether to check for fragment existence in HTML files. + /// * `include_wikilinks` - Whether to check the existence of Wikilinks found in Markdown files . pub(crate) fn new( base: Option, fallback_extensions: Vec, @@ -57,14 +59,19 @@ impl FileChecker { include_fragments: bool, include_wikilinks: bool, ) -> Self { + let wikilink_resolver = match base.clone() { + Some(basedir) => WikilinkResolver::new(basedir, fallback_extensions.clone()).ok(), + None => None, + }; + Self { - base: base.clone(), - fallback_extensions: fallback_extensions.clone(), + base, + fallback_extensions, index_files, include_fragments, include_wikilinks, fragment_checker: FragmentChecker::new(), - wikilink_resolver: WikilinkResolver::new(base, fallback_extensions), + wikilink_resolver, } } @@ -146,8 +153,10 @@ impl FileChecker { Err(e) if e.kind() == std::io::ErrorKind::NotFound => self .apply_fallback_extensions(path, uri) .or_else(|_| { - if self.include_wikilinks { - self.wikilink_resolver.resolve(path, uri) + if self.include_wikilinks + && let Some(resolver) = &self.wikilink_resolver + { + resolver.resolve(path, uri) } else { Err(ErrorKind::InvalidFilePath(uri.clone())) } diff --git a/lychee-lib/src/checker/wikilink/index.rs b/lychee-lib/src/checker/wikilink/index.rs index 4871c66648..d216398792 100644 --- a/lychee-lib/src/checker/wikilink/index.rs +++ b/lychee-lib/src/checker/wikilink/index.rs @@ -1,5 +1,5 @@ use crate::Base; -use log::{info, trace, warn}; +use log::{info, warn}; use std::collections::HashMap; use std::ffi::OsString; use std::path::Path; @@ -12,17 +12,18 @@ use walkdir::WalkDir; /// The `WikilinkIndex` recursively checks all subdirectories of the given /// base directory mapping any found files to the path where they can be found. /// Symlinks are ignored to prevent it from infinite loops. -#[derive(Clone, Debug, Default)] +#[derive(Clone, Debug)] pub(crate) struct WikilinkIndex { filenames: Arc>>, - basedir: Base, + /// local base directory + base: Base, } impl WikilinkIndex { - pub(crate) fn new(base: Option) -> Self { + pub(crate) fn new(local_base: Base) -> Self { let index = Self { - basedir: base, - ..Default::default() + base: local_base, + filenames: Arc::new(Mutex::new(HashMap::new())), }; index.start_indexing(); index @@ -30,43 +31,37 @@ impl WikilinkIndex { /// Populates the index of the `WikilinkIndex` on startup /// - /// Recursively walks the base directory, mapping each filename to an absolute filepath. + /// Recursively walks the local base directory, mapping each filename to an absolute filepath. /// The Index stays empty if no base directory is supplied or if the base directory is remote pub(crate) fn start_indexing(&self) { - match &self.basedir { - None => { - // The Empty Index returns no results in this case - trace!("File indexing for Wikilinks aborted as no base directory is specified"); - } - Some(base_type) => match base_type { - Base::Local(local_base_name) => { - // Start file indexing only if the Base is valid and local - info!( - "Starting file indexing for wikilinks in {}", - local_base_name.display() - ); + match &self.base { + Base::Local(local_base_name) => { + // Start file indexing only if the Base is valid and local + info!( + "Starting file indexing for wikilinks in {}", + local_base_name.display() + ); - let mut lock = self.filenames.lock().unwrap(); - for entry in WalkDir::new::(local_base_name.into()) - // actively ignore symlinks - .follow_links(false) - .into_iter() - .filter_map(Result::ok) - { - if let Some(filename) = entry.path().file_name() { - lock.insert(filename.to_ascii_lowercase(), entry.path().to_path_buf()); - } + for entry in WalkDir::new::(local_base_name.into()) + // actively ignore symlinks + .follow_links(false) + .into_iter() + .filter_map(Result::ok) + { + if let Some(filename) = entry.path().file_name() { + let mut lock = self.filenames.lock().unwrap(); + lock.insert(filename.to_ascii_lowercase(), entry.path().to_path_buf()); } } + } - // A remote base is of no use for the wikilink checker, return an error to the user - Base::Remote(remote_base_name) => { - warn!("Error using remote base url for checking wililinks: {remote_base_name}"); - } - }, + // A remote base is of no use for the wikilink checker, return an error to the user + Base::Remote(remote_base_name) => { + warn!("Error using remote base url for checking wililinks: {remote_base_name}"); + } } } - + /// Checks the index for a filename. Returning the absolute path if the name is found, /// otherwise returning None pub(crate) fn contains_path(&self, path: &Path) -> Option { @@ -75,10 +70,7 @@ impl WikilinkIndex { Some(filename) => { let filename_lock = self.filenames.lock().unwrap(); if filename_lock.contains_key(&filename.to_ascii_lowercase()) { - Some( - filename_lock.get(&filename.to_ascii_lowercase()).ok(), - ) - .cloned() + filename_lock.get(&filename.to_ascii_lowercase()).cloned() } else { None } diff --git a/lychee-lib/src/checker/wikilink/resolver.rs b/lychee-lib/src/checker/wikilink/resolver.rs index 2d74bc0559..762bfe8002 100644 --- a/lychee-lib/src/checker/wikilink/resolver.rs +++ b/lychee-lib/src/checker/wikilink/resolver.rs @@ -11,11 +11,17 @@ pub(crate) struct WikilinkResolver { /// Tries to resolve a `WikiLink` by searching for the filename in the `WikilinkIndex` /// Returns the path of the found file if found, otherwise an Error impl WikilinkResolver { - pub(crate) fn new(basedir: Base, fallback_extensions: Vec) -> Self { - Self { - checker: WikilinkIndex::new(basedir), - fallback_extensions, + pub(crate) fn new(base: Base, fallback_extensions: Vec) -> Result { + if let Base::Remote(_) = base { + return Err(ErrorKind::WikilinkResolverInit( + "The given base directory was recognized as Remote. A Local directory is needed." + .to_string(), + )); } + Ok(Self { + checker: WikilinkIndex::new(base), + fallback_extensions, + }) } /// Resolves a wikilink by searching the index with fallback extensions. pub(crate) fn resolve(&self, path: &Path, uri: &Uri) -> Result { @@ -26,7 +32,10 @@ impl WikilinkResolver { if let Some(resolved) = self.checker.contains_path(&candidate) { return Ok(resolved); } - trace!("Wikilink {uri} not found at {candiate}", uri, candidate=candidate.display()); + trace!( + "Wikilink {uri} not found at {candidate}", + candidate = candidate.display() + ); } Err(ErrorKind::WikilinkNotFound(uri.clone())) diff --git a/lychee-lib/src/types/error.rs b/lychee-lib/src/types/error.rs index 3c5788bde7..38f2a0b98a 100644 --- a/lychee-lib/src/types/error.rs +++ b/lychee-lib/src/types/error.rs @@ -180,8 +180,12 @@ pub enum ErrorKind { }, /// The extracted `WikiLink` could not be found by searching the directory - #[error("Failed to initialize Wikilink Checker")] + #[error("Could not find Wikilink by searching th given base directory")] WikilinkNotFound(Uri), + + /// Error on creation of the `WikilinkResolver` + #[error("Failed to initialize Wikilink Checker")] + WikilinkResolverInit(String), } impl ErrorKind { @@ -343,6 +347,9 @@ impl ErrorKind { ErrorKind::WikilinkNotFound(uri) => Some(format!( "WikiLink could not be found: {uri} ", )), + ErrorKind::WikilinkResolverInit(reason) => Some(format!( + "WikiLink Resolver could not be created: {reason} ", + )), } } @@ -474,6 +481,7 @@ impl Hash for ErrorKind { Self::StatusCodeSelectorError(e) => e.to_string().hash(state), Self::PreprocessorError { command, reason } => (command, reason).hash(state), Self::WikilinkNotFound(e) => e.hash(state), + Self::WikilinkResolverInit(e) => e.hash(state), } } } From 3a42e9fc0205250c99729799da4c3b9d751414a4 Mon Sep 17 00:00:00 2001 From: JayJayArr Date: Sat, 6 Dec 2025 11:04:10 +0100 Subject: [PATCH 19/27] refactor: move WikiLink cleaning to WikiLink Module --- lychee-lib/src/checker/wikilink/mod.rs | 72 ++++++++++++++++++++++++++ lychee-lib/src/extract/markdown.rs | 35 ++----------- 2 files changed, 75 insertions(+), 32 deletions(-) diff --git a/lychee-lib/src/checker/wikilink/mod.rs b/lychee-lib/src/checker/wikilink/mod.rs index 19e11f8070..bec6952e20 100644 --- a/lychee-lib/src/checker/wikilink/mod.rs +++ b/lychee-lib/src/checker/wikilink/mod.rs @@ -1,6 +1,78 @@ //! `WikiLink` Module //! //! This module contains a Indexer and a Resolver for `WikiLinks` +//! The Indexer recursively indexes the subdirectories and files in a given base-directory mapping +//! the filename to the full path +//! The Resolver looks for found `WikiLinks` in the Index thus resolving the `WikiLink` to a full +//! filepath pub(crate) mod index; pub(crate) mod resolver; + +use crate::ErrorKind; +use pulldown_cmark::CowStr; + +/// In Markdown Links both '#' and '|' act as modifiers +/// '#' links to a specific Header in a file +/// '|' is used to modify the link name, a so called "pothole" +const MARKDOWN_FRAGMENT_MARKER: char = '#'; +const MARKDOWN_POTHOLE_MARKER: char = '|'; + +/// Clean a `WikiLink` by removing potholes and fragments from a `&str` +pub(crate) fn wikilink(input: &str, has_pothole: bool) -> Result, ErrorKind> { + // Strip potholes (|) from wikilinks + let mut stripped_input = if has_pothole { + pulldown_cmark::CowStr::Borrowed( + &input[0..input.find(MARKDOWN_POTHOLE_MARKER).unwrap_or(input.len())], + ) + } else { + CowStr::Borrowed(input) + }; + + // Strip fragments (#) from wikilinks, according to the obsidian spec + // fragments always come before potholes + // https://help.obsidian.md/links#Change+the+link+display+text + if stripped_input.contains(MARKDOWN_FRAGMENT_MARKER) { + stripped_input = pulldown_cmark::CowStr::Borrowed( + // In theory a second '#' could be inserted into the pothole, so searching for the + // first occurence from the left should yield the correct result + &input[0..input.find(MARKDOWN_FRAGMENT_MARKER).unwrap_or(input.len())], + ); + } + if stripped_input.is_empty() { + return Err(ErrorKind::EmptyUrl); + } + Ok(stripped_input) +} + +#[cfg(test)] +mod tests { + use pulldown_cmark::CowStr; + use rstest::rstest; + + use crate::checker::wikilink::wikilink; + + #[rstest] + #[case("|foo", true)] + #[case("|foo#bar", true)] + #[case("#baz", false)] + fn test_empty_wikilinks_yield_no_errors(#[case] input: &str, #[case] has_pothole: bool) { + let result = wikilink(input, has_pothole); + assert!(result.is_err()); + } + + #[rstest] + #[case("foo|bar", true, "foo")] + #[case("foo#bar", true, "foo")] + #[case("foo#bar|baz", false, "foo")] + #[case("foo#bar|baz#hashtag_in_pothole", false, "foo")] + fn test_fragment_and_pothole_removal( + #[case] input: &str, + #[case] has_pothole: bool, + #[case] actual: &str, + ) { + let result = wikilink(input, has_pothole).unwrap(); + let actual = CowStr::Borrowed(actual); + assert_eq!(result, actual); + } +} diff --git a/lychee-lib/src/extract/markdown.rs b/lychee-lib/src/extract/markdown.rs index 763a247f3a..1a0c434a0f 100644 --- a/lychee-lib/src/extract/markdown.rs +++ b/lychee-lib/src/extract/markdown.rs @@ -5,7 +5,7 @@ use log::warn; use pulldown_cmark::{CowStr, Event, LinkType, Options, Parser, Tag, TagEnd, TextMergeWithOffset}; use crate::{ - ErrorKind, + checker::wikilink::wikilink, extract::{html::html5gum::extract_html_with_span, plaintext::extract_raw_uri_from_plaintext}, types::uri::raw::{ OffsetSpanProvider, RawUri, RawUriSpan, SourceSpanProvider, SpanProvider as _, @@ -99,7 +99,7 @@ pub(crate) fn extract_markdown( return None; } - if let Ok(wikilink) = clean_wikilink(&dest_url, has_pothole) { + if let Ok(wikilink) = wikilink(&dest_url, has_pothole) { Some(vec![RawUri { text: wikilink.to_string(), element: Some("a".to_string()), @@ -323,26 +323,6 @@ pub(crate) fn extract_markdown_fragments(input: &str) -> HashSet { out } -fn clean_wikilink(input: &str, has_pothole: bool) -> Result, ErrorKind> { - // Strip potholes (|) from wikilinks - let mut stripped_input = if has_pothole { - pulldown_cmark::CowStr::Borrowed(&input[0..input.find('|').unwrap_or(input.len())]) - } else { - pulldown_cmark::CowStr::Borrowed(input) - }; - - // Strip fragments (#) from wikilinks, according to the obsidian spec - // fragments always come before potholes - if stripped_input.contains('#') { - stripped_input = - pulldown_cmark::CowStr::Borrowed(&input[0..input.find('#').unwrap_or(input.len())]); - } - if stripped_input.is_empty() { - return Err(ErrorKind::EmptyUrl); - } - Ok(stripped_input) -} - #[derive(Default)] struct HeadingIdGenerator { counter: HashMap, @@ -383,7 +363,6 @@ mod tests { use crate::types::uri::raw::span; use super::*; - use rstest::rstest; const MD_INPUT: &str = r#" # A Test @@ -762,6 +741,7 @@ Shortcut link: [link4] assert_eq!(uris, expected); } + #[test] fn test_wikilink_extraction_returns_none_on_empty_links() { let markdown = r" @@ -846,13 +826,4 @@ Shortcut link: [link4] }]; assert_eq!(uris, expected); } - - #[rstest] - #[case("|foo", true)] - #[case("|foo#bar", true)] - #[case("#baz", false)] - fn test_from_str(#[case] input: &str, #[case] has_pothole: bool) { - let result = clean_wikilink(input, has_pothole); - assert!(result.is_err()); - } } From 3b390f3f77ca5c9f032e6bb44d07f8aea0396372 Mon Sep 17 00:00:00 2001 From: JayJayArr Date: Sat, 6 Dec 2025 11:07:00 +0100 Subject: [PATCH 20/27] fix type --- lychee-lib/src/checker/wikilink/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lychee-lib/src/checker/wikilink/mod.rs b/lychee-lib/src/checker/wikilink/mod.rs index bec6952e20..c4a143933c 100644 --- a/lychee-lib/src/checker/wikilink/mod.rs +++ b/lychee-lib/src/checker/wikilink/mod.rs @@ -35,7 +35,7 @@ pub(crate) fn wikilink(input: &str, has_pothole: bool) -> Result, Err if stripped_input.contains(MARKDOWN_FRAGMENT_MARKER) { stripped_input = pulldown_cmark::CowStr::Borrowed( // In theory a second '#' could be inserted into the pothole, so searching for the - // first occurence from the left should yield the correct result + // first occurrence from the left should yield the correct result &input[0..input.find(MARKDOWN_FRAGMENT_MARKER).unwrap_or(input.len())], ); } From 8f9c107eaca05c748ea6d83544efd1976f56865d Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Wed, 17 Dec 2025 10:54:42 +0100 Subject: [PATCH 21/27] Error handling for invalid base Removes cases where some errors were silently ignored or warnings were printed. --- lychee-lib/src/checker/file.rs | 63 +++++++++++---------- lychee-lib/src/checker/wikilink/index.rs | 54 +++++++----------- lychee-lib/src/checker/wikilink/resolver.rs | 28 ++++++--- lychee-lib/src/client.rs | 5 +- lychee-lib/src/types/error.rs | 6 +- 5 files changed, 80 insertions(+), 76 deletions(-) diff --git a/lychee-lib/src/checker/file.rs b/lychee-lib/src/checker/file.rs index e5dcd43056..f60f3bf9eb 100644 --- a/lychee-lib/src/checker/file.rs +++ b/lychee-lib/src/checker/file.rs @@ -5,7 +5,7 @@ use std::path::{Path, PathBuf}; use crate::checker::wikilink::resolver::WikilinkResolver; use crate::{ - Base, ErrorKind, Status, Uri, + Base, ErrorKind, Result, Status, Uri, utils::fragment_checker::{FragmentChecker, FragmentInput}, }; @@ -33,12 +33,9 @@ pub(crate) struct FileChecker { index_files: Option>, /// Whether to check for the existence of fragments (e.g., `#section-id`) in HTML files. include_fragments: bool, - /// Whether to check for the existence of files linked to by Wikilinks - include_wikilinks: bool, /// Utility for performing fragment checks in HTML files. fragment_checker: FragmentChecker, - /// Utility for resolving Wikilinks, indexes files in a given directory, only initialized when - /// `include_wikilinks` is set + /// Utility for optionally resolving Wikilinks. wikilink_resolver: Option, } @@ -52,27 +49,34 @@ impl FileChecker { /// * `index_files` - Optional list of index file names to search for if the path is a directory. /// * `include_fragments` - Whether to check for fragment existence in HTML files. /// * `include_wikilinks` - Whether to check the existence of Wikilinks found in Markdown files . + /// + /// # Errors + /// + /// Fails if an invalid `base` is provided when including wikilinks. pub(crate) fn new( base: Option, fallback_extensions: Vec, index_files: Option>, include_fragments: bool, include_wikilinks: bool, - ) -> Self { - let wikilink_resolver = match base.clone() { - Some(basedir) => WikilinkResolver::new(basedir, fallback_extensions.clone()).ok(), - None => None, + ) -> Result { + let wikilink_resolver = if include_wikilinks { + Some(WikilinkResolver::new( + base.as_ref(), + fallback_extensions.clone(), + )?) + } else { + None }; - Self { + Ok(Self { base, fallback_extensions, index_files, include_fragments, - include_wikilinks, fragment_checker: FragmentChecker::new(), wikilink_resolver, - } + }) } /// Checks the given file URI for existence and validity. @@ -142,20 +146,14 @@ impl FileChecker { /// Returns `Ok` with the resolved path if it is valid, otherwise returns /// `Err` with an appropriate error. The returned path, if any, is guaranteed /// to exist and may be a file or a directory. - fn resolve_local_path<'a>( - &self, - path: &'a Path, - uri: &Uri, - ) -> Result, ErrorKind> { + fn resolve_local_path<'a>(&self, path: &'a Path, uri: &Uri) -> Result> { let path = match path.metadata() { // for non-existing paths, attempt fallback extensions // if fallback extensions don't help, try wikilinks Err(e) if e.kind() == std::io::ErrorKind::NotFound => self .apply_fallback_extensions(path, uri) .or_else(|_| { - if self.include_wikilinks - && let Some(resolver) = &self.wikilink_resolver - { + if let Some(resolver) = &self.wikilink_resolver { resolver.resolve(path, uri) } else { Err(ErrorKind::InvalidFilePath(uri.clone())) @@ -206,7 +204,7 @@ impl FileChecker { /// /// Returns `Ok(PathBuf)` with the resolved file path, or `Err` if no valid file is found. /// If `Ok` is returned, the contained `PathBuf` is guaranteed to exist and be a file. - fn apply_fallback_extensions(&self, path: &Path, uri: &Uri) -> Result { + fn apply_fallback_extensions(&self, path: &Path, uri: &Uri) -> Result { // If it's already a file, use it directly if path.is_file() { return Ok(path.to_path_buf()); @@ -246,7 +244,7 @@ impl FileChecker { /// is guaranteed to exist. In most cases, the returned path will be a file path. /// /// If index files are disabled, simply returns `Ok(dir_path)`. - fn apply_index_files(&self, dir_path: &Path) -> Result { + fn apply_index_files(&self, dir_path: &Path) -> Result { // this implements the "disabled" case by treating a directory as its // own index file. let index_names_to_try = match &self.index_files { @@ -397,7 +395,7 @@ mod tests { #[tokio::test] async fn test_default() { // default behaviour accepts dir links as long as the directory exists. - let checker = FileChecker::new(None, vec![], None, true, false); + let checker = FileChecker::new(None, vec![], None, true, false).unwrap(); assert_filecheck!(&checker, "filechecker/index_dir", Status::Ok(_)); @@ -456,7 +454,8 @@ mod tests { Some(vec!["index.html".to_owned(), "index.md".to_owned()]), true, false, - ); + ) + .unwrap(); assert_resolves!( &checker, @@ -495,7 +494,8 @@ mod tests { Some(vec!["index".to_owned()]), false, false, - ); + ) + .unwrap(); // this test case has a subdir 'same_name' and a file 'same_name.html'. // this shows that the index file resolving is applied in this case and @@ -519,7 +519,8 @@ mod tests { #[tokio::test] async fn test_empty_index_list_corner() { // empty index_files list will reject all directory links - let checker_no_indexes = FileChecker::new(None, vec![], Some(vec![]), false, false); + let checker_no_indexes = + FileChecker::new(None, vec![], Some(vec![]), false, false).unwrap(); assert_resolves!( &checker_no_indexes, "filechecker/index_dir", @@ -543,7 +544,8 @@ mod tests { "..".to_owned(), "/".to_owned(), ]; - let checker_dir_indexes = FileChecker::new(None, vec![], Some(dir_names), false, false); + let checker_dir_indexes = + FileChecker::new(None, vec![], Some(dir_names), false, false).unwrap(); assert_resolves!( &checker_dir_indexes, "filechecker/index_dir", @@ -565,7 +567,8 @@ mod tests { Some(vec!["../index_dir/index.html".to_owned()]), true, false, - ); + ) + .unwrap(); assert_resolves!( &checker_dotdot, "filechecker/empty_dir#fragment", @@ -579,7 +582,7 @@ mod tests { .expect("expected utf-8 fixtures path") .to_owned(); let checker_absolute = - FileChecker::new(None, vec![], Some(vec![absolute_html]), true, false); + FileChecker::new(None, vec![], Some(vec![absolute_html]), true, false).unwrap(); assert_resolves!( &checker_absolute, "filechecker/empty_dir#fragment", @@ -589,7 +592,7 @@ mod tests { #[tokio::test] async fn test_fallback_extensions_on_directories() { - let checker = FileChecker::new(None, vec!["html".to_owned()], None, true, false); + let checker = FileChecker::new(None, vec!["html".to_owned()], None, true, false).unwrap(); // fallback extensions should be applied when directory links are resolved // to directories (i.e., the default index_files behavior or if `.` diff --git a/lychee-lib/src/checker/wikilink/index.rs b/lychee-lib/src/checker/wikilink/index.rs index d216398792..a26f72fab2 100644 --- a/lychee-lib/src/checker/wikilink/index.rs +++ b/lychee-lib/src/checker/wikilink/index.rs @@ -1,5 +1,4 @@ -use crate::Base; -use log::{info, warn}; +use log::info; use std::collections::HashMap; use std::ffi::OsString; use std::path::Path; @@ -15,49 +14,38 @@ use walkdir::WalkDir; #[derive(Clone, Debug)] pub(crate) struct WikilinkIndex { filenames: Arc>>, - /// local base directory - base: Base, + /// Local base directory + local_base: PathBuf, } impl WikilinkIndex { - pub(crate) fn new(local_base: Base) -> Self { + pub(crate) fn new(local_base: PathBuf) -> Self { let index = Self { - base: local_base, + local_base, filenames: Arc::new(Mutex::new(HashMap::new())), }; index.start_indexing(); index } - /// Populates the index of the `WikilinkIndex` on startup - /// - /// Recursively walks the local base directory, mapping each filename to an absolute filepath. - /// The Index stays empty if no base directory is supplied or if the base directory is remote + /// Populates the index of the `WikilinkIndex` on startup by walking + /// the local base directory, mapping each filename to an absolute filepath. pub(crate) fn start_indexing(&self) { - match &self.base { - Base::Local(local_base_name) => { - // Start file indexing only if the Base is valid and local - info!( - "Starting file indexing for wikilinks in {}", - local_base_name.display() - ); + // Start file indexing only if the Base is valid and local + info!( + "Starting file indexing for wikilinks in {}", + self.local_base.display() + ); - for entry in WalkDir::new::(local_base_name.into()) - // actively ignore symlinks - .follow_links(false) - .into_iter() - .filter_map(Result::ok) - { - if let Some(filename) = entry.path().file_name() { - let mut lock = self.filenames.lock().unwrap(); - lock.insert(filename.to_ascii_lowercase(), entry.path().to_path_buf()); - } - } - } - - // A remote base is of no use for the wikilink checker, return an error to the user - Base::Remote(remote_base_name) => { - warn!("Error using remote base url for checking wililinks: {remote_base_name}"); + for entry in WalkDir::new(&self.local_base) + // actively ignore symlinks + .follow_links(false) + .into_iter() + .filter_map(Result::ok) + { + if let Some(filename) = entry.path().file_name() { + let mut lock = self.filenames.lock().unwrap(); + lock.insert(filename.to_ascii_lowercase(), entry.path().to_path_buf()); } } } diff --git a/lychee-lib/src/checker/wikilink/resolver.rs b/lychee-lib/src/checker/wikilink/resolver.rs index 762bfe8002..9cc733f5c7 100644 --- a/lychee-lib/src/checker/wikilink/resolver.rs +++ b/lychee-lib/src/checker/wikilink/resolver.rs @@ -11,15 +11,27 @@ pub(crate) struct WikilinkResolver { /// Tries to resolve a `WikiLink` by searching for the filename in the `WikilinkIndex` /// Returns the path of the found file if found, otherwise an Error impl WikilinkResolver { - pub(crate) fn new(base: Base, fallback_extensions: Vec) -> Result { - if let Base::Remote(_) = base { - return Err(ErrorKind::WikilinkResolverInit( - "The given base directory was recognized as Remote. A Local directory is needed." - .to_string(), - )); - } + /// # Errors + /// + /// Fails if `base` is not `Some(Base::Local(_))`. + pub(crate) fn new( + base: Option<&Base>, + fallback_extensions: Vec, + ) -> Result { + let base = match base { + None => Err(ErrorKind::WikilinkInvalidBase( + "Base must be specified for wikilink checking".into(), + ))?, + Some(base) => match base { + Base::Local(p) => p, + Base::Remote(_) => Err(ErrorKind::WikilinkInvalidBase( + "Base cannot be remote".to_string(), + ))?, + }, + }; + Ok(Self { - checker: WikilinkIndex::new(base), + checker: WikilinkIndex::new(base.clone()), fallback_extensions, }) } diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index 4ace6b9569..31c2480257 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -300,7 +300,8 @@ pub struct ClientBuilder { /// Enable the checking of fragments in links. include_fragments: bool, - /// Enable the checking of wikilinks in markdown files + /// Enable the checking of wikilinks in markdown files. + /// Note that base must not be `None` if you set this `true`. include_wikilinks: bool, /// Requests run through this chain where each item in the chain @@ -402,7 +403,7 @@ impl ClientBuilder { self.index_files, self.include_fragments, self.include_wikilinks, - ), + )?, }) } diff --git a/lychee-lib/src/types/error.rs b/lychee-lib/src/types/error.rs index 38f2a0b98a..8f846352cf 100644 --- a/lychee-lib/src/types/error.rs +++ b/lychee-lib/src/types/error.rs @@ -185,7 +185,7 @@ pub enum ErrorKind { /// Error on creation of the `WikilinkResolver` #[error("Failed to initialize Wikilink Checker")] - WikilinkResolverInit(String), + WikilinkInvalidBase(String), } impl ErrorKind { @@ -347,7 +347,7 @@ impl ErrorKind { ErrorKind::WikilinkNotFound(uri) => Some(format!( "WikiLink could not be found: {uri} ", )), - ErrorKind::WikilinkResolverInit(reason) => Some(format!( + ErrorKind::WikilinkInvalidBase(reason) => Some(format!( "WikiLink Resolver could not be created: {reason} ", )), } @@ -481,7 +481,7 @@ impl Hash for ErrorKind { Self::StatusCodeSelectorError(e) => e.to_string().hash(state), Self::PreprocessorError { command, reason } => (command, reason).hash(state), Self::WikilinkNotFound(e) => e.hash(state), - Self::WikilinkResolverInit(e) => e.hash(state), + Self::WikilinkInvalidBase(e) => e.hash(state), } } } From d3bac8d148d2771778e9a19e1dfe86ea87de3ce2 Mon Sep 17 00:00:00 2001 From: Jakob <144204108+JayJayArr@users.noreply.github.com> Date: Mon, 22 Dec 2025 15:55:13 +0100 Subject: [PATCH 22/27] Apply suggestions from @thomas-zahner Co-authored-by: Thomas Zahner --- lychee-lib/src/checker/wikilink/mod.rs | 2 +- lychee-lib/src/extract/markdown.rs | 2 +- lychee-lib/src/types/error.rs | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lychee-lib/src/checker/wikilink/mod.rs b/lychee-lib/src/checker/wikilink/mod.rs index c4a143933c..7a247c9a14 100644 --- a/lychee-lib/src/checker/wikilink/mod.rs +++ b/lychee-lib/src/checker/wikilink/mod.rs @@ -20,7 +20,7 @@ const MARKDOWN_POTHOLE_MARKER: char = '|'; /// Clean a `WikiLink` by removing potholes and fragments from a `&str` pub(crate) fn wikilink(input: &str, has_pothole: bool) -> Result, ErrorKind> { - // Strip potholes (|) from wikilinks + // Strip pothole marker (|) and pothole (text after marker) from wikilinks let mut stripped_input = if has_pothole { pulldown_cmark::CowStr::Borrowed( &input[0..input.find(MARKDOWN_POTHOLE_MARKER).unwrap_or(input.len())], diff --git a/lychee-lib/src/extract/markdown.rs b/lychee-lib/src/extract/markdown.rs index 1a0c434a0f..126d9faab4 100644 --- a/lychee-lib/src/extract/markdown.rs +++ b/lychee-lib/src/extract/markdown.rs @@ -108,7 +108,7 @@ pub(crate) fn extract_markdown( span: span_provider.span(span.start + 2) }]) } else { - warn!("WARNING: The wikilink destination url {dest_url} could not be cleaned by removing potholes and fragments"); + warn!("The wikilink destination url {dest_url} could not be cleaned by removing potholes and fragments"); None } } diff --git a/lychee-lib/src/types/error.rs b/lychee-lib/src/types/error.rs index 8f846352cf..24373033d3 100644 --- a/lychee-lib/src/types/error.rs +++ b/lychee-lib/src/types/error.rs @@ -180,11 +180,11 @@ pub enum ErrorKind { }, /// The extracted `WikiLink` could not be found by searching the directory - #[error("Could not find Wikilink by searching th given base directory")] - WikilinkNotFound(Uri), + #[error("Wikilink {0} not found at {1}")] + WikilinkNotFound(Uri, PathBuf), /// Error on creation of the `WikilinkResolver` - #[error("Failed to initialize Wikilink Checker")] + #[error("Failed to initialize wikilink checker: {0}")] WikilinkInvalidBase(String), } From 335beb76457f3ffa1a81f3ba223c5a3d57e7a4bf Mon Sep 17 00:00:00 2001 From: JayJayArr Date: Mon, 22 Dec 2025 16:20:35 +0100 Subject: [PATCH 23/27] Improve Error Handling for WikilinkNotFound --- lychee-lib/src/checker/wikilink/mod.rs | 2 +- lychee-lib/src/checker/wikilink/resolver.rs | 7 +------ lychee-lib/src/types/error.rs | 6 +++--- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/lychee-lib/src/checker/wikilink/mod.rs b/lychee-lib/src/checker/wikilink/mod.rs index 7a247c9a14..3d759abff7 100644 --- a/lychee-lib/src/checker/wikilink/mod.rs +++ b/lychee-lib/src/checker/wikilink/mod.rs @@ -56,7 +56,7 @@ mod tests { #[case("|foo", true)] #[case("|foo#bar", true)] #[case("#baz", false)] - fn test_empty_wikilinks_yield_no_errors(#[case] input: &str, #[case] has_pothole: bool) { + fn test_empty_wikilinks_are_detected(#[case] input: &str, #[case] has_pothole: bool) { let result = wikilink(input, has_pothole); assert!(result.is_err()); } diff --git a/lychee-lib/src/checker/wikilink/resolver.rs b/lychee-lib/src/checker/wikilink/resolver.rs index 9cc733f5c7..39957127f0 100644 --- a/lychee-lib/src/checker/wikilink/resolver.rs +++ b/lychee-lib/src/checker/wikilink/resolver.rs @@ -1,5 +1,4 @@ use crate::{Base, ErrorKind, Uri, checker::wikilink::index::WikilinkIndex}; -use log::trace; use std::path::{Path, PathBuf}; #[derive(Clone, Debug)] @@ -44,12 +43,8 @@ impl WikilinkResolver { if let Some(resolved) = self.checker.contains_path(&candidate) { return Ok(resolved); } - trace!( - "Wikilink {uri} not found at {candidate}", - candidate = candidate.display() - ); } - Err(ErrorKind::WikilinkNotFound(uri.clone())) + Err(ErrorKind::WikilinkNotFound(uri.clone(), path.to_path_buf())) } } diff --git a/lychee-lib/src/types/error.rs b/lychee-lib/src/types/error.rs index 24373033d3..a08f4143a6 100644 --- a/lychee-lib/src/types/error.rs +++ b/lychee-lib/src/types/error.rs @@ -344,8 +344,8 @@ impl ErrorKind { [init @ .., tail] => format!("An index file ({}, or {}) is required", init.join(", "), tail), }.into(), ErrorKind::PreprocessorError{command, reason} => Some(format!("Command '{command}' failed {reason}. Check value of the pre option")), - ErrorKind::WikilinkNotFound(uri) => Some(format!( - "WikiLink could not be found: {uri} ", + ErrorKind::WikilinkNotFound(uri, pathbuf) => Some(format!( + "WikiLink {uri} could not be found at {:}", pathbuf.display() )), ErrorKind::WikilinkInvalidBase(reason) => Some(format!( "WikiLink Resolver could not be created: {reason} ", @@ -480,7 +480,7 @@ impl Hash for ErrorKind { Self::Cookies(e) => e.hash(state), Self::StatusCodeSelectorError(e) => e.to_string().hash(state), Self::PreprocessorError { command, reason } => (command, reason).hash(state), - Self::WikilinkNotFound(e) => e.hash(state), + Self::WikilinkNotFound(u, _p) => u.hash(state), Self::WikilinkInvalidBase(e) => e.hash(state), } } From dfeab262a29015ff488b0f888098fb2a96102a9d Mon Sep 17 00:00:00 2001 From: JayJayArr Date: Mon, 22 Dec 2025 17:31:33 +0100 Subject: [PATCH 24/27] Check for Unsupported Characters in Wikilinks fix typo --- lychee-lib/src/checker/wikilink/mod.rs | 45 ++++++++++++++++++++++++++ lychee-lib/src/types/error.rs | 8 +++++ 2 files changed, 53 insertions(+) diff --git a/lychee-lib/src/checker/wikilink/mod.rs b/lychee-lib/src/checker/wikilink/mod.rs index 3d759abff7..3a933350f7 100644 --- a/lychee-lib/src/checker/wikilink/mod.rs +++ b/lychee-lib/src/checker/wikilink/mod.rs @@ -17,9 +17,21 @@ use pulldown_cmark::CowStr; /// '|' is used to modify the link name, a so called "pothole" const MARKDOWN_FRAGMENT_MARKER: char = '#'; const MARKDOWN_POTHOLE_MARKER: char = '|'; +/// A Link containing one of the following characters may not work as a link: +/// `` +const MARKDOWN_OBSIDIAN_PROBLEMATIC_CHARACTERS: [char; 4] = ['^', '%', '[', ']']; /// Clean a `WikiLink` by removing potholes and fragments from a `&str` pub(crate) fn wikilink(input: &str, has_pothole: bool) -> Result, ErrorKind> { + // Check for problematic characters + for char in MARKDOWN_OBSIDIAN_PROBLEMATIC_CHARACTERS { + if input.contains(char) { + return Err(ErrorKind::WikilinkUnsupportedCharacter( + char, + input.to_string(), + )); + } + } // Strip pothole marker (|) and pothole (text after marker) from wikilinks let mut stripped_input = if has_pothole { pulldown_cmark::CowStr::Borrowed( @@ -52,20 +64,53 @@ mod tests { use crate::checker::wikilink::wikilink; + // All these Links are missing the targetname itself but contain valid fragment- and + // pothole-modifications. They would be parsed as an empty Link #[rstest] #[case("|foo", true)] #[case("|foo#bar", true)] + #[case("|foo#bar|foo#bar", true)] #[case("#baz", false)] + #[case("#baz#baz|foo", false)] fn test_empty_wikilinks_are_detected(#[case] input: &str, #[case] has_pothole: bool) { let result = wikilink(input, has_pothole); assert!(result.is_err()); } + #[rstest] + #[case("link with spaces", true, "link with spaces")] + #[case("foo.fileextension", true, "foo.fileextension")] + #[case("specialcharacters !_@$&(){}", true, "specialcharacters !_@$&(){}")] + fn test_valid_wikilinks(#[case] input: &str, #[case] has_pothole: bool, #[case] actual: &str) { + let result = wikilink(input, has_pothole).unwrap(); + let actual = CowStr::Borrowed(actual); + assert_eq!(result, actual); + } + + #[rstest] + #[case("foo^", false)] + #[case("foo%", false)] + #[case("foo[", false)] + #[case("foo]", false)] + fn test_invalid_characters_in_wikilinks_are_rejected( + #[case] input: &str, + #[case] has_pothole: bool, + ) { + let result = wikilink(input, has_pothole); + assert!(result.is_err()); + } + #[rstest] #[case("foo|bar", true, "foo")] #[case("foo#bar", true, "foo")] #[case("foo#bar|baz", false, "foo")] #[case("foo#bar|baz#hashtag_in_pothole", false, "foo")] + #[case("foo with spaces#bar|baz#hashtag_in_pothole", false, "foo with spaces")] + #[case( + "specialcharacters !_@$&(){}#bar|baz#hashtag_in_pothole", + true, + "specialcharacters !_@$&(){}" + )] fn test_fragment_and_pothole_removal( #[case] input: &str, #[case] has_pothole: bool, diff --git a/lychee-lib/src/types/error.rs b/lychee-lib/src/types/error.rs index a08f4143a6..6a755455dc 100644 --- a/lychee-lib/src/types/error.rs +++ b/lychee-lib/src/types/error.rs @@ -186,6 +186,10 @@ pub enum ErrorKind { /// Error on creation of the `WikilinkResolver` #[error("Failed to initialize wikilink checker: {0}")] WikilinkInvalidBase(String), + + /// Unsupported Character found in `WikiLink` content + #[error("Unsupported character {0} found in {0}")] + WikilinkUnsupportedCharacter(char, String), } impl ErrorKind { @@ -350,6 +354,9 @@ impl ErrorKind { ErrorKind::WikilinkInvalidBase(reason) => Some(format!( "WikiLink Resolver could not be created: {reason} ", )), + ErrorKind::WikilinkUnsupportedCharacter(character, link) => Some(format!( + "WikiLink {link} contains an unsupported character: {character} ", + )), } } @@ -482,6 +489,7 @@ impl Hash for ErrorKind { Self::PreprocessorError { command, reason } => (command, reason).hash(state), Self::WikilinkNotFound(u, _p) => u.hash(state), Self::WikilinkInvalidBase(e) => e.hash(state), + Self::WikilinkUnsupportedCharacter(_c, u) => u.hash(state), } } } From 2afa74b61b3af73ffc5132cedddde461bd993d72 Mon Sep 17 00:00:00 2001 From: JayJayArr Date: Tue, 23 Dec 2025 15:23:15 +0100 Subject: [PATCH 25/27] Remove Unsupported Character Check --- lychee-lib/src/checker/wikilink/mod.rs | 25 ------------------------- lychee-lib/src/types/error.rs | 10 +--------- 2 files changed, 1 insertion(+), 34 deletions(-) diff --git a/lychee-lib/src/checker/wikilink/mod.rs b/lychee-lib/src/checker/wikilink/mod.rs index 3a933350f7..e11b2dcfa3 100644 --- a/lychee-lib/src/checker/wikilink/mod.rs +++ b/lychee-lib/src/checker/wikilink/mod.rs @@ -17,21 +17,9 @@ use pulldown_cmark::CowStr; /// '|' is used to modify the link name, a so called "pothole" const MARKDOWN_FRAGMENT_MARKER: char = '#'; const MARKDOWN_POTHOLE_MARKER: char = '|'; -/// A Link containing one of the following characters may not work as a link: -/// `` -const MARKDOWN_OBSIDIAN_PROBLEMATIC_CHARACTERS: [char; 4] = ['^', '%', '[', ']']; /// Clean a `WikiLink` by removing potholes and fragments from a `&str` pub(crate) fn wikilink(input: &str, has_pothole: bool) -> Result, ErrorKind> { - // Check for problematic characters - for char in MARKDOWN_OBSIDIAN_PROBLEMATIC_CHARACTERS { - if input.contains(char) { - return Err(ErrorKind::WikilinkUnsupportedCharacter( - char, - input.to_string(), - )); - } - } // Strip pothole marker (|) and pothole (text after marker) from wikilinks let mut stripped_input = if has_pothole { pulldown_cmark::CowStr::Borrowed( @@ -87,19 +75,6 @@ mod tests { assert_eq!(result, actual); } - #[rstest] - #[case("foo^", false)] - #[case("foo%", false)] - #[case("foo[", false)] - #[case("foo]", false)] - fn test_invalid_characters_in_wikilinks_are_rejected( - #[case] input: &str, - #[case] has_pothole: bool, - ) { - let result = wikilink(input, has_pothole); - assert!(result.is_err()); - } - #[rstest] #[case("foo|bar", true, "foo")] #[case("foo#bar", true, "foo")] diff --git a/lychee-lib/src/types/error.rs b/lychee-lib/src/types/error.rs index 6a755455dc..8b70791af9 100644 --- a/lychee-lib/src/types/error.rs +++ b/lychee-lib/src/types/error.rs @@ -186,10 +186,6 @@ pub enum ErrorKind { /// Error on creation of the `WikilinkResolver` #[error("Failed to initialize wikilink checker: {0}")] WikilinkInvalidBase(String), - - /// Unsupported Character found in `WikiLink` content - #[error("Unsupported character {0} found in {0}")] - WikilinkUnsupportedCharacter(char, String), } impl ErrorKind { @@ -354,9 +350,6 @@ impl ErrorKind { ErrorKind::WikilinkInvalidBase(reason) => Some(format!( "WikiLink Resolver could not be created: {reason} ", )), - ErrorKind::WikilinkUnsupportedCharacter(character, link) => Some(format!( - "WikiLink {link} contains an unsupported character: {character} ", - )), } } @@ -487,9 +480,8 @@ impl Hash for ErrorKind { Self::Cookies(e) => e.hash(state), Self::StatusCodeSelectorError(e) => e.to_string().hash(state), Self::PreprocessorError { command, reason } => (command, reason).hash(state), - Self::WikilinkNotFound(u, _p) => u.hash(state), + Self::WikilinkNotFound(uri, pathbuf) => (uri, pathbuf).hash(state), Self::WikilinkInvalidBase(e) => e.hash(state), - Self::WikilinkUnsupportedCharacter(_c, u) => u.hash(state), } } } From 9824dbe80ce6098a434ce49aa44a30d39d0c39ce Mon Sep 17 00:00:00 2001 From: JayJayArr Date: Tue, 23 Dec 2025 16:18:22 +0100 Subject: [PATCH 26/27] Test for WikiLinkNotFound fix typo fix typo --- fixtures/wiki/Non-existent.md | 5 +++ lychee-bin/tests/cli.rs | 16 +++++++++ lychee-lib/src/checker/wikilink/resolver.rs | 38 +++++++++++++++++++++ 3 files changed, 59 insertions(+) create mode 100644 fixtures/wiki/Non-existent.md diff --git a/fixtures/wiki/Non-existent.md b/fixtures/wiki/Non-existent.md new file mode 100644 index 0000000000..45ed48a5eb --- /dev/null +++ b/fixtures/wiki/Non-existent.md @@ -0,0 +1,5 @@ +# Links to non-existing Files + +[[Does not exist]] +[[Doesn't exist.md]] +[[Does_not_exist]] diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index 2cb2c85e08..6f43752f75 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -3065,6 +3065,22 @@ The config file should contain every possible key for documentation purposes." .stdout(contains("4 OK")); } + #[test] + fn test_wikilink_fixture_wikilink_non_existent() { + let input = fixtures_path!().join("wiki/Non-existent.md"); + + cargo_bin_cmd!() + .arg(&input) + .arg("--include-wikilinks") + .arg("--fallback-extensions") + .arg("md") + .arg("--base-url") + .arg(fixtures_path!()) + .assert() + .failure() + .stdout(contains("3 Errors")); + } + #[test] fn test_wikilink_fixture_with_fragments_obsidian_style_fixtures_excluded() { let input = fixtures_path!().join("wiki/obsidian-style-plus-headers.md"); diff --git a/lychee-lib/src/checker/wikilink/resolver.rs b/lychee-lib/src/checker/wikilink/resolver.rs index 39957127f0..247f3cc674 100644 --- a/lychee-lib/src/checker/wikilink/resolver.rs +++ b/lychee-lib/src/checker/wikilink/resolver.rs @@ -48,3 +48,41 @@ impl WikilinkResolver { Err(ErrorKind::WikilinkNotFound(uri.clone(), path.to_path_buf())) } } + +#[cfg(test)] +mod tests { + use crate::{Base, ErrorKind, Uri, checker::wikilink::resolver::WikilinkResolver}; + use test_utils::{fixture_uri, fixtures_path}; + + #[test] + fn test_wikilink_resolves_to_filename() { + let resolver = WikilinkResolver::new( + Some(&Base::Local(fixtures_path!().join("wiki"))), + vec!["md".to_string()], + ) + .unwrap(); + let uri = Uri { + url: fixture_uri!("wiki/Usage"), + }; + let path = fixtures_path!().join("Usage"); + let expected_result = fixtures_path!().join("wiki/Usage.md"); + assert_eq!(resolver.resolve(&path, &uri), Ok(expected_result)); + } + + #[test] + fn test_wikilink_not_found() { + let resolver = WikilinkResolver::new( + Some(&Base::Local(fixtures_path!().join("wiki"))), + vec!["md".to_string()], + ) + .unwrap(); + let uri = Uri { + url: fixture_uri!("wiki/404"), + }; + let path = fixtures_path!().join("404"); + assert!(matches!( + resolver.resolve(&path, &uri), + Err(ErrorKind::WikilinkNotFound(..)) + )); + } +} From 0c3dcb53295fbc255d7511d78724e055437de2b8 Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Tue, 23 Dec 2025 20:49:29 +0100 Subject: [PATCH 27/27] Simplify WikilinkIndex --- lychee-lib/src/checker/wikilink/index.rs | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/lychee-lib/src/checker/wikilink/index.rs b/lychee-lib/src/checker/wikilink/index.rs index a26f72fab2..03ce53f78f 100644 --- a/lychee-lib/src/checker/wikilink/index.rs +++ b/lychee-lib/src/checker/wikilink/index.rs @@ -44,8 +44,10 @@ impl WikilinkIndex { .filter_map(Result::ok) { if let Some(filename) = entry.path().file_name() { - let mut lock = self.filenames.lock().unwrap(); - lock.insert(filename.to_ascii_lowercase(), entry.path().to_path_buf()); + self.filenames + .lock() + .unwrap() + .insert(filename.to_os_string(), entry.path().to_path_buf()); } } } @@ -53,16 +55,10 @@ impl WikilinkIndex { /// Checks the index for a filename. Returning the absolute path if the name is found, /// otherwise returning None pub(crate) fn contains_path(&self, path: &Path) -> Option { - match path.file_name() { - None => None, - Some(filename) => { - let filename_lock = self.filenames.lock().unwrap(); - if filename_lock.contains_key(&filename.to_ascii_lowercase()) { - filename_lock.get(&filename.to_ascii_lowercase()).cloned() - } else { - None - } - } - } + self.filenames + .lock() + .unwrap() + .get(path.file_name()?) + .cloned() } }