diff --git a/README.md b/README.md index dc06536538..f00901a336 100644 --- a/README.md +++ b/README.md @@ -452,12 +452,38 @@ Options: Remap URI matching pattern to different URI --fallback-extensions - Test the specified file extensions for URIs when checking files locally. - Multiple extensions can be separated by commas. Extensions will be checked in - order of appearance. + When checking locally, attempts to locate missing files by trying the given + fallback extensions. Multiple extensions can be separated by commas. Extensions + will be checked in order of appearance. Example: --fallback-extensions html,htm,php,asp,aspx,jsp,cgi + Note: This option only takes effect on `file://` URIs which do not exist. + + --index-files + When checking locally, resolves directory links to a separate index file. + The argument is a comma-separated list of index file names to search for. Index + names are relative to the link's directory and attempted in the order given. + + If `--index-files` is specified, then at least one index file must exist in + order for a directory link to be considered valid. Additionally, the special + name `.` can be used in the list to refer to the directory itself. + + If unspecified (the default behavior), index files are disabled and directory + links are considered valid as long as the directory exists on disk. + + Example 1: `--index-files index.html,readme.md` looks for index.html or readme.md + and requires that at least one exists. + + Example 2: `--index-files index.html,.` will use index.html if it exists, but + still accept the directory link regardless. + + Example 3: `--index-files ''` will reject all directory links because there are + no valid index files. This will require every link to explicitly name + a file. + + Note: This option only takes effect on `file://` URIs which exist and point to a directory. + -H, --header Set custom header for requests diff --git a/fixtures/filechecker/dir_links.md b/fixtures/filechecker/dir_links.md new file mode 100644 index 0000000000..8a7ef9789c --- /dev/null +++ b/fixtures/filechecker/dir_links.md @@ -0,0 +1,5 @@ +[a](empty_dir) +[a](empty_dir#fragment) + +[a](index_dir) +[a](index_dir#fragment) diff --git a/fixtures/filechecker/dir_with_extension.html/.gitkeep b/fixtures/filechecker/dir_with_extension.html/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/fixtures/filechecker/empty_dir/.gitkeep b/fixtures/filechecker/empty_dir/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/fixtures/filechecker/index_dir/index.html b/fixtures/filechecker/index_dir/index.html new file mode 100644 index 0000000000..0e56bdc3c7 --- /dev/null +++ b/fixtures/filechecker/index_dir/index.html @@ -0,0 +1 @@ +

boop

diff --git a/fixtures/filechecker/index_md/index.md b/fixtures/filechecker/index_md/index.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/fixtures/filechecker/same_name.html b/fixtures/filechecker/same_name.html new file mode 100644 index 0000000000..e69de29bb2 diff --git a/fixtures/filechecker/same_name/.gitkeep b/fixtures/filechecker/same_name/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/fixtures/fragments-fallback-extensions/index.html b/fixtures/fragments-fallback-extensions/index.html index cb2977d165..184bb1df00 100644 --- a/fixtures/fragments-fallback-extensions/index.html +++ b/fixtures/fragments-fallback-extensions/index.html @@ -6,8 +6,8 @@ - 1 - 2 + 1 + 2 3 diff --git a/lychee-bin/src/client.rs b/lychee-bin/src/client.rs index e5592e1fc0..4c99f6fe7c 100644 --- a/lychee-bin/src/client.rs +++ b/lychee-bin/src/client.rs @@ -54,6 +54,7 @@ pub(crate) fn create(cfg: &Config, cookie_jar: Option<&Arc>) - .min_tls_version(cfg.min_tls.clone().map(Into::into)) .include_fragments(cfg.include_fragments) .fallback_extensions(cfg.fallback_extensions.clone()) + .index_files(cfg.index_files.clone()) .build() .client() .context("Failed to create request client") diff --git a/lychee-bin/src/options.rs b/lychee-bin/src/options.rs index aebf1c888a..74033425ac 100644 --- a/lychee-bin/src/options.rs +++ b/lychee-bin/src/options.rs @@ -544,19 +544,51 @@ and 501." #[arg(long)] pub(crate) remap: Vec, - /// Automatically append file extensions to `file://` URIs as needed + /// Automatically append file extensions to `file://` URIs for non-existing paths #[serde(default)] #[arg( long, value_delimiter = ',', - long_help = "Test the specified file extensions for URIs when checking files locally. -Multiple extensions can be separated by commas. Extensions will be checked in -order of appearance. + long_help = "When checking locally, attempts to locate missing files by trying the given +fallback extensions. Multiple extensions can be separated by commas. Extensions +will be checked in order of appearance. + +Example: --fallback-extensions html,htm,php,asp,aspx,jsp,cgi -Example: --fallback-extensions html,htm,php,asp,aspx,jsp,cgi" +Note: This option only takes effect on `file://` URIs which do not exist." )] pub(crate) fallback_extensions: Vec, + /// Resolve local directory links to specified index files within the directory + #[serde(default)] + #[arg( + long, + value_delimiter = ',', + long_help = "When checking locally, resolves directory links to a separate index file. +The argument is a comma-separated list of index file names to search for. Index +names are relative to the link's directory and attempted in the order given. + +If `--index-files` is specified, then at least one index file must exist in +order for a directory link to be considered valid. Additionally, the special +name `.` can be used in the list to refer to the directory itself. + +If unspecified (the default behavior), index files are disabled and directory +links are considered valid as long as the directory exists on disk. + +Example 1: `--index-files index.html,readme.md` looks for index.html or readme.md + and requires that at least one exists. + +Example 2: `--index-files index.html,.` will use index.html if it exists, but + still accept the directory link regardless. + +Example 3: `--index-files ''` will reject all directory links because there are + no valid index files. This will require every link to explicitly name + a file. + +Note: This option only takes effect on `file://` URIs which exist and point to a directory." + )] + pub(crate) index_files: Option>, + /// Set custom header for requests #[arg( short = 'H', diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index 510b680514..dc820d0136 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -1908,7 +1908,6 @@ mod cli { "fixtures/fragments/file.html#top", "fixtures/fragments/file.html#Upper-%C3%84%C3%96%C3%B6", "fixtures/fragments/sub_dir", - "fixtures/fragments/sub_dir#a-link-inside-index-html-inside-sub-dir", "fixtures/fragments/zero.bin", "fixtures/fragments/zero.bin#", "fixtures/fragments/zero.bin#fragment", @@ -1922,6 +1921,7 @@ mod cli { let expected_failures = vec![ "fixtures/fragments/sub_dir_non_existing_1", "fixtures/fragments/sub_dir#non-existing-fragment-2", + "fixtures/fragments/sub_dir#a-link-inside-index-html-inside-sub-dir", "fixtures/fragments/empty_dir#non-existing-fragment-3", "fixtures/fragments/file2.md#missing-fragment", "fixtures/fragments/sub_dir#non-existing-fragment-1", @@ -2288,4 +2288,98 @@ mod cli { .success() .stdout(contains("https://www.example.com/smth.")); } + + #[test] + fn test_index_files_default() { + let input = fixtures_path().join("filechecker/dir_links.md"); + + // the dir links in this file all exist. + main_command() + .arg(&input) + .arg("--verbose") + .assert() + .success(); + + // ... but checking fragments will find none, because dirs + // have no fragments and no index file given. + let dir_links_with_fragment = 2; + main_command() + .arg(&input) + .arg("--include-fragments") + .assert() + .failure() + .stdout(contains("Cannot find fragment").count(dir_links_with_fragment)) + .stdout(contains("#").count(dir_links_with_fragment)); + } + + #[test] + fn test_index_files_specified() { + let input = fixtures_path().join("filechecker/dir_links.md"); + + // passing `--index-files index.html` should reject all links + // to /empty_dir because it doesn't have the index file + let result = main_command() + .arg(input) + .arg("--index-files") + .arg("index.html") + .arg("--verbose") + .assert() + .failure(); + + let empty_dir_links = 2; + let index_dir_links = 2; + result + .stdout(contains("Cannot find index file").count(empty_dir_links)) + .stdout(contains("/empty_dir").count(empty_dir_links)) + .stdout(contains(format!("{index_dir_links} OK"))); + } + + #[test] + fn test_index_files_dot_in_list() { + let input = fixtures_path().join("filechecker/dir_links.md"); + + // passing `.` in the index files list should accept a directory + // even if no other index file is found. + main_command() + .arg(&input) + .arg("--index-files") + .arg("index.html,.") + .assert() + .success() + .stdout(contains("4 OK")); + + // checking fragments will accept the index_dir#fragment link, + // but reject empty_dir#fragment because empty_dir doesn’t have + // index.html. + main_command() + .arg(&input) + .arg("--index-files") + .arg("index.html,.") + .arg("--include-fragments") + .assert() + .failure() + .stdout(contains("Cannot find fragment").count(1)) + .stdout(contains("empty_dir#fragment").count(1)) + .stdout(contains("index_dir#fragment").count(0)) + .stdout(contains("3 OK")); + } + + #[test] + fn test_index_files_empty_list() { + let input = fixtures_path().join("filechecker/dir_links.md"); + + // passing an empty list to --index-files should reject /all/ + // directory links. + let result = main_command() + .arg(input) + .arg("--index-files") + .arg("") + .assert() + .failure(); + + let num_dir_links = 4; + result + .stdout(contains("Cannot find index file").count(num_dir_links)) + .stdout(contains("0 OK")); + } } diff --git a/lychee-lib/src/checker/file.rs b/lychee-lib/src/checker/file.rs index f52310c4e0..236ba4d23d 100644 --- a/lychee-lib/src/checker/file.rs +++ b/lychee-lib/src/checker/file.rs @@ -1,5 +1,6 @@ use http::StatusCode; use log::warn; +use std::borrow::Cow; use std::path::{Path, PathBuf}; use crate::{ @@ -18,6 +19,17 @@ pub(crate) struct FileChecker { base: Option, /// List of file extensions to try if the original path doesn't exist. fallback_extensions: Vec, + /// If specified, resolves to one of the given index files if the original path + /// is a directory. + /// + /// If non-`None`, a directory must contain at least one of the file names + /// in order to be considered a valid link target. Index files names are + /// required to match regular files, aside from the special `.` name which + /// will match the directory itself. + /// + /// If `None`, index file checking is disabled and directory links are valid + /// as long as the directory exists on disk. + index_files: Option>, /// Whether to check for the existence of fragments (e.g., `#section-id`) in HTML files. include_fragments: bool, /// Utility for performing fragment checks in HTML files. @@ -31,15 +43,18 @@ impl FileChecker { /// /// * `base` - Optional base path or URL for resolving relative paths. /// * `fallback_extensions` - List of extensions to try if the original file is not found. + /// * `index_files` - Optional list of index file names to search for if the path is a directory. /// * `include_fragments` - Whether to check for fragment existence in HTML files. pub(crate) fn new( base: Option, fallback_extensions: Vec, + index_files: Option>, include_fragments: bool, ) -> Self { Self { base, fallback_extensions, + index_files, include_fragments, fragment_checker: FragmentChecker::new(), } @@ -74,7 +89,8 @@ impl FileChecker { /// /// # Returns /// - /// Returns the resolved path as a `PathBuf`. + /// Returns the resolved path as a `PathBuf`, or the original path + /// if no base path is defined. fn resolve_path(&self, path: &Path) -> PathBuf { if let Some(Base::Local(base_path)) = &self.base { if path.is_absolute() { @@ -96,132 +112,165 @@ impl FileChecker { /// Checks if the given path exists and performs additional checks if necessary. /// + /// First, the given path is resolved to a file by applying fallback extensions + /// and finding index files if needed. Then, the file is checked to make sure it + /// exists and passes any additional checks. + /// /// # Arguments /// /// * `path` - The path to check. - /// * `uri` - The original URI, used for error reporting. + /// * `uri` - The original URI, used for checking and error reporting. /// /// # Returns /// /// Returns a `Status` indicating the result of the check. async fn check_path(&self, path: &Path, uri: &Uri) -> Status { - let file_path = self.resolve_file_path(path); - let has_fragment = uri.url.fragment().is_some_and(|x| !x.is_empty()); + let path = match path.metadata() { + // for non-existing paths, attempt fallback extensions + Err(e) if e.kind() == std::io::ErrorKind::NotFound => { + self.apply_fallback_extensions(path, uri).map(Cow::Owned) + } - // If file_path exists, check this file - if let Some(file_path) = file_path { - return self.check_file(&file_path, uri).await; - } - // If path is a directory, and we cannot find an index file inside it, - // and we don't have a fragment, just return success. - else if path.is_dir() && !has_fragment { - return Status::Ok(StatusCode::OK); - } + // other IO errors are unexpected and should fail the check + Err(e) => Err(ErrorKind::ReadFileInput(e, path.to_path_buf())), - ErrorKind::InvalidFilePath(uri.clone()).into() + // existing directories are resolved via index files + Ok(meta) if meta.is_dir() => self.apply_index_files(path).map(Cow::Owned), + + // otherwise, path is an existing file - just return the path + Ok(_) => Ok(Cow::Borrowed(path)), + }; + + match path { + Ok(ref path) => self.check_file(path, uri).await, + Err(err) => err.into(), + } } - /// Resolves a path to an actual file, applying fallback extensions and directory index resolution. + /// Resolves a path to a file, applying fallback extensions if necessary. + /// + /// This function will try to find a file, first by attempting the given path + /// itself, then by attempting the path with each extension from + /// [`FileChecker::fallback_extensions`]. The first existing file (not directory), + /// if any, will be returned. /// /// # Arguments /// /// * `path` - The path to resolve. + /// * `uri` - The original URI, used for error reporting. /// /// # Returns /// - /// Returns `Some(PathBuf)` with the resolved file path, or `None` if no valid file is found. - fn resolve_file_path(&self, path: &Path) -> Option { + /// Returns `Ok(PathBuf)` with the resolved file path, or `Err` if no valid file is found. + /// If `Ok` is returned, the contained `PathBuf` is guaranteed to exist and be a file. + fn apply_fallback_extensions(&self, path: &Path, uri: &Uri) -> Result { // If it's already a file, use it directly if path.is_file() { - return Some(path.to_path_buf()); + return Ok(path.to_path_buf()); } // Try fallback extensions let mut path_buf = path.to_path_buf(); for ext in &self.fallback_extensions { path_buf.set_extension(ext); - if path_buf.exists() && path_buf.is_file() { - return Some(path_buf); + if path_buf.is_file() { + return Ok(path_buf); } } - // If it's a directory, try to find an index file - if path.is_dir() { - return self.get_index_file_path(path); - } - - None + Err(ErrorKind::InvalidFilePath(uri.clone())) } /// Tries to find an index file in the given directory, returning the first match. + /// The index file behavior is specified by [`FileChecker::index_files`]. + /// + /// If this is non-`None`, index files must exist and resolved index files are + /// required to be files, aside from the special name `.` - this will match the + /// directory itself. /// - /// Searches for `index.{ext}` files using fallback extensions, defaulting to `index.html` - /// if no fallback extensions are configured. This encapsulates both the "index" filename - /// convention and the extension resolution logic. + /// If `None`, index file resolution is disabled and this function simply + /// returns the given path. /// /// # Arguments /// - /// * `dir_path` - The directory to search for index files + /// * `dir_path` - The directory within which to search for index files. + /// This is assumed to be an existing directory. /// /// # Returns /// - /// Returns `Some(PathBuf)` pointing to the first existing index file, or `None` if no index file is found. - fn get_index_file_path(&self, dir_path: &Path) -> Option { - // In this function, we hardcode the filename `index` and the extension - // `.html` since `index.html` is the most common scenario when serving a - // page from a directory. However, various servers may support other - // filenames and extensions, such as `README.md`. We could enhance this by - // giving users the option to configure the index filename and extension. - - let extensions_to_try = if self.fallback_extensions.is_empty() { - vec!["html".to_string()] - } else { - self.fallback_extensions.clone() + /// Returns `Ok(PathBuf)` pointing to the first existing index file, or + /// `Err` if no index file is found. If `Ok` is returned, the contained `PathBuf` + /// is guaranteed to exist. In most cases, the returned path will be a file path. + /// + /// If index files are disabled, simply returns `Ok(dir_path)`. + fn apply_index_files(&self, dir_path: &Path) -> Result { + // this implements the "disabled" case by treating a directory as its + // own index file. + let index_names_to_try = match &self.index_files { + Some(names) => &names[..], + None => &[".".to_owned()], }; - for ext in &extensions_to_try { - let index_path = dir_path.join(format!("index.{ext}")); - if index_path.is_file() { - return Some(index_path); - } - } - None + index_names_to_try + .iter() + .find_map(|filename| { + // for some special index file names, we accept directories as well + // as files. + let exists = match filename.as_str() { + "." => Path::exists, + _ => Path::is_file, + }; + + let path = dir_path.join(filename); + exists(&path).then_some(path) + }) + .ok_or_else(|| ErrorKind::InvalidIndexFile(dir_path.to_path_buf())) } /// Checks a resolved file, optionally verifying fragments for HTML files. /// /// # Arguments /// - /// * `file_path` - The resolved file path to check. + /// * `path` - The resolved path to check. /// * `uri` - The original URI, used for error reporting. /// /// # Returns /// /// Returns a `Status` indicating the result of the check. - async fn check_file(&self, file_path: &Path, uri: &Uri) -> Status { - if !file_path.is_file() { - return ErrorKind::InvalidFilePath(uri.clone()).into(); - } - - // Check if we need to verify fragments - if self.include_fragments && uri.url.fragment().is_some_and(|x| !x.is_empty()) { - self.check_fragment(file_path, uri).await + async fn check_file(&self, path: &Path, uri: &Uri) -> Status { + if self.include_fragments { + self.check_fragment(path, uri).await } else { Status::Ok(StatusCode::OK) } } - /// Checks for the existence of a fragment in an HTML file. + /// Checks for the existence of a fragment in a path. + /// + /// The given path may be a file or a directory. A directory + /// is treated as if it was an empty file with no fragments. /// /// # Arguments /// - /// * `path` - The path to the HTML file. + /// * `path` - The path to the file or directory. Assumed to exist. /// * `uri` - The original URI, containing the fragment to check. /// /// # Returns /// /// Returns a `Status` indicating the result of the fragment check. async fn check_fragment(&self, path: &Path, uri: &Uri) -> Status { + // for absent or trivial fragments, always return success. + if uri.url.fragment().is_none_or(str::is_empty) { + return Status::Ok(StatusCode::OK); + } + + // directories are treated as if they were a file with no fragments. + // reaching here means we have a non-trivial fragment on a directory, + // so return error. + if path.is_dir() { + return ErrorKind::InvalidFragment(uri.clone()).into(); + } + match FragmentInput::from_path(path).await { Ok(input) => match self.fragment_checker.check(input, &uri.url).await { Ok(true) => Status::Ok(StatusCode::OK), @@ -238,3 +287,206 @@ impl FileChecker { } } } + +#[cfg(test)] +mod tests { + + use super::FileChecker; + use crate::test_utils::{fixture_uri, fixtures_path}; + use crate::{ + ErrorKind::{InvalidFilePath, InvalidFragment, InvalidIndexFile}, + Status, + }; + + /// Calls [`FileChecker::check`] on the given [`FileChecker`] with given URL + /// path (relative to the fixtures directory). + /// + /// The result of checking the link is matched against the given pattern. + macro_rules! assert_filecheck { + ($checker:expr, $path:expr, $pattern:pat) => { + let uri = fixture_uri($path); + let result = $checker.check(&uri).await; + assert!( + matches!(result, $pattern), + "assertion failed: {} should be {} but was '{:?}'", + &uri, + stringify!($pattern), + &result + ); + }; + } + + #[tokio::test] + async fn test_default() { + // default behaviour accepts dir links as long as the directory exists. + let checker = FileChecker::new(None, vec![], None, true); + + assert_filecheck!(&checker, "filechecker/index_dir", Status::Ok(_)); + + // empty dir is accepted with '.' in index_files, but it contains no fragments. + assert_filecheck!(&checker, "filechecker/empty_dir", Status::Ok(_)); + assert_filecheck!(&checker, "filechecker/empty_dir#", Status::Ok(_)); + assert_filecheck!( + &checker, + "filechecker/empty_dir#fragment", + Status::Error(InvalidFragment(_)) + ); + + // even though index.html is present, it is not used because index_files is only + // '.', so no fragments are found. + assert_filecheck!( + &checker, + "filechecker/index_dir#fragment", + Status::Error(InvalidFragment(_)) + ); + assert_filecheck!( + &checker, + "filechecker/index_dir#non-existingfragment", + Status::Error(InvalidFragment(_)) + ); + } + + #[tokio::test] + async fn test_index_files() { + let checker = FileChecker::new( + None, + vec![], + Some(vec!["index.html".to_owned(), "index.md".to_owned()]), + true, + ); + + assert_filecheck!(&checker, "filechecker/index_dir", Status::Ok(_)); + assert_filecheck!(&checker, "filechecker/index_md", Status::Ok(_)); + // empty is rejected because of no index.html + assert_filecheck!( + &checker, + "filechecker/empty_dir", + Status::Error(InvalidIndexFile(_)) + ); + assert_filecheck!( + &checker, + "filechecker/empty_dir#fragment", + Status::Error(InvalidIndexFile(_)) + ); + + // index.html is resolved and fragments are checked. + assert_filecheck!(&checker, "filechecker/index_dir#fragment", Status::Ok(_)); + assert_filecheck!( + &checker, + "filechecker/index_dir#non-existingfragment", + Status::Error(InvalidFragment(_)) + ); + + // directories which look like files should still have index files applied + assert_filecheck!( + &checker, + "filechecker/dir_with_extension.html", + Status::Error(InvalidIndexFile(_)) + ); + } + + #[tokio::test] + async fn test_both_fallback_and_index_corner() { + let checker = FileChecker::new( + None, + vec!["html".to_owned()], + Some(vec!["index".to_owned()]), + false, + ); + + // this test case has a subdir 'same_name' and a file 'same_name.html'. + // this shows that the index file resolving is applied in this case and + // fallback extensions are not applied. + assert_filecheck!( + &checker, + "filechecker/same_name", + Status::Error(InvalidIndexFile(_)) + ); + + // this directory has an index.html, but the index_files argument is only "index". this + // shows that fallback extensions are not applied to index file names, as the index.html is + // not found. + assert_filecheck!( + &checker, + "filechecker/index_dir", + Status::Error(InvalidIndexFile(_)) + ); + + // a directory called 'dir_with_extension.html' exists. this test shows that fallback + // extensions must resolve to a file not a directory. + assert_filecheck!( + &checker, + "filechecker/dir_with_extension", + Status::Error(InvalidFilePath(_)) + ); + } + + #[tokio::test] + async fn test_empty_index_list_corner() { + // empty index_files list will reject all directory links + let checker_no_indexes = FileChecker::new(None, vec![], Some(vec![]), false); + assert_filecheck!( + &checker_no_indexes, + "filechecker/index_dir", + Status::Error(InvalidIndexFile(_)) + ); + assert_filecheck!( + &checker_no_indexes, + "filechecker/empty_dir", + Status::Error(InvalidIndexFile(_)) + ); + } + + #[tokio::test] + async fn test_index_list_of_directories_corner() { + // this test defines index_files to be a list of different names, all of which will + // resolve to an existing directory. however, because they are directories and not + // the special '.' name, these should not be accepted as valid index files. + let dir_names = vec![ + String::new(), + "./.".to_owned(), + "..".to_owned(), + "/".to_owned(), + ]; + let checker_dir_indexes = FileChecker::new(None, vec![], Some(dir_names), false); + assert_filecheck!( + &checker_dir_indexes, + "filechecker/index_dir", + Status::Error(InvalidIndexFile(_)) + ); + assert_filecheck!( + &checker_dir_indexes, + "filechecker/empty_dir", + Status::Error(InvalidIndexFile(_)) + ); + } + + #[tokio::test] + async fn test_index_file_traversal_corner() { + // index file names can contain path fragments and they will be traversed. + let checker_dotdot = FileChecker::new( + None, + vec![], + Some(vec!["../index_dir/index.html".to_owned()]), + true, + ); + assert_filecheck!( + &checker_dotdot, + "filechecker/empty_dir#fragment", + Status::Ok(_) + ); + + // absolute paths to a file on disk should also work + let absolute_html = fixtures_path() + .join("filechecker/index_dir/index.html") + .to_str() + .expect("expected utf-8 fixtures path") + .to_owned(); + let checker_absolute = FileChecker::new(None, vec![], Some(vec![absolute_html]), true); + assert_filecheck!( + &checker_absolute, + "filechecker/empty_dir#fragment", + Status::Ok(_) + ); + } +} diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index 48c8926bca..9213a30be8 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -89,8 +89,26 @@ pub struct ClientBuilder { remaps: Option, /// Automatically append file extensions to `file://` URIs as needed + /// + /// This option takes effect on `file://` URIs which do not exist. fallback_extensions: Vec, + /// Index file names to use when resolving `file://` URIs which point to + /// directories. + /// + /// For local directory links, if this is non-`None`, then at least one + /// index file from this list must exist in order for the link to be + /// considered valid. Index files names are required to match regular + /// files, aside from the special `.` name which will match the + /// directory itself. + /// + /// If `None`, index file checking is disabled and directory links are valid + /// as long as the directory exists on disk. + /// + /// In the [`ClientBuilder`], this defaults to `None`. + #[builder(default = None)] + index_files: Option>, + /// Links matching this set of regular expressions are **always** checked. /// /// This has higher precedence over [`ClientBuilder::excludes`], **but** @@ -409,6 +427,7 @@ impl ClientBuilder { file_checker: FileChecker::new( self.base, self.fallback_extensions, + self.index_files, self.include_fragments, ), }) diff --git a/lychee-lib/src/test_utils.rs b/lychee-lib/src/test_utils.rs index ff7be8e8c6..98d65ef3d4 100644 --- a/lychee-lib/src/test_utils.rs +++ b/lychee-lib/src/test_utils.rs @@ -1,4 +1,8 @@ -use std::{convert::TryFrom, fs, path::Path}; +use std::{ + convert::TryFrom, + fs, + path::{Path, PathBuf}, +}; use reqwest::Url; @@ -60,12 +64,37 @@ pub(crate) fn mail(address: &str) -> Uri { .into() } -/// Loads a fixture from the `fixtures` directory -pub(crate) fn load_fixture(filename: &str) -> String { - let fixture_path = Path::new(env!("CARGO_MANIFEST_DIR")) +/// Returns the path to the `fixtures` directory. +/// +/// # Panic +/// +/// Panics if the fixtures directory could not be determined. +pub(crate) fn fixtures_path() -> PathBuf { + Path::new(env!("CARGO_MANIFEST_DIR")) .parent() .unwrap() .join("fixtures") - .join(filename); - fs::read_to_string(fixture_path).unwrap() +} + +/// Loads a fixture from the `fixtures` directory +pub(crate) fn load_fixture(filename: &str) -> String { + let path = fixtures_path().join(filename); + fs::read_to_string(path).unwrap() +} + +/// Constructs a [`Uri`] from a given subpath within the `fixtures` directory. +/// +/// The specified subpath may contain a fragment reference by ending with `#something`. +/// The subpath should not begin with a slash, otherwise it will be treated as an +/// absolute path. +pub(crate) fn fixture_uri(subpath: &str) -> Uri { + let fixture_url = + Url::from_directory_path(fixtures_path()).expect("fixture path should be a valid URL"); + + // joining subpath onto a Url allows the subpath to contain a fragment + let url = fixture_url + .join(subpath) + .expect("expected subpath to form a valid URL"); + + Uri::from(url) } diff --git a/lychee-lib/src/types/error.rs b/lychee-lib/src/types/error.rs index 51953e45d9..372366da43 100644 --- a/lychee-lib/src/types/error.rs +++ b/lychee-lib/src/types/error.rs @@ -70,6 +70,10 @@ pub enum ErrorKind { #[error("Cannot find fragment")] InvalidFragment(Uri), + /// The given directory is missing a required index file + #[error("Cannot find index file within directory")] + InvalidIndexFile(PathBuf), + /// The given path cannot be converted to a URI #[error("Invalid path to URL conversion: {0}")] InvalidUrlFromPath(PathBuf), @@ -277,6 +281,7 @@ impl PartialEq for ErrorKind { (Self::InvalidFile(p1), Self::InvalidFile(p2)) => p1 == p2, (Self::InvalidFilePath(u1), Self::InvalidFilePath(u2)) => u1 == u2, (Self::InvalidFragment(u1), Self::InvalidFragment(u2)) => u1 == u2, + (Self::InvalidIndexFile(p1), Self::InvalidIndexFile(p2)) => p1 == p2, (Self::InvalidUrlFromPath(p1), Self::InvalidUrlFromPath(p2)) => p1 == p2, (Self::InvalidBase(b1, e1), Self::InvalidBase(b2, e2)) => b1 == b2 && e1 == e2, (Self::InvalidUrlRemap(r1), Self::InvalidUrlRemap(r2)) => r1 == r2, @@ -314,6 +319,7 @@ impl Hash for ErrorKind { Self::Utf8(e) => e.to_string().hash(state), Self::InvalidFilePath(u) => u.hash(state), Self::InvalidFragment(u) => u.hash(state), + Self::InvalidIndexFile(p) => p.hash(state), Self::UnreachableEmailAddress(u, ..) => u.hash(state), Self::InsecureURL(u, ..) => u.hash(state), Self::InvalidBase(base, e) => (base, e).hash(state),