From 30f8f1143d027a983e51a17ff0c57f34d433cad5 Mon Sep 17 00:00:00 2001 From: ocavue Date: Sun, 29 Jun 2025 20:01:18 +1000 Subject: [PATCH] fix: resolve index file inside a directory --- .../empty_dir/.gitkeep | 0 .../fragments-fallback-extensions/index.html | 14 +++ .../sub_dir/index.html | 12 ++ lychee-bin/tests/cli.rs | 18 +++ lychee-lib/src/checker/file.rs | 112 +++++++++++++----- 5 files changed, 124 insertions(+), 32 deletions(-) create mode 100644 fixtures/fragments-fallback-extensions/empty_dir/.gitkeep create mode 100644 fixtures/fragments-fallback-extensions/index.html create mode 100644 fixtures/fragments-fallback-extensions/sub_dir/index.html diff --git a/fixtures/fragments-fallback-extensions/empty_dir/.gitkeep b/fixtures/fragments-fallback-extensions/empty_dir/.gitkeep new file mode 100644 index 0000000000..e69de29bb2 diff --git a/fixtures/fragments-fallback-extensions/index.html b/fixtures/fragments-fallback-extensions/index.html new file mode 100644 index 0000000000..cb2977d165 --- /dev/null +++ b/fixtures/fragments-fallback-extensions/index.html @@ -0,0 +1,14 @@ + + + + + Fallback Extensions Fragments + + + + 1 + 2 + 3 + + + diff --git a/fixtures/fragments-fallback-extensions/sub_dir/index.html b/fixtures/fragments-fallback-extensions/sub_dir/index.html new file mode 100644 index 0000000000..255a39337f --- /dev/null +++ b/fixtures/fragments-fallback-extensions/sub_dir/index.html @@ -0,0 +1,12 @@ + + + + + Fallback Extensions Fragments + + + +

ID

+ + + diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index 569edf7831..de1c27ae41 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -1907,6 +1907,24 @@ mod cli { .stdout(contains("0 Errors")); } + #[test] + fn test_fragments_fallback_extensions() { + let mut cmd = main_command(); + let input = fixtures_path().join("fragments-fallback-extensions"); + + cmd.arg("--include-fragments") + .arg("--fallback-extensions=html") + .arg("--no-progress") + .arg("--offline") + .arg("-v") + .arg(input) + .assert() + .failure() + .stdout(contains("3 Total")) + .stdout(contains("1 OK")) + .stdout(contains("2 Errors")); + } + /// Test relative paths /// /// Imagine a web server hosting a site with the following structure: diff --git a/lychee-lib/src/checker/file.rs b/lychee-lib/src/checker/file.rs index 9ea05a0430..026a644f3e 100644 --- a/lychee-lib/src/checker/file.rs +++ b/lychee-lib/src/checker/file.rs @@ -105,63 +105,111 @@ impl FileChecker { /// /// Returns a `Status` indicating the result of the check. async fn check_path(&self, path: &Path, uri: &Uri) -> Status { - if path.exists() { - return self.check_existing_path(path, uri).await; + let file_path = self.resolve_file_path(path); + let has_fragment = uri.url.fragment().is_some_and(|x| !x.is_empty()); + + // If file_path exists, check this file + if file_path.is_some() { + return self.check_file(&file_path.unwrap(), uri).await; + } + // If path is a directory, and we cannot find an index file inside it, + // and we don't have a fragment, just return success. This is for + // backward compatibility. + else if path.is_dir() && !has_fragment { + return Status::Ok(StatusCode::OK); } - self.check_with_fallback_extensions(path, uri).await + ErrorKind::InvalidFilePath(uri.clone()).into() } - /// Checks an existing path, optionally verifying fragments for HTML files. + /// Resolves a path to an actual file, applying fallback extensions and directory index resolution. /// /// # Arguments /// - /// * `path` - The path to check. - /// * `uri` - The original URI, used for error reporting. + /// * `path` - The path to resolve. /// /// # Returns /// - /// Returns a `Status` indicating the result of the check. - async fn check_existing_path(&self, path: &Path, uri: &Uri) -> Status { - // Only files can contain content with fragments. - // Skip if the uri doesn't have the fragment. - if self.include_fragments - && path.is_file() - && uri.url.fragment().is_some_and(|x| !x.is_empty()) - { - self.check_fragment(path, uri).await + /// Returns `Some(PathBuf)` with the resolved file path, or `None` if no valid file is found. + fn resolve_file_path(&self, path: &Path) -> Option { + // If it's already a file, use it directly + if path.is_file() { + return Some(path.to_path_buf()); + } + + // Try fallback extensions + let mut path_buf = path.to_path_buf(); + for ext in &self.fallback_extensions { + path_buf.set_extension(ext); + if path_buf.exists() && path_buf.is_file() { + return Some(path_buf); + } + } + + // If it's a directory, try to find an index file + if path.is_dir() { + return self.get_index_file_path(path); + } + + None + } + + /// Tries to find an index file in the given directory, returning the first match. + /// + /// Searches for `index.{ext}` files using fallback extensions, defaulting to `index.html` + /// if no fallback extensions are configured. This encapsulates both the "index" filename + /// convention and the extension resolution logic. + /// + /// # Arguments + /// + /// * `dir_path` - The directory to search for index files + /// + /// # Returns + /// + /// Returns `Some(PathBuf)` pointing to the first existing index file, or `None` if no index file is found. + fn get_index_file_path(&self, dir_path: &Path) -> Option { + // In this function, we hardcode the filename `index` and the extension + // `.html` since `index.html` is the most common scenario when serving a + // page from a directory. However, various servers may support other + // filenames and extensions, such as `README.md`. We could enhance this by + // giving users the option to configure the index filename and extension. + + let extensions_to_try = if self.fallback_extensions.is_empty() { + vec!["html".to_string()] } else { - Status::Ok(StatusCode::OK) + self.fallback_extensions.clone() + }; + + for ext in &extensions_to_try { + let index_path = dir_path.join(format!("index.{ext}")); + if index_path.is_file() { + return Some(index_path); + } } + None } - /// Attempts to find a file by trying different extensions specified in `fallback_extensions`. + /// Checks a resolved file, optionally verifying fragments for HTML files. /// /// # Arguments /// - /// * `path` - The original path to check. + /// * `file_path` - The resolved file path to check. /// * `uri` - The original URI, used for error reporting. /// /// # Returns /// /// Returns a `Status` indicating the result of the check. - async fn check_with_fallback_extensions(&self, path: &Path, uri: &Uri) -> Status { - let mut path_buf = path.to_path_buf(); - - // If the path already has an extension, try it first - if path_buf.extension().is_some() && path_buf.exists() { - return self.check_existing_path(&path_buf, uri).await; + async fn check_file(&self, file_path: &Path, uri: &Uri) -> Status { + if !file_path.is_file() { + return ErrorKind::InvalidFilePath(uri.clone()).into(); } - // Try fallback extensions - for ext in &self.fallback_extensions { - path_buf.set_extension(ext); - if path_buf.exists() { - return self.check_existing_path(&path_buf, uri).await; - } + // Check if we need to verify fragments + if self.include_fragments && uri.url.fragment().is_some_and(|x| !x.is_empty()) { + self.check_fragment(file_path, uri).await + } else { + Status::Ok(StatusCode::OK) } - - ErrorKind::InvalidFilePath(uri.clone()).into() } /// Checks for the existence of a fragment in an HTML file.