Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
14 changes: 14 additions & 0 deletions fixtures/fragments-fallback-extensions/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<!DOCTYPE html>
<html lang="en">

<head>
<title>Fallback Extensions Fragments</title>
</head>

<body>
<a href="sub_dir#valid-id">1</a>
<a href="sub_dir#invalid-id">2</a>
<a href="empty_dir#invalid-id">3</a>
</body>

</html>
12 changes: 12 additions & 0 deletions fixtures/fragments-fallback-extensions/sub_dir/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<!DOCTYPE html>
<html lang="en">

<head>
<title>Fallback Extensions Fragments</title>
</head>

<body>
<h1 id="valid-id">ID</h1>
</body>

</html>
18 changes: 18 additions & 0 deletions lychee-bin/tests/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1907,6 +1907,24 @@ mod cli {
.stdout(contains("0 Errors"));
}

#[test]
fn test_fragments_fallback_extensions() {
let mut cmd = main_command();
let input = fixtures_path().join("fragments-fallback-extensions");

cmd.arg("--include-fragments")
.arg("--fallback-extensions=html")
.arg("--no-progress")
.arg("--offline")
.arg("-v")
.arg(input)
.assert()
.failure()
.stdout(contains("3 Total"))
.stdout(contains("1 OK"))
.stdout(contains("2 Errors"));
}

/// Test relative paths
///
/// Imagine a web server hosting a site with the following structure:
Expand Down
112 changes: 80 additions & 32 deletions lychee-lib/src/checker/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,63 +105,111 @@ impl FileChecker {
///
/// Returns a `Status` indicating the result of the check.
async fn check_path(&self, path: &Path, uri: &Uri) -> Status {
if path.exists() {
return self.check_existing_path(path, uri).await;
let file_path = self.resolve_file_path(path);
let has_fragment = uri.url.fragment().is_some_and(|x| !x.is_empty());

// If file_path exists, check this file
if file_path.is_some() {
return self.check_file(&file_path.unwrap(), uri).await;
}
// If path is a directory, and we cannot find an index file inside it,
// and we don't have a fragment, just return success. This is for
// backward compatibility.
else if path.is_dir() && !has_fragment {
return Status::Ok(StatusCode::OK);
Comment on lines +115 to +119
Copy link
Contributor Author

@ocavue ocavue Jun 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added this logic to ensure that the following test can pass:

Skip the fragment check for directories like: [empty](empty_dir/).

While I personally prefer this test to fail, I may not be aware of all the context surrounding it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just found that I forgot to add the fragment to this test case. So it doesn't test the new code in #1713.

Instead, it has been treated as a success by lychee for a long time.

I'm not sure whether it's a common case for some scenarios or not.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kemingy Hey 👋

If I take out the quoted code in this comment, in other words, treat [empty](empty_dir/#no_exist_fragment) as an error, will this cause the "Too many open files" error to reappear? I'm not entirely clear on how checking link fragments relates to the "Too many open files" error.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No. I think it's safe to treat the empty_dir#fragment as an error.

Copy link
Contributor Author

@ocavue ocavue Jun 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice! I'll keep this PR as it is to limit its scope. I'll open another PR to make empty_dir#fragment an error.

}

self.check_with_fallback_extensions(path, uri).await
ErrorKind::InvalidFilePath(uri.clone()).into()
}

/// Checks an existing path, optionally verifying fragments for HTML files.
/// Resolves a path to an actual file, applying fallback extensions and directory index resolution.
///
/// # Arguments
///
/// * `path` - The path to check.
/// * `uri` - The original URI, used for error reporting.
/// * `path` - The path to resolve.
///
/// # Returns
///
/// Returns a `Status` indicating the result of the check.
async fn check_existing_path(&self, path: &Path, uri: &Uri) -> Status {
// Only files can contain content with fragments.
// Skip if the uri doesn't have the fragment.
if self.include_fragments
&& path.is_file()
&& uri.url.fragment().is_some_and(|x| !x.is_empty())
{
self.check_fragment(path, uri).await
/// Returns `Some(PathBuf)` with the resolved file path, or `None` if no valid file is found.
fn resolve_file_path(&self, path: &Path) -> Option<PathBuf> {
// If it's already a file, use it directly
if path.is_file() {
return Some(path.to_path_buf());
}

// Try fallback extensions
let mut path_buf = path.to_path_buf();
for ext in &self.fallback_extensions {
path_buf.set_extension(ext);
if path_buf.exists() && path_buf.is_file() {
return Some(path_buf);
}
}

// If it's a directory, try to find an index file
if path.is_dir() {
return self.get_index_file_path(path);
}

None
}

/// Tries to find an index file in the given directory, returning the first match.
///
/// Searches for `index.{ext}` files using fallback extensions, defaulting to `index.html`
/// if no fallback extensions are configured. This encapsulates both the "index" filename
/// convention and the extension resolution logic.
///
/// # Arguments
///
/// * `dir_path` - The directory to search for index files
///
/// # Returns
///
/// Returns `Some(PathBuf)` pointing to the first existing index file, or `None` if no index file is found.
fn get_index_file_path(&self, dir_path: &Path) -> Option<PathBuf> {
// In this function, we hardcode the filename `index` and the extension
// `.html` since `index.html` is the most common scenario when serving a
// page from a directory. However, various servers may support other
// filenames and extensions, such as `README.md`. We could enhance this by
// giving users the option to configure the index filename and extension.

let extensions_to_try = if self.fallback_extensions.is_empty() {
vec!["html".to_string()]
} else {
Status::Ok(StatusCode::OK)
self.fallback_extensions.clone()
};

for ext in &extensions_to_try {
let index_path = dir_path.join(format!("index.{ext}"));
if index_path.is_file() {
return Some(index_path);
}
}
None
}

/// Attempts to find a file by trying different extensions specified in `fallback_extensions`.
/// Checks a resolved file, optionally verifying fragments for HTML files.
///
/// # Arguments
///
/// * `path` - The original path to check.
/// * `file_path` - The resolved file path to check.
/// * `uri` - The original URI, used for error reporting.
///
/// # Returns
///
/// Returns a `Status` indicating the result of the check.
async fn check_with_fallback_extensions(&self, path: &Path, uri: &Uri) -> Status {
let mut path_buf = path.to_path_buf();

// If the path already has an extension, try it first
if path_buf.extension().is_some() && path_buf.exists() {
return self.check_existing_path(&path_buf, uri).await;
async fn check_file(&self, file_path: &Path, uri: &Uri) -> Status {
if !file_path.is_file() {
return ErrorKind::InvalidFilePath(uri.clone()).into();
}

// Try fallback extensions
for ext in &self.fallback_extensions {
path_buf.set_extension(ext);
if path_buf.exists() {
return self.check_existing_path(&path_buf, uri).await;
}
// Check if we need to verify fragments
if self.include_fragments && uri.url.fragment().is_some_and(|x| !x.is_empty()) {
self.check_fragment(file_path, uri).await
} else {
Status::Ok(StatusCode::OK)
}

ErrorKind::InvalidFilePath(uri.clone()).into()
}

/// Checks for the existence of a fragment in an HTML file.
Expand Down