diff --git a/fixtures/fragments/file1.md b/fixtures/fragments/file1.md index d4ec6f5324..18d11a1cca 100644 --- a/fixtures/fragments/file1.md +++ b/fixtures/fragments/file1.md @@ -67,3 +67,33 @@ without related HTML element. Browser will scroll to the top of the page. A link to the non-existing fragment: [try](https://github.com/lycheeverse/lychee#non-existent-anchor). Skip the fragment check for directories like: [empty](empty_dir/). + +# Binary data URLs checks + +Fragment checking tries to scan the (whole) content/response body for HTML element IDs. +This fails for binary data and can cause unnecessary traffic for remote URLs. + +## Without fragment + +Fragment checking is skipped if the URL does not actually contain a fragment. +Even with fragment checking enabled, the following links must hence succeed: + +[Link to local binary file without fragment](zero.bin) +[Link to local binary file with empty fragment](zero.bin#) +[Link to remote binary file without fragment](https://raw.githubusercontent.com/lycheeverse/lychee/master/fixtures/fragments/zero.bin) +[Link to remote binary file with empty fragment](https://raw.githubusercontent.com/lycheeverse/lychee/master/fixtures/fragments/zero.bin#) + +## Local file with fragment + +For local files URIs with fragment, the fragment checker is invoked and fails to read the content, +but the file checker emits a warning only. The following link hence must succeed as well: + +[Link to local binary file with fragment](zero.bin#fragment) + +## Remote URL with fragment + +Right now, there is not MIME/content type based exclusion for fragment checks in the website checker. +Also, other than the file checker, the website checker throws an error if reading the response body fails. +The following link hence must fail: + +[Link to remote binary file with fragment](https://raw.githubusercontent.com/lycheeverse/lychee/master/fixtures/fragments/zero.bin#fragment) diff --git a/fixtures/fragments/zero.bin b/fixtures/fragments/zero.bin new file mode 100644 index 0000000000..abfba4e88e --- /dev/null +++ b/fixtures/fragments/zero.bin @@ -0,0 +1 @@ +âÔ \ No newline at end of file diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index 1119a600df..0888dfed62 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -1876,10 +1876,22 @@ mod cli { .stderr(contains( "https://github.com/lycheeverse/lychee#non-existent-anchor", )) - .stdout(contains("28 Total")) - .stdout(contains("23 OK")) - // 4 failures because of missing fragments - .stdout(contains("5 Errors")); + .stderr(contains("fixtures/fragments/zero.bin")) + .stderr(contains("fixtures/fragments/zero.bin#")) + .stderr(contains( + "https://raw.githubusercontent.com/MichaIng/lychee/skip-fragment-check-by-url/fixtures/fragments/zero.bin", + )) + .stderr(contains( + "https://raw.githubusercontent.com/MichaIng/lychee/skip-fragment-check-by-url/fixtures/fragments/zero.bin#", + )) + .stderr(contains("fixtures/fragments/zero.bin#fragment")) + .stderr(contains( + "https://raw.githubusercontent.com/MichaIng/lychee/skip-fragment-check-by-url/fixtures/fragments/zero.bin#fragment", + )) + .stdout(contains("34 Total")) + .stdout(contains("28 OK")) + // Failures because of missing fragments or failed binary body scan + .stdout(contains("6 Errors")); } #[test] diff --git a/lychee-lib/src/checker/file.rs b/lychee-lib/src/checker/file.rs index 0e3c7d1c43..9ea05a0430 100644 --- a/lychee-lib/src/checker/file.rs +++ b/lychee-lib/src/checker/file.rs @@ -125,7 +125,10 @@ impl FileChecker { async fn check_existing_path(&self, path: &Path, uri: &Uri) -> Status { // Only files can contain content with fragments. // Skip if the uri doesn't have the fragment. - if self.include_fragments && path.is_file() && uri.url.fragment().is_some() { + if self.include_fragments + && path.is_file() + && uri.url.fragment().is_some_and(|x| !x.is_empty()) + { self.check_fragment(path, uri).await } else { Status::Ok(StatusCode::OK) diff --git a/lychee-lib/src/checker/website.rs b/lychee-lib/src/checker/website.rs index de118bee28..f4393b2899 100644 --- a/lychee-lib/src/checker/website.rs +++ b/lychee-lib/src/checker/website.rs @@ -103,7 +103,11 @@ impl WebsiteChecker { match self.reqwest_client.execute(request).await { Ok(response) => { let status = Status::new(&response, &self.accepted); - if self.include_fragments && status.is_success() && method == Method::GET { + if self.include_fragments + && status.is_success() + && method == Method::GET + && response.url().fragment().is_some_and(|x| !x.is_empty()) + { self.check_html_fragment(status, response).await } else { status