From 1e37a13b2295425a82f20a6883431d268d312063 Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Thu, 19 Jun 2025 11:06:02 +0200 Subject: [PATCH 01/10] Capture bug as failing test --- lychee-bin/tests/cli.rs | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index 569edf7831..c89726578c 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -24,7 +24,10 @@ mod cli { use serde_json::Value; use tempfile::NamedTempFile; use uuid::Uuid; - use wiremock::{Mock, ResponseTemplate, matchers::basic_auth}; + use wiremock::{ + Mock, ResponseTemplate, + matchers::{basic_auth, method}, + }; type Result = std::result::Result>; @@ -1673,8 +1676,14 @@ mod cli { let password = "password123"; let mock_server = wiremock::MockServer::start().await; - Mock::given(basic_auth(username, password)) - .respond_with(ResponseTemplate::new(200)) + + Mock::given(method("GET")) + .and(basic_auth(username, password)) + .respond_with(ResponseTemplate::new(200)) // Authenticated requests are accepted + .mount(&mock_server) + .await; + Mock::given(method("GET")) + .respond_with(|_: &_| panic!("Received unauthenticated request")) .mount(&mock_server) .await; @@ -1690,6 +1699,17 @@ mod cli { .stdout(contains("1 Total")) .stdout(contains("1 OK")); + // Websites as direct arguments must also be authenticated + main_command() + .arg(mock_server.uri()) + .arg("--verbose") + .arg("--basic-auth") + .arg(format!("{} {username}:{password}", mock_server.uri())) + .assert() + .success() + .stdout(contains("1 Total")) + .stdout(contains("1 OK")); + Ok(()) } From 5a233954e0fb93f73f7206b8bb23bbcc3c8f204e Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Thu, 19 Jun 2025 16:43:18 +0200 Subject: [PATCH 02/10] Add basic auth credentials for website extraction requests via RequestChain & remove headers from Input --- lychee-bin/src/options.rs | 10 +-- lychee-bin/tests/cli.rs | 5 +- lychee-lib/src/basic_auth/mod.rs | 2 +- lychee-lib/src/checker/website.rs | 2 + lychee-lib/src/collector.rs | 46 +++++------ .../src/types/basic_auth/credentials.rs | 5 +- lychee-lib/src/types/input.rs | 77 +++++++++++++------ lychee-lib/src/utils/request.rs | 2 +- 8 files changed, 80 insertions(+), 69 deletions(-) diff --git a/lychee-bin/src/options.rs b/lychee-bin/src/options.rs index 93b61a0092..041eca9238 100644 --- a/lychee-bin/src/options.rs +++ b/lychee-bin/src/options.rs @@ -340,15 +340,7 @@ impl LycheeOptions { self.raw_inputs .iter() - .map(|s| { - Input::new( - s, - None, - self.config.glob_ignore_case, - excluded.clone(), - headers.clone(), - ) - }) + .map(|s| Input::new(s, None, self.config.glob_ignore_case, excluded.clone())) .collect::>() .context("Cannot parse inputs from arguments") } diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index c89726578c..c39749ec55 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -1699,7 +1699,7 @@ mod cli { .stdout(contains("1 Total")) .stdout(contains("1 OK")); - // Websites as direct arguments must also be authenticated + // Websites as direct arguments must also use authentication main_command() .arg(mock_server.uri()) .arg("--verbose") @@ -1707,8 +1707,7 @@ mod cli { .arg(format!("{} {username}:{password}", mock_server.uri())) .assert() .success() - .stdout(contains("1 Total")) - .stdout(contains("1 OK")); + .stdout(contains("0 Total")); // Mock server returns no body, so there are no URLs to check Ok(()) } diff --git a/lychee-lib/src/basic_auth/mod.rs b/lychee-lib/src/basic_auth/mod.rs index 0cd4de9de1..f7c90c600c 100644 --- a/lychee-lib/src/basic_auth/mod.rs +++ b/lychee-lib/src/basic_auth/mod.rs @@ -62,7 +62,7 @@ impl BasicAuthExtractor { /// [`BasicAuthCredentials`] if the a match was found. It should be noted /// that only the first match will be used to return the appropriate /// credentials. - pub(crate) fn matches(&self, uri: &Uri) -> Option { + pub fn matches(&self, uri: &Uri) -> Option { let matches: Vec<_> = self.regex_set.matches(uri.as_str()).into_iter().collect(); if matches.is_empty() { diff --git a/lychee-lib/src/checker/website.rs b/lychee-lib/src/checker/website.rs index f4393b2899..cefc878140 100644 --- a/lychee-lib/src/checker/website.rs +++ b/lychee-lib/src/checker/website.rs @@ -99,6 +99,8 @@ impl WebsiteChecker { /// Check a URI using [reqwest](https://github.com/seanmonstar/reqwest). async fn check_default(&self, request: Request) -> Status { + // todo!("Handling in chain: {request:?}"); + let method = request.method().clone(); match self.reqwest_client.execute(request).await { Ok(response) => { diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs index 935220c606..3a3ffea888 100644 --- a/lychee-lib/src/collector.rs +++ b/lychee-lib/src/collector.rs @@ -141,24 +141,34 @@ impl Collector { let skip_hidden = self.skip_hidden; let skip_ignored = self.skip_ignored; let global_base = self.base; + let basic_auth_extractor = self.basic_auth_extractor.clone(); // TODO: not as ugly + stream::iter(inputs) .par_then_unordered(None, move |input| { let default_base = global_base.clone(); let extensions = extensions.clone(); + let basic_auth_extractor = self.basic_auth_extractor.clone(); async move { let base = match &input.source { InputSource::RemoteUrl(url) => Base::try_from(url.as_str()).ok(), _ => default_base, }; + input - .get_contents(skip_missing_inputs, skip_hidden, skip_ignored, extensions) + .get_contents( + skip_missing_inputs, + skip_hidden, + skip_ignored, + extensions, + basic_auth_extractor, + ) .map(move |content| (content, base.clone())) } }) .flatten() .par_then_unordered(None, move |(content, base)| { let root_dir = self.root_dir.clone(); - let basic_auth_extractor = self.basic_auth_extractor.clone(); + let basic_auth_extractor = basic_auth_extractor.clone(); async move { let content = content?; let extractor = Extractor::new(self.use_html5ever, self.include_verbatim); @@ -229,15 +239,9 @@ mod tests { // Treat as plaintext file (no extension) let file_path = temp_dir.path().join("README"); let _file = File::create(&file_path).unwrap(); - let input = Input::new( - &file_path.as_path().display().to_string(), - None, - true, - None, - HeaderMap::new(), - )?; + let input = Input::new(&file_path.as_path().display().to_string(), None, true, None)?; let contents: Vec<_> = input - .get_contents(true, true, true, FileType::default_extensions()) + .get_contents(true, true, true, FileType::default_extensions(), None) .collect::>() .await; @@ -248,9 +252,9 @@ mod tests { #[tokio::test] async fn test_url_without_extension_is_html() -> Result<()> { - let input = Input::new("https://example.com/", None, true, None, HeaderMap::new())?; + let input = Input::new("https://example.com/", None, true, None)?; let contents: Vec<_> = input - .get_contents(true, true, true, FileType::default_extensions()) + .get_contents(true, true, true, FileType::default_extensions(), None) .collect::>() .await; @@ -283,7 +287,6 @@ mod tests { source: InputSource::String(TEST_STRING.to_owned()), file_type_hint: None, excluded_paths: None, - headers: HeaderMap::new(), }, Input { source: InputSource::RemoteUrl(Box::new( @@ -293,13 +296,11 @@ mod tests { )), file_type_hint: None, excluded_paths: None, - headers: HeaderMap::new(), }, Input { source: InputSource::FsPath(file_path), file_type_hint: None, excluded_paths: None, - headers: HeaderMap::new(), }, Input { source: InputSource::FsGlob { @@ -308,7 +309,6 @@ mod tests { }, file_type_hint: None, excluded_paths: None, - headers: HeaderMap::new(), }, ]; @@ -337,7 +337,6 @@ mod tests { source: InputSource::String("This is [a test](https://endler.dev). This is a relative link test [Relative Link Test](relative_link)".to_string()), file_type_hint: Some(FileType::Markdown), excluded_paths: None, - headers: HeaderMap::new(), }; let links = collect(vec![input], None, Some(base)).await.ok().unwrap(); @@ -364,7 +363,6 @@ mod tests { ), file_type_hint: Some(FileType::Html), excluded_paths: None, - headers: HeaderMap::new(), }; let links = collect(vec![input], None, Some(base)).await.ok().unwrap(); @@ -394,7 +392,6 @@ mod tests { ), file_type_hint: Some(FileType::Html), excluded_paths: None, - headers: HeaderMap::new(), }; let links = collect(vec![input], None, Some(base)).await.ok().unwrap(); @@ -421,7 +418,6 @@ mod tests { ), file_type_hint: Some(FileType::Markdown), excluded_paths: None, - headers: HeaderMap::new(), }; let links = collect(vec![input], None, Some(base)).await.ok().unwrap(); @@ -445,7 +441,6 @@ mod tests { source: InputSource::String(input), file_type_hint: Some(FileType::Html), excluded_paths: None, - headers: HeaderMap::new(), }; let links = collect(vec![input], None, Some(base)).await.ok().unwrap(); @@ -478,7 +473,6 @@ mod tests { source: InputSource::RemoteUrl(Box::new(server_uri.clone())), file_type_hint: None, excluded_paths: None, - headers: HeaderMap::new(), }; let links = collect(vec![input], None, None).await.ok().unwrap(); @@ -499,7 +493,6 @@ mod tests { ), file_type_hint: None, excluded_paths: None, - headers: HeaderMap::new(), }; let links = collect(vec![input], None, None).await.ok().unwrap(); @@ -530,7 +523,6 @@ mod tests { )), file_type_hint: Some(FileType::Html), excluded_paths: None, - headers: HeaderMap::new(), }, Input { source: InputSource::RemoteUrl(Box::new( @@ -542,7 +534,6 @@ mod tests { )), file_type_hint: Some(FileType::Html), excluded_paths: None, - headers: HeaderMap::new(), }, ]; @@ -571,14 +562,13 @@ mod tests { source: InputSource::String( r#" Index - About - Another + About + Another "# .into(), ), file_type_hint: Some(FileType::Html), excluded_paths: None, - headers: HeaderMap::new(), }; let links = collect(vec![input], None, Some(base)).await.ok().unwrap(); diff --git a/lychee-lib/src/types/basic_auth/credentials.rs b/lychee-lib/src/types/basic_auth/credentials.rs index 5435dc6641..fb5f02fd6d 100644 --- a/lychee-lib/src/types/basic_auth/credentials.rs +++ b/lychee-lib/src/types/basic_auth/credentials.rs @@ -8,7 +8,6 @@ use reqwest::Request; use serde::Deserialize; use thiserror::Error; -use crate::Status; use crate::chain::{ChainResult, Handler}; #[derive(Copy, Clone, Debug, Error, PartialEq)] @@ -77,8 +76,8 @@ impl BasicAuthCredentials { } #[async_trait] -impl Handler for Option { - async fn handle(&mut self, mut request: Request) -> ChainResult { +impl Handler for Option { + async fn handle(&mut self, mut request: Request) -> ChainResult { if let Some(credentials) = self { request .headers_mut() diff --git a/lychee-lib/src/types/input.rs b/lychee-lib/src/types/input.rs index 7e02b84910..39dc2a1ebe 100644 --- a/lychee-lib/src/types/input.rs +++ b/lychee-lib/src/types/input.rs @@ -1,11 +1,14 @@ +use super::file::FileExtensions; +use crate::chain::Chain; use crate::types::FileType; -use crate::{ErrorKind, Result, utils}; +use crate::utils::request; +use crate::{BasicAuthExtractor, ChainResult, ErrorKind, Handler, Result, Uri, utils}; use async_stream::try_stream; +use async_trait::async_trait; use futures::stream::Stream; use glob::glob_with; -use http::HeaderMap; use ignore::WalkBuilder; -use reqwest::Url; +use reqwest::{Client, Request, Url}; use serde::{Deserialize, Serialize}; use shellexpand::tilde; use std::fmt::Display; @@ -13,10 +16,10 @@ use std::fs; use std::path::{Path, PathBuf}; use tokio::io::{AsyncReadExt, stdin}; -use super::file::FileExtensions; - const STDIN: &str = "-"; +type RequestChain = Chain; + #[derive(Debug)] /// Encapsulates the content for a given input pub struct InputContent { @@ -110,8 +113,6 @@ pub struct Input { pub file_type_hint: Option, /// Excluded paths that will be skipped when reading content pub excluded_paths: Option>, - /// Custom headers to be used when fetching remote URLs - pub headers: reqwest::header::HeaderMap, } impl Input { @@ -128,7 +129,6 @@ impl Input { file_type_hint: Option, glob_ignore_case: bool, excluded_paths: Option>, - headers: reqwest::header::HeaderMap, ) -> Result { let source = if value == STDIN { InputSource::Stdin @@ -194,7 +194,6 @@ impl Input { source, file_type_hint, excluded_paths, - headers, }) } @@ -205,7 +204,7 @@ impl Input { /// Returns an error if the input does not exist (i.e. invalid path) /// and the input cannot be parsed as a URL. pub fn from_value(value: &str) -> Result { - Self::new(value, None, false, None, HeaderMap::new()) + Self::new(value, None, false, None) } /// Retrieve the contents from the input @@ -226,11 +225,19 @@ impl Input { // If `Input` is a file path, try the given file extensions in order. // Stop on the first match. file_extensions: FileExtensions, + basic_auth_extractor: Option, ) -> impl Stream> { try_stream! { match self.source { InputSource::RemoteUrl(ref url) => { - let content = Self::url_contents(url, &self.headers).await; + let credentials = request::extract_credentials(basic_auth_extractor.as_ref(), &Uri{ url: *url.clone() }); + + let chain: RequestChain= Chain::new(vec![ + Box::new(credentials), + Box::new(self.clone()), + ]); + + let content = Self::url_contents(url, chain).await; match content { Err(_) if skip_missing => (), Err(e) => Err(e)?, @@ -328,7 +335,7 @@ impl Input { } } - async fn url_contents(url: &Url, headers: &HeaderMap) -> Result { + async fn url_contents(url: &Url, request_chain: RequestChain) -> Result { // Assume HTML for default paths let file_type = if url.path().is_empty() || url.path() == "/" { FileType::Html @@ -336,18 +343,23 @@ impl Input { FileType::from(url.as_str()) }; - let client = reqwest::Client::new(); + // TODO: Don't create a new Client for every call + let request = Client::builder() + .build() + .map_err(ErrorKind::BuildRequestClient)? + .request(reqwest::Method::GET, url.clone()) + .build() + .map_err(ErrorKind::BuildRequestClient)?; + + let content = match request_chain.traverse(request).await { + ChainResult::Next(_) => todo!(), + ChainResult::Done(r) => r, + }; - let res = client - .get(url.clone()) - .headers(headers.clone()) - .send() - .await - .map_err(ErrorKind::NetworkRequest)?; let input_content = InputContent { source: InputSource::RemoteUrl(Box::new(url.clone())), file_type, - content: res.text().await.map_err(ErrorKind::ReadResponseBody)?, + content, }; Ok(input_content) @@ -454,10 +466,28 @@ fn is_excluded_path(excluded_paths: &[PathBuf], path: &PathBuf) -> bool { false } +#[async_trait] +impl Handler for Input { + async fn handle(&mut self, input: Request) -> ChainResult { + let client = reqwest::Client::new(); + + let result = client + .execute(input) + .await + .map_err(ErrorKind::NetworkRequest) + .expect("todo") // todo + .text() + .await + .map_err(ErrorKind::ReadResponseBody) + .expect("todo"); // todo + // .headers(headers.clone()) // todo: add headers again + + ChainResult::Done(result) + } +} + #[cfg(test)] mod tests { - use http::HeaderMap; - use super::*; #[test] @@ -468,7 +498,7 @@ mod tests { assert!(path.exists()); assert!(path.is_relative()); - let input = Input::new(test_file, None, false, None, HeaderMap::new()); + let input = Input::new(test_file, None, false, None); assert!(input.is_ok()); assert!(matches!( input, @@ -476,7 +506,6 @@ mod tests { source: InputSource::FsPath(PathBuf { .. }), file_type_hint: None, excluded_paths: None, - headers: _, }) )); } diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index 779ced469d..207f8a0de9 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -14,7 +14,7 @@ use crate::{ }; /// Extract basic auth credentials for a given URL. -fn extract_credentials( +pub fn extract_credentials( extractor: Option<&BasicAuthExtractor>, uri: &Uri, ) -> Option { From 2c676f4a8d45d9be75a55c442d2d51fa1e46d589 Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Wed, 25 Jun 2025 16:34:03 +0200 Subject: [PATCH 03/10] Create UrlExtractor and add back headers --- examples/collect_links/collect_links.rs | 2 - lychee-bin/src/main.rs | 4 +- lychee-bin/src/options.rs | 1 - lychee-lib/src/checker/website.rs | 2 - lychee-lib/src/collector.rs | 45 +++++++++++++-- lychee-lib/src/types/input.rs | 73 ++----------------------- lychee-lib/src/types/mod.rs | 1 + lychee-lib/src/types/url_extractor.rs | 73 +++++++++++++++++++++++++ lychee-lib/src/utils/request.rs | 2 +- 9 files changed, 125 insertions(+), 78 deletions(-) create mode 100644 lychee-lib/src/types/url_extractor.rs diff --git a/examples/collect_links/collect_links.rs b/examples/collect_links/collect_links.rs index a7266b026e..6d639e81b2 100644 --- a/examples/collect_links/collect_links.rs +++ b/examples/collect_links/collect_links.rs @@ -14,13 +14,11 @@ async fn main() -> Result<()> { )), file_type_hint: None, excluded_paths: None, - headers: HeaderMap::new(), }, Input { source: InputSource::FsPath(PathBuf::from("fixtures/TEST.md")), file_type_hint: None, excluded_paths: None, - headers: HeaderMap::new(), }, ]; diff --git a/lychee-bin/src/main.rs b/lychee-bin/src/main.rs index b4af97925d..fb9ef5a49c 100644 --- a/lychee-bin/src/main.rs +++ b/lychee-bin/src/main.rs @@ -67,12 +67,13 @@ use anyhow::{Context, Error, Result, bail}; use clap::Parser; use commands::CommandParams; use formatters::{get_stats_formatter, log::init_logging}; +use http::HeaderMap; use log::{error, info, warn}; #[cfg(feature = "native-tls")] use openssl_sys as _; // required for vendored-openssl feature -use options::LYCHEE_CONFIG_FILE; +use options::{HeaderMapExt, LYCHEE_CONFIG_FILE}; use ring as _; // required for apple silicon use lychee_lib::BasicAuthExtractor; @@ -319,6 +320,7 @@ async fn run(opts: &LycheeOptions) -> Result { .skip_hidden(!opts.config.hidden) .skip_ignored(!opts.config.no_ignore) .include_verbatim(opts.config.include_verbatim) + .headers(HeaderMap::from_header_pairs(&opts.config.header)?) // File a bug if you rely on this envvar! It's going to go away eventually. .use_html5ever(std::env::var("LYCHEE_USE_HTML5EVER").is_ok_and(|x| x == "1")); diff --git a/lychee-bin/src/options.rs b/lychee-bin/src/options.rs index 041eca9238..924d0c77e7 100644 --- a/lychee-bin/src/options.rs +++ b/lychee-bin/src/options.rs @@ -336,7 +336,6 @@ impl LycheeOptions { } else { Some(self.config.exclude_path.clone()) }; - let headers = HeaderMap::from_header_pairs(&self.config.header)?; self.raw_inputs .iter() diff --git a/lychee-lib/src/checker/website.rs b/lychee-lib/src/checker/website.rs index cefc878140..f4393b2899 100644 --- a/lychee-lib/src/checker/website.rs +++ b/lychee-lib/src/checker/website.rs @@ -99,8 +99,6 @@ impl WebsiteChecker { /// Check a URI using [reqwest](https://github.com/seanmonstar/reqwest). async fn check_default(&self, request: Request) -> Status { - // todo!("Handling in chain: {request:?}"); - let method = request.method().clone(); match self.reqwest_client.execute(request).await { Ok(response) => { diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs index 3a3ffea888..73ab70f3e7 100644 --- a/lychee-lib/src/collector.rs +++ b/lychee-lib/src/collector.rs @@ -1,5 +1,6 @@ use crate::ErrorKind; use crate::InputSource; +use crate::types::url_extractor::UrlExtractor; use crate::{ Base, Input, Request, Result, basic_auth::BasicAuthExtractor, extract::Extractor, types::FileExtensions, types::uri::raw::RawUri, utils::request, @@ -9,7 +10,9 @@ use futures::{ StreamExt, stream::{self, Stream}, }; +use http::HeaderMap; use par_stream::ParStreamExt; +use reqwest::Client; use std::path::PathBuf; /// Collector keeps the state of link collection @@ -25,6 +28,7 @@ pub struct Collector { use_html5ever: bool, root_dir: Option, base: Option, + headers: HeaderMap, } impl Default for Collector { @@ -36,6 +40,7 @@ impl Default for Collector { use_html5ever: false, skip_hidden: true, skip_ignored: true, + headers: HeaderMap::new(), root_dir: None, base: None, } @@ -61,6 +66,7 @@ impl Collector { use_html5ever: false, skip_hidden: true, skip_ignored: true, + headers: HeaderMap::new(), root_dir, base, }) @@ -87,6 +93,13 @@ impl Collector { self } + /// Skip files that are ignored + #[must_use] + pub fn headers(mut self, headers: HeaderMap) -> Self { + self.headers = headers; + self + } + /// Use `html5ever` to parse HTML instead of `html5gum`. #[must_use] pub const fn use_html5ever(mut self, yes: bool) -> Self { @@ -148,19 +161,31 @@ impl Collector { let default_base = global_base.clone(); let extensions = extensions.clone(); let basic_auth_extractor = self.basic_auth_extractor.clone(); + let headers = self.headers.clone(); async move { let base = match &input.source { InputSource::RemoteUrl(url) => Base::try_from(url.as_str()).ok(), _ => default_base, }; + let client = Client::builder() + .build() + .map_err(ErrorKind::BuildRequestClient) + .unwrap(); // TODO + + let extractor = UrlExtractor { + basic_auth_extractor, + headers, + client, + }; + input .get_contents( skip_missing_inputs, skip_hidden, skip_ignored, extensions, - basic_auth_extractor, + extractor, ) .map(move |content| (content, base.clone())) } @@ -191,7 +216,7 @@ impl Collector { mod tests { use std::{collections::HashSet, convert::TryFrom, fs::File, io::Write}; - use http::{HeaderMap, StatusCode}; + use http::StatusCode; use reqwest::Url; use super::*; @@ -241,7 +266,13 @@ mod tests { let _file = File::create(&file_path).unwrap(); let input = Input::new(&file_path.as_path().display().to_string(), None, true, None)?; let contents: Vec<_> = input - .get_contents(true, true, true, FileType::default_extensions(), None) + .get_contents( + true, + true, + true, + FileType::default_extensions(), + UrlExtractor::default(), + ) .collect::>() .await; @@ -254,7 +285,13 @@ mod tests { async fn test_url_without_extension_is_html() -> Result<()> { let input = Input::new("https://example.com/", None, true, None)?; let contents: Vec<_> = input - .get_contents(true, true, true, FileType::default_extensions(), None) + .get_contents( + true, + true, + true, + FileType::default_extensions(), + UrlExtractor::default(), + ) .collect::>() .await; diff --git a/lychee-lib/src/types/input.rs b/lychee-lib/src/types/input.rs index 39dc2a1ebe..1988eb3c8b 100644 --- a/lychee-lib/src/types/input.rs +++ b/lychee-lib/src/types/input.rs @@ -1,14 +1,12 @@ use super::file::FileExtensions; -use crate::chain::Chain; +use super::url_extractor::UrlExtractor; use crate::types::FileType; -use crate::utils::request; -use crate::{BasicAuthExtractor, ChainResult, ErrorKind, Handler, Result, Uri, utils}; +use crate::{ErrorKind, Result, utils}; use async_stream::try_stream; -use async_trait::async_trait; use futures::stream::Stream; use glob::glob_with; use ignore::WalkBuilder; -use reqwest::{Client, Request, Url}; +use reqwest::Url; use serde::{Deserialize, Serialize}; use shellexpand::tilde; use std::fmt::Display; @@ -18,8 +16,6 @@ use tokio::io::{AsyncReadExt, stdin}; const STDIN: &str = "-"; -type RequestChain = Chain; - #[derive(Debug)] /// Encapsulates the content for a given input pub struct InputContent { @@ -225,19 +221,12 @@ impl Input { // If `Input` is a file path, try the given file extensions in order. // Stop on the first match. file_extensions: FileExtensions, - basic_auth_extractor: Option, + url_extractor: UrlExtractor, ) -> impl Stream> { try_stream! { match self.source { - InputSource::RemoteUrl(ref url) => { - let credentials = request::extract_credentials(basic_auth_extractor.as_ref(), &Uri{ url: *url.clone() }); - - let chain: RequestChain= Chain::new(vec![ - Box::new(credentials), - Box::new(self.clone()), - ]); - - let content = Self::url_contents(url, chain).await; + InputSource::RemoteUrl(url) => { + let content = url_extractor.url_contents(*url).await; match content { Err(_) if skip_missing => (), Err(e) => Err(e)?, @@ -335,36 +324,6 @@ impl Input { } } - async fn url_contents(url: &Url, request_chain: RequestChain) -> Result { - // Assume HTML for default paths - let file_type = if url.path().is_empty() || url.path() == "/" { - FileType::Html - } else { - FileType::from(url.as_str()) - }; - - // TODO: Don't create a new Client for every call - let request = Client::builder() - .build() - .map_err(ErrorKind::BuildRequestClient)? - .request(reqwest::Method::GET, url.clone()) - .build() - .map_err(ErrorKind::BuildRequestClient)?; - - let content = match request_chain.traverse(request).await { - ChainResult::Next(_) => todo!(), - ChainResult::Done(r) => r, - }; - - let input_content = InputContent { - source: InputSource::RemoteUrl(Box::new(url.clone())), - file_type, - content, - }; - - Ok(input_content) - } - fn glob_contents( &self, pattern: &str, @@ -466,26 +425,6 @@ fn is_excluded_path(excluded_paths: &[PathBuf], path: &PathBuf) -> bool { false } -#[async_trait] -impl Handler for Input { - async fn handle(&mut self, input: Request) -> ChainResult { - let client = reqwest::Client::new(); - - let result = client - .execute(input) - .await - .map_err(ErrorKind::NetworkRequest) - .expect("todo") // todo - .text() - .await - .map_err(ErrorKind::ReadResponseBody) - .expect("todo"); // todo - // .headers(headers.clone()) // todo: add headers again - - ChainResult::Done(result) - } -} - #[cfg(test)] mod tests { use super::*; diff --git a/lychee-lib/src/types/mod.rs b/lychee-lib/src/types/mod.rs index ab1c9e7c65..43f772c960 100644 --- a/lychee-lib/src/types/mod.rs +++ b/lychee-lib/src/types/mod.rs @@ -14,6 +14,7 @@ mod response; mod status; mod status_code; pub(crate) mod uri; +pub(crate) mod url_extractor; pub use accept::*; pub use base::Base; diff --git a/lychee-lib/src/types/url_extractor.rs b/lychee-lib/src/types/url_extractor.rs new file mode 100644 index 0000000000..6f70353fc5 --- /dev/null +++ b/lychee-lib/src/types/url_extractor.rs @@ -0,0 +1,73 @@ +use super::{FileType, InputContent, InputSource}; +use crate::chain::Chain; +use crate::utils::request; +use crate::{BasicAuthExtractor, ChainResult, ErrorKind, Handler, Result, Uri}; +use async_trait::async_trait; +use http::HeaderMap; +use reqwest::{Request, Url}; + +#[derive(Debug, Default, Clone)] +pub struct UrlExtractor { + pub basic_auth_extractor: Option, + pub headers: HeaderMap, + pub client: reqwest::Client, +} + +type RequestChain = Chain; + +impl UrlExtractor { + pub async fn url_contents(&self, url: Url) -> Result { + // Assume HTML for default paths + let file_type = if url.path().is_empty() || url.path() == "/" { + FileType::Html + } else { + FileType::from(url.as_str()) + }; + + let credentials = request::extract_credentials( + self.basic_auth_extractor.as_ref(), + &Uri { url: url.clone() }, + ); + + let chain: RequestChain = Chain::new(vec![Box::new(credentials), Box::new(self.clone())]); + + let request = self + .client + .request(reqwest::Method::GET, url.clone()) + .build() + .map_err(ErrorKind::BuildRequestClient)?; + + let content = match chain.traverse(request).await { + ChainResult::Next(_) => todo!(), + ChainResult::Done(r) => r, + }; + + let input_content = InputContent { + source: InputSource::RemoteUrl(Box::new(url.clone())), + file_type, + content, + }; + + Ok(input_content) + } +} + +#[async_trait] +impl Handler for UrlExtractor { + async fn handle(&mut self, mut input: Request) -> ChainResult { + *input.headers_mut() = self.headers.clone(); + + let result = self + .client + .execute(input) + .await + .map_err(ErrorKind::NetworkRequest) + .expect("todo") // todo + .text() + .await + .map_err(ErrorKind::ReadResponseBody) + .expect("todo"); // todo + + ChainResult::Done(result) + } +} diff --git a/lychee-lib/src/utils/request.rs b/lychee-lib/src/utils/request.rs index 207f8a0de9..6b57b9ee8c 100644 --- a/lychee-lib/src/utils/request.rs +++ b/lychee-lib/src/utils/request.rs @@ -14,7 +14,7 @@ use crate::{ }; /// Extract basic auth credentials for a given URL. -pub fn extract_credentials( +pub(crate) fn extract_credentials( extractor: Option<&BasicAuthExtractor>, uri: &Uri, ) -> Option { From f188da81538e0fd16c65c77d442bc7206c8a21b3 Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Wed, 25 Jun 2025 17:19:18 +0200 Subject: [PATCH 04/10] Improve UrlExtractor --- lychee-lib/src/types/url_extractor.rs | 40 +++++++++++++-------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/lychee-lib/src/types/url_extractor.rs b/lychee-lib/src/types/url_extractor.rs index 6f70353fc5..11b7e7657a 100644 --- a/lychee-lib/src/types/url_extractor.rs +++ b/lychee-lib/src/types/url_extractor.rs @@ -4,7 +4,7 @@ use crate::utils::request; use crate::{BasicAuthExtractor, ChainResult, ErrorKind, Handler, Result, Uri}; use async_trait::async_trait; use http::HeaderMap; -use reqwest::{Request, Url}; +use reqwest::{Client, Request, Url}; #[derive(Debug, Default, Clone)] pub struct UrlExtractor { @@ -13,7 +13,7 @@ pub struct UrlExtractor { pub client: reqwest::Client, } -type RequestChain = Chain; +type RequestChain = Chain>; impl UrlExtractor { pub async fn url_contents(&self, url: Url) -> Result { @@ -38,9 +38,11 @@ impl UrlExtractor { .map_err(ErrorKind::BuildRequestClient)?; let content = match chain.traverse(request).await { - ChainResult::Next(_) => todo!(), + ChainResult::Next(_) => unreachable!( + "ChainResult::Done is unconditionally returned from the last chain element" + ), ChainResult::Done(r) => r, - }; + }?; let input_content = InputContent { source: InputSource::RemoteUrl(Box::new(url.clone())), @@ -53,21 +55,19 @@ impl UrlExtractor { } #[async_trait] -impl Handler for UrlExtractor { - async fn handle(&mut self, mut input: Request) -> ChainResult { - *input.headers_mut() = self.headers.clone(); - - let result = self - .client - .execute(input) - .await - .map_err(ErrorKind::NetworkRequest) - .expect("todo") // todo - .text() - .await - .map_err(ErrorKind::ReadResponseBody) - .expect("todo"); // todo - - ChainResult::Done(result) +impl Handler> for UrlExtractor { + async fn handle(&mut self, mut request: Request) -> ChainResult> { + *request.headers_mut() = self.headers.clone(); + ChainResult::Done(execute_request(&self.client, request).await) } } + +async fn execute_request(client: &Client, request: Request) -> Result { + Ok(client + .execute(request) + .await + .map_err(ErrorKind::NetworkRequest)? + .text() + .await + .map_err(ErrorKind::ReadResponseBody)?) +} From aa0ed01be9b15a05c9555e872d3b45ac4564ac1e Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Wed, 25 Jun 2025 17:43:18 +0200 Subject: [PATCH 05/10] Fix bug: extend headers instead of setting them --- lychee-lib/src/types/url_extractor.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lychee-lib/src/types/url_extractor.rs b/lychee-lib/src/types/url_extractor.rs index 11b7e7657a..e9d1a15040 100644 --- a/lychee-lib/src/types/url_extractor.rs +++ b/lychee-lib/src/types/url_extractor.rs @@ -57,7 +57,7 @@ impl UrlExtractor { #[async_trait] impl Handler> for UrlExtractor { async fn handle(&mut self, mut request: Request) -> ChainResult> { - *request.headers_mut() = self.headers.clone(); + request.headers_mut().extend(self.headers.clone()); ChainResult::Done(execute_request(&self.client, request).await) } } From 3a0e39d4ead35a645e044063a1b2a8698cea0a47 Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Fri, 27 Jun 2025 10:04:02 +0200 Subject: [PATCH 06/10] Clean up --- examples/collect_links/collect_links.rs | 1 - lychee-lib/src/basic_auth/mod.rs | 2 +- lychee-lib/src/collector.rs | 37 +++++++++---------- lychee-lib/src/types/input.rs | 6 +-- lychee-lib/src/types/mod.rs | 2 +- .../types/{url_extractor.rs => resolver.rs} | 14 ++++--- 6 files changed, 32 insertions(+), 30 deletions(-) rename lychee-lib/src/types/{url_extractor.rs => resolver.rs} (84%) diff --git a/examples/collect_links/collect_links.rs b/examples/collect_links/collect_links.rs index 6d639e81b2..57edd5fcdb 100644 --- a/examples/collect_links/collect_links.rs +++ b/examples/collect_links/collect_links.rs @@ -1,4 +1,3 @@ -use http::HeaderMap; use lychee_lib::{Collector, Input, InputSource, Result}; use reqwest::Url; use std::path::PathBuf; diff --git a/lychee-lib/src/basic_auth/mod.rs b/lychee-lib/src/basic_auth/mod.rs index f7c90c600c..0cd4de9de1 100644 --- a/lychee-lib/src/basic_auth/mod.rs +++ b/lychee-lib/src/basic_auth/mod.rs @@ -62,7 +62,7 @@ impl BasicAuthExtractor { /// [`BasicAuthCredentials`] if the a match was found. It should be noted /// that only the first match will be used to return the appropriate /// credentials. - pub fn matches(&self, uri: &Uri) -> Option { + pub(crate) fn matches(&self, uri: &Uri) -> Option { let matches: Vec<_> = self.regex_set.matches(uri.as_str()).into_iter().collect(); if matches.is_empty() { diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs index 73ab70f3e7..fdca148b65 100644 --- a/lychee-lib/src/collector.rs +++ b/lychee-lib/src/collector.rs @@ -1,6 +1,6 @@ use crate::ErrorKind; use crate::InputSource; -use crate::types::url_extractor::UrlExtractor; +use crate::types::resolver::UrlContentResolver; use crate::{ Base, Input, Request, Result, basic_auth::BasicAuthExtractor, extract::Extractor, types::FileExtensions, types::uri::raw::RawUri, utils::request, @@ -29,6 +29,7 @@ pub struct Collector { root_dir: Option, base: Option, headers: HeaderMap, + client: Client, } impl Default for Collector { @@ -43,6 +44,7 @@ impl Default for Collector { headers: HeaderMap::new(), root_dir: None, base: None, + client: Client::new(), } } } @@ -67,6 +69,9 @@ impl Collector { skip_hidden: true, skip_ignored: true, headers: HeaderMap::new(), + client: Client::builder() + .build() + .map_err(ErrorKind::BuildRequestClient)?, root_dir, base, }) @@ -154,38 +159,32 @@ impl Collector { let skip_hidden = self.skip_hidden; let skip_ignored = self.skip_ignored; let global_base = self.base; - let basic_auth_extractor = self.basic_auth_extractor.clone(); // TODO: not as ugly + + let resolver = UrlContentResolver { + basic_auth_extractor: self.basic_auth_extractor.clone(), + headers: self.headers.clone(), + client: self.client, + }; stream::iter(inputs) .par_then_unordered(None, move |input| { let default_base = global_base.clone(); let extensions = extensions.clone(); - let basic_auth_extractor = self.basic_auth_extractor.clone(); - let headers = self.headers.clone(); + let resolver = resolver.clone(); + async move { let base = match &input.source { InputSource::RemoteUrl(url) => Base::try_from(url.as_str()).ok(), _ => default_base, }; - let client = Client::builder() - .build() - .map_err(ErrorKind::BuildRequestClient) - .unwrap(); // TODO - - let extractor = UrlExtractor { - basic_auth_extractor, - headers, - client, - }; - input .get_contents( skip_missing_inputs, skip_hidden, skip_ignored, extensions, - extractor, + resolver, ) .map(move |content| (content, base.clone())) } @@ -193,7 +192,7 @@ impl Collector { .flatten() .par_then_unordered(None, move |(content, base)| { let root_dir = self.root_dir.clone(); - let basic_auth_extractor = basic_auth_extractor.clone(); + let basic_auth_extractor = self.basic_auth_extractor.clone(); async move { let content = content?; let extractor = Extractor::new(self.use_html5ever, self.include_verbatim); @@ -271,7 +270,7 @@ mod tests { true, true, FileType::default_extensions(), - UrlExtractor::default(), + UrlContentResolver::default(), ) .collect::>() .await; @@ -290,7 +289,7 @@ mod tests { true, true, FileType::default_extensions(), - UrlExtractor::default(), + UrlContentResolver::default(), ) .collect::>() .await; diff --git a/lychee-lib/src/types/input.rs b/lychee-lib/src/types/input.rs index 1988eb3c8b..66650855e3 100644 --- a/lychee-lib/src/types/input.rs +++ b/lychee-lib/src/types/input.rs @@ -1,5 +1,5 @@ use super::file::FileExtensions; -use super::url_extractor::UrlExtractor; +use super::resolver::UrlContentResolver; use crate::types::FileType; use crate::{ErrorKind, Result, utils}; use async_stream::try_stream; @@ -221,12 +221,12 @@ impl Input { // If `Input` is a file path, try the given file extensions in order. // Stop on the first match. file_extensions: FileExtensions, - url_extractor: UrlExtractor, + resolver: UrlContentResolver, ) -> impl Stream> { try_stream! { match self.source { InputSource::RemoteUrl(url) => { - let content = url_extractor.url_contents(*url).await; + let content = resolver.url_contents(*url).await; match content { Err(_) if skip_missing => (), Err(e) => Err(e)?, diff --git a/lychee-lib/src/types/mod.rs b/lychee-lib/src/types/mod.rs index 43f772c960..53093fae6d 100644 --- a/lychee-lib/src/types/mod.rs +++ b/lychee-lib/src/types/mod.rs @@ -10,11 +10,11 @@ mod file; mod input; pub(crate) mod mail; mod request; +pub(crate) mod resolver; mod response; mod status; mod status_code; pub(crate) mod uri; -pub(crate) mod url_extractor; pub use accept::*; pub use base::Base; diff --git a/lychee-lib/src/types/url_extractor.rs b/lychee-lib/src/types/resolver.rs similarity index 84% rename from lychee-lib/src/types/url_extractor.rs rename to lychee-lib/src/types/resolver.rs index e9d1a15040..b3f1629b9e 100644 --- a/lychee-lib/src/types/url_extractor.rs +++ b/lychee-lib/src/types/resolver.rs @@ -6,8 +6,9 @@ use async_trait::async_trait; use http::HeaderMap; use reqwest::{Client, Request, Url}; +/// Structure to fetch remote content. #[derive(Debug, Default, Clone)] -pub struct UrlExtractor { +pub struct UrlContentResolver { pub basic_auth_extractor: Option, pub headers: HeaderMap, pub client: reqwest::Client, @@ -15,7 +16,10 @@ pub struct UrlExtractor { type RequestChain = Chain>; -impl UrlExtractor { +impl UrlContentResolver { + /// Fetch remote content by URL. + /// This method is not intended to check if a URL is functional but + /// to get a URL's content and process the content. pub async fn url_contents(&self, url: Url) -> Result { // Assume HTML for default paths let file_type = if url.path().is_empty() || url.path() == "/" { @@ -55,7 +59,7 @@ impl UrlExtractor { } #[async_trait] -impl Handler> for UrlExtractor { +impl Handler> for UrlContentResolver { async fn handle(&mut self, mut request: Request) -> ChainResult> { request.headers_mut().extend(self.headers.clone()); ChainResult::Done(execute_request(&self.client, request).await) @@ -63,11 +67,11 @@ impl Handler> for UrlExtractor { } async fn execute_request(client: &Client, request: Request) -> Result { - Ok(client + client .execute(request) .await .map_err(ErrorKind::NetworkRequest)? .text() .await - .map_err(ErrorKind::ReadResponseBody)?) + .map_err(ErrorKind::ReadResponseBody) } From 65eaadac11452d72c125b93270ce9687918ad352 Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Fri, 27 Jun 2025 10:12:23 +0200 Subject: [PATCH 07/10] Minor adjustments --- lychee-lib/src/collector.rs | 11 +++++++++-- lychee-lib/src/types/resolver.rs | 2 +- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs index fdca148b65..f40bae5b7d 100644 --- a/lychee-lib/src/collector.rs +++ b/lychee-lib/src/collector.rs @@ -41,9 +41,9 @@ impl Default for Collector { use_html5ever: false, skip_hidden: true, skip_ignored: true, - headers: HeaderMap::new(), root_dir: None, base: None, + headers: HeaderMap::new(), client: Client::new(), } } @@ -98,13 +98,20 @@ impl Collector { self } - /// Skip files that are ignored + /// Set headers to use when resolving input URLs #[must_use] pub fn headers(mut self, headers: HeaderMap) -> Self { self.headers = headers; self } + /// Set client to use when resolving input URLs + #[must_use] + pub fn client(mut self, client: Client) -> Self { + self.client = client; + self + } + /// Use `html5ever` to parse HTML instead of `html5gum`. #[must_use] pub const fn use_html5ever(mut self, yes: bool) -> Self { diff --git a/lychee-lib/src/types/resolver.rs b/lychee-lib/src/types/resolver.rs index b3f1629b9e..f0e343f27b 100644 --- a/lychee-lib/src/types/resolver.rs +++ b/lychee-lib/src/types/resolver.rs @@ -14,7 +14,7 @@ pub struct UrlContentResolver { pub client: reqwest::Client, } -type RequestChain = Chain>; +type RequestChain = Chain>; impl UrlContentResolver { /// Fetch remote content by URL. From 8518e7f8937e9d8875b968ba7d6ba77db0858667 Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Fri, 27 Jun 2025 13:43:26 +0200 Subject: [PATCH 08/10] Apply suggestions from code review Co-authored-by: Matthias Endler --- lychee-lib/src/collector.rs | 2 +- lychee-lib/src/types/resolver.rs | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs index f40bae5b7d..49f078a9f3 100644 --- a/lychee-lib/src/collector.rs +++ b/lychee-lib/src/collector.rs @@ -105,7 +105,7 @@ impl Collector { self } - /// Set client to use when resolving input URLs + /// Set client to use for checking input URLs #[must_use] pub fn client(mut self, client: Client) -> Self { self.client = client; diff --git a/lychee-lib/src/types/resolver.rs b/lychee-lib/src/types/resolver.rs index f0e343f27b..14ecbd9272 100644 --- a/lychee-lib/src/types/resolver.rs +++ b/lychee-lib/src/types/resolver.rs @@ -18,14 +18,14 @@ type RequestChain = Chain>; impl UrlContentResolver { /// Fetch remote content by URL. + /// /// This method is not intended to check if a URL is functional but /// to get a URL's content and process the content. pub async fn url_contents(&self, url: Url) -> Result { // Assume HTML for default paths - let file_type = if url.path().is_empty() || url.path() == "/" { - FileType::Html - } else { - FileType::from(url.as_str()) + let file_type = match url.path() { + path if path.is_empty() || path == "/" => FileType::Html, + _ => FileType::from(url.as_str()), }; let credentials = request::extract_credentials( @@ -62,11 +62,11 @@ impl UrlContentResolver { impl Handler> for UrlContentResolver { async fn handle(&mut self, mut request: Request) -> ChainResult> { request.headers_mut().extend(self.headers.clone()); - ChainResult::Done(execute_request(&self.client, request).await) + ChainResult::Done(get_request_body_text(&self.client, request).await) } } -async fn execute_request(client: &Client, request: Request) -> Result { +async fn get_request_body_text(client: &Client, request: Request) -> Result { client .execute(request) .await From 89e9a18eeaf771d6cc0465d62bf55b8fa289a09e Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Fri, 27 Jun 2025 14:23:45 +0200 Subject: [PATCH 09/10] Mention in doc comment how the method might panic --- lychee-lib/src/collector.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs index 49f078a9f3..8ce6861f0f 100644 --- a/lychee-lib/src/collector.rs +++ b/lychee-lib/src/collector.rs @@ -33,6 +33,10 @@ pub struct Collector { } impl Default for Collector { + /// # Panics + /// + /// We call `Client::new()` which can panic in certain scenarios. + /// Use `Collector::new()` to handle `ClientBuilder` errors gracefully. fn default() -> Self { Collector { basic_auth_extractor: None, From a3e822f1c367b82c347c9d0ec95fb0d536cf3d2e Mon Sep 17 00:00:00 2001 From: Thomas Zahner Date: Thu, 3 Jul 2025 09:06:31 +0200 Subject: [PATCH 10/10] Remove use of chain for more simplicity --- .../src/types/basic_auth/credentials.rs | 11 +++-- lychee-lib/src/types/resolver.rs | 43 ++++++++----------- 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/lychee-lib/src/types/basic_auth/credentials.rs b/lychee-lib/src/types/basic_auth/credentials.rs index fb5f02fd6d..82a630870f 100644 --- a/lychee-lib/src/types/basic_auth/credentials.rs +++ b/lychee-lib/src/types/basic_auth/credentials.rs @@ -73,15 +73,20 @@ impl BasicAuthCredentials { pub fn to_authorization(&self) -> Authorization { Authorization::basic(&self.username, &self.password) } + + /// Append the credentials as headers to a `Request` + pub fn append_to_request(&self, request: &mut Request) { + request + .headers_mut() + .append(AUTHORIZATION, self.to_authorization().0.encode()); + } } #[async_trait] impl Handler for Option { async fn handle(&mut self, mut request: Request) -> ChainResult { if let Some(credentials) = self { - request - .headers_mut() - .append(AUTHORIZATION, credentials.to_authorization().0.encode()); + credentials.append_to_request(&mut request); } ChainResult::Next(request) diff --git a/lychee-lib/src/types/resolver.rs b/lychee-lib/src/types/resolver.rs index 14ecbd9272..550583fcfb 100644 --- a/lychee-lib/src/types/resolver.rs +++ b/lychee-lib/src/types/resolver.rs @@ -1,8 +1,6 @@ use super::{FileType, InputContent, InputSource}; -use crate::chain::Chain; use crate::utils::request; -use crate::{BasicAuthExtractor, ChainResult, ErrorKind, Handler, Result, Uri}; -use async_trait::async_trait; +use crate::{BasicAuthExtractor, ErrorKind, Result, Uri}; use http::HeaderMap; use reqwest::{Client, Request, Url}; @@ -14,8 +12,6 @@ pub struct UrlContentResolver { pub client: reqwest::Client, } -type RequestChain = Chain>; - impl UrlContentResolver { /// Fetch remote content by URL. /// @@ -33,20 +29,8 @@ impl UrlContentResolver { &Uri { url: url.clone() }, ); - let chain: RequestChain = Chain::new(vec![Box::new(credentials), Box::new(self.clone())]); - - let request = self - .client - .request(reqwest::Method::GET, url.clone()) - .build() - .map_err(ErrorKind::BuildRequestClient)?; - - let content = match chain.traverse(request).await { - ChainResult::Next(_) => unreachable!( - "ChainResult::Done is unconditionally returned from the last chain element" - ), - ChainResult::Done(r) => r, - }?; + let request = self.build_request(&url, credentials)?; + let content = get_request_body_text(&self.client, request).await?; let input_content = InputContent { source: InputSource::RemoteUrl(Box::new(url.clone())), @@ -56,13 +40,24 @@ impl UrlContentResolver { Ok(input_content) } -} -#[async_trait] -impl Handler> for UrlContentResolver { - async fn handle(&mut self, mut request: Request) -> ChainResult> { + fn build_request( + &self, + url: &Url, + credentials: Option, + ) -> Result { + let mut request = self + .client + .request(reqwest::Method::GET, url.clone()) + .build() + .map_err(ErrorKind::BuildRequestClient)?; + request.headers_mut().extend(self.headers.clone()); - ChainResult::Done(get_request_body_text(&self.client, request).await) + if let Some(credentials) = credentials { + credentials.append_to_request(&mut request); + } + + Ok(request) } }